blob: 883f3f4bb0ea3ac45cd8aa62e5cb8baec4a5fdad [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020022#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020023#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020024#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020025#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020026#include <string.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020027#include <sys/mman.h>
28#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020029#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020030
Radek Krejciaa45bda2020-07-20 07:43:38 +020031#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020032#include "tree_schema_internal.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020033
34void *
35ly_realloc(void *ptr, size_t size)
36{
37 void *new_mem;
38
39 new_mem = realloc(ptr, size);
40 if (!new_mem) {
41 free(ptr);
42 }
43
44 return new_mem;
45}
Michal Vasko841d1a92018-09-07 15:40:31 +020046
Michal Vasko03ff5a72019-09-11 13:49:33 +020047char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020048ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020049{
Michal Vaskob4d40d62021-05-04 11:42:44 +020050 for ( ; len && (*s != (char)c); ++s, --len) {}
51 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020052}
53
Radek Krejci7f9b6512019-09-18 13:11:09 +020054int
55ly_strncmp(const char *refstr, const char *str, size_t str_len)
56{
57 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020058
59 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020060 return 0;
61 } else {
62 return rc ? rc : 1;
63 }
64}
65
Michal Vasko15dc9fa2021-05-03 14:33:05 +020066LY_ERR
67ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
68{
69 uint8_t num = 0, dig, dec_pow;
70
71 if (len > 3) {
72 /* overflow for sure */
73 return LY_EDENIED;
74 }
75
76 dec_pow = 1;
77 for ( ; len && isdigit(nptr[len - 1]); --len) {
78 dig = nptr[len - 1] - 48;
79
80 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
81 return LY_EDENIED;
82 }
83 dig *= dec_pow;
84
85 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
86 return LY_EDENIED;
87 }
88 num += dig;
89
90 dec_pow *= 10;
91 }
92
93 if (len) {
94 return LY_EVALID;
95 }
96 *ret = num;
97 return LY_SUCCESS;
98}
99
aPieceke3f828d2021-05-10 15:34:41 +0200100LY_ERR
101ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200102{
103 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200104 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200105 uint32_t c;
106 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200107 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200108
aPieceke3f828d2021-05-10 15:34:41 +0200109 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200110
111#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
112
113 *str_next = NULL;
114 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200115 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200116
117 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
118 return ret;
119 }
120
121 stop = str_begin;
122 prefix = NULL;
123 prefix_found = 0;
124
125 do {
126 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200127 do {
128 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
129 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200130
131 if (IS_AT_END(stop, str_end)) {
132 break;
133 }
134
135 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200136 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200137
138 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200139 do {
140 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
141 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200142
143 prefix_found = c == ':' ? 1 : 0;
144
145 /* if it wasn't the prefix, keep looking */
146 } while (!IS_AT_END(stop, str_end) && !prefix_found);
147
148 if ((str_begin == prefix) && prefix_found) {
149 /* prefix found at the beginning of the input string */
150 *is_prefix = 1;
151 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200152 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200153 } else if ((str_begin != prefix) && (prefix_found)) {
154 /* there is a some string before prefix */
155 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200156 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200157 } else {
158 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200159 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200160 }
161
162#undef IS_AT_END
163
164 return ret;
165}
166
Radek Krejcib416be62018-10-01 14:51:45 +0200167LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100168ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200169{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200170 uint32_t c, aux;
171 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200172
Radek Krejcicc6a45c2019-05-13 10:16:14 +0200173 if (bytes_read) {
174 (*bytes_read) = 0;
175 }
176
Radek Krejcib416be62018-10-01 14:51:45 +0200177 c = (*input)[0];
178 LY_CHECK_RET(!c, LY_EINVAL);
179
180 if (!(c & 0x80)) {
181 /* one byte character */
182 len = 1;
183
Michal Vasko69730152020-10-09 16:30:07 +0200184 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200185 return LY_EINVAL;
186 }
187 } else if ((c & 0xe0) == 0xc0) {
188 /* two bytes character */
189 len = 2;
190
191 aux = (*input)[1];
192 if ((aux & 0xc0) != 0x80) {
193 return LY_EINVAL;
194 }
195 c = ((c & 0x1f) << 6) | (aux & 0x3f);
196
197 if (c < 0x80) {
198 return LY_EINVAL;
199 }
200 } else if ((c & 0xf0) == 0xe0) {
201 /* three bytes character */
202 len = 3;
203
204 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200205 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200206 aux = (*input)[i];
207 if ((aux & 0xc0) != 0x80) {
208 return LY_EINVAL;
209 }
210
211 c = (c << 6) | (aux & 0x3f);
212 }
213
Michal Vasko69730152020-10-09 16:30:07 +0200214 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200215 return LY_EINVAL;
216 }
217 } else if ((c & 0xf8) == 0xf0) {
218 /* four bytes character */
219 len = 4;
220
221 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200222 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200223 aux = (*input)[i];
224 if ((aux & 0xc0) != 0x80) {
225 return LY_EINVAL;
226 }
227
228 c = (c << 6) | (aux & 0x3f);
229 }
230
Michal Vasko69730152020-10-09 16:30:07 +0200231 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200232 return LY_EINVAL;
233 }
234 } else {
235 return LY_EINVAL;
236 }
237
238 (*utf8_char) = c;
239 (*input) += len;
240 if (bytes_read) {
241 (*bytes_read) = len;
242 }
243 return LY_SUCCESS;
244}
245
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200246LY_ERR
247ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
248{
249 if (value < 0x80) {
250 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200251 if ((value < 0x20) &&
252 (value != 0x09) &&
253 (value != 0x0a) &&
254 (value != 0x0d)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200255 return LY_EINVAL;
256 }
257
258 dst[0] = value;
259 (*bytes_written) = 1;
260 } else if (value < 0x800) {
261 /* two bytes character */
262 dst[0] = 0xc0 | (value >> 6);
263 dst[1] = 0x80 | (value & 0x3f);
264 (*bytes_written) = 2;
265 } else if (value < 0xfffe) {
266 /* three bytes character */
267 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200268 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200269 /* exclude surrogate blocks %xD800-DFFF */
270 /* exclude noncharacters %xFDD0-FDEF */
271 return LY_EINVAL;
272 }
273
274 dst[0] = 0xe0 | (value >> 12);
275 dst[1] = 0x80 | ((value >> 6) & 0x3f);
276 dst[2] = 0x80 | (value & 0x3f);
277
278 (*bytes_written) = 3;
279 } else if (value < 0x10fffe) {
280 if ((value & 0xffe) == 0xffe) {
281 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
282 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
283 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
284 return LY_EINVAL;
285 }
286 /* four bytes character */
287 dst[0] = 0xf0 | (value >> 18);
288 dst[1] = 0x80 | ((value >> 12) & 0x3f);
289 dst[2] = 0x80 | ((value >> 6) & 0x3f);
290 dst[3] = 0x80 | (value & 0x3f);
291
292 (*bytes_written) = 4;
293 } else {
294 return LY_EINVAL;
295 }
296 return LY_SUCCESS;
297}
298
Radek Krejci76c98012019-08-14 11:23:24 +0200299/**
300 * @brief Static table of the UTF8 characters lengths according to their first byte.
301 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200302static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200303 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
304 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
305 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
306 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
307 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
315 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
316 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
317 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
318 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
319};
320
321size_t
322ly_utf8len(const char *str, size_t bytes)
323{
Radek Krejci1e008d22020-08-17 11:37:37 +0200324 size_t len = 0;
325 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200326
Michal Vaskob4d40d62021-05-04 11:42:44 +0200327 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200328 ++len;
329 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
330 }
Radek Krejci76c98012019-08-14 11:23:24 +0200331 return len;
332}
333
Radek Krejcid972c252018-09-25 13:23:39 +0200334size_t
335LY_VCODE_INSTREXP_len(const char *str)
336{
337 size_t len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200338
Radek Krejcid972c252018-09-25 13:23:39 +0200339 if (!str) {
340 return len;
341 } else if (!str[0]) {
342 return 1;
343 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200344 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200345 return len;
346}
347
Radek Krejcif345c012018-09-19 11:12:59 +0200348LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200349ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200350{
Radek Krejci86d106e2018-10-18 09:53:19 +0200351 struct stat sb;
352 long pagesize;
353 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200354
Radek Krejci86d106e2018-10-18 09:53:19 +0200355 assert(length);
356 assert(addr);
357 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200358
Radek Krejci86d106e2018-10-18 09:53:19 +0200359 if (fstat(fd, &sb) == -1) {
360 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
361 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200362 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200363 if (!S_ISREG(sb.st_mode)) {
364 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
365 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200366 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200367 if (!sb.st_size) {
368 *addr = NULL;
369 return LY_SUCCESS;
370 }
371 pagesize = sysconf(_SC_PAGESIZE);
372
373 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200374 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200375 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
376 *length = sb.st_size + 1;
377 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
378 } else {
379 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
380 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
381 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
382 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
383 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
384 * where the anonymous mapping starts. */
385 *length = sb.st_size + pagesize;
386 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
387 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
388 }
389 if (*addr == MAP_FAILED) {
390 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
391 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200392 }
393
Radek Krejcif345c012018-09-19 11:12:59 +0200394 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200395}
Michal Vasko841d1a92018-09-07 15:40:31 +0200396
Radek Krejci86d106e2018-10-18 09:53:19 +0200397LY_ERR
398ly_munmap(void *addr, size_t length)
399{
400 if (munmap(addr, length)) {
401 return LY_ESYS;
402 }
403 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200404}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100405
406LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200407ly_strcat(char **dest, const char *format, ...)
408{
409 va_list fp;
410 char *addition = NULL;
411 size_t len;
412
413 va_start(fp, format);
414 len = vasprintf(&addition, format, fp);
415 len += (*dest ? strlen(*dest) : 0) + 1;
416
417 if (*dest) {
418 *dest = ly_realloc(*dest, len);
419 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100420 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200421 return LY_EMEM;
422 }
423 *dest = strcat(*dest, addition);
424 free(addition);
425 } else {
426 *dest = addition;
427 }
428
429 va_end(fp);
430 return LY_SUCCESS;
431}
432
433LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200434ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100435{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200436 LY_ERR rc = LY_SUCCESS;
437 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200438 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100439
Radek Krejci249973a2019-06-10 10:50:54 +0200440 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100441
Michal Vaskob4d40d62021-05-04 11:42:44 +0200442 /* duplicate the value */
443 str = strndup(val_str, val_len);
444 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100445
Michal Vaskob4d40d62021-05-04 11:42:44 +0200446 /* parse the value to avoid accessing following bytes */
447 errno = 0;
448 i = strtoll(str, &ptr, base);
449 if (errno || (ptr == str)) {
450 /* invalid string */
451 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200452 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200453 /* invalid number */
454 rc = LY_EDENIED;
455 } else if (*ptr) {
456 while (isspace(*ptr)) {
457 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100458 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200459 if (*ptr) {
460 /* invalid characters after some number */
461 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100462 }
463 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200464
Michal Vaskob4d40d62021-05-04 11:42:44 +0200465 /* cleanup */
466 free(str);
467 if (!rc) {
468 *ret = i;
469 }
470 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100471}
472
473LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200474ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100475{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200476 LY_ERR rc = LY_SUCCESS;
477 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100478 uint64_t u;
479
Michal Vaskob4d40d62021-05-04 11:42:44 +0200480 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100481
Michal Vaskob4d40d62021-05-04 11:42:44 +0200482 /* duplicate the value to avoid accessing following bytes */
483 str = strndup(val_str, val_len);
484 LY_CHECK_RET(!str, LY_EMEM);
485
486 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100487 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200488 u = strtoull(str, &ptr, base);
489 if (errno || (ptr == str)) {
490 /* invalid string */
491 rc = LY_EVALID;
492 } else if ((u > max) || (u && (str[0] == '-'))) {
493 /* invalid number */
494 rc = LY_EDENIED;
495 } else if (*ptr) {
496 while (isspace(*ptr)) {
497 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100498 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200499 if (*ptr) {
500 /* invalid characters after some number */
501 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100502 }
503 }
504
Michal Vaskob4d40d62021-05-04 11:42:44 +0200505 /* cleanup */
506 free(str);
507 if (!rc) {
508 *ret = u;
509 }
510 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100511}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200512
513/**
514 * @brief Parse an identifier.
515 *
516 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
517 * identifier = (ALPHA / "_")
518 * *(ALPHA / DIGIT / "_" / "-" / ".")
519 *
520 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
521 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
522 */
523static LY_ERR
524lys_parse_id(const char **id)
525{
526 assert(id && *id);
527
528 if (!is_yangidentstartchar(**id)) {
529 return LY_EINVAL;
530 }
531 ++(*id);
532
533 while (is_yangidentchar(**id)) {
534 ++(*id);
535 }
536 return LY_SUCCESS;
537}
538
539LY_ERR
540ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
541{
542 assert(id && *id);
543 assert(prefix && prefix_len);
544 assert(name && name_len);
545
546 *prefix = *id;
547 *prefix_len = 0;
548 *name = NULL;
549 *name_len = 0;
550
551 LY_CHECK_RET(lys_parse_id(id));
552 if (**id == ':') {
553 /* there is prefix */
554 *prefix_len = *id - *prefix;
555 ++(*id);
556 *name = *id;
557
558 LY_CHECK_RET(lys_parse_id(id));
559 *name_len = *id - *name;
560 } else {
561 /* there is no prefix, so what we have as prefix now is actually the name */
562 *name = *prefix;
563 *name_len = *id - *name;
564 *prefix = NULL;
565 }
566
567 return LY_SUCCESS;
568}
569
570LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200571ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200572 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
573 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200574{
575 LY_ERR ret = LY_EVALID;
576 const char *in = *pred;
577 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200578 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200579 char quot;
580
Radek Krejci4607f542020-12-01 12:18:49 +0100581 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200582
583 *prefix = *id = *value = NULL;
584 *prefix_len = *id_len = *value_len = 0;
585
586 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200587 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200588
589 if (isdigit(in[offset])) {
590 /* pos: "[" *WSP positive-integer-value *WSP "]" */
591 if (in[offset] == '0') {
592 /* zero */
593 *errmsg = "The position predicate cannot be zero.";
594 goto error;
595 }
596
597 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200598 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200599 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200600 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200601
602 } else if (in[offset] == '.') {
603 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
604 *id = &in[offset];
605 *id_len = 1;
606 offset++;
607 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200608 } else if (in[offset] == '-') {
609 /* typically negative value */
610 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
611 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200612 } else {
613 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
614 in = &in[offset];
615 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
616 *errmsg = "Invalid node-identifier.";
617 goto error;
618 }
Michal Vasko69730152020-10-09 16:30:07 +0200619 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200620 /* all node names MUST be qualified with explicit namespace prefix */
621 *errmsg = "Missing prefix of a node name.";
622 goto error;
623 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200624 offset = in - *pred;
625 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200626 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200627 }
628
629 if (expr) {
630 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200631 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200632
633 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200634 if (expr == 1) {
635 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
636 } else { /* 2 */
637 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
638 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200639 goto error;
640 }
641 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200642 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200643
644 /* quoted-string */
645 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200646 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200647 *errmsg = "String value is not quoted.";
648 goto error;
649 }
650 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200651 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200652 if (in[offset] == quot) {
653 *value_len = &in[offset] - *value;
654 offset++;
655 } else {
656 *errmsg = "Value is not terminated quoted-string.";
657 goto error;
658 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200659 }
660
661 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200662 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200663 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200664 if (expr == 0) {
665 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
666 } else if (expr == 1) {
667 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
668 } else { /* 2 */
669 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
670 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200671 goto error;
672 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200673 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200674
Radek Krejci10bfdf82019-06-10 14:08:13 +0200675 if (offset <= limit) {
676 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200677 return LY_SUCCESS;
678 }
679
680 /* we read after the limit */
681 *errmsg = "Predicate is incomplete.";
682 *prefix = *id = *value = NULL;
683 *prefix_len = *id_len = *value_len = 0;
684 offset = limit;
685 ret = LY_EINVAL;
686
687error:
688 *pred = &in[offset];
689 return ret;
690}