blob: a43423528382d597c328441117179b6cd927ee10 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020022#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020023#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020024#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020025#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020026#include <string.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020027#include <sys/mman.h>
28#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020029#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020030
Radek Krejciaa45bda2020-07-20 07:43:38 +020031#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020032#include "tree_schema_internal.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020033
34void *
35ly_realloc(void *ptr, size_t size)
36{
37 void *new_mem;
38
39 new_mem = realloc(ptr, size);
40 if (!new_mem) {
41 free(ptr);
42 }
43
44 return new_mem;
45}
Michal Vasko841d1a92018-09-07 15:40:31 +020046
Michal Vasko03ff5a72019-09-11 13:49:33 +020047char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020048ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020049{
Michal Vaskob4d40d62021-05-04 11:42:44 +020050 for ( ; len && (*s != (char)c); ++s, --len) {}
51 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020052}
53
Radek Krejci7f9b6512019-09-18 13:11:09 +020054int
55ly_strncmp(const char *refstr, const char *str, size_t str_len)
56{
57 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020058
59 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020060 return 0;
61 } else {
62 return rc ? rc : 1;
63 }
64}
65
Michal Vasko15dc9fa2021-05-03 14:33:05 +020066#define LY_OVERFLOW_ADD(MAX, X, Y) ((X > MAX - Y) ? 1 : 0)
67
68#define LY_OVERFLOW_MUL(MAX, X, Y) ((X > MAX / Y) ? 1 : 0)
69
70LY_ERR
71ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
72{
73 uint8_t num = 0, dig, dec_pow;
74
75 if (len > 3) {
76 /* overflow for sure */
77 return LY_EDENIED;
78 }
79
80 dec_pow = 1;
81 for ( ; len && isdigit(nptr[len - 1]); --len) {
82 dig = nptr[len - 1] - 48;
83
84 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
85 return LY_EDENIED;
86 }
87 dig *= dec_pow;
88
89 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
90 return LY_EDENIED;
91 }
92 num += dig;
93
94 dec_pow *= 10;
95 }
96
97 if (len) {
98 return LY_EVALID;
99 }
100 *ret = num;
101 return LY_SUCCESS;
102}
103
aPiecekf102d4d2021-03-30 12:18:38 +0200104uint32_t
105ly_value_prefix_next(const char *str_begin, const char *str_end, ly_bool *is_prefix, const char **str_next)
106{
107 const char *stop, *prefix;
108 size_t bytes;
109 uint32_t c;
110 ly_bool prefix_found;
111 uint32_t ret;
112
113 assert(is_prefix && str_next);
114
115#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
116
117 *str_next = NULL;
118 *is_prefix = 0;
119 ret = 0;
120
121 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
122 return ret;
123 }
124
125 stop = str_begin;
126 prefix = NULL;
127 prefix_found = 0;
128
129 do {
130 /* look for the beginning of the YANG value */
131 for (ly_getutf8(&stop, &c, &bytes);
132 !is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end);
133 ly_getutf8(&stop, &c, &bytes)) {}
134
135 if (IS_AT_END(stop, str_end)) {
136 break;
137 }
138
139 /* maybe the prefix was found */
140 prefix = stop - bytes;
141
142 /* look for the the end of the prefix */
143 for (ly_getutf8(&stop, &c, &bytes);
144 is_xmlqnamechar(c) && !IS_AT_END(stop, str_end);
145 ly_getutf8(&stop, &c, &bytes)) {}
146
147 prefix_found = c == ':' ? 1 : 0;
148
149 /* if it wasn't the prefix, keep looking */
150 } while (!IS_AT_END(stop, str_end) && !prefix_found);
151
152 if ((str_begin == prefix) && prefix_found) {
153 /* prefix found at the beginning of the input string */
154 *is_prefix = 1;
155 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
156 ret = (stop - bytes) - str_begin;
157 } else if ((str_begin != prefix) && (prefix_found)) {
158 /* there is a some string before prefix */
159 *str_next = prefix;
160 ret = prefix - str_begin;
161 } else {
162 /* no prefix found */
163 ret = stop - str_begin;
164 }
165
166#undef IS_AT_END
167
168 return ret;
169}
170
Radek Krejcib416be62018-10-01 14:51:45 +0200171LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100172ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200173{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200174 uint32_t c, aux;
175 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200176
Radek Krejcicc6a45c2019-05-13 10:16:14 +0200177 if (bytes_read) {
178 (*bytes_read) = 0;
179 }
180
Radek Krejcib416be62018-10-01 14:51:45 +0200181 c = (*input)[0];
182 LY_CHECK_RET(!c, LY_EINVAL);
183
184 if (!(c & 0x80)) {
185 /* one byte character */
186 len = 1;
187
Michal Vasko69730152020-10-09 16:30:07 +0200188 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200189 return LY_EINVAL;
190 }
191 } else if ((c & 0xe0) == 0xc0) {
192 /* two bytes character */
193 len = 2;
194
195 aux = (*input)[1];
196 if ((aux & 0xc0) != 0x80) {
197 return LY_EINVAL;
198 }
199 c = ((c & 0x1f) << 6) | (aux & 0x3f);
200
201 if (c < 0x80) {
202 return LY_EINVAL;
203 }
204 } else if ((c & 0xf0) == 0xe0) {
205 /* three bytes character */
206 len = 3;
207
208 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200209 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200210 aux = (*input)[i];
211 if ((aux & 0xc0) != 0x80) {
212 return LY_EINVAL;
213 }
214
215 c = (c << 6) | (aux & 0x3f);
216 }
217
Michal Vasko69730152020-10-09 16:30:07 +0200218 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200219 return LY_EINVAL;
220 }
221 } else if ((c & 0xf8) == 0xf0) {
222 /* four bytes character */
223 len = 4;
224
225 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200226 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200227 aux = (*input)[i];
228 if ((aux & 0xc0) != 0x80) {
229 return LY_EINVAL;
230 }
231
232 c = (c << 6) | (aux & 0x3f);
233 }
234
Michal Vasko69730152020-10-09 16:30:07 +0200235 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200236 return LY_EINVAL;
237 }
238 } else {
239 return LY_EINVAL;
240 }
241
242 (*utf8_char) = c;
243 (*input) += len;
244 if (bytes_read) {
245 (*bytes_read) = len;
246 }
247 return LY_SUCCESS;
248}
249
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200250LY_ERR
251ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
252{
253 if (value < 0x80) {
254 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200255 if ((value < 0x20) &&
256 (value != 0x09) &&
257 (value != 0x0a) &&
258 (value != 0x0d)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200259 return LY_EINVAL;
260 }
261
262 dst[0] = value;
263 (*bytes_written) = 1;
264 } else if (value < 0x800) {
265 /* two bytes character */
266 dst[0] = 0xc0 | (value >> 6);
267 dst[1] = 0x80 | (value & 0x3f);
268 (*bytes_written) = 2;
269 } else if (value < 0xfffe) {
270 /* three bytes character */
271 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200272 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200273 /* exclude surrogate blocks %xD800-DFFF */
274 /* exclude noncharacters %xFDD0-FDEF */
275 return LY_EINVAL;
276 }
277
278 dst[0] = 0xe0 | (value >> 12);
279 dst[1] = 0x80 | ((value >> 6) & 0x3f);
280 dst[2] = 0x80 | (value & 0x3f);
281
282 (*bytes_written) = 3;
283 } else if (value < 0x10fffe) {
284 if ((value & 0xffe) == 0xffe) {
285 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
286 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
287 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
288 return LY_EINVAL;
289 }
290 /* four bytes character */
291 dst[0] = 0xf0 | (value >> 18);
292 dst[1] = 0x80 | ((value >> 12) & 0x3f);
293 dst[2] = 0x80 | ((value >> 6) & 0x3f);
294 dst[3] = 0x80 | (value & 0x3f);
295
296 (*bytes_written) = 4;
297 } else {
298 return LY_EINVAL;
299 }
300 return LY_SUCCESS;
301}
302
Radek Krejci76c98012019-08-14 11:23:24 +0200303/**
304 * @brief Static table of the UTF8 characters lengths according to their first byte.
305 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200306static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200307 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
317 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
320 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
321 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
322 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
323};
324
325size_t
326ly_utf8len(const char *str, size_t bytes)
327{
Radek Krejci1e008d22020-08-17 11:37:37 +0200328 size_t len = 0;
329 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200330
Michal Vaskob4d40d62021-05-04 11:42:44 +0200331 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200332 ++len;
333 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
334 }
Radek Krejci76c98012019-08-14 11:23:24 +0200335 return len;
336}
337
Radek Krejcid972c252018-09-25 13:23:39 +0200338size_t
339LY_VCODE_INSTREXP_len(const char *str)
340{
341 size_t len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200342
Radek Krejcid972c252018-09-25 13:23:39 +0200343 if (!str) {
344 return len;
345 } else if (!str[0]) {
346 return 1;
347 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200348 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200349 return len;
350}
351
Radek Krejcif345c012018-09-19 11:12:59 +0200352LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200353ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200354{
Radek Krejci86d106e2018-10-18 09:53:19 +0200355 struct stat sb;
356 long pagesize;
357 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200358
Radek Krejci86d106e2018-10-18 09:53:19 +0200359 assert(length);
360 assert(addr);
361 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200362
Radek Krejci86d106e2018-10-18 09:53:19 +0200363 if (fstat(fd, &sb) == -1) {
364 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
365 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200366 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200367 if (!S_ISREG(sb.st_mode)) {
368 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
369 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200370 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200371 if (!sb.st_size) {
372 *addr = NULL;
373 return LY_SUCCESS;
374 }
375 pagesize = sysconf(_SC_PAGESIZE);
376
377 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200378 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200379 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
380 *length = sb.st_size + 1;
381 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
382 } else {
383 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
384 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
385 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
386 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
387 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
388 * where the anonymous mapping starts. */
389 *length = sb.st_size + pagesize;
390 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
391 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
392 }
393 if (*addr == MAP_FAILED) {
394 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
395 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200396 }
397
Radek Krejcif345c012018-09-19 11:12:59 +0200398 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200399}
Michal Vasko841d1a92018-09-07 15:40:31 +0200400
Radek Krejci86d106e2018-10-18 09:53:19 +0200401LY_ERR
402ly_munmap(void *addr, size_t length)
403{
404 if (munmap(addr, length)) {
405 return LY_ESYS;
406 }
407 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200408}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100409
410LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200411ly_strcat(char **dest, const char *format, ...)
412{
413 va_list fp;
414 char *addition = NULL;
415 size_t len;
416
417 va_start(fp, format);
418 len = vasprintf(&addition, format, fp);
419 len += (*dest ? strlen(*dest) : 0) + 1;
420
421 if (*dest) {
422 *dest = ly_realloc(*dest, len);
423 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100424 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200425 return LY_EMEM;
426 }
427 *dest = strcat(*dest, addition);
428 free(addition);
429 } else {
430 *dest = addition;
431 }
432
433 va_end(fp);
434 return LY_SUCCESS;
435}
436
437LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200438ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100439{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200440 LY_ERR rc = LY_SUCCESS;
441 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200442 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100443
Radek Krejci249973a2019-06-10 10:50:54 +0200444 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100445
Michal Vaskob4d40d62021-05-04 11:42:44 +0200446 /* duplicate the value */
447 str = strndup(val_str, val_len);
448 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100449
Michal Vaskob4d40d62021-05-04 11:42:44 +0200450 /* parse the value to avoid accessing following bytes */
451 errno = 0;
452 i = strtoll(str, &ptr, base);
453 if (errno || (ptr == str)) {
454 /* invalid string */
455 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200456 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200457 /* invalid number */
458 rc = LY_EDENIED;
459 } else if (*ptr) {
460 while (isspace(*ptr)) {
461 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100462 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200463 if (*ptr) {
464 /* invalid characters after some number */
465 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100466 }
467 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200468
Michal Vaskob4d40d62021-05-04 11:42:44 +0200469 /* cleanup */
470 free(str);
471 if (!rc) {
472 *ret = i;
473 }
474 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100475}
476
477LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200478ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100479{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200480 LY_ERR rc = LY_SUCCESS;
481 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100482 uint64_t u;
483
Michal Vaskob4d40d62021-05-04 11:42:44 +0200484 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100485
Michal Vaskob4d40d62021-05-04 11:42:44 +0200486 /* duplicate the value to avoid accessing following bytes */
487 str = strndup(val_str, val_len);
488 LY_CHECK_RET(!str, LY_EMEM);
489
490 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100491 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200492 u = strtoull(str, &ptr, base);
493 if (errno || (ptr == str)) {
494 /* invalid string */
495 rc = LY_EVALID;
496 } else if ((u > max) || (u && (str[0] == '-'))) {
497 /* invalid number */
498 rc = LY_EDENIED;
499 } else if (*ptr) {
500 while (isspace(*ptr)) {
501 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100502 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200503 if (*ptr) {
504 /* invalid characters after some number */
505 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100506 }
507 }
508
Michal Vaskob4d40d62021-05-04 11:42:44 +0200509 /* cleanup */
510 free(str);
511 if (!rc) {
512 *ret = u;
513 }
514 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100515}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200516
517/**
518 * @brief Parse an identifier.
519 *
520 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
521 * identifier = (ALPHA / "_")
522 * *(ALPHA / DIGIT / "_" / "-" / ".")
523 *
524 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
525 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
526 */
527static LY_ERR
528lys_parse_id(const char **id)
529{
530 assert(id && *id);
531
532 if (!is_yangidentstartchar(**id)) {
533 return LY_EINVAL;
534 }
535 ++(*id);
536
537 while (is_yangidentchar(**id)) {
538 ++(*id);
539 }
540 return LY_SUCCESS;
541}
542
543LY_ERR
544ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
545{
546 assert(id && *id);
547 assert(prefix && prefix_len);
548 assert(name && name_len);
549
550 *prefix = *id;
551 *prefix_len = 0;
552 *name = NULL;
553 *name_len = 0;
554
555 LY_CHECK_RET(lys_parse_id(id));
556 if (**id == ':') {
557 /* there is prefix */
558 *prefix_len = *id - *prefix;
559 ++(*id);
560 *name = *id;
561
562 LY_CHECK_RET(lys_parse_id(id));
563 *name_len = *id - *name;
564 } else {
565 /* there is no prefix, so what we have as prefix now is actually the name */
566 *name = *prefix;
567 *name_len = *id - *name;
568 *prefix = NULL;
569 }
570
571 return LY_SUCCESS;
572}
573
574LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200575ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200576 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
577 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200578{
579 LY_ERR ret = LY_EVALID;
580 const char *in = *pred;
581 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200582 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200583 char quot;
584
Radek Krejci4607f542020-12-01 12:18:49 +0100585 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200586
587 *prefix = *id = *value = NULL;
588 *prefix_len = *id_len = *value_len = 0;
589
590 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200591 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200592
593 if (isdigit(in[offset])) {
594 /* pos: "[" *WSP positive-integer-value *WSP "]" */
595 if (in[offset] == '0') {
596 /* zero */
597 *errmsg = "The position predicate cannot be zero.";
598 goto error;
599 }
600
601 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200602 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200603 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200604 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200605
606 } else if (in[offset] == '.') {
607 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
608 *id = &in[offset];
609 *id_len = 1;
610 offset++;
611 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200612 } else if (in[offset] == '-') {
613 /* typically negative value */
614 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
615 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200616 } else {
617 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
618 in = &in[offset];
619 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
620 *errmsg = "Invalid node-identifier.";
621 goto error;
622 }
Michal Vasko69730152020-10-09 16:30:07 +0200623 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200624 /* all node names MUST be qualified with explicit namespace prefix */
625 *errmsg = "Missing prefix of a node name.";
626 goto error;
627 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200628 offset = in - *pred;
629 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200630 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200631 }
632
633 if (expr) {
634 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200635 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200636
637 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200638 if (expr == 1) {
639 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
640 } else { /* 2 */
641 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
642 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200643 goto error;
644 }
645 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200646 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200647
648 /* quoted-string */
649 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200650 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200651 *errmsg = "String value is not quoted.";
652 goto error;
653 }
654 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200655 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200656 if (in[offset] == quot) {
657 *value_len = &in[offset] - *value;
658 offset++;
659 } else {
660 *errmsg = "Value is not terminated quoted-string.";
661 goto error;
662 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200663 }
664
665 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200666 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200667 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200668 if (expr == 0) {
669 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
670 } else if (expr == 1) {
671 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
672 } else { /* 2 */
673 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
674 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200675 goto error;
676 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200677 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200678
Radek Krejci10bfdf82019-06-10 14:08:13 +0200679 if (offset <= limit) {
680 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200681 return LY_SUCCESS;
682 }
683
684 /* we read after the limit */
685 *errmsg = "Predicate is incomplete.";
686 *prefix = *id = *value = NULL;
687 *prefix_len = *id_len = *value_len = 0;
688 offset = limit;
689 ret = LY_EINVAL;
690
691error:
692 *pred = &in[offset];
693 return ret;
694}