blob: 9c0d15a48604d09456679862dfdbed4d2461d386 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
22#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020023#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020024#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020025#include <string.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020026#include <sys/mman.h>
27#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020028#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020029
Radek Krejciaa45bda2020-07-20 07:43:38 +020030#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020031#include "tree_schema_internal.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020032
33void *
34ly_realloc(void *ptr, size_t size)
35{
36 void *new_mem;
37
38 new_mem = realloc(ptr, size);
39 if (!new_mem) {
40 free(ptr);
41 }
42
43 return new_mem;
44}
Michal Vasko841d1a92018-09-07 15:40:31 +020045
Michal Vasko03ff5a72019-09-11 13:49:33 +020046char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020047ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020048{
Michal Vaskod989ba02020-08-24 10:59:24 +020049 for ( ; *s != (char)c; ++s, --len) {
Michal Vasko03ff5a72019-09-11 13:49:33 +020050 if ((*s == '\0') || (!len)) {
51 return NULL;
52 }
53 }
54 return (char *)s;
55}
56
Radek Krejci7f9b6512019-09-18 13:11:09 +020057int
58ly_strncmp(const char *refstr, const char *str, size_t str_len)
59{
60 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020061
62 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020063 return 0;
64 } else {
65 return rc ? rc : 1;
66 }
67}
68
aPiecekf102d4d2021-03-30 12:18:38 +020069uint32_t
70ly_value_prefix_next(const char *str_begin, const char *str_end, ly_bool *is_prefix, const char **str_next)
71{
72 const char *stop, *prefix;
73 size_t bytes;
74 uint32_t c;
75 ly_bool prefix_found;
76 uint32_t ret;
77
78 assert(is_prefix && str_next);
79
80#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
81
82 *str_next = NULL;
83 *is_prefix = 0;
84 ret = 0;
85
86 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
87 return ret;
88 }
89
90 stop = str_begin;
91 prefix = NULL;
92 prefix_found = 0;
93
94 do {
95 /* look for the beginning of the YANG value */
96 for (ly_getutf8(&stop, &c, &bytes);
97 !is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end);
98 ly_getutf8(&stop, &c, &bytes)) {}
99
100 if (IS_AT_END(stop, str_end)) {
101 break;
102 }
103
104 /* maybe the prefix was found */
105 prefix = stop - bytes;
106
107 /* look for the the end of the prefix */
108 for (ly_getutf8(&stop, &c, &bytes);
109 is_xmlqnamechar(c) && !IS_AT_END(stop, str_end);
110 ly_getutf8(&stop, &c, &bytes)) {}
111
112 prefix_found = c == ':' ? 1 : 0;
113
114 /* if it wasn't the prefix, keep looking */
115 } while (!IS_AT_END(stop, str_end) && !prefix_found);
116
117 if ((str_begin == prefix) && prefix_found) {
118 /* prefix found at the beginning of the input string */
119 *is_prefix = 1;
120 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
121 ret = (stop - bytes) - str_begin;
122 } else if ((str_begin != prefix) && (prefix_found)) {
123 /* there is a some string before prefix */
124 *str_next = prefix;
125 ret = prefix - str_begin;
126 } else {
127 /* no prefix found */
128 ret = stop - str_begin;
129 }
130
131#undef IS_AT_END
132
133 return ret;
134}
135
Radek Krejcib416be62018-10-01 14:51:45 +0200136LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100137ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200138{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200139 uint32_t c, aux;
140 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200141
Radek Krejcicc6a45c2019-05-13 10:16:14 +0200142 if (bytes_read) {
143 (*bytes_read) = 0;
144 }
145
Radek Krejcib416be62018-10-01 14:51:45 +0200146 c = (*input)[0];
147 LY_CHECK_RET(!c, LY_EINVAL);
148
149 if (!(c & 0x80)) {
150 /* one byte character */
151 len = 1;
152
Michal Vasko69730152020-10-09 16:30:07 +0200153 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200154 return LY_EINVAL;
155 }
156 } else if ((c & 0xe0) == 0xc0) {
157 /* two bytes character */
158 len = 2;
159
160 aux = (*input)[1];
161 if ((aux & 0xc0) != 0x80) {
162 return LY_EINVAL;
163 }
164 c = ((c & 0x1f) << 6) | (aux & 0x3f);
165
166 if (c < 0x80) {
167 return LY_EINVAL;
168 }
169 } else if ((c & 0xf0) == 0xe0) {
170 /* three bytes character */
171 len = 3;
172
173 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200174 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200175 aux = (*input)[i];
176 if ((aux & 0xc0) != 0x80) {
177 return LY_EINVAL;
178 }
179
180 c = (c << 6) | (aux & 0x3f);
181 }
182
Michal Vasko69730152020-10-09 16:30:07 +0200183 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200184 return LY_EINVAL;
185 }
186 } else if ((c & 0xf8) == 0xf0) {
187 /* four bytes character */
188 len = 4;
189
190 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200191 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200192 aux = (*input)[i];
193 if ((aux & 0xc0) != 0x80) {
194 return LY_EINVAL;
195 }
196
197 c = (c << 6) | (aux & 0x3f);
198 }
199
Michal Vasko69730152020-10-09 16:30:07 +0200200 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200201 return LY_EINVAL;
202 }
203 } else {
204 return LY_EINVAL;
205 }
206
207 (*utf8_char) = c;
208 (*input) += len;
209 if (bytes_read) {
210 (*bytes_read) = len;
211 }
212 return LY_SUCCESS;
213}
214
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200215LY_ERR
216ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
217{
218 if (value < 0x80) {
219 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200220 if ((value < 0x20) &&
221 (value != 0x09) &&
222 (value != 0x0a) &&
223 (value != 0x0d)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200224 return LY_EINVAL;
225 }
226
227 dst[0] = value;
228 (*bytes_written) = 1;
229 } else if (value < 0x800) {
230 /* two bytes character */
231 dst[0] = 0xc0 | (value >> 6);
232 dst[1] = 0x80 | (value & 0x3f);
233 (*bytes_written) = 2;
234 } else if (value < 0xfffe) {
235 /* three bytes character */
236 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200237 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200238 /* exclude surrogate blocks %xD800-DFFF */
239 /* exclude noncharacters %xFDD0-FDEF */
240 return LY_EINVAL;
241 }
242
243 dst[0] = 0xe0 | (value >> 12);
244 dst[1] = 0x80 | ((value >> 6) & 0x3f);
245 dst[2] = 0x80 | (value & 0x3f);
246
247 (*bytes_written) = 3;
248 } else if (value < 0x10fffe) {
249 if ((value & 0xffe) == 0xffe) {
250 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
251 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
252 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
253 return LY_EINVAL;
254 }
255 /* four bytes character */
256 dst[0] = 0xf0 | (value >> 18);
257 dst[1] = 0x80 | ((value >> 12) & 0x3f);
258 dst[2] = 0x80 | ((value >> 6) & 0x3f);
259 dst[3] = 0x80 | (value & 0x3f);
260
261 (*bytes_written) = 4;
262 } else {
263 return LY_EINVAL;
264 }
265 return LY_SUCCESS;
266}
267
Radek Krejci76c98012019-08-14 11:23:24 +0200268/**
269 * @brief Static table of the UTF8 characters lengths according to their first byte.
270 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200271static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200272 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
275 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
277 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
278 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
279 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
280 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
281 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
284 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
285 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
286 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
287 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
288};
289
290size_t
291ly_utf8len(const char *str, size_t bytes)
292{
Radek Krejci1e008d22020-08-17 11:37:37 +0200293 size_t len = 0;
294 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200295
Michal Vaskod989ba02020-08-24 10:59:24 +0200296 while (*ptr && (size_t)(ptr - str) < bytes) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200297 ++len;
298 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
299 }
Radek Krejci76c98012019-08-14 11:23:24 +0200300 return len;
301}
302
Radek Krejcid972c252018-09-25 13:23:39 +0200303size_t
304LY_VCODE_INSTREXP_len(const char *str)
305{
306 size_t len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200307
Radek Krejcid972c252018-09-25 13:23:39 +0200308 if (!str) {
309 return len;
310 } else if (!str[0]) {
311 return 1;
312 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200313 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200314 return len;
315}
316
Radek Krejcif345c012018-09-19 11:12:59 +0200317LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200318ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200319{
Radek Krejci86d106e2018-10-18 09:53:19 +0200320 struct stat sb;
321 long pagesize;
322 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200323
Radek Krejci86d106e2018-10-18 09:53:19 +0200324 assert(length);
325 assert(addr);
326 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200327
Radek Krejci86d106e2018-10-18 09:53:19 +0200328 if (fstat(fd, &sb) == -1) {
329 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
330 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200331 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200332 if (!S_ISREG(sb.st_mode)) {
333 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
334 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200335 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200336 if (!sb.st_size) {
337 *addr = NULL;
338 return LY_SUCCESS;
339 }
340 pagesize = sysconf(_SC_PAGESIZE);
341
342 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200343 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200344 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
345 *length = sb.st_size + 1;
346 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
347 } else {
348 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
349 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
350 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
351 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
352 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
353 * where the anonymous mapping starts. */
354 *length = sb.st_size + pagesize;
355 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
356 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
357 }
358 if (*addr == MAP_FAILED) {
359 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
360 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200361 }
362
Radek Krejcif345c012018-09-19 11:12:59 +0200363 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200364}
Michal Vasko841d1a92018-09-07 15:40:31 +0200365
Radek Krejci86d106e2018-10-18 09:53:19 +0200366LY_ERR
367ly_munmap(void *addr, size_t length)
368{
369 if (munmap(addr, length)) {
370 return LY_ESYS;
371 }
372 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200373}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100374
375LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200376ly_strcat(char **dest, const char *format, ...)
377{
378 va_list fp;
379 char *addition = NULL;
380 size_t len;
381
382 va_start(fp, format);
383 len = vasprintf(&addition, format, fp);
384 len += (*dest ? strlen(*dest) : 0) + 1;
385
386 if (*dest) {
387 *dest = ly_realloc(*dest, len);
388 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100389 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200390 return LY_EMEM;
391 }
392 *dest = strcat(*dest, addition);
393 free(addition);
394 } else {
395 *dest = addition;
396 }
397
398 va_end(fp);
399 return LY_SUCCESS;
400}
401
402LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200403ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100404{
405 char *strptr;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200406 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100407
Radek Krejci249973a2019-06-10 10:50:54 +0200408 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100409
410 /* convert to 64-bit integer, all the redundant characters are handled */
411 errno = 0;
412 strptr = NULL;
413
414 /* parse the value */
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200415 i = strtoll(val_str, &strptr, base);
Michal Vasko69730152020-10-09 16:30:07 +0200416 if (errno || (strptr == val_str)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100417 return LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200418 } else if ((i < min) || (i > max)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100419 return LY_EDENIED;
420 } else if (strptr && *strptr) {
421 while (isspace(*strptr)) {
422 ++strptr;
423 }
Michal Vasko69730152020-10-09 16:30:07 +0200424 if (*strptr && (strptr < val_str + val_len)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100425 return LY_EVALID;
426 }
427 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200428
429 *ret = i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100430 return LY_SUCCESS;
431}
432
433LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200434ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100435{
436 char *strptr;
437 uint64_t u;
438
439 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], LY_EINVAL);
440
441 errno = 0;
442 strptr = NULL;
443 u = strtoull(val_str, &strptr, base);
Michal Vasko69730152020-10-09 16:30:07 +0200444 if (errno || (strptr == val_str)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100445 return LY_EVALID;
Michal Vasko69730152020-10-09 16:30:07 +0200446 } else if ((u > max) || (u && (val_str[0] == '-'))) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100447 return LY_EDENIED;
448 } else if (strptr && *strptr) {
449 while (isspace(*strptr)) {
450 ++strptr;
451 }
Michal Vasko69730152020-10-09 16:30:07 +0200452 if (*strptr && (strptr < val_str + val_len)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100453 return LY_EVALID;
454 }
455 }
456
457 *ret = u;
458 return LY_SUCCESS;
459}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200460
461/**
462 * @brief Parse an identifier.
463 *
464 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
465 * identifier = (ALPHA / "_")
466 * *(ALPHA / DIGIT / "_" / "-" / ".")
467 *
468 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
469 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
470 */
471static LY_ERR
472lys_parse_id(const char **id)
473{
474 assert(id && *id);
475
476 if (!is_yangidentstartchar(**id)) {
477 return LY_EINVAL;
478 }
479 ++(*id);
480
481 while (is_yangidentchar(**id)) {
482 ++(*id);
483 }
484 return LY_SUCCESS;
485}
486
487LY_ERR
488ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
489{
490 assert(id && *id);
491 assert(prefix && prefix_len);
492 assert(name && name_len);
493
494 *prefix = *id;
495 *prefix_len = 0;
496 *name = NULL;
497 *name_len = 0;
498
499 LY_CHECK_RET(lys_parse_id(id));
500 if (**id == ':') {
501 /* there is prefix */
502 *prefix_len = *id - *prefix;
503 ++(*id);
504 *name = *id;
505
506 LY_CHECK_RET(lys_parse_id(id));
507 *name_len = *id - *name;
508 } else {
509 /* there is no prefix, so what we have as prefix now is actually the name */
510 *name = *prefix;
511 *name_len = *id - *name;
512 *prefix = NULL;
513 }
514
515 return LY_SUCCESS;
516}
517
518LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200519ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200520 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
521 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200522{
523 LY_ERR ret = LY_EVALID;
524 const char *in = *pred;
525 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200526 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200527 char quot;
528
Radek Krejci4607f542020-12-01 12:18:49 +0100529 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200530
531 *prefix = *id = *value = NULL;
532 *prefix_len = *id_len = *value_len = 0;
533
534 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200535 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200536
537 if (isdigit(in[offset])) {
538 /* pos: "[" *WSP positive-integer-value *WSP "]" */
539 if (in[offset] == '0') {
540 /* zero */
541 *errmsg = "The position predicate cannot be zero.";
542 goto error;
543 }
544
545 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200546 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200547 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200548 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200549
550 } else if (in[offset] == '.') {
551 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
552 *id = &in[offset];
553 *id_len = 1;
554 offset++;
555 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200556 } else if (in[offset] == '-') {
557 /* typically negative value */
558 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
559 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200560 } else {
561 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
562 in = &in[offset];
563 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
564 *errmsg = "Invalid node-identifier.";
565 goto error;
566 }
Michal Vasko69730152020-10-09 16:30:07 +0200567 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200568 /* all node names MUST be qualified with explicit namespace prefix */
569 *errmsg = "Missing prefix of a node name.";
570 goto error;
571 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200572 offset = in - *pred;
573 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200574 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200575 }
576
577 if (expr) {
578 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200579 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200580
581 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200582 if (expr == 1) {
583 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
584 } else { /* 2 */
585 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
586 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200587 goto error;
588 }
589 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200590 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200591
592 /* quoted-string */
593 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200594 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200595 *errmsg = "String value is not quoted.";
596 goto error;
597 }
598 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200599 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200600 if (in[offset] == quot) {
601 *value_len = &in[offset] - *value;
602 offset++;
603 } else {
604 *errmsg = "Value is not terminated quoted-string.";
605 goto error;
606 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200607 }
608
609 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200610 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200611 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200612 if (expr == 0) {
613 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
614 } else if (expr == 1) {
615 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
616 } else { /* 2 */
617 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
618 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200619 goto error;
620 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200621 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200622
Radek Krejci10bfdf82019-06-10 14:08:13 +0200623 if (offset <= limit) {
624 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200625 return LY_SUCCESS;
626 }
627
628 /* we read after the limit */
629 *errmsg = "Predicate is incomplete.";
630 *prefix = *id = *value = NULL;
631 *prefix_len = *id_len = *value_len = 0;
632 offset = limit;
633 ret = LY_EINVAL;
634
635error:
636 *pred = &in[offset];
637 return ret;
638}