blob: 25e19f1202c978713e2cf292648fea13c3f5af32 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Michal Vasko7f401462021-05-03 14:33:05 +020022#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020023#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020024#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020025#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020026#include <string.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020027#include <sys/mman.h>
28#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020029#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020030
Radek Krejciaa45bda2020-07-20 07:43:38 +020031#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020032#include "tree_schema_internal.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020033
34void *
35ly_realloc(void *ptr, size_t size)
36{
37 void *new_mem;
38
39 new_mem = realloc(ptr, size);
40 if (!new_mem) {
41 free(ptr);
42 }
43
44 return new_mem;
45}
Michal Vasko841d1a92018-09-07 15:40:31 +020046
Michal Vasko03ff5a72019-09-11 13:49:33 +020047char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020048ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020049{
Michal Vaskod989ba02020-08-24 10:59:24 +020050 for ( ; *s != (char)c; ++s, --len) {
Michal Vasko03ff5a72019-09-11 13:49:33 +020051 if ((*s == '\0') || (!len)) {
52 return NULL;
53 }
54 }
55 return (char *)s;
56}
57
Radek Krejci7f9b6512019-09-18 13:11:09 +020058int
59ly_strncmp(const char *refstr, const char *str, size_t str_len)
60{
61 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020062
63 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020064 return 0;
65 } else {
66 return rc ? rc : 1;
67 }
68}
69
Michal Vasko7f401462021-05-03 14:33:05 +020070#define LY_OVERFLOW_ADD(MAX, X, Y) ((X > MAX - Y) ? 1 : 0)
71
72#define LY_OVERFLOW_MUL(MAX, X, Y) ((X > MAX / Y) ? 1 : 0)
73
74LY_ERR
75ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
76{
77 uint8_t num = 0, dig, dec_pow;
78
79 if (len > 3) {
80 /* overflow for sure */
81 return LY_EDENIED;
82 }
83
84 dec_pow = 1;
85 for ( ; len && isdigit(nptr[len - 1]); --len) {
86 dig = nptr[len - 1] - 48;
87
88 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
89 return LY_EDENIED;
90 }
91 dig *= dec_pow;
92
93 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
94 return LY_EDENIED;
95 }
96 num += dig;
97
98 dec_pow *= 10;
99 }
100
101 if (len) {
102 return LY_EVALID;
103 }
104 *ret = num;
105 return LY_SUCCESS;
106}
107
aPiecekf102d4d2021-03-30 12:18:38 +0200108uint32_t
109ly_value_prefix_next(const char *str_begin, const char *str_end, ly_bool *is_prefix, const char **str_next)
110{
111 const char *stop, *prefix;
112 size_t bytes;
113 uint32_t c;
114 ly_bool prefix_found;
115 uint32_t ret;
116
117 assert(is_prefix && str_next);
118
119#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
120
121 *str_next = NULL;
122 *is_prefix = 0;
123 ret = 0;
124
125 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
126 return ret;
127 }
128
129 stop = str_begin;
130 prefix = NULL;
131 prefix_found = 0;
132
133 do {
134 /* look for the beginning of the YANG value */
135 for (ly_getutf8(&stop, &c, &bytes);
136 !is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end);
137 ly_getutf8(&stop, &c, &bytes)) {}
138
139 if (IS_AT_END(stop, str_end)) {
140 break;
141 }
142
143 /* maybe the prefix was found */
144 prefix = stop - bytes;
145
146 /* look for the the end of the prefix */
147 for (ly_getutf8(&stop, &c, &bytes);
148 is_xmlqnamechar(c) && !IS_AT_END(stop, str_end);
149 ly_getutf8(&stop, &c, &bytes)) {}
150
151 prefix_found = c == ':' ? 1 : 0;
152
153 /* if it wasn't the prefix, keep looking */
154 } while (!IS_AT_END(stop, str_end) && !prefix_found);
155
156 if ((str_begin == prefix) && prefix_found) {
157 /* prefix found at the beginning of the input string */
158 *is_prefix = 1;
159 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
160 ret = (stop - bytes) - str_begin;
161 } else if ((str_begin != prefix) && (prefix_found)) {
162 /* there is a some string before prefix */
163 *str_next = prefix;
164 ret = prefix - str_begin;
165 } else {
166 /* no prefix found */
167 ret = stop - str_begin;
168 }
169
170#undef IS_AT_END
171
172 return ret;
173}
174
Radek Krejcib416be62018-10-01 14:51:45 +0200175LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100176ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200177{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200178 uint32_t c, aux;
179 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200180
Radek Krejcicc6a45c2019-05-13 10:16:14 +0200181 if (bytes_read) {
182 (*bytes_read) = 0;
183 }
184
Radek Krejcib416be62018-10-01 14:51:45 +0200185 c = (*input)[0];
186 LY_CHECK_RET(!c, LY_EINVAL);
187
188 if (!(c & 0x80)) {
189 /* one byte character */
190 len = 1;
191
Michal Vasko69730152020-10-09 16:30:07 +0200192 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200193 return LY_EINVAL;
194 }
195 } else if ((c & 0xe0) == 0xc0) {
196 /* two bytes character */
197 len = 2;
198
199 aux = (*input)[1];
200 if ((aux & 0xc0) != 0x80) {
201 return LY_EINVAL;
202 }
203 c = ((c & 0x1f) << 6) | (aux & 0x3f);
204
205 if (c < 0x80) {
206 return LY_EINVAL;
207 }
208 } else if ((c & 0xf0) == 0xe0) {
209 /* three bytes character */
210 len = 3;
211
212 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200213 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200214 aux = (*input)[i];
215 if ((aux & 0xc0) != 0x80) {
216 return LY_EINVAL;
217 }
218
219 c = (c << 6) | (aux & 0x3f);
220 }
221
Michal Vasko69730152020-10-09 16:30:07 +0200222 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200223 return LY_EINVAL;
224 }
225 } else if ((c & 0xf8) == 0xf0) {
226 /* four bytes character */
227 len = 4;
228
229 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200230 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200231 aux = (*input)[i];
232 if ((aux & 0xc0) != 0x80) {
233 return LY_EINVAL;
234 }
235
236 c = (c << 6) | (aux & 0x3f);
237 }
238
Michal Vasko69730152020-10-09 16:30:07 +0200239 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200240 return LY_EINVAL;
241 }
242 } else {
243 return LY_EINVAL;
244 }
245
246 (*utf8_char) = c;
247 (*input) += len;
248 if (bytes_read) {
249 (*bytes_read) = len;
250 }
251 return LY_SUCCESS;
252}
253
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200254LY_ERR
255ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
256{
257 if (value < 0x80) {
258 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200259 if ((value < 0x20) &&
260 (value != 0x09) &&
261 (value != 0x0a) &&
262 (value != 0x0d)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200263 return LY_EINVAL;
264 }
265
266 dst[0] = value;
267 (*bytes_written) = 1;
268 } else if (value < 0x800) {
269 /* two bytes character */
270 dst[0] = 0xc0 | (value >> 6);
271 dst[1] = 0x80 | (value & 0x3f);
272 (*bytes_written) = 2;
273 } else if (value < 0xfffe) {
274 /* three bytes character */
275 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200276 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200277 /* exclude surrogate blocks %xD800-DFFF */
278 /* exclude noncharacters %xFDD0-FDEF */
279 return LY_EINVAL;
280 }
281
282 dst[0] = 0xe0 | (value >> 12);
283 dst[1] = 0x80 | ((value >> 6) & 0x3f);
284 dst[2] = 0x80 | (value & 0x3f);
285
286 (*bytes_written) = 3;
287 } else if (value < 0x10fffe) {
288 if ((value & 0xffe) == 0xffe) {
289 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
290 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
291 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
292 return LY_EINVAL;
293 }
294 /* four bytes character */
295 dst[0] = 0xf0 | (value >> 18);
296 dst[1] = 0x80 | ((value >> 12) & 0x3f);
297 dst[2] = 0x80 | ((value >> 6) & 0x3f);
298 dst[3] = 0x80 | (value & 0x3f);
299
300 (*bytes_written) = 4;
301 } else {
302 return LY_EINVAL;
303 }
304 return LY_SUCCESS;
305}
306
Radek Krejci76c98012019-08-14 11:23:24 +0200307/**
308 * @brief Static table of the UTF8 characters lengths according to their first byte.
309 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200310static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200311 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
317 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
324 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
325 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
326 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
327};
328
329size_t
330ly_utf8len(const char *str, size_t bytes)
331{
Radek Krejci1e008d22020-08-17 11:37:37 +0200332 size_t len = 0;
333 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200334
Michal Vaskod989ba02020-08-24 10:59:24 +0200335 while (*ptr && (size_t)(ptr - str) < bytes) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200336 ++len;
337 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
338 }
Radek Krejci76c98012019-08-14 11:23:24 +0200339 return len;
340}
341
Radek Krejcid972c252018-09-25 13:23:39 +0200342size_t
343LY_VCODE_INSTREXP_len(const char *str)
344{
345 size_t len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200346
Radek Krejcid972c252018-09-25 13:23:39 +0200347 if (!str) {
348 return len;
349 } else if (!str[0]) {
350 return 1;
351 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200352 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200353 return len;
354}
355
Radek Krejcif345c012018-09-19 11:12:59 +0200356LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200357ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200358{
Radek Krejci86d106e2018-10-18 09:53:19 +0200359 struct stat sb;
360 long pagesize;
361 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200362
Radek Krejci86d106e2018-10-18 09:53:19 +0200363 assert(length);
364 assert(addr);
365 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200366
Radek Krejci86d106e2018-10-18 09:53:19 +0200367 if (fstat(fd, &sb) == -1) {
368 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
369 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200370 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200371 if (!S_ISREG(sb.st_mode)) {
372 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
373 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200374 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200375 if (!sb.st_size) {
376 *addr = NULL;
377 return LY_SUCCESS;
378 }
379 pagesize = sysconf(_SC_PAGESIZE);
380
381 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200382 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200383 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
384 *length = sb.st_size + 1;
385 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
386 } else {
387 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
388 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
389 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
390 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
391 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
392 * where the anonymous mapping starts. */
393 *length = sb.st_size + pagesize;
394 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
395 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
396 }
397 if (*addr == MAP_FAILED) {
398 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
399 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200400 }
401
Radek Krejcif345c012018-09-19 11:12:59 +0200402 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200403}
Michal Vasko841d1a92018-09-07 15:40:31 +0200404
Radek Krejci86d106e2018-10-18 09:53:19 +0200405LY_ERR
406ly_munmap(void *addr, size_t length)
407{
408 if (munmap(addr, length)) {
409 return LY_ESYS;
410 }
411 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200412}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100413
414LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200415ly_strcat(char **dest, const char *format, ...)
416{
417 va_list fp;
418 char *addition = NULL;
419 size_t len;
420
421 va_start(fp, format);
422 len = vasprintf(&addition, format, fp);
423 len += (*dest ? strlen(*dest) : 0) + 1;
424
425 if (*dest) {
426 *dest = ly_realloc(*dest, len);
427 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100428 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200429 return LY_EMEM;
430 }
431 *dest = strcat(*dest, addition);
432 free(addition);
433 } else {
434 *dest = addition;
435 }
436
437 va_end(fp);
438 return LY_SUCCESS;
439}
440
441LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200442ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100443{
444 char *strptr;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200445 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100446
Radek Krejci249973a2019-06-10 10:50:54 +0200447 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100448
449 /* convert to 64-bit integer, all the redundant characters are handled */
450 errno = 0;
451 strptr = NULL;
452
453 /* parse the value */
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200454 i = strtoll(val_str, &strptr, base);
Michal Vasko69730152020-10-09 16:30:07 +0200455 if (errno || (strptr == val_str)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100456 return LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200457 } else if ((i < min) || (i > max)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100458 return LY_EDENIED;
459 } else if (strptr && *strptr) {
460 while (isspace(*strptr)) {
461 ++strptr;
462 }
Michal Vasko69730152020-10-09 16:30:07 +0200463 if (*strptr && (strptr < val_str + val_len)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100464 return LY_EVALID;
465 }
466 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200467
468 *ret = i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100469 return LY_SUCCESS;
470}
471
472LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200473ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100474{
475 char *strptr;
476 uint64_t u;
477
478 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], LY_EINVAL);
479
480 errno = 0;
481 strptr = NULL;
482 u = strtoull(val_str, &strptr, base);
Michal Vasko69730152020-10-09 16:30:07 +0200483 if (errno || (strptr == val_str)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100484 return LY_EVALID;
Michal Vasko69730152020-10-09 16:30:07 +0200485 } else if ((u > max) || (u && (val_str[0] == '-'))) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100486 return LY_EDENIED;
487 } else if (strptr && *strptr) {
488 while (isspace(*strptr)) {
489 ++strptr;
490 }
Michal Vasko69730152020-10-09 16:30:07 +0200491 if (*strptr && (strptr < val_str + val_len)) {
Radek Krejci4f28eda2018-11-12 11:46:16 +0100492 return LY_EVALID;
493 }
494 }
495
496 *ret = u;
497 return LY_SUCCESS;
498}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200499
500/**
501 * @brief Parse an identifier.
502 *
503 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
504 * identifier = (ALPHA / "_")
505 * *(ALPHA / DIGIT / "_" / "-" / ".")
506 *
507 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
508 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
509 */
510static LY_ERR
511lys_parse_id(const char **id)
512{
513 assert(id && *id);
514
515 if (!is_yangidentstartchar(**id)) {
516 return LY_EINVAL;
517 }
518 ++(*id);
519
520 while (is_yangidentchar(**id)) {
521 ++(*id);
522 }
523 return LY_SUCCESS;
524}
525
526LY_ERR
527ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
528{
529 assert(id && *id);
530 assert(prefix && prefix_len);
531 assert(name && name_len);
532
533 *prefix = *id;
534 *prefix_len = 0;
535 *name = NULL;
536 *name_len = 0;
537
538 LY_CHECK_RET(lys_parse_id(id));
539 if (**id == ':') {
540 /* there is prefix */
541 *prefix_len = *id - *prefix;
542 ++(*id);
543 *name = *id;
544
545 LY_CHECK_RET(lys_parse_id(id));
546 *name_len = *id - *name;
547 } else {
548 /* there is no prefix, so what we have as prefix now is actually the name */
549 *name = *prefix;
550 *name_len = *id - *name;
551 *prefix = NULL;
552 }
553
554 return LY_SUCCESS;
555}
556
557LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200558ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200559 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
560 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200561{
562 LY_ERR ret = LY_EVALID;
563 const char *in = *pred;
564 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200565 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200566 char quot;
567
Radek Krejci4607f542020-12-01 12:18:49 +0100568 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200569
570 *prefix = *id = *value = NULL;
571 *prefix_len = *id_len = *value_len = 0;
572
573 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200574 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200575
576 if (isdigit(in[offset])) {
577 /* pos: "[" *WSP positive-integer-value *WSP "]" */
578 if (in[offset] == '0') {
579 /* zero */
580 *errmsg = "The position predicate cannot be zero.";
581 goto error;
582 }
583
584 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200585 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200586 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200587 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200588
589 } else if (in[offset] == '.') {
590 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
591 *id = &in[offset];
592 *id_len = 1;
593 offset++;
594 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200595 } else if (in[offset] == '-') {
596 /* typically negative value */
597 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
598 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200599 } else {
600 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
601 in = &in[offset];
602 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
603 *errmsg = "Invalid node-identifier.";
604 goto error;
605 }
Michal Vasko69730152020-10-09 16:30:07 +0200606 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200607 /* all node names MUST be qualified with explicit namespace prefix */
608 *errmsg = "Missing prefix of a node name.";
609 goto error;
610 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200611 offset = in - *pred;
612 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200613 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200614 }
615
616 if (expr) {
617 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200618 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200619
620 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200621 if (expr == 1) {
622 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
623 } else { /* 2 */
624 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
625 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200626 goto error;
627 }
628 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200629 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200630
631 /* quoted-string */
632 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200633 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200634 *errmsg = "String value is not quoted.";
635 goto error;
636 }
637 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200638 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200639 if (in[offset] == quot) {
640 *value_len = &in[offset] - *value;
641 offset++;
642 } else {
643 *errmsg = "Value is not terminated quoted-string.";
644 goto error;
645 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200646 }
647
648 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200649 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200650 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200651 if (expr == 0) {
652 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
653 } else if (expr == 1) {
654 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
655 } else { /* 2 */
656 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
657 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200658 goto error;
659 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200660 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200661
Radek Krejci10bfdf82019-06-10 14:08:13 +0200662 if (offset <= limit) {
663 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200664 return LY_SUCCESS;
665 }
666
667 /* we read after the limit */
668 *errmsg = "Predicate is incomplete.";
669 *prefix = *id = *value = NULL;
670 *prefix_len = *id_len = *value_len = 0;
671 offset = limit;
672 ret = LY_EINVAL;
673
674error:
675 *pred = &in[offset];
676 return ret;
677}