blob: fa275889f4d51077ba6fa50eeee839594479f0a3 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020022#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020023#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020024#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020025#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020026#include <string.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020027#include <sys/mman.h>
28#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020029#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020030
Radek Krejciaa45bda2020-07-20 07:43:38 +020031#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020032#include "tree_schema_internal.h"
aPiecek704f8e92021-08-25 13:35:05 +020033#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020034
35void *
36ly_realloc(void *ptr, size_t size)
37{
38 void *new_mem;
39
40 new_mem = realloc(ptr, size);
41 if (!new_mem) {
42 free(ptr);
43 }
44
45 return new_mem;
46}
Michal Vasko841d1a92018-09-07 15:40:31 +020047
Michal Vasko03ff5a72019-09-11 13:49:33 +020048char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020049ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020050{
Michal Vaskob4d40d62021-05-04 11:42:44 +020051 for ( ; len && (*s != (char)c); ++s, --len) {}
52 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020053}
54
Radek Krejci7f9b6512019-09-18 13:11:09 +020055int
56ly_strncmp(const char *refstr, const char *str, size_t str_len)
57{
58 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020059
60 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020061 return 0;
62 } else {
63 return rc ? rc : 1;
64 }
65}
66
Michal Vasko15dc9fa2021-05-03 14:33:05 +020067LY_ERR
68ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
69{
70 uint8_t num = 0, dig, dec_pow;
71
72 if (len > 3) {
73 /* overflow for sure */
74 return LY_EDENIED;
75 }
76
77 dec_pow = 1;
78 for ( ; len && isdigit(nptr[len - 1]); --len) {
79 dig = nptr[len - 1] - 48;
80
81 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
82 return LY_EDENIED;
83 }
84 dig *= dec_pow;
85
86 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
87 return LY_EDENIED;
88 }
89 num += dig;
90
91 dec_pow *= 10;
92 }
93
94 if (len) {
95 return LY_EVALID;
96 }
97 *ret = num;
98 return LY_SUCCESS;
99}
100
aPieceke3f828d2021-05-10 15:34:41 +0200101LY_ERR
102ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200103{
104 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200105 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200106 uint32_t c;
107 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200108 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200109
aPieceke3f828d2021-05-10 15:34:41 +0200110 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200111
112#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
113
114 *str_next = NULL;
115 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200116 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200117
118 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
119 return ret;
120 }
121
122 stop = str_begin;
123 prefix = NULL;
124 prefix_found = 0;
125
126 do {
127 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200128 do {
129 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
130 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200131
132 if (IS_AT_END(stop, str_end)) {
133 break;
134 }
135
136 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200137 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200138
139 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200140 do {
141 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
142 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200143
144 prefix_found = c == ':' ? 1 : 0;
145
146 /* if it wasn't the prefix, keep looking */
147 } while (!IS_AT_END(stop, str_end) && !prefix_found);
148
149 if ((str_begin == prefix) && prefix_found) {
150 /* prefix found at the beginning of the input string */
151 *is_prefix = 1;
152 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200153 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200154 } else if ((str_begin != prefix) && (prefix_found)) {
155 /* there is a some string before prefix */
156 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200157 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200158 } else {
159 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200160 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200161 }
162
163#undef IS_AT_END
164
165 return ret;
166}
167
Radek Krejcib416be62018-10-01 14:51:45 +0200168LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100169ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200170{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200171 uint32_t c, aux;
172 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200173
Radek Krejcicc6a45c2019-05-13 10:16:14 +0200174 if (bytes_read) {
175 (*bytes_read) = 0;
176 }
177
Radek Krejcib416be62018-10-01 14:51:45 +0200178 c = (*input)[0];
179 LY_CHECK_RET(!c, LY_EINVAL);
180
181 if (!(c & 0x80)) {
182 /* one byte character */
183 len = 1;
184
Michal Vasko69730152020-10-09 16:30:07 +0200185 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200186 return LY_EINVAL;
187 }
188 } else if ((c & 0xe0) == 0xc0) {
189 /* two bytes character */
190 len = 2;
191
192 aux = (*input)[1];
193 if ((aux & 0xc0) != 0x80) {
194 return LY_EINVAL;
195 }
196 c = ((c & 0x1f) << 6) | (aux & 0x3f);
197
198 if (c < 0x80) {
199 return LY_EINVAL;
200 }
201 } else if ((c & 0xf0) == 0xe0) {
202 /* three bytes character */
203 len = 3;
204
205 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200206 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200207 aux = (*input)[i];
208 if ((aux & 0xc0) != 0x80) {
209 return LY_EINVAL;
210 }
211
212 c = (c << 6) | (aux & 0x3f);
213 }
214
Michal Vasko69730152020-10-09 16:30:07 +0200215 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200216 return LY_EINVAL;
217 }
218 } else if ((c & 0xf8) == 0xf0) {
219 /* four bytes character */
220 len = 4;
221
222 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200223 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200224 aux = (*input)[i];
225 if ((aux & 0xc0) != 0x80) {
226 return LY_EINVAL;
227 }
228
229 c = (c << 6) | (aux & 0x3f);
230 }
231
Michal Vasko69730152020-10-09 16:30:07 +0200232 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200233 return LY_EINVAL;
234 }
235 } else {
236 return LY_EINVAL;
237 }
238
239 (*utf8_char) = c;
240 (*input) += len;
241 if (bytes_read) {
242 (*bytes_read) = len;
243 }
244 return LY_SUCCESS;
245}
246
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200247LY_ERR
248ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
249{
250 if (value < 0x80) {
251 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200252 if ((value < 0x20) &&
253 (value != 0x09) &&
254 (value != 0x0a) &&
255 (value != 0x0d)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200256 return LY_EINVAL;
257 }
258
259 dst[0] = value;
260 (*bytes_written) = 1;
261 } else if (value < 0x800) {
262 /* two bytes character */
263 dst[0] = 0xc0 | (value >> 6);
264 dst[1] = 0x80 | (value & 0x3f);
265 (*bytes_written) = 2;
266 } else if (value < 0xfffe) {
267 /* three bytes character */
268 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200269 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200270 /* exclude surrogate blocks %xD800-DFFF */
271 /* exclude noncharacters %xFDD0-FDEF */
272 return LY_EINVAL;
273 }
274
275 dst[0] = 0xe0 | (value >> 12);
276 dst[1] = 0x80 | ((value >> 6) & 0x3f);
277 dst[2] = 0x80 | (value & 0x3f);
278
279 (*bytes_written) = 3;
280 } else if (value < 0x10fffe) {
281 if ((value & 0xffe) == 0xffe) {
282 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
283 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
284 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
285 return LY_EINVAL;
286 }
287 /* four bytes character */
288 dst[0] = 0xf0 | (value >> 18);
289 dst[1] = 0x80 | ((value >> 12) & 0x3f);
290 dst[2] = 0x80 | ((value >> 6) & 0x3f);
291 dst[3] = 0x80 | (value & 0x3f);
292
293 (*bytes_written) = 4;
294 } else {
295 return LY_EINVAL;
296 }
297 return LY_SUCCESS;
298}
299
Radek Krejci76c98012019-08-14 11:23:24 +0200300/**
301 * @brief Static table of the UTF8 characters lengths according to their first byte.
302 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200303static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200304 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
305 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
306 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
307 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
317 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
318 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
319 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
320};
321
322size_t
323ly_utf8len(const char *str, size_t bytes)
324{
Radek Krejci1e008d22020-08-17 11:37:37 +0200325 size_t len = 0;
326 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200327
Michal Vaskob4d40d62021-05-04 11:42:44 +0200328 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200329 ++len;
330 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
331 }
Radek Krejci76c98012019-08-14 11:23:24 +0200332 return len;
333}
334
Radek Krejcid972c252018-09-25 13:23:39 +0200335size_t
336LY_VCODE_INSTREXP_len(const char *str)
337{
338 size_t len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200339
Radek Krejcid972c252018-09-25 13:23:39 +0200340 if (!str) {
341 return len;
342 } else if (!str[0]) {
343 return 1;
344 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200345 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200346 return len;
347}
348
Radek Krejcif345c012018-09-19 11:12:59 +0200349LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200350ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200351{
Radek Krejci86d106e2018-10-18 09:53:19 +0200352 struct stat sb;
353 long pagesize;
354 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200355
Radek Krejci86d106e2018-10-18 09:53:19 +0200356 assert(length);
357 assert(addr);
358 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200359
Radek Krejci86d106e2018-10-18 09:53:19 +0200360 if (fstat(fd, &sb) == -1) {
361 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
362 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200363 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200364 if (!S_ISREG(sb.st_mode)) {
365 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
366 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200367 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200368 if (!sb.st_size) {
369 *addr = NULL;
370 return LY_SUCCESS;
371 }
372 pagesize = sysconf(_SC_PAGESIZE);
373
374 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200375 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200376 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
377 *length = sb.st_size + 1;
378 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
379 } else {
380 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
381 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
382 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
383 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
384 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
385 * where the anonymous mapping starts. */
386 *length = sb.st_size + pagesize;
387 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
388 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
389 }
390 if (*addr == MAP_FAILED) {
391 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
392 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200393 }
394
Radek Krejcif345c012018-09-19 11:12:59 +0200395 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200396}
Michal Vasko841d1a92018-09-07 15:40:31 +0200397
Radek Krejci86d106e2018-10-18 09:53:19 +0200398LY_ERR
399ly_munmap(void *addr, size_t length)
400{
401 if (munmap(addr, length)) {
402 return LY_ESYS;
403 }
404 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200405}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100406
407LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200408ly_strcat(char **dest, const char *format, ...)
409{
410 va_list fp;
411 char *addition = NULL;
412 size_t len;
413
414 va_start(fp, format);
415 len = vasprintf(&addition, format, fp);
416 len += (*dest ? strlen(*dest) : 0) + 1;
417
418 if (*dest) {
419 *dest = ly_realloc(*dest, len);
420 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100421 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200422 return LY_EMEM;
423 }
424 *dest = strcat(*dest, addition);
425 free(addition);
426 } else {
427 *dest = addition;
428 }
429
430 va_end(fp);
431 return LY_SUCCESS;
432}
433
434LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200435ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100436{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200437 LY_ERR rc = LY_SUCCESS;
438 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200439 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100440
Radek Krejci249973a2019-06-10 10:50:54 +0200441 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100442
Michal Vaskob4d40d62021-05-04 11:42:44 +0200443 /* duplicate the value */
444 str = strndup(val_str, val_len);
445 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100446
Michal Vaskob4d40d62021-05-04 11:42:44 +0200447 /* parse the value to avoid accessing following bytes */
448 errno = 0;
449 i = strtoll(str, &ptr, base);
450 if (errno || (ptr == str)) {
451 /* invalid string */
452 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200453 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200454 /* invalid number */
455 rc = LY_EDENIED;
456 } else if (*ptr) {
457 while (isspace(*ptr)) {
458 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100459 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200460 if (*ptr) {
461 /* invalid characters after some number */
462 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100463 }
464 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200465
Michal Vaskob4d40d62021-05-04 11:42:44 +0200466 /* cleanup */
467 free(str);
468 if (!rc) {
469 *ret = i;
470 }
471 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100472}
473
474LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200475ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100476{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200477 LY_ERR rc = LY_SUCCESS;
478 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100479 uint64_t u;
480
Michal Vaskob4d40d62021-05-04 11:42:44 +0200481 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100482
Michal Vaskob4d40d62021-05-04 11:42:44 +0200483 /* duplicate the value to avoid accessing following bytes */
484 str = strndup(val_str, val_len);
485 LY_CHECK_RET(!str, LY_EMEM);
486
487 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100488 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200489 u = strtoull(str, &ptr, base);
490 if (errno || (ptr == str)) {
491 /* invalid string */
492 rc = LY_EVALID;
493 } else if ((u > max) || (u && (str[0] == '-'))) {
494 /* invalid number */
495 rc = LY_EDENIED;
496 } else if (*ptr) {
497 while (isspace(*ptr)) {
498 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100499 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200500 if (*ptr) {
501 /* invalid characters after some number */
502 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100503 }
504 }
505
Michal Vaskob4d40d62021-05-04 11:42:44 +0200506 /* cleanup */
507 free(str);
508 if (!rc) {
509 *ret = u;
510 }
511 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100512}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200513
514/**
515 * @brief Parse an identifier.
516 *
517 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
518 * identifier = (ALPHA / "_")
519 * *(ALPHA / DIGIT / "_" / "-" / ".")
520 *
521 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
522 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
523 */
524static LY_ERR
525lys_parse_id(const char **id)
526{
527 assert(id && *id);
528
529 if (!is_yangidentstartchar(**id)) {
530 return LY_EINVAL;
531 }
532 ++(*id);
533
534 while (is_yangidentchar(**id)) {
535 ++(*id);
536 }
537 return LY_SUCCESS;
538}
539
540LY_ERR
541ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
542{
543 assert(id && *id);
544 assert(prefix && prefix_len);
545 assert(name && name_len);
546
547 *prefix = *id;
548 *prefix_len = 0;
549 *name = NULL;
550 *name_len = 0;
551
552 LY_CHECK_RET(lys_parse_id(id));
553 if (**id == ':') {
554 /* there is prefix */
555 *prefix_len = *id - *prefix;
556 ++(*id);
557 *name = *id;
558
559 LY_CHECK_RET(lys_parse_id(id));
560 *name_len = *id - *name;
561 } else {
562 /* there is no prefix, so what we have as prefix now is actually the name */
563 *name = *prefix;
564 *name_len = *id - *name;
565 *prefix = NULL;
566 }
567
568 return LY_SUCCESS;
569}
570
571LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200572ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200573 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
574 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200575{
576 LY_ERR ret = LY_EVALID;
577 const char *in = *pred;
578 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200579 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200580 char quot;
581
Radek Krejci4607f542020-12-01 12:18:49 +0100582 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200583
584 *prefix = *id = *value = NULL;
585 *prefix_len = *id_len = *value_len = 0;
586
587 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200588 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200589
590 if (isdigit(in[offset])) {
591 /* pos: "[" *WSP positive-integer-value *WSP "]" */
592 if (in[offset] == '0') {
593 /* zero */
594 *errmsg = "The position predicate cannot be zero.";
595 goto error;
596 }
597
598 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200599 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200600 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200601 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200602
603 } else if (in[offset] == '.') {
604 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
605 *id = &in[offset];
606 *id_len = 1;
607 offset++;
608 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200609 } else if (in[offset] == '-') {
610 /* typically negative value */
611 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
612 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200613 } else {
614 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
615 in = &in[offset];
616 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
617 *errmsg = "Invalid node-identifier.";
618 goto error;
619 }
Michal Vasko69730152020-10-09 16:30:07 +0200620 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200621 /* all node names MUST be qualified with explicit namespace prefix */
622 *errmsg = "Missing prefix of a node name.";
623 goto error;
624 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200625 offset = in - *pred;
626 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200627 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200628 }
629
630 if (expr) {
631 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200632 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200633
634 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200635 if (expr == 1) {
636 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
637 } else { /* 2 */
638 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
639 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200640 goto error;
641 }
642 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200643 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200644
645 /* quoted-string */
646 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200647 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200648 *errmsg = "String value is not quoted.";
649 goto error;
650 }
651 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200652 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200653 if (in[offset] == quot) {
654 *value_len = &in[offset] - *value;
655 offset++;
656 } else {
657 *errmsg = "Value is not terminated quoted-string.";
658 goto error;
659 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200660 }
661
662 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200663 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200664 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200665 if (expr == 0) {
666 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
667 } else if (expr == 1) {
668 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
669 } else { /* 2 */
670 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
671 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200672 goto error;
673 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200674 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200675
Radek Krejci10bfdf82019-06-10 14:08:13 +0200676 if (offset <= limit) {
677 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200678 return LY_SUCCESS;
679 }
680
681 /* we read after the limit */
682 *errmsg = "Predicate is incomplete.";
683 *prefix = *id = *value = NULL;
684 *prefix_len = *id_len = *value_len = 0;
685 offset = limit;
686 ret = LY_EINVAL;
687
688error:
689 *pred = &in[offset];
690 return ret;
691}