blob: 449c862f9858e1dfb354aa54600469eb7c7b5f45 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
Michal Vasko8f702ee2024-02-20 15:44:24 +01002 * @file ly_common.c
Michal Vasko1324b6c2018-09-07 11:16:23 +02003 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
Michal Vasko8f702ee2024-02-20 15:44:24 +01006 * Copyright (c) 2018 - 2024 CESNET, z.s.p.o.
Michal Vasko1324b6c2018-09-07 11:16:23 +02007 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Michal Vasko8f702ee2024-02-20 15:44:24 +010017#include "ly_common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020022#include <fcntl.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020023#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020024#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020025#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020026#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020027#include <string.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010028#ifndef _WIN32
Michal Vasko8f702ee2024-02-20 15:44:24 +010029# ifdef HAVE_MMAP
30# include <sys/mman.h>
31# endif
Jan Kundrátd31adc12022-07-07 21:36:15 +020032#else
Michal Vasko8f702ee2024-02-20 15:44:24 +010033# include <io.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010034#endif
Radek Krejci86d106e2018-10-18 09:53:19 +020035#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020036#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020037
Radek Krejciaa45bda2020-07-20 07:43:38 +020038#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020039#include "tree_schema_internal.h"
Michal Vasko3ab37cc2024-06-28 10:40:07 +020040#include "version.h"
aPiecek704f8e92021-08-25 13:35:05 +020041#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020042
Michal Vasko11581632024-07-15 15:04:58 +020043LIBYANG_API_DEF uint32_t
44ly_version_so_major(void)
45{
46 return LY_VERSION_MAJOR;
47}
Michal Vasko3ab37cc2024-06-28 10:40:07 +020048
Michal Vasko11581632024-07-15 15:04:58 +020049LIBYANG_API_DEF uint32_t
50ly_version_so_minor(void)
51{
52 return LY_VERSION_MINOR;
53}
54
55LIBYANG_API_DEF uint32_t
56ly_version_so_micro(void)
57{
58 return LY_VERSION_MICRO;
59}
60
61LIBYANG_API_DEF const char *
62ly_version_so_str(void)
63{
64 return LY_VERSION;
65}
66
67LIBYANG_API_DEF uint32_t
68ly_version_proj_major(void)
69{
70 return LY_PROJ_VERSION_MAJOR;
71}
72
73LIBYANG_API_DEF uint32_t
74ly_version_proj_minor(void)
75{
76 return LY_PROJ_VERSION_MINOR;
77}
78
79LIBYANG_API_DEF uint32_t
80ly_version_proj_micro(void)
81{
82 return LY_PROJ_VERSION_MICRO;
83}
84
85LIBYANG_API_DEF const char *
86ly_version_proj_str(void)
87{
88 return LY_PROJ_VERSION;
89}
Michal Vasko3ab37cc2024-06-28 10:40:07 +020090
Michal Vasko1324b6c2018-09-07 11:16:23 +020091void *
92ly_realloc(void *ptr, size_t size)
93{
94 void *new_mem;
95
96 new_mem = realloc(ptr, size);
97 if (!new_mem) {
98 free(ptr);
99 }
100
101 return new_mem;
102}
Michal Vasko841d1a92018-09-07 15:40:31 +0200103
Michal Vasko03ff5a72019-09-11 13:49:33 +0200104char *
Radek Krejci1deb5be2020-08-26 16:43:36 +0200105ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +0200106{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200107 for ( ; len && (*s != (char)c); ++s, --len) {}
108 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +0200109}
110
Radek Krejci7f9b6512019-09-18 13:11:09 +0200111int
112ly_strncmp(const char *refstr, const char *str, size_t str_len)
113{
114 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +0200115
116 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +0200117 return 0;
118 } else {
119 return rc ? rc : 1;
120 }
121}
122
Michal Vasko15dc9fa2021-05-03 14:33:05 +0200123LY_ERR
124ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
125{
Michal Vasko014507d2024-06-26 16:02:44 +0200126 uint8_t num = 0, dig;
127 uint16_t dec_pow;
Michal Vasko15dc9fa2021-05-03 14:33:05 +0200128
129 if (len > 3) {
130 /* overflow for sure */
131 return LY_EDENIED;
132 }
133
134 dec_pow = 1;
135 for ( ; len && isdigit(nptr[len - 1]); --len) {
136 dig = nptr[len - 1] - 48;
137
138 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
139 return LY_EDENIED;
140 }
141 dig *= dec_pow;
142
143 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
144 return LY_EDENIED;
145 }
146 num += dig;
147
148 dec_pow *= 10;
149 }
150
151 if (len) {
152 return LY_EVALID;
153 }
154 *ret = num;
155 return LY_SUCCESS;
156}
157
aPieceke3f828d2021-05-10 15:34:41 +0200158LY_ERR
159ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200160{
161 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200162 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200163 uint32_t c;
164 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200165 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200166
aPieceke3f828d2021-05-10 15:34:41 +0200167 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200168
169#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
170
171 *str_next = NULL;
172 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200173 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200174
175 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
176 return ret;
177 }
178
179 stop = str_begin;
180 prefix = NULL;
181 prefix_found = 0;
182
183 do {
184 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200185 do {
186 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
187 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200188
189 if (IS_AT_END(stop, str_end)) {
190 break;
191 }
192
193 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200194 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200195
196 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200197 do {
198 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
199 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200200
201 prefix_found = c == ':' ? 1 : 0;
202
203 /* if it wasn't the prefix, keep looking */
204 } while (!IS_AT_END(stop, str_end) && !prefix_found);
205
206 if ((str_begin == prefix) && prefix_found) {
207 /* prefix found at the beginning of the input string */
208 *is_prefix = 1;
209 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200210 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200211 } else if ((str_begin != prefix) && (prefix_found)) {
212 /* there is a some string before prefix */
213 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200214 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200215 } else {
216 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200217 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200218 }
219
220#undef IS_AT_END
221
222 return ret;
223}
224
Radek Krejcib416be62018-10-01 14:51:45 +0200225LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100226ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200227{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200228 uint32_t c, aux;
229 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200230
231 c = (*input)[0];
Radek Krejcib416be62018-10-01 14:51:45 +0200232
233 if (!(c & 0x80)) {
234 /* one byte character */
235 len = 1;
236
Michal Vasko69730152020-10-09 16:30:07 +0200237 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200238 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200239 }
240 } else if ((c & 0xe0) == 0xc0) {
241 /* two bytes character */
242 len = 2;
243
244 aux = (*input)[1];
245 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200246 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200247 }
248 c = ((c & 0x1f) << 6) | (aux & 0x3f);
249
250 if (c < 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200251 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200252 }
253 } else if ((c & 0xf0) == 0xe0) {
254 /* three bytes character */
255 len = 3;
256
257 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200258 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200259 aux = (*input)[i];
260 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200261 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200262 }
263
264 c = (c << 6) | (aux & 0x3f);
265 }
266
Michal Vasko69730152020-10-09 16:30:07 +0200267 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200268 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200269 }
270 } else if ((c & 0xf8) == 0xf0) {
271 /* four bytes character */
272 len = 4;
273
274 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200275 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200276 aux = (*input)[i];
277 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200278 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200279 }
280
281 c = (c << 6) | (aux & 0x3f);
282 }
283
Michal Vasko69730152020-10-09 16:30:07 +0200284 if ((c < 0x1000) || (c > 0x10ffff)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200285 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200286 }
287 } else {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200288 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200289 }
290
291 (*utf8_char) = c;
292 (*input) += len;
293 if (bytes_read) {
294 (*bytes_read) = len;
295 }
296 return LY_SUCCESS;
Michal Vaskoda09abf2023-10-06 15:53:18 +0200297
298error:
299 if (bytes_read) {
300 (*bytes_read) = 0;
301 }
302 return LY_EINVAL;
Radek Krejcib416be62018-10-01 14:51:45 +0200303}
304
Michal Vasko989cdb42023-10-06 15:32:37 +0200305/**
306 * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
307 *
308 * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
309 *
310 * @param[in] input UTF-8 string.
311 * @param[in] bytes Number of bytes to compare.
312 * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
313 * @return Result of the operation.
314 */
315static int
Michal Vaskoc03945b2024-03-04 08:30:30 +0100316ly_utf8_and_equal(const char *input, int bytes, ...)
Michal Vasko989cdb42023-10-06 15:32:37 +0200317{
318 va_list ap;
319 int i, and, byte;
320
321 va_start(ap, bytes);
322 for (i = 0; i < bytes; ++i) {
323 and = va_arg(ap, int);
324 byte = va_arg(ap, int);
325
326 /* compare each byte */
327 if (((uint8_t)input[i] & and) != (uint8_t)byte) {
328 return 0;
329 }
330 }
331 va_end(ap);
332
333 return 1;
334}
335
336/**
337 * @brief Check whether an UTF-8 string is smaller than a hex string.
338 *
339 * input < 0x[arg1][arg2]...
340 *
341 * @param[in] input UTF-8 string.
342 * @param[in] bytes Number of bytes to compare.
343 * @param[in] ... @p bytes number of bytes to compare with.
344 * @return Result of the operation.
345 */
346static int
Michal Vaskoc03945b2024-03-04 08:30:30 +0100347ly_utf8_less(const char *input, int bytes, ...)
Michal Vasko989cdb42023-10-06 15:32:37 +0200348{
349 va_list ap;
350 int i, byte;
351
352 va_start(ap, bytes);
353 for (i = 0; i < bytes; ++i) {
354 byte = va_arg(ap, int);
355
356 /* compare until bytes differ */
357 if ((uint8_t)input[i] > (uint8_t)byte) {
358 return 0;
359 } else if ((uint8_t)input[i] < (uint8_t)byte) {
360 return 1;
361 }
362 }
363 va_end(ap);
364
365 /* equals */
366 return 0;
367}
368
369/**
370 * @brief Check whether an UTF-8 string is greater than a hex string.
371 *
372 * input > 0x[arg1][arg2]...
373 *
374 * @param[in] input UTF-8 string.
375 * @param[in] bytes Number of bytes to compare.
376 * @param[in] ... @p bytes number of bytes to compare with.
377 * @return Result of the operation.
378 */
379static int
Michal Vaskoc03945b2024-03-04 08:30:30 +0100380ly_utf8_greater(const char *input, int bytes, ...)
Michal Vasko989cdb42023-10-06 15:32:37 +0200381{
382 va_list ap;
383 int i, byte;
384
385 va_start(ap, bytes);
386 for (i = 0; i < bytes; ++i) {
387 byte = va_arg(ap, int);
388
389 /* compare until bytes differ */
390 if ((uint8_t)input[i] > (uint8_t)byte) {
391 return 1;
392 } else if ((uint8_t)input[i] < (uint8_t)byte) {
393 return 0;
394 }
395 }
396 va_end(ap);
397
398 /* equals */
399 return 0;
400}
401
402LY_ERR
403ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len)
404{
405 size_t len;
406
407 if (!(input[0] & 0x80)) {
408 /* one byte character */
409 len = 1;
410
411 if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) {
412 /* invalid control characters */
413 return LY_EINVAL;
414 }
415 } else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) {
416 /* two bytes character */
417 len = 2;
418
419 /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
420 if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) ||
421 !ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) {
422 return LY_EINVAL;
423 }
424 } else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) {
425 /* three bytes character */
426 len = 3;
427
428 /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
429 if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) {
430 /* reject UTF-16 surrogates */
431 return LY_EINVAL;
432 }
433
434 /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
435 if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) ||
436 !ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) {
437 return LY_EINVAL;
438 }
439 } else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) {
440 /* four bytes character */
441 len = 4;
442
443 /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
444 if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) ||
445 !ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) {
446 return LY_EINVAL;
447 }
448 } else {
449 return LY_EINVAL;
450 }
451
452 *utf8_len = len;
453 return LY_SUCCESS;
454}
455
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200456LY_ERR
457ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
458{
459 if (value < 0x80) {
460 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200461 if ((value < 0x20) &&
462 (value != 0x09) &&
463 (value != 0x0a) &&
464 (value != 0x0d)) {
Michal Vasko519097f2023-05-25 10:00:44 +0200465 /* valid UTF8 but not YANG string character */
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200466 return LY_EINVAL;
467 }
468
469 dst[0] = value;
470 (*bytes_written) = 1;
471 } else if (value < 0x800) {
472 /* two bytes character */
473 dst[0] = 0xc0 | (value >> 6);
474 dst[1] = 0x80 | (value & 0x3f);
475 (*bytes_written) = 2;
476 } else if (value < 0xfffe) {
477 /* three bytes character */
478 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200479 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200480 /* exclude surrogate blocks %xD800-DFFF */
481 /* exclude noncharacters %xFDD0-FDEF */
482 return LY_EINVAL;
483 }
484
485 dst[0] = 0xe0 | (value >> 12);
486 dst[1] = 0x80 | ((value >> 6) & 0x3f);
487 dst[2] = 0x80 | (value & 0x3f);
488
489 (*bytes_written) = 3;
490 } else if (value < 0x10fffe) {
491 if ((value & 0xffe) == 0xffe) {
492 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
493 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
494 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
495 return LY_EINVAL;
496 }
497 /* four bytes character */
498 dst[0] = 0xf0 | (value >> 18);
499 dst[1] = 0x80 | ((value >> 12) & 0x3f);
500 dst[2] = 0x80 | ((value >> 6) & 0x3f);
501 dst[3] = 0x80 | (value & 0x3f);
502
503 (*bytes_written) = 4;
504 } else {
505 return LY_EINVAL;
506 }
507 return LY_SUCCESS;
508}
509
Radek Krejci76c98012019-08-14 11:23:24 +0200510/**
511 * @brief Static table of the UTF8 characters lengths according to their first byte.
512 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200513static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
515 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
516 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
526 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
527 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
528 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
529 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
530};
531
532size_t
533ly_utf8len(const char *str, size_t bytes)
534{
Radek Krejci1e008d22020-08-17 11:37:37 +0200535 size_t len = 0;
536 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200537
Michal Vaskob4d40d62021-05-04 11:42:44 +0200538 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200539 ++len;
540 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
541 }
Radek Krejci76c98012019-08-14 11:23:24 +0200542 return len;
543}
544
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200545int
Radek Krejcid972c252018-09-25 13:23:39 +0200546LY_VCODE_INSTREXP_len(const char *str)
547{
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200548 int len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200549
Radek Krejcid972c252018-09-25 13:23:39 +0200550 if (!str) {
551 return len;
552 } else if (!str[0]) {
553 return 1;
554 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200555 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200556 return len;
557}
558
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100559#ifdef HAVE_MMAP
Radek Krejcif345c012018-09-19 11:12:59 +0200560LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200561ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200562{
Radek Krejci86d106e2018-10-18 09:53:19 +0200563 struct stat sb;
564 long pagesize;
565 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200566
Radek Krejci86d106e2018-10-18 09:53:19 +0200567 assert(length);
568 assert(addr);
569 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200570
Radek Krejci86d106e2018-10-18 09:53:19 +0200571 if (fstat(fd, &sb) == -1) {
572 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
573 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200574 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200575 if (!S_ISREG(sb.st_mode)) {
576 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
577 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200578 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200579 if (!sb.st_size) {
580 *addr = NULL;
581 return LY_SUCCESS;
582 }
583 pagesize = sysconf(_SC_PAGESIZE);
584
585 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200586 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200587 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
588 *length = sb.st_size + 1;
589 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
590 } else {
591 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
592 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
593 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
594 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
595 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
596 * where the anonymous mapping starts. */
597 *length = sb.st_size + pagesize;
598 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
599 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
600 }
601 if (*addr == MAP_FAILED) {
602 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
603 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200604 }
605
Radek Krejcif345c012018-09-19 11:12:59 +0200606 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200607}
Michal Vasko841d1a92018-09-07 15:40:31 +0200608
Radek Krejci86d106e2018-10-18 09:53:19 +0200609LY_ERR
610ly_munmap(void *addr, size_t length)
611{
612 if (munmap(addr, length)) {
613 return LY_ESYS;
614 }
615 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200616}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100617
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100618#else
619
620LY_ERR
621ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
622{
623 struct stat sb;
624 size_t m;
625
626 assert(length);
627 assert(addr);
628 assert(fd >= 0);
629
Jan Kundrátd31adc12022-07-07 21:36:15 +0200630#if _WIN32
631 if (_setmode(fd, _O_BINARY) == -1) {
632 LOGERR(ctx, LY_ESYS, "Failed to switch the file descriptor to binary mode.", strerror(errno));
633 return LY_ESYS;
634 }
635#endif
636
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100637 if (fstat(fd, &sb) == -1) {
638 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
639 return LY_ESYS;
640 }
641 if (!S_ISREG(sb.st_mode)) {
642 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
643 return LY_ESYS;
644 }
645 if (!sb.st_size) {
646 *addr = NULL;
647 return LY_SUCCESS;
648 }
649 /* On Windows, the mman-win32 mmap() emulation uses CreateFileMapping and MapViewOfFile, and these functions
650 * do not allow mapping more than "length of file" bytes for PROT_READ. Remapping existing mappings is not allowed, either.
651 * At that point the path of least resistance is just reading the file in as-is. */
652 m = sb.st_size + 1;
653 char *buf = calloc(m, 1);
654
655 if (!buf) {
656 LOGERR(ctx, LY_ESYS, "ly_mmap: malloc() failed (%s).", strerror(errno));
657 }
658 *addr = buf;
659 *length = m;
660
661 lseek(fd, 0, SEEK_SET);
662 ssize_t to_read = m - 1;
663
664 while (to_read > 0) {
665 ssize_t n = read(fd, buf, to_read);
Michal Vasko2bf4af42023-01-04 12:08:38 +0100666
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100667 if (n == 0) {
668 return LY_SUCCESS;
669 } else if (n < 0) {
670 if (errno == EINTR) {
671 continue; // can I get this on Windows?
672 }
673 LOGERR(ctx, LY_ESYS, "ly_mmap: read() failed (%s).", strerror(errno));
674 }
675 to_read -= n;
676 buf += n;
677 }
678 return LY_SUCCESS;
679}
680
681LY_ERR
682ly_munmap(void *addr, size_t length)
683{
684 (void)length;
685 free(addr);
686 return LY_SUCCESS;
687}
688
689#endif
690
Radek Krejci4f28eda2018-11-12 11:46:16 +0100691LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200692ly_strcat(char **dest, const char *format, ...)
693{
694 va_list fp;
695 char *addition = NULL;
696 size_t len;
697
698 va_start(fp, format);
699 len = vasprintf(&addition, format, fp);
700 len += (*dest ? strlen(*dest) : 0) + 1;
701
702 if (*dest) {
703 *dest = ly_realloc(*dest, len);
704 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100705 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200706 return LY_EMEM;
707 }
708 *dest = strcat(*dest, addition);
709 free(addition);
710 } else {
711 *dest = addition;
712 }
713
714 va_end(fp);
715 return LY_SUCCESS;
716}
717
718LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200719ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100720{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200721 LY_ERR rc = LY_SUCCESS;
722 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200723 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100724
Radek Krejci249973a2019-06-10 10:50:54 +0200725 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100726
Michal Vaskob4d40d62021-05-04 11:42:44 +0200727 /* duplicate the value */
728 str = strndup(val_str, val_len);
729 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100730
Michal Vaskob4d40d62021-05-04 11:42:44 +0200731 /* parse the value to avoid accessing following bytes */
732 errno = 0;
733 i = strtoll(str, &ptr, base);
734 if (errno || (ptr == str)) {
735 /* invalid string */
736 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200737 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200738 /* invalid number */
739 rc = LY_EDENIED;
740 } else if (*ptr) {
741 while (isspace(*ptr)) {
742 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100743 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200744 if (*ptr) {
745 /* invalid characters after some number */
746 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100747 }
748 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200749
Michal Vaskob4d40d62021-05-04 11:42:44 +0200750 /* cleanup */
751 free(str);
752 if (!rc) {
753 *ret = i;
754 }
755 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100756}
757
758LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200759ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100760{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200761 LY_ERR rc = LY_SUCCESS;
762 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100763 uint64_t u;
764
Michal Vaskob4d40d62021-05-04 11:42:44 +0200765 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100766
Michal Vaskob4d40d62021-05-04 11:42:44 +0200767 /* duplicate the value to avoid accessing following bytes */
768 str = strndup(val_str, val_len);
769 LY_CHECK_RET(!str, LY_EMEM);
770
771 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100772 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200773 u = strtoull(str, &ptr, base);
774 if (errno || (ptr == str)) {
775 /* invalid string */
776 rc = LY_EVALID;
777 } else if ((u > max) || (u && (str[0] == '-'))) {
778 /* invalid number */
779 rc = LY_EDENIED;
780 } else if (*ptr) {
781 while (isspace(*ptr)) {
782 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100783 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200784 if (*ptr) {
785 /* invalid characters after some number */
786 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100787 }
788 }
789
Michal Vaskob4d40d62021-05-04 11:42:44 +0200790 /* cleanup */
791 free(str);
792 if (!rc) {
793 *ret = u;
794 }
795 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100796}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200797
798/**
799 * @brief Parse an identifier.
800 *
801 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
802 * identifier = (ALPHA / "_")
803 * *(ALPHA / DIGIT / "_" / "-" / ".")
804 *
805 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
806 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
807 */
808static LY_ERR
809lys_parse_id(const char **id)
810{
811 assert(id && *id);
812
813 if (!is_yangidentstartchar(**id)) {
814 return LY_EINVAL;
815 }
816 ++(*id);
817
818 while (is_yangidentchar(**id)) {
819 ++(*id);
820 }
821 return LY_SUCCESS;
822}
823
824LY_ERR
825ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
826{
827 assert(id && *id);
828 assert(prefix && prefix_len);
829 assert(name && name_len);
830
831 *prefix = *id;
832 *prefix_len = 0;
833 *name = NULL;
834 *name_len = 0;
835
836 LY_CHECK_RET(lys_parse_id(id));
837 if (**id == ':') {
838 /* there is prefix */
839 *prefix_len = *id - *prefix;
840 ++(*id);
841 *name = *id;
842
843 LY_CHECK_RET(lys_parse_id(id));
844 *name_len = *id - *name;
845 } else {
846 /* there is no prefix, so what we have as prefix now is actually the name */
847 *name = *prefix;
848 *name_len = *id - *name;
849 *prefix = NULL;
850 }
851
852 return LY_SUCCESS;
853}
854
855LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200856ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200857 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
858 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200859{
860 LY_ERR ret = LY_EVALID;
861 const char *in = *pred;
862 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200863 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200864 char quot;
865
Radek Krejci4607f542020-12-01 12:18:49 +0100866 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200867
868 *prefix = *id = *value = NULL;
869 *prefix_len = *id_len = *value_len = 0;
870
871 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200872 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200873
874 if (isdigit(in[offset])) {
875 /* pos: "[" *WSP positive-integer-value *WSP "]" */
876 if (in[offset] == '0') {
877 /* zero */
878 *errmsg = "The position predicate cannot be zero.";
879 goto error;
880 }
881
882 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200883 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200884 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200885 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200886
887 } else if (in[offset] == '.') {
888 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
889 *id = &in[offset];
890 *id_len = 1;
891 offset++;
892 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200893 } else if (in[offset] == '-') {
894 /* typically negative value */
895 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
896 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200897 } else {
898 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
899 in = &in[offset];
900 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
901 *errmsg = "Invalid node-identifier.";
902 goto error;
903 }
Michal Vasko69730152020-10-09 16:30:07 +0200904 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200905 /* all node names MUST be qualified with explicit namespace prefix */
906 *errmsg = "Missing prefix of a node name.";
907 goto error;
908 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200909 offset = in - *pred;
910 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200911 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200912 }
913
914 if (expr) {
915 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200916 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200917
918 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200919 if (expr == 1) {
920 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
921 } else { /* 2 */
922 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
923 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200924 goto error;
925 }
926 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200927 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200928
929 /* quoted-string */
930 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200931 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200932 *errmsg = "String value is not quoted.";
933 goto error;
934 }
935 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200936 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200937 if (in[offset] == quot) {
938 *value_len = &in[offset] - *value;
939 offset++;
940 } else {
941 *errmsg = "Value is not terminated quoted-string.";
942 goto error;
943 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200944 }
945
946 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200947 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200948 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200949 if (expr == 0) {
950 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
951 } else if (expr == 1) {
952 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
953 } else { /* 2 */
954 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
955 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200956 goto error;
957 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200958 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200959
Radek Krejci10bfdf82019-06-10 14:08:13 +0200960 if (offset <= limit) {
961 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200962 return LY_SUCCESS;
963 }
964
965 /* we read after the limit */
966 *errmsg = "Predicate is incomplete.";
967 *prefix = *id = *value = NULL;
968 *prefix_len = *id_len = *value_len = 0;
969 offset = limit;
970 ret = LY_EINVAL;
971
972error:
973 *pred = &in[offset];
974 return ret;
975}