blob: 28d75141c13ce1d93db9248c3a308f7dfcc11905 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
Michal Vasko8f702ee2024-02-20 15:44:24 +01002 * @file ly_common.c
Michal Vasko1324b6c2018-09-07 11:16:23 +02003 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
Michal Vasko8f702ee2024-02-20 15:44:24 +01006 * Copyright (c) 2018 - 2024 CESNET, z.s.p.o.
Michal Vasko1324b6c2018-09-07 11:16:23 +02007 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Michal Vasko8f702ee2024-02-20 15:44:24 +010017#include "ly_common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020022#include <fcntl.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020023#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020024#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020025#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020026#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020027#include <string.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010028#ifndef _WIN32
Michal Vasko8f702ee2024-02-20 15:44:24 +010029# ifdef HAVE_MMAP
30# include <sys/mman.h>
31# endif
Jan Kundrátd31adc12022-07-07 21:36:15 +020032#else
Michal Vasko8f702ee2024-02-20 15:44:24 +010033# include <io.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010034#endif
Radek Krejci86d106e2018-10-18 09:53:19 +020035#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020036#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020037
Radek Krejciaa45bda2020-07-20 07:43:38 +020038#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020039#include "tree_schema_internal.h"
Michal Vasko3ab37cc2024-06-28 10:40:07 +020040#include "version.h"
aPiecek704f8e92021-08-25 13:35:05 +020041#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020042
Michal Vasko3ab37cc2024-06-28 10:40:07 +020043LIBYANG_API_DEF struct ly_version ly_version_so = {
44 .major = LY_VERSION_MAJOR,
45 .minor = LY_VERSION_MINOR,
46 .micro = LY_VERSION_MICRO,
47 .str = LY_VERSION
48};
49
50LIBYANG_API_DEF struct ly_version ly_version_proj = {
51 .major = LY_PROJ_VERSION_MAJOR,
52 .minor = LY_PROJ_VERSION_MINOR,
53 .micro = LY_PROJ_VERSION_MICRO,
54 .str = LY_PROJ_VERSION
55};
56
Michal Vasko1324b6c2018-09-07 11:16:23 +020057void *
58ly_realloc(void *ptr, size_t size)
59{
60 void *new_mem;
61
62 new_mem = realloc(ptr, size);
63 if (!new_mem) {
64 free(ptr);
65 }
66
67 return new_mem;
68}
Michal Vasko841d1a92018-09-07 15:40:31 +020069
Michal Vasko03ff5a72019-09-11 13:49:33 +020070char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020071ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020072{
Michal Vaskob4d40d62021-05-04 11:42:44 +020073 for ( ; len && (*s != (char)c); ++s, --len) {}
74 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020075}
76
Radek Krejci7f9b6512019-09-18 13:11:09 +020077int
78ly_strncmp(const char *refstr, const char *str, size_t str_len)
79{
80 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020081
82 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020083 return 0;
84 } else {
85 return rc ? rc : 1;
86 }
87}
88
Michal Vasko15dc9fa2021-05-03 14:33:05 +020089LY_ERR
90ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
91{
Michal Vasko014507d2024-06-26 16:02:44 +020092 uint8_t num = 0, dig;
93 uint16_t dec_pow;
Michal Vasko15dc9fa2021-05-03 14:33:05 +020094
95 if (len > 3) {
96 /* overflow for sure */
97 return LY_EDENIED;
98 }
99
100 dec_pow = 1;
101 for ( ; len && isdigit(nptr[len - 1]); --len) {
102 dig = nptr[len - 1] - 48;
103
104 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
105 return LY_EDENIED;
106 }
107 dig *= dec_pow;
108
109 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
110 return LY_EDENIED;
111 }
112 num += dig;
113
114 dec_pow *= 10;
115 }
116
117 if (len) {
118 return LY_EVALID;
119 }
120 *ret = num;
121 return LY_SUCCESS;
122}
123
aPieceke3f828d2021-05-10 15:34:41 +0200124LY_ERR
125ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200126{
127 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200128 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200129 uint32_t c;
130 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200131 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200132
aPieceke3f828d2021-05-10 15:34:41 +0200133 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200134
135#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
136
137 *str_next = NULL;
138 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200139 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200140
141 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
142 return ret;
143 }
144
145 stop = str_begin;
146 prefix = NULL;
147 prefix_found = 0;
148
149 do {
150 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200151 do {
152 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
153 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200154
155 if (IS_AT_END(stop, str_end)) {
156 break;
157 }
158
159 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200160 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200161
162 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200163 do {
164 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
165 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200166
167 prefix_found = c == ':' ? 1 : 0;
168
169 /* if it wasn't the prefix, keep looking */
170 } while (!IS_AT_END(stop, str_end) && !prefix_found);
171
172 if ((str_begin == prefix) && prefix_found) {
173 /* prefix found at the beginning of the input string */
174 *is_prefix = 1;
175 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200176 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200177 } else if ((str_begin != prefix) && (prefix_found)) {
178 /* there is a some string before prefix */
179 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200180 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200181 } else {
182 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200183 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200184 }
185
186#undef IS_AT_END
187
188 return ret;
189}
190
Radek Krejcib416be62018-10-01 14:51:45 +0200191LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100192ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200193{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200194 uint32_t c, aux;
195 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200196
197 c = (*input)[0];
Radek Krejcib416be62018-10-01 14:51:45 +0200198
199 if (!(c & 0x80)) {
200 /* one byte character */
201 len = 1;
202
Michal Vasko69730152020-10-09 16:30:07 +0200203 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200204 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200205 }
206 } else if ((c & 0xe0) == 0xc0) {
207 /* two bytes character */
208 len = 2;
209
210 aux = (*input)[1];
211 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200212 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200213 }
214 c = ((c & 0x1f) << 6) | (aux & 0x3f);
215
216 if (c < 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200217 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200218 }
219 } else if ((c & 0xf0) == 0xe0) {
220 /* three bytes character */
221 len = 3;
222
223 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200224 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200225 aux = (*input)[i];
226 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200227 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200228 }
229
230 c = (c << 6) | (aux & 0x3f);
231 }
232
Michal Vasko69730152020-10-09 16:30:07 +0200233 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200234 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200235 }
236 } else if ((c & 0xf8) == 0xf0) {
237 /* four bytes character */
238 len = 4;
239
240 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200241 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200242 aux = (*input)[i];
243 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200244 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200245 }
246
247 c = (c << 6) | (aux & 0x3f);
248 }
249
Michal Vasko69730152020-10-09 16:30:07 +0200250 if ((c < 0x1000) || (c > 0x10ffff)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200251 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200252 }
253 } else {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200254 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200255 }
256
257 (*utf8_char) = c;
258 (*input) += len;
259 if (bytes_read) {
260 (*bytes_read) = len;
261 }
262 return LY_SUCCESS;
Michal Vaskoda09abf2023-10-06 15:53:18 +0200263
264error:
265 if (bytes_read) {
266 (*bytes_read) = 0;
267 }
268 return LY_EINVAL;
Radek Krejcib416be62018-10-01 14:51:45 +0200269}
270
Michal Vasko989cdb42023-10-06 15:32:37 +0200271/**
272 * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
273 *
274 * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
275 *
276 * @param[in] input UTF-8 string.
277 * @param[in] bytes Number of bytes to compare.
278 * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
279 * @return Result of the operation.
280 */
281static int
Michal Vaskoc03945b2024-03-04 08:30:30 +0100282ly_utf8_and_equal(const char *input, int bytes, ...)
Michal Vasko989cdb42023-10-06 15:32:37 +0200283{
284 va_list ap;
285 int i, and, byte;
286
287 va_start(ap, bytes);
288 for (i = 0; i < bytes; ++i) {
289 and = va_arg(ap, int);
290 byte = va_arg(ap, int);
291
292 /* compare each byte */
293 if (((uint8_t)input[i] & and) != (uint8_t)byte) {
294 return 0;
295 }
296 }
297 va_end(ap);
298
299 return 1;
300}
301
302/**
303 * @brief Check whether an UTF-8 string is smaller than a hex string.
304 *
305 * input < 0x[arg1][arg2]...
306 *
307 * @param[in] input UTF-8 string.
308 * @param[in] bytes Number of bytes to compare.
309 * @param[in] ... @p bytes number of bytes to compare with.
310 * @return Result of the operation.
311 */
312static int
Michal Vaskoc03945b2024-03-04 08:30:30 +0100313ly_utf8_less(const char *input, int bytes, ...)
Michal Vasko989cdb42023-10-06 15:32:37 +0200314{
315 va_list ap;
316 int i, byte;
317
318 va_start(ap, bytes);
319 for (i = 0; i < bytes; ++i) {
320 byte = va_arg(ap, int);
321
322 /* compare until bytes differ */
323 if ((uint8_t)input[i] > (uint8_t)byte) {
324 return 0;
325 } else if ((uint8_t)input[i] < (uint8_t)byte) {
326 return 1;
327 }
328 }
329 va_end(ap);
330
331 /* equals */
332 return 0;
333}
334
335/**
336 * @brief Check whether an UTF-8 string is greater than a hex string.
337 *
338 * input > 0x[arg1][arg2]...
339 *
340 * @param[in] input UTF-8 string.
341 * @param[in] bytes Number of bytes to compare.
342 * @param[in] ... @p bytes number of bytes to compare with.
343 * @return Result of the operation.
344 */
345static int
Michal Vaskoc03945b2024-03-04 08:30:30 +0100346ly_utf8_greater(const char *input, int bytes, ...)
Michal Vasko989cdb42023-10-06 15:32:37 +0200347{
348 va_list ap;
349 int i, byte;
350
351 va_start(ap, bytes);
352 for (i = 0; i < bytes; ++i) {
353 byte = va_arg(ap, int);
354
355 /* compare until bytes differ */
356 if ((uint8_t)input[i] > (uint8_t)byte) {
357 return 1;
358 } else if ((uint8_t)input[i] < (uint8_t)byte) {
359 return 0;
360 }
361 }
362 va_end(ap);
363
364 /* equals */
365 return 0;
366}
367
368LY_ERR
369ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len)
370{
371 size_t len;
372
373 if (!(input[0] & 0x80)) {
374 /* one byte character */
375 len = 1;
376
377 if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) {
378 /* invalid control characters */
379 return LY_EINVAL;
380 }
381 } else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) {
382 /* two bytes character */
383 len = 2;
384
385 /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
386 if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) ||
387 !ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) {
388 return LY_EINVAL;
389 }
390 } else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) {
391 /* three bytes character */
392 len = 3;
393
394 /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
395 if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) {
396 /* reject UTF-16 surrogates */
397 return LY_EINVAL;
398 }
399
400 /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
401 if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) ||
402 !ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) {
403 return LY_EINVAL;
404 }
405 } else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) {
406 /* four bytes character */
407 len = 4;
408
409 /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
410 if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) ||
411 !ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) {
412 return LY_EINVAL;
413 }
414 } else {
415 return LY_EINVAL;
416 }
417
418 *utf8_len = len;
419 return LY_SUCCESS;
420}
421
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200422LY_ERR
423ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
424{
425 if (value < 0x80) {
426 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200427 if ((value < 0x20) &&
428 (value != 0x09) &&
429 (value != 0x0a) &&
430 (value != 0x0d)) {
Michal Vasko519097f2023-05-25 10:00:44 +0200431 /* valid UTF8 but not YANG string character */
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200432 return LY_EINVAL;
433 }
434
435 dst[0] = value;
436 (*bytes_written) = 1;
437 } else if (value < 0x800) {
438 /* two bytes character */
439 dst[0] = 0xc0 | (value >> 6);
440 dst[1] = 0x80 | (value & 0x3f);
441 (*bytes_written) = 2;
442 } else if (value < 0xfffe) {
443 /* three bytes character */
444 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200445 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200446 /* exclude surrogate blocks %xD800-DFFF */
447 /* exclude noncharacters %xFDD0-FDEF */
448 return LY_EINVAL;
449 }
450
451 dst[0] = 0xe0 | (value >> 12);
452 dst[1] = 0x80 | ((value >> 6) & 0x3f);
453 dst[2] = 0x80 | (value & 0x3f);
454
455 (*bytes_written) = 3;
456 } else if (value < 0x10fffe) {
457 if ((value & 0xffe) == 0xffe) {
458 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
459 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
460 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
461 return LY_EINVAL;
462 }
463 /* four bytes character */
464 dst[0] = 0xf0 | (value >> 18);
465 dst[1] = 0x80 | ((value >> 12) & 0x3f);
466 dst[2] = 0x80 | ((value >> 6) & 0x3f);
467 dst[3] = 0x80 | (value & 0x3f);
468
469 (*bytes_written) = 4;
470 } else {
471 return LY_EINVAL;
472 }
473 return LY_SUCCESS;
474}
475
Radek Krejci76c98012019-08-14 11:23:24 +0200476/**
477 * @brief Static table of the UTF8 characters lengths according to their first byte.
478 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200479static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
481 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
482 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
488 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
490 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
491 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
492 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
493 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
494 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
495 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
496};
497
498size_t
499ly_utf8len(const char *str, size_t bytes)
500{
Radek Krejci1e008d22020-08-17 11:37:37 +0200501 size_t len = 0;
502 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200503
Michal Vaskob4d40d62021-05-04 11:42:44 +0200504 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200505 ++len;
506 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
507 }
Radek Krejci76c98012019-08-14 11:23:24 +0200508 return len;
509}
510
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200511int
Radek Krejcid972c252018-09-25 13:23:39 +0200512LY_VCODE_INSTREXP_len(const char *str)
513{
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200514 int len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200515
Radek Krejcid972c252018-09-25 13:23:39 +0200516 if (!str) {
517 return len;
518 } else if (!str[0]) {
519 return 1;
520 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200521 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200522 return len;
523}
524
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100525#ifdef HAVE_MMAP
Radek Krejcif345c012018-09-19 11:12:59 +0200526LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200527ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200528{
Radek Krejci86d106e2018-10-18 09:53:19 +0200529 struct stat sb;
530 long pagesize;
531 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200532
Radek Krejci86d106e2018-10-18 09:53:19 +0200533 assert(length);
534 assert(addr);
535 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200536
Radek Krejci86d106e2018-10-18 09:53:19 +0200537 if (fstat(fd, &sb) == -1) {
538 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
539 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200540 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200541 if (!S_ISREG(sb.st_mode)) {
542 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
543 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200544 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200545 if (!sb.st_size) {
546 *addr = NULL;
547 return LY_SUCCESS;
548 }
549 pagesize = sysconf(_SC_PAGESIZE);
550
551 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200552 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200553 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
554 *length = sb.st_size + 1;
555 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
556 } else {
557 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
558 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
559 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
560 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
561 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
562 * where the anonymous mapping starts. */
563 *length = sb.st_size + pagesize;
564 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
565 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
566 }
567 if (*addr == MAP_FAILED) {
568 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
569 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200570 }
571
Radek Krejcif345c012018-09-19 11:12:59 +0200572 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200573}
Michal Vasko841d1a92018-09-07 15:40:31 +0200574
Radek Krejci86d106e2018-10-18 09:53:19 +0200575LY_ERR
576ly_munmap(void *addr, size_t length)
577{
578 if (munmap(addr, length)) {
579 return LY_ESYS;
580 }
581 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200582}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100583
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100584#else
585
586LY_ERR
587ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
588{
589 struct stat sb;
590 size_t m;
591
592 assert(length);
593 assert(addr);
594 assert(fd >= 0);
595
Jan Kundrátd31adc12022-07-07 21:36:15 +0200596#if _WIN32
597 if (_setmode(fd, _O_BINARY) == -1) {
598 LOGERR(ctx, LY_ESYS, "Failed to switch the file descriptor to binary mode.", strerror(errno));
599 return LY_ESYS;
600 }
601#endif
602
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100603 if (fstat(fd, &sb) == -1) {
604 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
605 return LY_ESYS;
606 }
607 if (!S_ISREG(sb.st_mode)) {
608 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
609 return LY_ESYS;
610 }
611 if (!sb.st_size) {
612 *addr = NULL;
613 return LY_SUCCESS;
614 }
615 /* On Windows, the mman-win32 mmap() emulation uses CreateFileMapping and MapViewOfFile, and these functions
616 * do not allow mapping more than "length of file" bytes for PROT_READ. Remapping existing mappings is not allowed, either.
617 * At that point the path of least resistance is just reading the file in as-is. */
618 m = sb.st_size + 1;
619 char *buf = calloc(m, 1);
620
621 if (!buf) {
622 LOGERR(ctx, LY_ESYS, "ly_mmap: malloc() failed (%s).", strerror(errno));
623 }
624 *addr = buf;
625 *length = m;
626
627 lseek(fd, 0, SEEK_SET);
628 ssize_t to_read = m - 1;
629
630 while (to_read > 0) {
631 ssize_t n = read(fd, buf, to_read);
Michal Vasko2bf4af42023-01-04 12:08:38 +0100632
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100633 if (n == 0) {
634 return LY_SUCCESS;
635 } else if (n < 0) {
636 if (errno == EINTR) {
637 continue; // can I get this on Windows?
638 }
639 LOGERR(ctx, LY_ESYS, "ly_mmap: read() failed (%s).", strerror(errno));
640 }
641 to_read -= n;
642 buf += n;
643 }
644 return LY_SUCCESS;
645}
646
647LY_ERR
648ly_munmap(void *addr, size_t length)
649{
650 (void)length;
651 free(addr);
652 return LY_SUCCESS;
653}
654
655#endif
656
Radek Krejci4f28eda2018-11-12 11:46:16 +0100657LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200658ly_strcat(char **dest, const char *format, ...)
659{
660 va_list fp;
661 char *addition = NULL;
662 size_t len;
663
664 va_start(fp, format);
665 len = vasprintf(&addition, format, fp);
666 len += (*dest ? strlen(*dest) : 0) + 1;
667
668 if (*dest) {
669 *dest = ly_realloc(*dest, len);
670 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100671 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200672 return LY_EMEM;
673 }
674 *dest = strcat(*dest, addition);
675 free(addition);
676 } else {
677 *dest = addition;
678 }
679
680 va_end(fp);
681 return LY_SUCCESS;
682}
683
684LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200685ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100686{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200687 LY_ERR rc = LY_SUCCESS;
688 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200689 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100690
Radek Krejci249973a2019-06-10 10:50:54 +0200691 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100692
Michal Vaskob4d40d62021-05-04 11:42:44 +0200693 /* duplicate the value */
694 str = strndup(val_str, val_len);
695 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100696
Michal Vaskob4d40d62021-05-04 11:42:44 +0200697 /* parse the value to avoid accessing following bytes */
698 errno = 0;
699 i = strtoll(str, &ptr, base);
700 if (errno || (ptr == str)) {
701 /* invalid string */
702 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200703 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200704 /* invalid number */
705 rc = LY_EDENIED;
706 } else if (*ptr) {
707 while (isspace(*ptr)) {
708 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100709 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200710 if (*ptr) {
711 /* invalid characters after some number */
712 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100713 }
714 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200715
Michal Vaskob4d40d62021-05-04 11:42:44 +0200716 /* cleanup */
717 free(str);
718 if (!rc) {
719 *ret = i;
720 }
721 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100722}
723
724LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200725ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100726{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200727 LY_ERR rc = LY_SUCCESS;
728 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100729 uint64_t u;
730
Michal Vaskob4d40d62021-05-04 11:42:44 +0200731 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100732
Michal Vaskob4d40d62021-05-04 11:42:44 +0200733 /* duplicate the value to avoid accessing following bytes */
734 str = strndup(val_str, val_len);
735 LY_CHECK_RET(!str, LY_EMEM);
736
737 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100738 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200739 u = strtoull(str, &ptr, base);
740 if (errno || (ptr == str)) {
741 /* invalid string */
742 rc = LY_EVALID;
743 } else if ((u > max) || (u && (str[0] == '-'))) {
744 /* invalid number */
745 rc = LY_EDENIED;
746 } else if (*ptr) {
747 while (isspace(*ptr)) {
748 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100749 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200750 if (*ptr) {
751 /* invalid characters after some number */
752 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100753 }
754 }
755
Michal Vaskob4d40d62021-05-04 11:42:44 +0200756 /* cleanup */
757 free(str);
758 if (!rc) {
759 *ret = u;
760 }
761 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100762}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200763
764/**
765 * @brief Parse an identifier.
766 *
767 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
768 * identifier = (ALPHA / "_")
769 * *(ALPHA / DIGIT / "_" / "-" / ".")
770 *
771 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
772 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
773 */
774static LY_ERR
775lys_parse_id(const char **id)
776{
777 assert(id && *id);
778
779 if (!is_yangidentstartchar(**id)) {
780 return LY_EINVAL;
781 }
782 ++(*id);
783
784 while (is_yangidentchar(**id)) {
785 ++(*id);
786 }
787 return LY_SUCCESS;
788}
789
790LY_ERR
791ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
792{
793 assert(id && *id);
794 assert(prefix && prefix_len);
795 assert(name && name_len);
796
797 *prefix = *id;
798 *prefix_len = 0;
799 *name = NULL;
800 *name_len = 0;
801
802 LY_CHECK_RET(lys_parse_id(id));
803 if (**id == ':') {
804 /* there is prefix */
805 *prefix_len = *id - *prefix;
806 ++(*id);
807 *name = *id;
808
809 LY_CHECK_RET(lys_parse_id(id));
810 *name_len = *id - *name;
811 } else {
812 /* there is no prefix, so what we have as prefix now is actually the name */
813 *name = *prefix;
814 *name_len = *id - *name;
815 *prefix = NULL;
816 }
817
818 return LY_SUCCESS;
819}
820
821LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200822ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200823 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
824 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200825{
826 LY_ERR ret = LY_EVALID;
827 const char *in = *pred;
828 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200829 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200830 char quot;
831
Radek Krejci4607f542020-12-01 12:18:49 +0100832 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200833
834 *prefix = *id = *value = NULL;
835 *prefix_len = *id_len = *value_len = 0;
836
837 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200838 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200839
840 if (isdigit(in[offset])) {
841 /* pos: "[" *WSP positive-integer-value *WSP "]" */
842 if (in[offset] == '0') {
843 /* zero */
844 *errmsg = "The position predicate cannot be zero.";
845 goto error;
846 }
847
848 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200849 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200850 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200851 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200852
853 } else if (in[offset] == '.') {
854 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
855 *id = &in[offset];
856 *id_len = 1;
857 offset++;
858 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200859 } else if (in[offset] == '-') {
860 /* typically negative value */
861 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
862 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200863 } else {
864 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
865 in = &in[offset];
866 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
867 *errmsg = "Invalid node-identifier.";
868 goto error;
869 }
Michal Vasko69730152020-10-09 16:30:07 +0200870 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200871 /* all node names MUST be qualified with explicit namespace prefix */
872 *errmsg = "Missing prefix of a node name.";
873 goto error;
874 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200875 offset = in - *pred;
876 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200877 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200878 }
879
880 if (expr) {
881 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200882 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200883
884 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200885 if (expr == 1) {
886 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
887 } else { /* 2 */
888 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
889 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200890 goto error;
891 }
892 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200893 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200894
895 /* quoted-string */
896 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200897 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200898 *errmsg = "String value is not quoted.";
899 goto error;
900 }
901 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200902 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200903 if (in[offset] == quot) {
904 *value_len = &in[offset] - *value;
905 offset++;
906 } else {
907 *errmsg = "Value is not terminated quoted-string.";
908 goto error;
909 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200910 }
911
912 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200913 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200914 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200915 if (expr == 0) {
916 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
917 } else if (expr == 1) {
918 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
919 } else { /* 2 */
920 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
921 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200922 goto error;
923 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200924 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200925
Radek Krejci10bfdf82019-06-10 14:08:13 +0200926 if (offset <= limit) {
927 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200928 return LY_SUCCESS;
929 }
930
931 /* we read after the limit */
932 *errmsg = "Predicate is incomplete.";
933 *prefix = *id = *value = NULL;
934 *prefix_len = *id_len = *value_len = 0;
935 offset = limit;
936 ret = LY_EINVAL;
937
938error:
939 *pred = &in[offset];
940 return ret;
941}