blob: e80ca93becc99888047bb84c3fdc3d5fb5f96cdf [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
Michal Vasko8f702ee2024-02-20 15:44:24 +01002 * @file ly_common.c
Michal Vasko1324b6c2018-09-07 11:16:23 +02003 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
Michal Vasko8f702ee2024-02-20 15:44:24 +01006 * Copyright (c) 2018 - 2024 CESNET, z.s.p.o.
Michal Vasko1324b6c2018-09-07 11:16:23 +02007 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Michal Vasko8f702ee2024-02-20 15:44:24 +010017#include "ly_common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020022#include <fcntl.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020023#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020024#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020025#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020026#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020027#include <string.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010028#ifndef _WIN32
Michal Vasko8f702ee2024-02-20 15:44:24 +010029# ifdef HAVE_MMAP
30# include <sys/mman.h>
31# endif
Jan Kundrátd31adc12022-07-07 21:36:15 +020032#else
Michal Vasko8f702ee2024-02-20 15:44:24 +010033# include <io.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010034#endif
Radek Krejci86d106e2018-10-18 09:53:19 +020035#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020036#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020037
Radek Krejciaa45bda2020-07-20 07:43:38 +020038#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020039#include "tree_schema_internal.h"
aPiecek704f8e92021-08-25 13:35:05 +020040#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020041
42void *
43ly_realloc(void *ptr, size_t size)
44{
45 void *new_mem;
46
47 new_mem = realloc(ptr, size);
48 if (!new_mem) {
49 free(ptr);
50 }
51
52 return new_mem;
53}
Michal Vasko841d1a92018-09-07 15:40:31 +020054
Michal Vasko03ff5a72019-09-11 13:49:33 +020055char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020056ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020057{
Michal Vaskob4d40d62021-05-04 11:42:44 +020058 for ( ; len && (*s != (char)c); ++s, --len) {}
59 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020060}
61
Radek Krejci7f9b6512019-09-18 13:11:09 +020062int
63ly_strncmp(const char *refstr, const char *str, size_t str_len)
64{
65 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020066
67 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020068 return 0;
69 } else {
70 return rc ? rc : 1;
71 }
72}
73
Michal Vasko15dc9fa2021-05-03 14:33:05 +020074LY_ERR
75ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
76{
77 uint8_t num = 0, dig, dec_pow;
78
79 if (len > 3) {
80 /* overflow for sure */
81 return LY_EDENIED;
82 }
83
84 dec_pow = 1;
85 for ( ; len && isdigit(nptr[len - 1]); --len) {
86 dig = nptr[len - 1] - 48;
87
88 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
89 return LY_EDENIED;
90 }
91 dig *= dec_pow;
92
93 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
94 return LY_EDENIED;
95 }
96 num += dig;
97
98 dec_pow *= 10;
99 }
100
101 if (len) {
102 return LY_EVALID;
103 }
104 *ret = num;
105 return LY_SUCCESS;
106}
107
aPieceke3f828d2021-05-10 15:34:41 +0200108LY_ERR
109ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200110{
111 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200112 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200113 uint32_t c;
114 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200115 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200116
aPieceke3f828d2021-05-10 15:34:41 +0200117 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200118
119#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
120
121 *str_next = NULL;
122 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200123 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200124
125 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
126 return ret;
127 }
128
129 stop = str_begin;
130 prefix = NULL;
131 prefix_found = 0;
132
133 do {
134 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200135 do {
136 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
137 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200138
139 if (IS_AT_END(stop, str_end)) {
140 break;
141 }
142
143 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200144 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200145
146 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200147 do {
148 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
149 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200150
151 prefix_found = c == ':' ? 1 : 0;
152
153 /* if it wasn't the prefix, keep looking */
154 } while (!IS_AT_END(stop, str_end) && !prefix_found);
155
156 if ((str_begin == prefix) && prefix_found) {
157 /* prefix found at the beginning of the input string */
158 *is_prefix = 1;
159 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200160 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200161 } else if ((str_begin != prefix) && (prefix_found)) {
162 /* there is a some string before prefix */
163 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200164 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200165 } else {
166 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200167 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200168 }
169
170#undef IS_AT_END
171
172 return ret;
173}
174
Radek Krejcib416be62018-10-01 14:51:45 +0200175LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100176ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200177{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200178 uint32_t c, aux;
179 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200180
181 c = (*input)[0];
Radek Krejcib416be62018-10-01 14:51:45 +0200182
183 if (!(c & 0x80)) {
184 /* one byte character */
185 len = 1;
186
Michal Vasko69730152020-10-09 16:30:07 +0200187 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200188 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200189 }
190 } else if ((c & 0xe0) == 0xc0) {
191 /* two bytes character */
192 len = 2;
193
194 aux = (*input)[1];
195 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200196 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200197 }
198 c = ((c & 0x1f) << 6) | (aux & 0x3f);
199
200 if (c < 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200201 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200202 }
203 } else if ((c & 0xf0) == 0xe0) {
204 /* three bytes character */
205 len = 3;
206
207 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200208 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200209 aux = (*input)[i];
210 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200211 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200212 }
213
214 c = (c << 6) | (aux & 0x3f);
215 }
216
Michal Vasko69730152020-10-09 16:30:07 +0200217 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200218 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200219 }
220 } else if ((c & 0xf8) == 0xf0) {
221 /* four bytes character */
222 len = 4;
223
224 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200225 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200226 aux = (*input)[i];
227 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200228 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200229 }
230
231 c = (c << 6) | (aux & 0x3f);
232 }
233
Michal Vasko69730152020-10-09 16:30:07 +0200234 if ((c < 0x1000) || (c > 0x10ffff)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200235 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200236 }
237 } else {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200238 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200239 }
240
241 (*utf8_char) = c;
242 (*input) += len;
243 if (bytes_read) {
244 (*bytes_read) = len;
245 }
246 return LY_SUCCESS;
Michal Vaskoda09abf2023-10-06 15:53:18 +0200247
248error:
249 if (bytes_read) {
250 (*bytes_read) = 0;
251 }
252 return LY_EINVAL;
Radek Krejcib416be62018-10-01 14:51:45 +0200253}
254
Michal Vasko989cdb42023-10-06 15:32:37 +0200255/**
256 * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
257 *
258 * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
259 *
260 * @param[in] input UTF-8 string.
261 * @param[in] bytes Number of bytes to compare.
262 * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
263 * @return Result of the operation.
264 */
265static int
266ly_utf8_and_equal(const char *input, uint8_t bytes, ...)
267{
268 va_list ap;
269 int i, and, byte;
270
271 va_start(ap, bytes);
272 for (i = 0; i < bytes; ++i) {
273 and = va_arg(ap, int);
274 byte = va_arg(ap, int);
275
276 /* compare each byte */
277 if (((uint8_t)input[i] & and) != (uint8_t)byte) {
278 return 0;
279 }
280 }
281 va_end(ap);
282
283 return 1;
284}
285
286/**
287 * @brief Check whether an UTF-8 string is smaller than a hex string.
288 *
289 * input < 0x[arg1][arg2]...
290 *
291 * @param[in] input UTF-8 string.
292 * @param[in] bytes Number of bytes to compare.
293 * @param[in] ... @p bytes number of bytes to compare with.
294 * @return Result of the operation.
295 */
296static int
297ly_utf8_less(const char *input, uint8_t bytes, ...)
298{
299 va_list ap;
300 int i, byte;
301
302 va_start(ap, bytes);
303 for (i = 0; i < bytes; ++i) {
304 byte = va_arg(ap, int);
305
306 /* compare until bytes differ */
307 if ((uint8_t)input[i] > (uint8_t)byte) {
308 return 0;
309 } else if ((uint8_t)input[i] < (uint8_t)byte) {
310 return 1;
311 }
312 }
313 va_end(ap);
314
315 /* equals */
316 return 0;
317}
318
319/**
320 * @brief Check whether an UTF-8 string is greater than a hex string.
321 *
322 * input > 0x[arg1][arg2]...
323 *
324 * @param[in] input UTF-8 string.
325 * @param[in] bytes Number of bytes to compare.
326 * @param[in] ... @p bytes number of bytes to compare with.
327 * @return Result of the operation.
328 */
329static int
330ly_utf8_greater(const char *input, uint8_t bytes, ...)
331{
332 va_list ap;
333 int i, byte;
334
335 va_start(ap, bytes);
336 for (i = 0; i < bytes; ++i) {
337 byte = va_arg(ap, int);
338
339 /* compare until bytes differ */
340 if ((uint8_t)input[i] > (uint8_t)byte) {
341 return 1;
342 } else if ((uint8_t)input[i] < (uint8_t)byte) {
343 return 0;
344 }
345 }
346 va_end(ap);
347
348 /* equals */
349 return 0;
350}
351
352LY_ERR
353ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len)
354{
355 size_t len;
356
357 if (!(input[0] & 0x80)) {
358 /* one byte character */
359 len = 1;
360
361 if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) {
362 /* invalid control characters */
363 return LY_EINVAL;
364 }
365 } else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) {
366 /* two bytes character */
367 len = 2;
368
369 /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
370 if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) ||
371 !ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) {
372 return LY_EINVAL;
373 }
374 } else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) {
375 /* three bytes character */
376 len = 3;
377
378 /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
379 if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) {
380 /* reject UTF-16 surrogates */
381 return LY_EINVAL;
382 }
383
384 /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
385 if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) ||
386 !ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) {
387 return LY_EINVAL;
388 }
389 } else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) {
390 /* four bytes character */
391 len = 4;
392
393 /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
394 if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) ||
395 !ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) {
396 return LY_EINVAL;
397 }
398 } else {
399 return LY_EINVAL;
400 }
401
402 *utf8_len = len;
403 return LY_SUCCESS;
404}
405
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200406LY_ERR
407ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
408{
409 if (value < 0x80) {
410 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200411 if ((value < 0x20) &&
412 (value != 0x09) &&
413 (value != 0x0a) &&
414 (value != 0x0d)) {
Michal Vasko519097f2023-05-25 10:00:44 +0200415 /* valid UTF8 but not YANG string character */
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200416 return LY_EINVAL;
417 }
418
419 dst[0] = value;
420 (*bytes_written) = 1;
421 } else if (value < 0x800) {
422 /* two bytes character */
423 dst[0] = 0xc0 | (value >> 6);
424 dst[1] = 0x80 | (value & 0x3f);
425 (*bytes_written) = 2;
426 } else if (value < 0xfffe) {
427 /* three bytes character */
428 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200429 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200430 /* exclude surrogate blocks %xD800-DFFF */
431 /* exclude noncharacters %xFDD0-FDEF */
432 return LY_EINVAL;
433 }
434
435 dst[0] = 0xe0 | (value >> 12);
436 dst[1] = 0x80 | ((value >> 6) & 0x3f);
437 dst[2] = 0x80 | (value & 0x3f);
438
439 (*bytes_written) = 3;
440 } else if (value < 0x10fffe) {
441 if ((value & 0xffe) == 0xffe) {
442 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
443 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
444 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
445 return LY_EINVAL;
446 }
447 /* four bytes character */
448 dst[0] = 0xf0 | (value >> 18);
449 dst[1] = 0x80 | ((value >> 12) & 0x3f);
450 dst[2] = 0x80 | ((value >> 6) & 0x3f);
451 dst[3] = 0x80 | (value & 0x3f);
452
453 (*bytes_written) = 4;
454 } else {
455 return LY_EINVAL;
456 }
457 return LY_SUCCESS;
458}
459
Radek Krejci76c98012019-08-14 11:23:24 +0200460/**
461 * @brief Static table of the UTF8 characters lengths according to their first byte.
462 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200463static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
473 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
474 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
476 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
477 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
478 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
479 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
480};
481
482size_t
483ly_utf8len(const char *str, size_t bytes)
484{
Radek Krejci1e008d22020-08-17 11:37:37 +0200485 size_t len = 0;
486 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200487
Michal Vaskob4d40d62021-05-04 11:42:44 +0200488 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200489 ++len;
490 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
491 }
Radek Krejci76c98012019-08-14 11:23:24 +0200492 return len;
493}
494
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200495int
Radek Krejcid972c252018-09-25 13:23:39 +0200496LY_VCODE_INSTREXP_len(const char *str)
497{
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200498 int len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200499
Radek Krejcid972c252018-09-25 13:23:39 +0200500 if (!str) {
501 return len;
502 } else if (!str[0]) {
503 return 1;
504 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200505 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200506 return len;
507}
508
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100509#ifdef HAVE_MMAP
Radek Krejcif345c012018-09-19 11:12:59 +0200510LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200511ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200512{
Radek Krejci86d106e2018-10-18 09:53:19 +0200513 struct stat sb;
514 long pagesize;
515 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200516
Radek Krejci86d106e2018-10-18 09:53:19 +0200517 assert(length);
518 assert(addr);
519 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200520
Radek Krejci86d106e2018-10-18 09:53:19 +0200521 if (fstat(fd, &sb) == -1) {
522 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
523 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200524 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200525 if (!S_ISREG(sb.st_mode)) {
526 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
527 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200528 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200529 if (!sb.st_size) {
530 *addr = NULL;
531 return LY_SUCCESS;
532 }
533 pagesize = sysconf(_SC_PAGESIZE);
534
535 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200536 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200537 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
538 *length = sb.st_size + 1;
539 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
540 } else {
541 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
542 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
543 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
544 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
545 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
546 * where the anonymous mapping starts. */
547 *length = sb.st_size + pagesize;
548 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
549 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
550 }
551 if (*addr == MAP_FAILED) {
552 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
553 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200554 }
555
Radek Krejcif345c012018-09-19 11:12:59 +0200556 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200557}
Michal Vasko841d1a92018-09-07 15:40:31 +0200558
Radek Krejci86d106e2018-10-18 09:53:19 +0200559LY_ERR
560ly_munmap(void *addr, size_t length)
561{
562 if (munmap(addr, length)) {
563 return LY_ESYS;
564 }
565 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200566}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100567
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100568#else
569
570LY_ERR
571ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
572{
573 struct stat sb;
574 size_t m;
575
576 assert(length);
577 assert(addr);
578 assert(fd >= 0);
579
Jan Kundrátd31adc12022-07-07 21:36:15 +0200580#if _WIN32
581 if (_setmode(fd, _O_BINARY) == -1) {
582 LOGERR(ctx, LY_ESYS, "Failed to switch the file descriptor to binary mode.", strerror(errno));
583 return LY_ESYS;
584 }
585#endif
586
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100587 if (fstat(fd, &sb) == -1) {
588 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
589 return LY_ESYS;
590 }
591 if (!S_ISREG(sb.st_mode)) {
592 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
593 return LY_ESYS;
594 }
595 if (!sb.st_size) {
596 *addr = NULL;
597 return LY_SUCCESS;
598 }
599 /* On Windows, the mman-win32 mmap() emulation uses CreateFileMapping and MapViewOfFile, and these functions
600 * do not allow mapping more than "length of file" bytes for PROT_READ. Remapping existing mappings is not allowed, either.
601 * At that point the path of least resistance is just reading the file in as-is. */
602 m = sb.st_size + 1;
603 char *buf = calloc(m, 1);
604
605 if (!buf) {
606 LOGERR(ctx, LY_ESYS, "ly_mmap: malloc() failed (%s).", strerror(errno));
607 }
608 *addr = buf;
609 *length = m;
610
611 lseek(fd, 0, SEEK_SET);
612 ssize_t to_read = m - 1;
613
614 while (to_read > 0) {
615 ssize_t n = read(fd, buf, to_read);
Michal Vasko2bf4af42023-01-04 12:08:38 +0100616
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100617 if (n == 0) {
618 return LY_SUCCESS;
619 } else if (n < 0) {
620 if (errno == EINTR) {
621 continue; // can I get this on Windows?
622 }
623 LOGERR(ctx, LY_ESYS, "ly_mmap: read() failed (%s).", strerror(errno));
624 }
625 to_read -= n;
626 buf += n;
627 }
628 return LY_SUCCESS;
629}
630
631LY_ERR
632ly_munmap(void *addr, size_t length)
633{
634 (void)length;
635 free(addr);
636 return LY_SUCCESS;
637}
638
639#endif
640
Radek Krejci4f28eda2018-11-12 11:46:16 +0100641LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200642ly_strcat(char **dest, const char *format, ...)
643{
644 va_list fp;
645 char *addition = NULL;
646 size_t len;
647
648 va_start(fp, format);
649 len = vasprintf(&addition, format, fp);
650 len += (*dest ? strlen(*dest) : 0) + 1;
651
652 if (*dest) {
653 *dest = ly_realloc(*dest, len);
654 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100655 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200656 return LY_EMEM;
657 }
658 *dest = strcat(*dest, addition);
659 free(addition);
660 } else {
661 *dest = addition;
662 }
663
664 va_end(fp);
665 return LY_SUCCESS;
666}
667
668LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200669ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100670{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200671 LY_ERR rc = LY_SUCCESS;
672 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200673 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100674
Radek Krejci249973a2019-06-10 10:50:54 +0200675 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100676
Michal Vaskob4d40d62021-05-04 11:42:44 +0200677 /* duplicate the value */
678 str = strndup(val_str, val_len);
679 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100680
Michal Vaskob4d40d62021-05-04 11:42:44 +0200681 /* parse the value to avoid accessing following bytes */
682 errno = 0;
683 i = strtoll(str, &ptr, base);
684 if (errno || (ptr == str)) {
685 /* invalid string */
686 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200687 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200688 /* invalid number */
689 rc = LY_EDENIED;
690 } else if (*ptr) {
691 while (isspace(*ptr)) {
692 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100693 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200694 if (*ptr) {
695 /* invalid characters after some number */
696 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100697 }
698 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200699
Michal Vaskob4d40d62021-05-04 11:42:44 +0200700 /* cleanup */
701 free(str);
702 if (!rc) {
703 *ret = i;
704 }
705 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100706}
707
708LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200709ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100710{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200711 LY_ERR rc = LY_SUCCESS;
712 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100713 uint64_t u;
714
Michal Vaskob4d40d62021-05-04 11:42:44 +0200715 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100716
Michal Vaskob4d40d62021-05-04 11:42:44 +0200717 /* duplicate the value to avoid accessing following bytes */
718 str = strndup(val_str, val_len);
719 LY_CHECK_RET(!str, LY_EMEM);
720
721 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100722 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200723 u = strtoull(str, &ptr, base);
724 if (errno || (ptr == str)) {
725 /* invalid string */
726 rc = LY_EVALID;
727 } else if ((u > max) || (u && (str[0] == '-'))) {
728 /* invalid number */
729 rc = LY_EDENIED;
730 } else if (*ptr) {
731 while (isspace(*ptr)) {
732 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100733 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200734 if (*ptr) {
735 /* invalid characters after some number */
736 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100737 }
738 }
739
Michal Vaskob4d40d62021-05-04 11:42:44 +0200740 /* cleanup */
741 free(str);
742 if (!rc) {
743 *ret = u;
744 }
745 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100746}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200747
748/**
749 * @brief Parse an identifier.
750 *
751 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
752 * identifier = (ALPHA / "_")
753 * *(ALPHA / DIGIT / "_" / "-" / ".")
754 *
755 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
756 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
757 */
758static LY_ERR
759lys_parse_id(const char **id)
760{
761 assert(id && *id);
762
763 if (!is_yangidentstartchar(**id)) {
764 return LY_EINVAL;
765 }
766 ++(*id);
767
768 while (is_yangidentchar(**id)) {
769 ++(*id);
770 }
771 return LY_SUCCESS;
772}
773
774LY_ERR
775ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
776{
777 assert(id && *id);
778 assert(prefix && prefix_len);
779 assert(name && name_len);
780
781 *prefix = *id;
782 *prefix_len = 0;
783 *name = NULL;
784 *name_len = 0;
785
786 LY_CHECK_RET(lys_parse_id(id));
787 if (**id == ':') {
788 /* there is prefix */
789 *prefix_len = *id - *prefix;
790 ++(*id);
791 *name = *id;
792
793 LY_CHECK_RET(lys_parse_id(id));
794 *name_len = *id - *name;
795 } else {
796 /* there is no prefix, so what we have as prefix now is actually the name */
797 *name = *prefix;
798 *name_len = *id - *name;
799 *prefix = NULL;
800 }
801
802 return LY_SUCCESS;
803}
804
805LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200806ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200807 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
808 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200809{
810 LY_ERR ret = LY_EVALID;
811 const char *in = *pred;
812 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200813 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200814 char quot;
815
Radek Krejci4607f542020-12-01 12:18:49 +0100816 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200817
818 *prefix = *id = *value = NULL;
819 *prefix_len = *id_len = *value_len = 0;
820
821 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200822 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200823
824 if (isdigit(in[offset])) {
825 /* pos: "[" *WSP positive-integer-value *WSP "]" */
826 if (in[offset] == '0') {
827 /* zero */
828 *errmsg = "The position predicate cannot be zero.";
829 goto error;
830 }
831
832 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200833 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200834 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200835 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200836
837 } else if (in[offset] == '.') {
838 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
839 *id = &in[offset];
840 *id_len = 1;
841 offset++;
842 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200843 } else if (in[offset] == '-') {
844 /* typically negative value */
845 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
846 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200847 } else {
848 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
849 in = &in[offset];
850 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
851 *errmsg = "Invalid node-identifier.";
852 goto error;
853 }
Michal Vasko69730152020-10-09 16:30:07 +0200854 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200855 /* all node names MUST be qualified with explicit namespace prefix */
856 *errmsg = "Missing prefix of a node name.";
857 goto error;
858 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200859 offset = in - *pred;
860 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200861 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200862 }
863
864 if (expr) {
865 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200866 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200867
868 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200869 if (expr == 1) {
870 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
871 } else { /* 2 */
872 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
873 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200874 goto error;
875 }
876 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200877 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200878
879 /* quoted-string */
880 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200881 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200882 *errmsg = "String value is not quoted.";
883 goto error;
884 }
885 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200886 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200887 if (in[offset] == quot) {
888 *value_len = &in[offset] - *value;
889 offset++;
890 } else {
891 *errmsg = "Value is not terminated quoted-string.";
892 goto error;
893 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200894 }
895
896 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200897 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200898 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200899 if (expr == 0) {
900 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
901 } else if (expr == 1) {
902 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
903 } else { /* 2 */
904 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
905 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200906 goto error;
907 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200908 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200909
Radek Krejci10bfdf82019-06-10 14:08:13 +0200910 if (offset <= limit) {
911 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200912 return LY_SUCCESS;
913 }
914
915 /* we read after the limit */
916 *errmsg = "Predicate is incomplete.";
917 *prefix = *id = *value = NULL;
918 *prefix_len = *id_len = *value_len = 0;
919 offset = limit;
920 ret = LY_EINVAL;
921
922error:
923 *pred = &in[offset];
924 return ret;
925}