blob: 03dd81c08116a00eecbafb6ef12d9c9afa1bd55e [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020022#include <fcntl.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020023#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020024#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020025#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020026#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020027#include <string.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010028#ifndef _WIN32
Radek Krejci86d106e2018-10-18 09:53:19 +020029#include <sys/mman.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020030#else
31#include <io.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010032#endif
Radek Krejci86d106e2018-10-18 09:53:19 +020033#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020034#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020035
Radek Krejciaa45bda2020-07-20 07:43:38 +020036#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020037#include "tree_schema_internal.h"
aPiecek704f8e92021-08-25 13:35:05 +020038#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020039
40void *
41ly_realloc(void *ptr, size_t size)
42{
43 void *new_mem;
44
45 new_mem = realloc(ptr, size);
46 if (!new_mem) {
47 free(ptr);
48 }
49
50 return new_mem;
51}
Michal Vasko841d1a92018-09-07 15:40:31 +020052
Michal Vasko03ff5a72019-09-11 13:49:33 +020053char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020054ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020055{
Michal Vaskob4d40d62021-05-04 11:42:44 +020056 for ( ; len && (*s != (char)c); ++s, --len) {}
57 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020058}
59
Radek Krejci7f9b6512019-09-18 13:11:09 +020060int
61ly_strncmp(const char *refstr, const char *str, size_t str_len)
62{
63 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020064
65 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020066 return 0;
67 } else {
68 return rc ? rc : 1;
69 }
70}
71
Michal Vasko15dc9fa2021-05-03 14:33:05 +020072LY_ERR
73ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
74{
75 uint8_t num = 0, dig, dec_pow;
76
77 if (len > 3) {
78 /* overflow for sure */
79 return LY_EDENIED;
80 }
81
82 dec_pow = 1;
83 for ( ; len && isdigit(nptr[len - 1]); --len) {
84 dig = nptr[len - 1] - 48;
85
86 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
87 return LY_EDENIED;
88 }
89 dig *= dec_pow;
90
91 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
92 return LY_EDENIED;
93 }
94 num += dig;
95
96 dec_pow *= 10;
97 }
98
99 if (len) {
100 return LY_EVALID;
101 }
102 *ret = num;
103 return LY_SUCCESS;
104}
105
aPieceke3f828d2021-05-10 15:34:41 +0200106LY_ERR
107ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200108{
109 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200110 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200111 uint32_t c;
112 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200113 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200114
aPieceke3f828d2021-05-10 15:34:41 +0200115 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200116
117#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
118
119 *str_next = NULL;
120 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200121 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200122
123 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
124 return ret;
125 }
126
127 stop = str_begin;
128 prefix = NULL;
129 prefix_found = 0;
130
131 do {
132 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200133 do {
134 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
135 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200136
137 if (IS_AT_END(stop, str_end)) {
138 break;
139 }
140
141 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200142 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200143
144 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200145 do {
146 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
147 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200148
149 prefix_found = c == ':' ? 1 : 0;
150
151 /* if it wasn't the prefix, keep looking */
152 } while (!IS_AT_END(stop, str_end) && !prefix_found);
153
154 if ((str_begin == prefix) && prefix_found) {
155 /* prefix found at the beginning of the input string */
156 *is_prefix = 1;
157 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200158 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200159 } else if ((str_begin != prefix) && (prefix_found)) {
160 /* there is a some string before prefix */
161 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200162 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200163 } else {
164 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200165 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200166 }
167
168#undef IS_AT_END
169
170 return ret;
171}
172
Radek Krejcib416be62018-10-01 14:51:45 +0200173LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100174ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200175{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200176 uint32_t c, aux;
177 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200178
179 c = (*input)[0];
Radek Krejcib416be62018-10-01 14:51:45 +0200180
181 if (!(c & 0x80)) {
182 /* one byte character */
183 len = 1;
184
Michal Vasko69730152020-10-09 16:30:07 +0200185 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200186 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200187 }
188 } else if ((c & 0xe0) == 0xc0) {
189 /* two bytes character */
190 len = 2;
191
192 aux = (*input)[1];
193 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200194 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200195 }
196 c = ((c & 0x1f) << 6) | (aux & 0x3f);
197
198 if (c < 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200199 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200200 }
201 } else if ((c & 0xf0) == 0xe0) {
202 /* three bytes character */
203 len = 3;
204
205 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200206 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200207 aux = (*input)[i];
208 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200209 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200210 }
211
212 c = (c << 6) | (aux & 0x3f);
213 }
214
Michal Vasko69730152020-10-09 16:30:07 +0200215 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200216 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200217 }
218 } else if ((c & 0xf8) == 0xf0) {
219 /* four bytes character */
220 len = 4;
221
222 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200223 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200224 aux = (*input)[i];
225 if ((aux & 0xc0) != 0x80) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200226 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200227 }
228
229 c = (c << 6) | (aux & 0x3f);
230 }
231
Michal Vasko69730152020-10-09 16:30:07 +0200232 if ((c < 0x1000) || (c > 0x10ffff)) {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200233 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200234 }
235 } else {
Michal Vaskoda09abf2023-10-06 15:53:18 +0200236 goto error;
Radek Krejcib416be62018-10-01 14:51:45 +0200237 }
238
239 (*utf8_char) = c;
240 (*input) += len;
241 if (bytes_read) {
242 (*bytes_read) = len;
243 }
244 return LY_SUCCESS;
Michal Vaskoda09abf2023-10-06 15:53:18 +0200245
246error:
247 if (bytes_read) {
248 (*bytes_read) = 0;
249 }
250 return LY_EINVAL;
Radek Krejcib416be62018-10-01 14:51:45 +0200251}
252
Michal Vasko989cdb42023-10-06 15:32:37 +0200253/**
254 * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
255 *
256 * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
257 *
258 * @param[in] input UTF-8 string.
259 * @param[in] bytes Number of bytes to compare.
260 * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
261 * @return Result of the operation.
262 */
263static int
264ly_utf8_and_equal(const char *input, uint8_t bytes, ...)
265{
266 va_list ap;
267 int i, and, byte;
268
269 va_start(ap, bytes);
270 for (i = 0; i < bytes; ++i) {
271 and = va_arg(ap, int);
272 byte = va_arg(ap, int);
273
274 /* compare each byte */
275 if (((uint8_t)input[i] & and) != (uint8_t)byte) {
276 return 0;
277 }
278 }
279 va_end(ap);
280
281 return 1;
282}
283
284/**
285 * @brief Check whether an UTF-8 string is smaller than a hex string.
286 *
287 * input < 0x[arg1][arg2]...
288 *
289 * @param[in] input UTF-8 string.
290 * @param[in] bytes Number of bytes to compare.
291 * @param[in] ... @p bytes number of bytes to compare with.
292 * @return Result of the operation.
293 */
294static int
295ly_utf8_less(const char *input, uint8_t bytes, ...)
296{
297 va_list ap;
298 int i, byte;
299
300 va_start(ap, bytes);
301 for (i = 0; i < bytes; ++i) {
302 byte = va_arg(ap, int);
303
304 /* compare until bytes differ */
305 if ((uint8_t)input[i] > (uint8_t)byte) {
306 return 0;
307 } else if ((uint8_t)input[i] < (uint8_t)byte) {
308 return 1;
309 }
310 }
311 va_end(ap);
312
313 /* equals */
314 return 0;
315}
316
317/**
318 * @brief Check whether an UTF-8 string is greater than a hex string.
319 *
320 * input > 0x[arg1][arg2]...
321 *
322 * @param[in] input UTF-8 string.
323 * @param[in] bytes Number of bytes to compare.
324 * @param[in] ... @p bytes number of bytes to compare with.
325 * @return Result of the operation.
326 */
327static int
328ly_utf8_greater(const char *input, uint8_t bytes, ...)
329{
330 va_list ap;
331 int i, byte;
332
333 va_start(ap, bytes);
334 for (i = 0; i < bytes; ++i) {
335 byte = va_arg(ap, int);
336
337 /* compare until bytes differ */
338 if ((uint8_t)input[i] > (uint8_t)byte) {
339 return 1;
340 } else if ((uint8_t)input[i] < (uint8_t)byte) {
341 return 0;
342 }
343 }
344 va_end(ap);
345
346 /* equals */
347 return 0;
348}
349
350LY_ERR
351ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len)
352{
353 size_t len;
354
355 if (!(input[0] & 0x80)) {
356 /* one byte character */
357 len = 1;
358
359 if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) {
360 /* invalid control characters */
361 return LY_EINVAL;
362 }
363 } else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) {
364 /* two bytes character */
365 len = 2;
366
367 /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
368 if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) ||
369 !ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) {
370 return LY_EINVAL;
371 }
372 } else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) {
373 /* three bytes character */
374 len = 3;
375
376 /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
377 if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) {
378 /* reject UTF-16 surrogates */
379 return LY_EINVAL;
380 }
381
382 /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
383 if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) ||
384 !ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) {
385 return LY_EINVAL;
386 }
387 } else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) {
388 /* four bytes character */
389 len = 4;
390
391 /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
392 if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) ||
393 !ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) {
394 return LY_EINVAL;
395 }
396 } else {
397 return LY_EINVAL;
398 }
399
400 *utf8_len = len;
401 return LY_SUCCESS;
402}
403
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200404LY_ERR
405ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
406{
407 if (value < 0x80) {
408 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200409 if ((value < 0x20) &&
410 (value != 0x09) &&
411 (value != 0x0a) &&
412 (value != 0x0d)) {
Michal Vasko519097f2023-05-25 10:00:44 +0200413 /* valid UTF8 but not YANG string character */
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200414 return LY_EINVAL;
415 }
416
417 dst[0] = value;
418 (*bytes_written) = 1;
419 } else if (value < 0x800) {
420 /* two bytes character */
421 dst[0] = 0xc0 | (value >> 6);
422 dst[1] = 0x80 | (value & 0x3f);
423 (*bytes_written) = 2;
424 } else if (value < 0xfffe) {
425 /* three bytes character */
426 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200427 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200428 /* exclude surrogate blocks %xD800-DFFF */
429 /* exclude noncharacters %xFDD0-FDEF */
430 return LY_EINVAL;
431 }
432
433 dst[0] = 0xe0 | (value >> 12);
434 dst[1] = 0x80 | ((value >> 6) & 0x3f);
435 dst[2] = 0x80 | (value & 0x3f);
436
437 (*bytes_written) = 3;
438 } else if (value < 0x10fffe) {
439 if ((value & 0xffe) == 0xffe) {
440 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
441 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
442 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
443 return LY_EINVAL;
444 }
445 /* four bytes character */
446 dst[0] = 0xf0 | (value >> 18);
447 dst[1] = 0x80 | ((value >> 12) & 0x3f);
448 dst[2] = 0x80 | ((value >> 6) & 0x3f);
449 dst[3] = 0x80 | (value & 0x3f);
450
451 (*bytes_written) = 4;
452 } else {
453 return LY_EINVAL;
454 }
455 return LY_SUCCESS;
456}
457
Radek Krejci76c98012019-08-14 11:23:24 +0200458/**
459 * @brief Static table of the UTF8 characters lengths according to their first byte.
460 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200461static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
463 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
473 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
474 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
475 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
476 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
477 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
478};
479
480size_t
481ly_utf8len(const char *str, size_t bytes)
482{
Radek Krejci1e008d22020-08-17 11:37:37 +0200483 size_t len = 0;
484 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200485
Michal Vaskob4d40d62021-05-04 11:42:44 +0200486 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200487 ++len;
488 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
489 }
Radek Krejci76c98012019-08-14 11:23:24 +0200490 return len;
491}
492
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200493int
Radek Krejcid972c252018-09-25 13:23:39 +0200494LY_VCODE_INSTREXP_len(const char *str)
495{
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200496 int len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200497
Radek Krejcid972c252018-09-25 13:23:39 +0200498 if (!str) {
499 return len;
500 } else if (!str[0]) {
501 return 1;
502 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200503 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200504 return len;
505}
506
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100507#ifdef HAVE_MMAP
Radek Krejcif345c012018-09-19 11:12:59 +0200508LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200509ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200510{
Radek Krejci86d106e2018-10-18 09:53:19 +0200511 struct stat sb;
512 long pagesize;
513 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200514
Radek Krejci86d106e2018-10-18 09:53:19 +0200515 assert(length);
516 assert(addr);
517 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200518
Radek Krejci86d106e2018-10-18 09:53:19 +0200519 if (fstat(fd, &sb) == -1) {
520 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
521 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200522 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200523 if (!S_ISREG(sb.st_mode)) {
524 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
525 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200526 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200527 if (!sb.st_size) {
528 *addr = NULL;
529 return LY_SUCCESS;
530 }
531 pagesize = sysconf(_SC_PAGESIZE);
532
533 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200534 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200535 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
536 *length = sb.st_size + 1;
537 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
538 } else {
539 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
540 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
541 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
542 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
543 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
544 * where the anonymous mapping starts. */
545 *length = sb.st_size + pagesize;
546 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
547 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
548 }
549 if (*addr == MAP_FAILED) {
550 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
551 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200552 }
553
Radek Krejcif345c012018-09-19 11:12:59 +0200554 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200555}
Michal Vasko841d1a92018-09-07 15:40:31 +0200556
Radek Krejci86d106e2018-10-18 09:53:19 +0200557LY_ERR
558ly_munmap(void *addr, size_t length)
559{
560 if (munmap(addr, length)) {
561 return LY_ESYS;
562 }
563 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200564}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100565
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100566#else
567
568LY_ERR
569ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
570{
571 struct stat sb;
572 size_t m;
573
574 assert(length);
575 assert(addr);
576 assert(fd >= 0);
577
Jan Kundrátd31adc12022-07-07 21:36:15 +0200578#if _WIN32
579 if (_setmode(fd, _O_BINARY) == -1) {
580 LOGERR(ctx, LY_ESYS, "Failed to switch the file descriptor to binary mode.", strerror(errno));
581 return LY_ESYS;
582 }
583#endif
584
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100585 if (fstat(fd, &sb) == -1) {
586 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
587 return LY_ESYS;
588 }
589 if (!S_ISREG(sb.st_mode)) {
590 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
591 return LY_ESYS;
592 }
593 if (!sb.st_size) {
594 *addr = NULL;
595 return LY_SUCCESS;
596 }
597 /* On Windows, the mman-win32 mmap() emulation uses CreateFileMapping and MapViewOfFile, and these functions
598 * do not allow mapping more than "length of file" bytes for PROT_READ. Remapping existing mappings is not allowed, either.
599 * At that point the path of least resistance is just reading the file in as-is. */
600 m = sb.st_size + 1;
601 char *buf = calloc(m, 1);
602
603 if (!buf) {
604 LOGERR(ctx, LY_ESYS, "ly_mmap: malloc() failed (%s).", strerror(errno));
605 }
606 *addr = buf;
607 *length = m;
608
609 lseek(fd, 0, SEEK_SET);
610 ssize_t to_read = m - 1;
611
612 while (to_read > 0) {
613 ssize_t n = read(fd, buf, to_read);
Michal Vasko2bf4af42023-01-04 12:08:38 +0100614
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100615 if (n == 0) {
616 return LY_SUCCESS;
617 } else if (n < 0) {
618 if (errno == EINTR) {
619 continue; // can I get this on Windows?
620 }
621 LOGERR(ctx, LY_ESYS, "ly_mmap: read() failed (%s).", strerror(errno));
622 }
623 to_read -= n;
624 buf += n;
625 }
626 return LY_SUCCESS;
627}
628
629LY_ERR
630ly_munmap(void *addr, size_t length)
631{
632 (void)length;
633 free(addr);
634 return LY_SUCCESS;
635}
636
637#endif
638
Radek Krejci4f28eda2018-11-12 11:46:16 +0100639LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200640ly_strcat(char **dest, const char *format, ...)
641{
642 va_list fp;
643 char *addition = NULL;
644 size_t len;
645
646 va_start(fp, format);
647 len = vasprintf(&addition, format, fp);
648 len += (*dest ? strlen(*dest) : 0) + 1;
649
650 if (*dest) {
651 *dest = ly_realloc(*dest, len);
652 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100653 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200654 return LY_EMEM;
655 }
656 *dest = strcat(*dest, addition);
657 free(addition);
658 } else {
659 *dest = addition;
660 }
661
662 va_end(fp);
663 return LY_SUCCESS;
664}
665
666LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200667ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100668{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200669 LY_ERR rc = LY_SUCCESS;
670 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200671 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100672
Radek Krejci249973a2019-06-10 10:50:54 +0200673 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100674
Michal Vaskob4d40d62021-05-04 11:42:44 +0200675 /* duplicate the value */
676 str = strndup(val_str, val_len);
677 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100678
Michal Vaskob4d40d62021-05-04 11:42:44 +0200679 /* parse the value to avoid accessing following bytes */
680 errno = 0;
681 i = strtoll(str, &ptr, base);
682 if (errno || (ptr == str)) {
683 /* invalid string */
684 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200685 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200686 /* invalid number */
687 rc = LY_EDENIED;
688 } else if (*ptr) {
689 while (isspace(*ptr)) {
690 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100691 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200692 if (*ptr) {
693 /* invalid characters after some number */
694 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100695 }
696 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200697
Michal Vaskob4d40d62021-05-04 11:42:44 +0200698 /* cleanup */
699 free(str);
700 if (!rc) {
701 *ret = i;
702 }
703 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100704}
705
706LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200707ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100708{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200709 LY_ERR rc = LY_SUCCESS;
710 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100711 uint64_t u;
712
Michal Vaskob4d40d62021-05-04 11:42:44 +0200713 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100714
Michal Vaskob4d40d62021-05-04 11:42:44 +0200715 /* duplicate the value to avoid accessing following bytes */
716 str = strndup(val_str, val_len);
717 LY_CHECK_RET(!str, LY_EMEM);
718
719 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100720 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200721 u = strtoull(str, &ptr, base);
722 if (errno || (ptr == str)) {
723 /* invalid string */
724 rc = LY_EVALID;
725 } else if ((u > max) || (u && (str[0] == '-'))) {
726 /* invalid number */
727 rc = LY_EDENIED;
728 } else if (*ptr) {
729 while (isspace(*ptr)) {
730 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100731 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200732 if (*ptr) {
733 /* invalid characters after some number */
734 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100735 }
736 }
737
Michal Vaskob4d40d62021-05-04 11:42:44 +0200738 /* cleanup */
739 free(str);
740 if (!rc) {
741 *ret = u;
742 }
743 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100744}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200745
746/**
747 * @brief Parse an identifier.
748 *
749 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
750 * identifier = (ALPHA / "_")
751 * *(ALPHA / DIGIT / "_" / "-" / ".")
752 *
753 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
754 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
755 */
756static LY_ERR
757lys_parse_id(const char **id)
758{
759 assert(id && *id);
760
761 if (!is_yangidentstartchar(**id)) {
762 return LY_EINVAL;
763 }
764 ++(*id);
765
766 while (is_yangidentchar(**id)) {
767 ++(*id);
768 }
769 return LY_SUCCESS;
770}
771
772LY_ERR
773ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
774{
775 assert(id && *id);
776 assert(prefix && prefix_len);
777 assert(name && name_len);
778
779 *prefix = *id;
780 *prefix_len = 0;
781 *name = NULL;
782 *name_len = 0;
783
784 LY_CHECK_RET(lys_parse_id(id));
785 if (**id == ':') {
786 /* there is prefix */
787 *prefix_len = *id - *prefix;
788 ++(*id);
789 *name = *id;
790
791 LY_CHECK_RET(lys_parse_id(id));
792 *name_len = *id - *name;
793 } else {
794 /* there is no prefix, so what we have as prefix now is actually the name */
795 *name = *prefix;
796 *name_len = *id - *name;
797 *prefix = NULL;
798 }
799
800 return LY_SUCCESS;
801}
802
803LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200804ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200805 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
806 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200807{
808 LY_ERR ret = LY_EVALID;
809 const char *in = *pred;
810 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200811 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200812 char quot;
813
Radek Krejci4607f542020-12-01 12:18:49 +0100814 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200815
816 *prefix = *id = *value = NULL;
817 *prefix_len = *id_len = *value_len = 0;
818
819 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200820 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200821
822 if (isdigit(in[offset])) {
823 /* pos: "[" *WSP positive-integer-value *WSP "]" */
824 if (in[offset] == '0') {
825 /* zero */
826 *errmsg = "The position predicate cannot be zero.";
827 goto error;
828 }
829
830 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200831 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200832 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200833 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200834
835 } else if (in[offset] == '.') {
836 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
837 *id = &in[offset];
838 *id_len = 1;
839 offset++;
840 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200841 } else if (in[offset] == '-') {
842 /* typically negative value */
843 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
844 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200845 } else {
846 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
847 in = &in[offset];
848 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
849 *errmsg = "Invalid node-identifier.";
850 goto error;
851 }
Michal Vasko69730152020-10-09 16:30:07 +0200852 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200853 /* all node names MUST be qualified with explicit namespace prefix */
854 *errmsg = "Missing prefix of a node name.";
855 goto error;
856 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200857 offset = in - *pred;
858 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200859 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200860 }
861
862 if (expr) {
863 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200864 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200865
866 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200867 if (expr == 1) {
868 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
869 } else { /* 2 */
870 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
871 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200872 goto error;
873 }
874 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200875 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200876
877 /* quoted-string */
878 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200879 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200880 *errmsg = "String value is not quoted.";
881 goto error;
882 }
883 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200884 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200885 if (in[offset] == quot) {
886 *value_len = &in[offset] - *value;
887 offset++;
888 } else {
889 *errmsg = "Value is not terminated quoted-string.";
890 goto error;
891 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200892 }
893
894 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200895 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200896 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200897 if (expr == 0) {
898 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
899 } else if (expr == 1) {
900 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
901 } else { /* 2 */
902 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
903 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200904 goto error;
905 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200906 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200907
Radek Krejci10bfdf82019-06-10 14:08:13 +0200908 if (offset <= limit) {
909 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200910 return LY_SUCCESS;
911 }
912
913 /* we read after the limit */
914 *errmsg = "Predicate is incomplete.";
915 *prefix = *id = *value = NULL;
916 *prefix_len = *id_len = *value_len = 0;
917 offset = limit;
918 ret = LY_EINVAL;
919
920error:
921 *pred = &in[offset];
922 return ret;
923}