blob: 8f69dda4ec2d34656f8db4b2a27ad0b3d6d34251 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020022#include <fcntl.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020023#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020024#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020025#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020026#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020027#include <string.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010028#ifndef _WIN32
Radek Krejci86d106e2018-10-18 09:53:19 +020029#include <sys/mman.h>
Jan Kundrátd31adc12022-07-07 21:36:15 +020030#else
31#include <io.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010032#endif
Radek Krejci86d106e2018-10-18 09:53:19 +020033#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020034#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020035
Radek Krejciaa45bda2020-07-20 07:43:38 +020036#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020037#include "tree_schema_internal.h"
aPiecek704f8e92021-08-25 13:35:05 +020038#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020039
40void *
41ly_realloc(void *ptr, size_t size)
42{
43 void *new_mem;
44
45 new_mem = realloc(ptr, size);
46 if (!new_mem) {
47 free(ptr);
48 }
49
50 return new_mem;
51}
Michal Vasko841d1a92018-09-07 15:40:31 +020052
Michal Vasko03ff5a72019-09-11 13:49:33 +020053char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020054ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020055{
Michal Vaskob4d40d62021-05-04 11:42:44 +020056 for ( ; len && (*s != (char)c); ++s, --len) {}
57 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020058}
59
Radek Krejci7f9b6512019-09-18 13:11:09 +020060int
61ly_strncmp(const char *refstr, const char *str, size_t str_len)
62{
63 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020064
65 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020066 return 0;
67 } else {
68 return rc ? rc : 1;
69 }
70}
71
Michal Vasko15dc9fa2021-05-03 14:33:05 +020072LY_ERR
73ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
74{
75 uint8_t num = 0, dig, dec_pow;
76
77 if (len > 3) {
78 /* overflow for sure */
79 return LY_EDENIED;
80 }
81
82 dec_pow = 1;
83 for ( ; len && isdigit(nptr[len - 1]); --len) {
84 dig = nptr[len - 1] - 48;
85
86 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
87 return LY_EDENIED;
88 }
89 dig *= dec_pow;
90
91 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
92 return LY_EDENIED;
93 }
94 num += dig;
95
96 dec_pow *= 10;
97 }
98
99 if (len) {
100 return LY_EVALID;
101 }
102 *ret = num;
103 return LY_SUCCESS;
104}
105
aPieceke3f828d2021-05-10 15:34:41 +0200106LY_ERR
107ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200108{
109 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200110 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200111 uint32_t c;
112 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200113 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200114
aPieceke3f828d2021-05-10 15:34:41 +0200115 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200116
117#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
118
119 *str_next = NULL;
120 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200121 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200122
123 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
124 return ret;
125 }
126
127 stop = str_begin;
128 prefix = NULL;
129 prefix_found = 0;
130
131 do {
132 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200133 do {
134 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
135 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200136
137 if (IS_AT_END(stop, str_end)) {
138 break;
139 }
140
141 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200142 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200143
144 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200145 do {
146 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
147 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200148
149 prefix_found = c == ':' ? 1 : 0;
150
151 /* if it wasn't the prefix, keep looking */
152 } while (!IS_AT_END(stop, str_end) && !prefix_found);
153
154 if ((str_begin == prefix) && prefix_found) {
155 /* prefix found at the beginning of the input string */
156 *is_prefix = 1;
157 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200158 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200159 } else if ((str_begin != prefix) && (prefix_found)) {
160 /* there is a some string before prefix */
161 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200162 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200163 } else {
164 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200165 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200166 }
167
168#undef IS_AT_END
169
170 return ret;
171}
172
Radek Krejcib416be62018-10-01 14:51:45 +0200173LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100174ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200175{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200176 uint32_t c, aux;
177 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200178
179 c = (*input)[0];
Radek Krejcib416be62018-10-01 14:51:45 +0200180
181 if (!(c & 0x80)) {
182 /* one byte character */
183 len = 1;
184
Michal Vasko69730152020-10-09 16:30:07 +0200185 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200186 return LY_EINVAL;
187 }
188 } else if ((c & 0xe0) == 0xc0) {
189 /* two bytes character */
190 len = 2;
191
192 aux = (*input)[1];
193 if ((aux & 0xc0) != 0x80) {
194 return LY_EINVAL;
195 }
196 c = ((c & 0x1f) << 6) | (aux & 0x3f);
197
198 if (c < 0x80) {
199 return LY_EINVAL;
200 }
201 } else if ((c & 0xf0) == 0xe0) {
202 /* three bytes character */
203 len = 3;
204
205 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200206 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200207 aux = (*input)[i];
208 if ((aux & 0xc0) != 0x80) {
209 return LY_EINVAL;
210 }
211
212 c = (c << 6) | (aux & 0x3f);
213 }
214
Michal Vasko69730152020-10-09 16:30:07 +0200215 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200216 return LY_EINVAL;
217 }
218 } else if ((c & 0xf8) == 0xf0) {
219 /* four bytes character */
220 len = 4;
221
222 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200223 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200224 aux = (*input)[i];
225 if ((aux & 0xc0) != 0x80) {
226 return LY_EINVAL;
227 }
228
229 c = (c << 6) | (aux & 0x3f);
230 }
231
Michal Vasko69730152020-10-09 16:30:07 +0200232 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200233 return LY_EINVAL;
234 }
235 } else {
Michal Vaskobe9136a2023-10-06 15:25:20 +0200236 if (bytes_read) {
237 (*bytes_read) = 0;
238 }
Radek Krejcib416be62018-10-01 14:51:45 +0200239 return LY_EINVAL;
240 }
241
242 (*utf8_char) = c;
243 (*input) += len;
244 if (bytes_read) {
245 (*bytes_read) = len;
246 }
247 return LY_SUCCESS;
248}
249
Michal Vasko989cdb42023-10-06 15:32:37 +0200250/**
251 * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
252 *
253 * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
254 *
255 * @param[in] input UTF-8 string.
256 * @param[in] bytes Number of bytes to compare.
257 * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
258 * @return Result of the operation.
259 */
260static int
261ly_utf8_and_equal(const char *input, uint8_t bytes, ...)
262{
263 va_list ap;
264 int i, and, byte;
265
266 va_start(ap, bytes);
267 for (i = 0; i < bytes; ++i) {
268 and = va_arg(ap, int);
269 byte = va_arg(ap, int);
270
271 /* compare each byte */
272 if (((uint8_t)input[i] & and) != (uint8_t)byte) {
273 return 0;
274 }
275 }
276 va_end(ap);
277
278 return 1;
279}
280
281/**
282 * @brief Check whether an UTF-8 string is smaller than a hex string.
283 *
284 * input < 0x[arg1][arg2]...
285 *
286 * @param[in] input UTF-8 string.
287 * @param[in] bytes Number of bytes to compare.
288 * @param[in] ... @p bytes number of bytes to compare with.
289 * @return Result of the operation.
290 */
291static int
292ly_utf8_less(const char *input, uint8_t bytes, ...)
293{
294 va_list ap;
295 int i, byte;
296
297 va_start(ap, bytes);
298 for (i = 0; i < bytes; ++i) {
299 byte = va_arg(ap, int);
300
301 /* compare until bytes differ */
302 if ((uint8_t)input[i] > (uint8_t)byte) {
303 return 0;
304 } else if ((uint8_t)input[i] < (uint8_t)byte) {
305 return 1;
306 }
307 }
308 va_end(ap);
309
310 /* equals */
311 return 0;
312}
313
314/**
315 * @brief Check whether an UTF-8 string is greater than a hex string.
316 *
317 * input > 0x[arg1][arg2]...
318 *
319 * @param[in] input UTF-8 string.
320 * @param[in] bytes Number of bytes to compare.
321 * @param[in] ... @p bytes number of bytes to compare with.
322 * @return Result of the operation.
323 */
324static int
325ly_utf8_greater(const char *input, uint8_t bytes, ...)
326{
327 va_list ap;
328 int i, byte;
329
330 va_start(ap, bytes);
331 for (i = 0; i < bytes; ++i) {
332 byte = va_arg(ap, int);
333
334 /* compare until bytes differ */
335 if ((uint8_t)input[i] > (uint8_t)byte) {
336 return 1;
337 } else if ((uint8_t)input[i] < (uint8_t)byte) {
338 return 0;
339 }
340 }
341 va_end(ap);
342
343 /* equals */
344 return 0;
345}
346
347LY_ERR
348ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len)
349{
350 size_t len;
351
352 if (!(input[0] & 0x80)) {
353 /* one byte character */
354 len = 1;
355
356 if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) {
357 /* invalid control characters */
358 return LY_EINVAL;
359 }
360 } else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) {
361 /* two bytes character */
362 len = 2;
363
364 /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
365 if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) ||
366 !ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) {
367 return LY_EINVAL;
368 }
369 } else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) {
370 /* three bytes character */
371 len = 3;
372
373 /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
374 if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) {
375 /* reject UTF-16 surrogates */
376 return LY_EINVAL;
377 }
378
379 /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
380 if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) ||
381 !ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) {
382 return LY_EINVAL;
383 }
384 } else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) {
385 /* four bytes character */
386 len = 4;
387
388 /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
389 if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) ||
390 !ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) {
391 return LY_EINVAL;
392 }
393 } else {
394 return LY_EINVAL;
395 }
396
397 *utf8_len = len;
398 return LY_SUCCESS;
399}
400
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200401LY_ERR
402ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
403{
404 if (value < 0x80) {
405 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200406 if ((value < 0x20) &&
407 (value != 0x09) &&
408 (value != 0x0a) &&
409 (value != 0x0d)) {
Michal Vasko519097f2023-05-25 10:00:44 +0200410 /* valid UTF8 but not YANG string character */
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200411 return LY_EINVAL;
412 }
413
414 dst[0] = value;
415 (*bytes_written) = 1;
416 } else if (value < 0x800) {
417 /* two bytes character */
418 dst[0] = 0xc0 | (value >> 6);
419 dst[1] = 0x80 | (value & 0x3f);
420 (*bytes_written) = 2;
421 } else if (value < 0xfffe) {
422 /* three bytes character */
423 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200424 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200425 /* exclude surrogate blocks %xD800-DFFF */
426 /* exclude noncharacters %xFDD0-FDEF */
427 return LY_EINVAL;
428 }
429
430 dst[0] = 0xe0 | (value >> 12);
431 dst[1] = 0x80 | ((value >> 6) & 0x3f);
432 dst[2] = 0x80 | (value & 0x3f);
433
434 (*bytes_written) = 3;
435 } else if (value < 0x10fffe) {
436 if ((value & 0xffe) == 0xffe) {
437 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
438 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
439 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
440 return LY_EINVAL;
441 }
442 /* four bytes character */
443 dst[0] = 0xf0 | (value >> 18);
444 dst[1] = 0x80 | ((value >> 12) & 0x3f);
445 dst[2] = 0x80 | ((value >> 6) & 0x3f);
446 dst[3] = 0x80 | (value & 0x3f);
447
448 (*bytes_written) = 4;
449 } else {
450 return LY_EINVAL;
451 }
452 return LY_SUCCESS;
453}
454
Radek Krejci76c98012019-08-14 11:23:24 +0200455/**
456 * @brief Static table of the UTF8 characters lengths according to their first byte.
457 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200458static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200459 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
460 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
461 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
463 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
471 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
472 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
473 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
474 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
475};
476
477size_t
478ly_utf8len(const char *str, size_t bytes)
479{
Radek Krejci1e008d22020-08-17 11:37:37 +0200480 size_t len = 0;
481 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200482
Michal Vaskob4d40d62021-05-04 11:42:44 +0200483 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200484 ++len;
485 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
486 }
Radek Krejci76c98012019-08-14 11:23:24 +0200487 return len;
488}
489
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200490int
Radek Krejcid972c252018-09-25 13:23:39 +0200491LY_VCODE_INSTREXP_len(const char *str)
492{
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200493 int len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200494
Radek Krejcid972c252018-09-25 13:23:39 +0200495 if (!str) {
496 return len;
497 } else if (!str[0]) {
498 return 1;
499 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200500 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200501 return len;
502}
503
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100504#ifdef HAVE_MMAP
Radek Krejcif345c012018-09-19 11:12:59 +0200505LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200506ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200507{
Radek Krejci86d106e2018-10-18 09:53:19 +0200508 struct stat sb;
509 long pagesize;
510 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200511
Radek Krejci86d106e2018-10-18 09:53:19 +0200512 assert(length);
513 assert(addr);
514 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200515
Radek Krejci86d106e2018-10-18 09:53:19 +0200516 if (fstat(fd, &sb) == -1) {
517 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
518 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200519 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200520 if (!S_ISREG(sb.st_mode)) {
521 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
522 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200523 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200524 if (!sb.st_size) {
525 *addr = NULL;
526 return LY_SUCCESS;
527 }
528 pagesize = sysconf(_SC_PAGESIZE);
529
530 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200531 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200532 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
533 *length = sb.st_size + 1;
534 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
535 } else {
536 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
537 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
538 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
539 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
540 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
541 * where the anonymous mapping starts. */
542 *length = sb.st_size + pagesize;
543 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
544 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
545 }
546 if (*addr == MAP_FAILED) {
547 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
548 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200549 }
550
Radek Krejcif345c012018-09-19 11:12:59 +0200551 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200552}
Michal Vasko841d1a92018-09-07 15:40:31 +0200553
Radek Krejci86d106e2018-10-18 09:53:19 +0200554LY_ERR
555ly_munmap(void *addr, size_t length)
556{
557 if (munmap(addr, length)) {
558 return LY_ESYS;
559 }
560 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200561}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100562
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100563#else
564
565LY_ERR
566ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
567{
568 struct stat sb;
569 size_t m;
570
571 assert(length);
572 assert(addr);
573 assert(fd >= 0);
574
Jan Kundrátd31adc12022-07-07 21:36:15 +0200575#if _WIN32
576 if (_setmode(fd, _O_BINARY) == -1) {
577 LOGERR(ctx, LY_ESYS, "Failed to switch the file descriptor to binary mode.", strerror(errno));
578 return LY_ESYS;
579 }
580#endif
581
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100582 if (fstat(fd, &sb) == -1) {
583 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
584 return LY_ESYS;
585 }
586 if (!S_ISREG(sb.st_mode)) {
587 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
588 return LY_ESYS;
589 }
590 if (!sb.st_size) {
591 *addr = NULL;
592 return LY_SUCCESS;
593 }
594 /* On Windows, the mman-win32 mmap() emulation uses CreateFileMapping and MapViewOfFile, and these functions
595 * do not allow mapping more than "length of file" bytes for PROT_READ. Remapping existing mappings is not allowed, either.
596 * At that point the path of least resistance is just reading the file in as-is. */
597 m = sb.st_size + 1;
598 char *buf = calloc(m, 1);
599
600 if (!buf) {
601 LOGERR(ctx, LY_ESYS, "ly_mmap: malloc() failed (%s).", strerror(errno));
602 }
603 *addr = buf;
604 *length = m;
605
606 lseek(fd, 0, SEEK_SET);
607 ssize_t to_read = m - 1;
608
609 while (to_read > 0) {
610 ssize_t n = read(fd, buf, to_read);
Michal Vasko2bf4af42023-01-04 12:08:38 +0100611
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100612 if (n == 0) {
613 return LY_SUCCESS;
614 } else if (n < 0) {
615 if (errno == EINTR) {
616 continue; // can I get this on Windows?
617 }
618 LOGERR(ctx, LY_ESYS, "ly_mmap: read() failed (%s).", strerror(errno));
619 }
620 to_read -= n;
621 buf += n;
622 }
623 return LY_SUCCESS;
624}
625
626LY_ERR
627ly_munmap(void *addr, size_t length)
628{
629 (void)length;
630 free(addr);
631 return LY_SUCCESS;
632}
633
634#endif
635
Radek Krejci4f28eda2018-11-12 11:46:16 +0100636LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200637ly_strcat(char **dest, const char *format, ...)
638{
639 va_list fp;
640 char *addition = NULL;
641 size_t len;
642
643 va_start(fp, format);
644 len = vasprintf(&addition, format, fp);
645 len += (*dest ? strlen(*dest) : 0) + 1;
646
647 if (*dest) {
648 *dest = ly_realloc(*dest, len);
649 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100650 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200651 return LY_EMEM;
652 }
653 *dest = strcat(*dest, addition);
654 free(addition);
655 } else {
656 *dest = addition;
657 }
658
659 va_end(fp);
660 return LY_SUCCESS;
661}
662
663LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200664ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100665{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200666 LY_ERR rc = LY_SUCCESS;
667 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200668 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100669
Radek Krejci249973a2019-06-10 10:50:54 +0200670 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100671
Michal Vaskob4d40d62021-05-04 11:42:44 +0200672 /* duplicate the value */
673 str = strndup(val_str, val_len);
674 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100675
Michal Vaskob4d40d62021-05-04 11:42:44 +0200676 /* parse the value to avoid accessing following bytes */
677 errno = 0;
678 i = strtoll(str, &ptr, base);
679 if (errno || (ptr == str)) {
680 /* invalid string */
681 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200682 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200683 /* invalid number */
684 rc = LY_EDENIED;
685 } else if (*ptr) {
686 while (isspace(*ptr)) {
687 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100688 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200689 if (*ptr) {
690 /* invalid characters after some number */
691 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100692 }
693 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200694
Michal Vaskob4d40d62021-05-04 11:42:44 +0200695 /* cleanup */
696 free(str);
697 if (!rc) {
698 *ret = i;
699 }
700 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100701}
702
703LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200704ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100705{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200706 LY_ERR rc = LY_SUCCESS;
707 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100708 uint64_t u;
709
Michal Vaskob4d40d62021-05-04 11:42:44 +0200710 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100711
Michal Vaskob4d40d62021-05-04 11:42:44 +0200712 /* duplicate the value to avoid accessing following bytes */
713 str = strndup(val_str, val_len);
714 LY_CHECK_RET(!str, LY_EMEM);
715
716 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100717 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200718 u = strtoull(str, &ptr, base);
719 if (errno || (ptr == str)) {
720 /* invalid string */
721 rc = LY_EVALID;
722 } else if ((u > max) || (u && (str[0] == '-'))) {
723 /* invalid number */
724 rc = LY_EDENIED;
725 } else if (*ptr) {
726 while (isspace(*ptr)) {
727 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100728 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200729 if (*ptr) {
730 /* invalid characters after some number */
731 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100732 }
733 }
734
Michal Vaskob4d40d62021-05-04 11:42:44 +0200735 /* cleanup */
736 free(str);
737 if (!rc) {
738 *ret = u;
739 }
740 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100741}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200742
743/**
744 * @brief Parse an identifier.
745 *
746 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
747 * identifier = (ALPHA / "_")
748 * *(ALPHA / DIGIT / "_" / "-" / ".")
749 *
750 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
751 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
752 */
753static LY_ERR
754lys_parse_id(const char **id)
755{
756 assert(id && *id);
757
758 if (!is_yangidentstartchar(**id)) {
759 return LY_EINVAL;
760 }
761 ++(*id);
762
763 while (is_yangidentchar(**id)) {
764 ++(*id);
765 }
766 return LY_SUCCESS;
767}
768
769LY_ERR
770ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
771{
772 assert(id && *id);
773 assert(prefix && prefix_len);
774 assert(name && name_len);
775
776 *prefix = *id;
777 *prefix_len = 0;
778 *name = NULL;
779 *name_len = 0;
780
781 LY_CHECK_RET(lys_parse_id(id));
782 if (**id == ':') {
783 /* there is prefix */
784 *prefix_len = *id - *prefix;
785 ++(*id);
786 *name = *id;
787
788 LY_CHECK_RET(lys_parse_id(id));
789 *name_len = *id - *name;
790 } else {
791 /* there is no prefix, so what we have as prefix now is actually the name */
792 *name = *prefix;
793 *name_len = *id - *name;
794 *prefix = NULL;
795 }
796
797 return LY_SUCCESS;
798}
799
800LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200801ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200802 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
803 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200804{
805 LY_ERR ret = LY_EVALID;
806 const char *in = *pred;
807 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200808 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200809 char quot;
810
Radek Krejci4607f542020-12-01 12:18:49 +0100811 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200812
813 *prefix = *id = *value = NULL;
814 *prefix_len = *id_len = *value_len = 0;
815
816 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200817 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200818
819 if (isdigit(in[offset])) {
820 /* pos: "[" *WSP positive-integer-value *WSP "]" */
821 if (in[offset] == '0') {
822 /* zero */
823 *errmsg = "The position predicate cannot be zero.";
824 goto error;
825 }
826
827 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200828 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200829 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200830 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200831
832 } else if (in[offset] == '.') {
833 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
834 *id = &in[offset];
835 *id_len = 1;
836 offset++;
837 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200838 } else if (in[offset] == '-') {
839 /* typically negative value */
840 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
841 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200842 } else {
843 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
844 in = &in[offset];
845 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
846 *errmsg = "Invalid node-identifier.";
847 goto error;
848 }
Michal Vasko69730152020-10-09 16:30:07 +0200849 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200850 /* all node names MUST be qualified with explicit namespace prefix */
851 *errmsg = "Missing prefix of a node name.";
852 goto error;
853 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200854 offset = in - *pred;
855 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200856 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200857 }
858
859 if (expr) {
860 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200861 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200862
863 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200864 if (expr == 1) {
865 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
866 } else { /* 2 */
867 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
868 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200869 goto error;
870 }
871 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200872 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200873
874 /* quoted-string */
875 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200876 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200877 *errmsg = "String value is not quoted.";
878 goto error;
879 }
880 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200881 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200882 if (in[offset] == quot) {
883 *value_len = &in[offset] - *value;
884 offset++;
885 } else {
886 *errmsg = "Value is not terminated quoted-string.";
887 goto error;
888 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200889 }
890
891 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200892 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200893 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200894 if (expr == 0) {
895 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
896 } else if (expr == 1) {
897 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
898 } else { /* 2 */
899 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
900 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200901 goto error;
902 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200903 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200904
Radek Krejci10bfdf82019-06-10 14:08:13 +0200905 if (offset <= limit) {
906 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200907 return LY_SUCCESS;
908 }
909
910 /* we read after the limit */
911 *errmsg = "Predicate is incomplete.";
912 *prefix = *id = *value = NULL;
913 *prefix_len = *id_len = *value_len = 0;
914 offset = limit;
915 ret = LY_EINVAL;
916
917error:
918 *pred = &in[offset];
919 return ret;
920}