blob: 41e527024ac41874cf9c65fef8f775884d95a4b4 [file] [log] [blame]
Michal Vasko1324b6c2018-09-07 11:16:23 +02001/**
2 * @file common.c
3 * @author Michal Vasko <mvasko@cesnet.cz>
4 * @brief common internal definitions for libyang
5 *
6 * Copyright (c) 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
Radek Krejcib7db73a2018-10-24 14:18:40 +020014
Radek Krejci535ea9f2020-05-29 16:01:05 +020015#define _GNU_SOURCE
16
Radek Krejcib7db73a2018-10-24 14:18:40 +020017#include "common.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020018
Radek Krejci86d106e2018-10-18 09:53:19 +020019#include <assert.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020020#include <ctype.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020021#include <errno.h>
Michal Vasko15dc9fa2021-05-03 14:33:05 +020022#include <inttypes.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020023#include <stdarg.h>
Radek Krejci535ea9f2020-05-29 16:01:05 +020024#include <stdio.h>
Radek Krejci4546aa62019-07-15 16:53:32 +020025#include <stdlib.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020026#include <string.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010027#ifndef _WIN32
Radek Krejci86d106e2018-10-18 09:53:19 +020028#include <sys/mman.h>
Jan Kundrátf1960dc2021-12-12 03:12:23 +010029#endif
Radek Krejci86d106e2018-10-18 09:53:19 +020030#include <sys/stat.h>
Radek Krejci86d106e2018-10-18 09:53:19 +020031#include <unistd.h>
Michal Vasko841d1a92018-09-07 15:40:31 +020032
Radek Krejciaa45bda2020-07-20 07:43:38 +020033#include "compat.h"
Radek Krejcib4a4a272019-06-10 12:44:52 +020034#include "tree_schema_internal.h"
aPiecek704f8e92021-08-25 13:35:05 +020035#include "xml.h"
Michal Vasko1324b6c2018-09-07 11:16:23 +020036
37void *
38ly_realloc(void *ptr, size_t size)
39{
40 void *new_mem;
41
42 new_mem = realloc(ptr, size);
43 if (!new_mem) {
44 free(ptr);
45 }
46
47 return new_mem;
48}
Michal Vasko841d1a92018-09-07 15:40:31 +020049
Michal Vasko03ff5a72019-09-11 13:49:33 +020050char *
Radek Krejci1deb5be2020-08-26 16:43:36 +020051ly_strnchr(const char *s, int c, size_t len)
Michal Vasko03ff5a72019-09-11 13:49:33 +020052{
Michal Vaskob4d40d62021-05-04 11:42:44 +020053 for ( ; len && (*s != (char)c); ++s, --len) {}
54 return len ? (char *)s : NULL;
Michal Vasko03ff5a72019-09-11 13:49:33 +020055}
56
Radek Krejci7f9b6512019-09-18 13:11:09 +020057int
58ly_strncmp(const char *refstr, const char *str, size_t str_len)
59{
60 int rc = strncmp(refstr, str, str_len);
Michal Vasko69730152020-10-09 16:30:07 +020061
62 if (!rc && (refstr[str_len] == '\0')) {
Radek Krejci7f9b6512019-09-18 13:11:09 +020063 return 0;
64 } else {
65 return rc ? rc : 1;
66 }
67}
68
Michal Vasko15dc9fa2021-05-03 14:33:05 +020069LY_ERR
70ly_strntou8(const char *nptr, size_t len, uint8_t *ret)
71{
72 uint8_t num = 0, dig, dec_pow;
73
74 if (len > 3) {
75 /* overflow for sure */
76 return LY_EDENIED;
77 }
78
79 dec_pow = 1;
80 for ( ; len && isdigit(nptr[len - 1]); --len) {
81 dig = nptr[len - 1] - 48;
82
83 if (LY_OVERFLOW_MUL(UINT8_MAX, dig, dec_pow)) {
84 return LY_EDENIED;
85 }
86 dig *= dec_pow;
87
88 if (LY_OVERFLOW_ADD(UINT8_MAX, num, dig)) {
89 return LY_EDENIED;
90 }
91 num += dig;
92
93 dec_pow *= 10;
94 }
95
96 if (len) {
97 return LY_EVALID;
98 }
99 *ret = num;
100 return LY_SUCCESS;
101}
102
aPieceke3f828d2021-05-10 15:34:41 +0200103LY_ERR
104ly_value_prefix_next(const char *str_begin, const char *str_end, uint32_t *len, ly_bool *is_prefix, const char **str_next)
aPiecekf102d4d2021-03-30 12:18:38 +0200105{
106 const char *stop, *prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200107 size_t bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200108 uint32_t c;
109 ly_bool prefix_found;
aPieceke3f828d2021-05-10 15:34:41 +0200110 LY_ERR ret = LY_SUCCESS;
aPiecekf102d4d2021-03-30 12:18:38 +0200111
aPieceke3f828d2021-05-10 15:34:41 +0200112 assert(len && is_prefix && str_next);
aPiecekf102d4d2021-03-30 12:18:38 +0200113
114#define IS_AT_END(PTR, STR_END) (STR_END ? PTR == STR_END : !(*PTR))
115
116 *str_next = NULL;
117 *is_prefix = 0;
aPieceke3f828d2021-05-10 15:34:41 +0200118 *len = 0;
aPiecekf102d4d2021-03-30 12:18:38 +0200119
120 if (!str_begin || !(*str_begin) || (str_begin == str_end)) {
121 return ret;
122 }
123
124 stop = str_begin;
125 prefix = NULL;
126 prefix_found = 0;
127
128 do {
129 /* look for the beginning of the YANG value */
aPieceke3f828d2021-05-10 15:34:41 +0200130 do {
131 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
132 } while (!is_xmlqnamestartchar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200133
134 if (IS_AT_END(stop, str_end)) {
135 break;
136 }
137
138 /* maybe the prefix was found */
aPieceke3f828d2021-05-10 15:34:41 +0200139 prefix = stop - bytes_read;
aPiecekf102d4d2021-03-30 12:18:38 +0200140
141 /* look for the the end of the prefix */
aPieceke3f828d2021-05-10 15:34:41 +0200142 do {
143 LY_CHECK_RET(ly_getutf8(&stop, &c, &bytes_read));
144 } while (is_xmlqnamechar(c) && !IS_AT_END(stop, str_end));
aPiecekf102d4d2021-03-30 12:18:38 +0200145
146 prefix_found = c == ':' ? 1 : 0;
147
148 /* if it wasn't the prefix, keep looking */
149 } while (!IS_AT_END(stop, str_end) && !prefix_found);
150
151 if ((str_begin == prefix) && prefix_found) {
152 /* prefix found at the beginning of the input string */
153 *is_prefix = 1;
154 *str_next = IS_AT_END(stop, str_end) ? NULL : stop;
aPieceke3f828d2021-05-10 15:34:41 +0200155 *len = (stop - bytes_read) - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200156 } else if ((str_begin != prefix) && (prefix_found)) {
157 /* there is a some string before prefix */
158 *str_next = prefix;
aPieceke3f828d2021-05-10 15:34:41 +0200159 *len = prefix - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200160 } else {
161 /* no prefix found */
aPieceke3f828d2021-05-10 15:34:41 +0200162 *len = stop - str_begin;
aPiecekf102d4d2021-03-30 12:18:38 +0200163 }
164
165#undef IS_AT_END
166
167 return ret;
168}
169
Radek Krejcib416be62018-10-01 14:51:45 +0200170LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100171ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
Radek Krejcib416be62018-10-01 14:51:45 +0200172{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200173 uint32_t c, aux;
174 size_t len;
Radek Krejcib416be62018-10-01 14:51:45 +0200175
Radek Krejcicc6a45c2019-05-13 10:16:14 +0200176 if (bytes_read) {
177 (*bytes_read) = 0;
178 }
179
Radek Krejcib416be62018-10-01 14:51:45 +0200180 c = (*input)[0];
181 LY_CHECK_RET(!c, LY_EINVAL);
182
183 if (!(c & 0x80)) {
184 /* one byte character */
185 len = 1;
186
Michal Vasko69730152020-10-09 16:30:07 +0200187 if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200188 return LY_EINVAL;
189 }
190 } else if ((c & 0xe0) == 0xc0) {
191 /* two bytes character */
192 len = 2;
193
194 aux = (*input)[1];
195 if ((aux & 0xc0) != 0x80) {
196 return LY_EINVAL;
197 }
198 c = ((c & 0x1f) << 6) | (aux & 0x3f);
199
200 if (c < 0x80) {
201 return LY_EINVAL;
202 }
203 } else if ((c & 0xf0) == 0xe0) {
204 /* three bytes character */
205 len = 3;
206
207 c &= 0x0f;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200208 for (uint64_t i = 1; i <= 2; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200209 aux = (*input)[i];
210 if ((aux & 0xc0) != 0x80) {
211 return LY_EINVAL;
212 }
213
214 c = (c << 6) | (aux & 0x3f);
215 }
216
Michal Vasko69730152020-10-09 16:30:07 +0200217 if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200218 return LY_EINVAL;
219 }
220 } else if ((c & 0xf8) == 0xf0) {
221 /* four bytes character */
222 len = 4;
223
224 c &= 0x07;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200225 for (uint64_t i = 1; i <= 3; i++) {
Radek Krejcib416be62018-10-01 14:51:45 +0200226 aux = (*input)[i];
227 if ((aux & 0xc0) != 0x80) {
228 return LY_EINVAL;
229 }
230
231 c = (c << 6) | (aux & 0x3f);
232 }
233
Michal Vasko69730152020-10-09 16:30:07 +0200234 if ((c < 0x1000) || (c > 0x10ffff)) {
Radek Krejcib416be62018-10-01 14:51:45 +0200235 return LY_EINVAL;
236 }
237 } else {
238 return LY_EINVAL;
239 }
240
241 (*utf8_char) = c;
242 (*input) += len;
243 if (bytes_read) {
244 (*bytes_read) = len;
245 }
246 return LY_SUCCESS;
247}
248
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200249LY_ERR
250ly_pututf8(char *dst, uint32_t value, size_t *bytes_written)
251{
252 if (value < 0x80) {
253 /* one byte character */
Michal Vasko69730152020-10-09 16:30:07 +0200254 if ((value < 0x20) &&
255 (value != 0x09) &&
256 (value != 0x0a) &&
257 (value != 0x0d)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200258 return LY_EINVAL;
259 }
260
261 dst[0] = value;
262 (*bytes_written) = 1;
263 } else if (value < 0x800) {
264 /* two bytes character */
265 dst[0] = 0xc0 | (value >> 6);
266 dst[1] = 0x80 | (value & 0x3f);
267 (*bytes_written) = 2;
268 } else if (value < 0xfffe) {
269 /* three bytes character */
270 if (((value & 0xf800) == 0xd800) ||
Michal Vasko69730152020-10-09 16:30:07 +0200271 ((value >= 0xfdd0) && (value <= 0xfdef))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200272 /* exclude surrogate blocks %xD800-DFFF */
273 /* exclude noncharacters %xFDD0-FDEF */
274 return LY_EINVAL;
275 }
276
277 dst[0] = 0xe0 | (value >> 12);
278 dst[1] = 0x80 | ((value >> 6) & 0x3f);
279 dst[2] = 0x80 | (value & 0x3f);
280
281 (*bytes_written) = 3;
282 } else if (value < 0x10fffe) {
283 if ((value & 0xffe) == 0xffe) {
284 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
285 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
286 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
287 return LY_EINVAL;
288 }
289 /* four bytes character */
290 dst[0] = 0xf0 | (value >> 18);
291 dst[1] = 0x80 | ((value >> 12) & 0x3f);
292 dst[2] = 0x80 | ((value >> 6) & 0x3f);
293 dst[3] = 0x80 | (value & 0x3f);
294
295 (*bytes_written) = 4;
296 } else {
297 return LY_EINVAL;
298 }
299 return LY_SUCCESS;
300}
301
Radek Krejci76c98012019-08-14 11:23:24 +0200302/**
303 * @brief Static table of the UTF8 characters lengths according to their first byte.
304 */
Radek Krejcif6a11002020-08-21 13:29:07 +0200305static const unsigned char utf8_char_length_table[] = {
Radek Krejci76c98012019-08-14 11:23:24 +0200306 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
307 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
313 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
317 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
318 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
319 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
320 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
321 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
322};
323
324size_t
325ly_utf8len(const char *str, size_t bytes)
326{
Radek Krejci1e008d22020-08-17 11:37:37 +0200327 size_t len = 0;
328 const char *ptr = str;
Radek Krejci76c98012019-08-14 11:23:24 +0200329
Michal Vaskob4d40d62021-05-04 11:42:44 +0200330 while (((size_t)(ptr - str) < bytes) && *ptr) {
Radek Krejci1e008d22020-08-17 11:37:37 +0200331 ++len;
332 ptr += utf8_char_length_table[((unsigned char)(*ptr))];
333 }
Radek Krejci76c98012019-08-14 11:23:24 +0200334 return len;
335}
336
Radek Krejcid972c252018-09-25 13:23:39 +0200337size_t
338LY_VCODE_INSTREXP_len(const char *str)
339{
340 size_t len = 0;
Michal Vasko69730152020-10-09 16:30:07 +0200341
Radek Krejcid972c252018-09-25 13:23:39 +0200342 if (!str) {
343 return len;
344 } else if (!str[0]) {
345 return 1;
346 }
Radek Krejci1e008d22020-08-17 11:37:37 +0200347 for (len = 1; len < LY_VCODE_INSTREXP_MAXLEN && str[len]; ++len) {}
Radek Krejcid972c252018-09-25 13:23:39 +0200348 return len;
349}
350
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100351#ifdef HAVE_MMAP
Radek Krejcif345c012018-09-19 11:12:59 +0200352LY_ERR
Radek Krejci86d106e2018-10-18 09:53:19 +0200353ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
Michal Vasko841d1a92018-09-07 15:40:31 +0200354{
Radek Krejci86d106e2018-10-18 09:53:19 +0200355 struct stat sb;
356 long pagesize;
357 size_t m;
Michal Vasko841d1a92018-09-07 15:40:31 +0200358
Radek Krejci86d106e2018-10-18 09:53:19 +0200359 assert(length);
360 assert(addr);
361 assert(fd >= 0);
Michal Vasko841d1a92018-09-07 15:40:31 +0200362
Radek Krejci86d106e2018-10-18 09:53:19 +0200363 if (fstat(fd, &sb) == -1) {
364 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
365 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200366 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200367 if (!S_ISREG(sb.st_mode)) {
368 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
369 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200370 }
Radek Krejci86d106e2018-10-18 09:53:19 +0200371 if (!sb.st_size) {
372 *addr = NULL;
373 return LY_SUCCESS;
374 }
375 pagesize = sysconf(_SC_PAGESIZE);
376
377 m = sb.st_size % pagesize;
Michal Vasko69730152020-10-09 16:30:07 +0200378 if (m && (pagesize - m >= 1)) {
Radek Krejci86d106e2018-10-18 09:53:19 +0200379 /* there will be enough space (at least 1 byte) after the file content mapping to provide zeroed NULL-termination byte */
380 *length = sb.st_size + 1;
381 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE, fd, 0);
382 } else {
383 /* there will not be enough bytes after the file content mapping for the additional bytes and some of them
384 * would overflow into another page that would not be zerroed and any access into it would generate SIGBUS.
385 * Therefore we have to do the following hack with double mapping. First, the required number of bytes
386 * (including the additinal bytes) is required as anonymous and thus they will be really provided (actually more
387 * because of using whole pages) and also initialized by zeros. Then, the file is mapped to the same address
388 * where the anonymous mapping starts. */
389 *length = sb.st_size + pagesize;
390 *addr = mmap(NULL, *length, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
391 *addr = mmap(*addr, sb.st_size, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
392 }
393 if (*addr == MAP_FAILED) {
394 LOGERR(ctx, LY_ESYS, "mmap() failed (%s).", strerror(errno));
395 return LY_ESYS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200396 }
397
Radek Krejcif345c012018-09-19 11:12:59 +0200398 return LY_SUCCESS;
Radek Krejci86d106e2018-10-18 09:53:19 +0200399}
Michal Vasko841d1a92018-09-07 15:40:31 +0200400
Radek Krejci86d106e2018-10-18 09:53:19 +0200401LY_ERR
402ly_munmap(void *addr, size_t length)
403{
404 if (munmap(addr, length)) {
405 return LY_ESYS;
406 }
407 return LY_SUCCESS;
Michal Vasko841d1a92018-09-07 15:40:31 +0200408}
Radek Krejci4f28eda2018-11-12 11:46:16 +0100409
Jan Kundrátf1960dc2021-12-12 03:12:23 +0100410#else
411
412LY_ERR
413ly_mmap(struct ly_ctx *ctx, int fd, size_t *length, void **addr)
414{
415 struct stat sb;
416 size_t m;
417
418 assert(length);
419 assert(addr);
420 assert(fd >= 0);
421
422 if (fstat(fd, &sb) == -1) {
423 LOGERR(ctx, LY_ESYS, "Failed to stat the file descriptor (%s) for the mmap().", strerror(errno));
424 return LY_ESYS;
425 }
426 if (!S_ISREG(sb.st_mode)) {
427 LOGERR(ctx, LY_EINVAL, "File to mmap() is not a regular file.");
428 return LY_ESYS;
429 }
430 if (!sb.st_size) {
431 *addr = NULL;
432 return LY_SUCCESS;
433 }
434 /* On Windows, the mman-win32 mmap() emulation uses CreateFileMapping and MapViewOfFile, and these functions
435 * do not allow mapping more than "length of file" bytes for PROT_READ. Remapping existing mappings is not allowed, either.
436 * At that point the path of least resistance is just reading the file in as-is. */
437 m = sb.st_size + 1;
438 char *buf = calloc(m, 1);
439
440 if (!buf) {
441 LOGERR(ctx, LY_ESYS, "ly_mmap: malloc() failed (%s).", strerror(errno));
442 }
443 *addr = buf;
444 *length = m;
445
446 lseek(fd, 0, SEEK_SET);
447 ssize_t to_read = m - 1;
448
449 while (to_read > 0) {
450 ssize_t n = read(fd, buf, to_read);
451 if (n == 0) {
452 return LY_SUCCESS;
453 } else if (n < 0) {
454 if (errno == EINTR) {
455 continue; // can I get this on Windows?
456 }
457 LOGERR(ctx, LY_ESYS, "ly_mmap: read() failed (%s).", strerror(errno));
458 }
459 to_read -= n;
460 buf += n;
461 }
462 return LY_SUCCESS;
463}
464
465LY_ERR
466ly_munmap(void *addr, size_t length)
467{
468 (void)length;
469 free(addr);
470 return LY_SUCCESS;
471}
472
473#endif
474
Radek Krejci4f28eda2018-11-12 11:46:16 +0100475LY_ERR
Radek Krejci4546aa62019-07-15 16:53:32 +0200476ly_strcat(char **dest, const char *format, ...)
477{
478 va_list fp;
479 char *addition = NULL;
480 size_t len;
481
482 va_start(fp, format);
483 len = vasprintf(&addition, format, fp);
484 len += (*dest ? strlen(*dest) : 0) + 1;
485
486 if (*dest) {
487 *dest = ly_realloc(*dest, len);
488 if (!*dest) {
Radek Krejci1cd812f2020-12-01 12:17:53 +0100489 va_end(fp);
Radek Krejci4546aa62019-07-15 16:53:32 +0200490 return LY_EMEM;
491 }
492 *dest = strcat(*dest, addition);
493 free(addition);
494 } else {
495 *dest = addition;
496 }
497
498 va_end(fp);
499 return LY_SUCCESS;
500}
501
502LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200503ly_parse_int(const char *val_str, size_t val_len, int64_t min, int64_t max, int base, int64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100504{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200505 LY_ERR rc = LY_SUCCESS;
506 char *ptr, *str;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200507 int64_t i;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100508
Radek Krejci249973a2019-06-10 10:50:54 +0200509 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100510
Michal Vaskob4d40d62021-05-04 11:42:44 +0200511 /* duplicate the value */
512 str = strndup(val_str, val_len);
513 LY_CHECK_RET(!str, LY_EMEM);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100514
Michal Vaskob4d40d62021-05-04 11:42:44 +0200515 /* parse the value to avoid accessing following bytes */
516 errno = 0;
517 i = strtoll(str, &ptr, base);
518 if (errno || (ptr == str)) {
519 /* invalid string */
520 rc = LY_EVALID;
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200521 } else if ((i < min) || (i > max)) {
Michal Vaskob4d40d62021-05-04 11:42:44 +0200522 /* invalid number */
523 rc = LY_EDENIED;
524 } else if (*ptr) {
525 while (isspace(*ptr)) {
526 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100527 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200528 if (*ptr) {
529 /* invalid characters after some number */
530 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100531 }
532 }
Radek Krejci9ea8ca12019-06-10 13:11:55 +0200533
Michal Vaskob4d40d62021-05-04 11:42:44 +0200534 /* cleanup */
535 free(str);
536 if (!rc) {
537 *ret = i;
538 }
539 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100540}
541
542LY_ERR
Radek Krejci249973a2019-06-10 10:50:54 +0200543ly_parse_uint(const char *val_str, size_t val_len, uint64_t max, int base, uint64_t *ret)
Radek Krejci4f28eda2018-11-12 11:46:16 +0100544{
Michal Vaskob4d40d62021-05-04 11:42:44 +0200545 LY_ERR rc = LY_SUCCESS;
546 char *ptr, *str;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100547 uint64_t u;
548
Michal Vaskob4d40d62021-05-04 11:42:44 +0200549 LY_CHECK_ARG_RET(NULL, val_str, val_str[0], val_len, LY_EINVAL);
Radek Krejci4f28eda2018-11-12 11:46:16 +0100550
Michal Vaskob4d40d62021-05-04 11:42:44 +0200551 /* duplicate the value to avoid accessing following bytes */
552 str = strndup(val_str, val_len);
553 LY_CHECK_RET(!str, LY_EMEM);
554
555 /* parse the value */
Radek Krejci4f28eda2018-11-12 11:46:16 +0100556 errno = 0;
Michal Vaskob4d40d62021-05-04 11:42:44 +0200557 u = strtoull(str, &ptr, base);
558 if (errno || (ptr == str)) {
559 /* invalid string */
560 rc = LY_EVALID;
561 } else if ((u > max) || (u && (str[0] == '-'))) {
562 /* invalid number */
563 rc = LY_EDENIED;
564 } else if (*ptr) {
565 while (isspace(*ptr)) {
566 ++ptr;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100567 }
Michal Vaskob4d40d62021-05-04 11:42:44 +0200568 if (*ptr) {
569 /* invalid characters after some number */
570 rc = LY_EVALID;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100571 }
572 }
573
Michal Vaskob4d40d62021-05-04 11:42:44 +0200574 /* cleanup */
575 free(str);
576 if (!rc) {
577 *ret = u;
578 }
579 return rc;
Radek Krejci4f28eda2018-11-12 11:46:16 +0100580}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200581
582/**
583 * @brief Parse an identifier.
584 *
585 * ;; An identifier MUST NOT start with (('X'|'x') ('M'|'m') ('L'|'l'))
586 * identifier = (ALPHA / "_")
587 * *(ALPHA / DIGIT / "_" / "-" / ".")
588 *
589 * @param[in,out] id Identifier to parse. When returned, it points to the first character which is not part of the identifier.
590 * @return LY_ERR value: LY_SUCCESS or LY_EINVAL in case of invalid starting character.
591 */
592static LY_ERR
593lys_parse_id(const char **id)
594{
595 assert(id && *id);
596
597 if (!is_yangidentstartchar(**id)) {
598 return LY_EINVAL;
599 }
600 ++(*id);
601
602 while (is_yangidentchar(**id)) {
603 ++(*id);
604 }
605 return LY_SUCCESS;
606}
607
608LY_ERR
609ly_parse_nodeid(const char **id, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
610{
611 assert(id && *id);
612 assert(prefix && prefix_len);
613 assert(name && name_len);
614
615 *prefix = *id;
616 *prefix_len = 0;
617 *name = NULL;
618 *name_len = 0;
619
620 LY_CHECK_RET(lys_parse_id(id));
621 if (**id == ':') {
622 /* there is prefix */
623 *prefix_len = *id - *prefix;
624 ++(*id);
625 *name = *id;
626
627 LY_CHECK_RET(lys_parse_id(id));
628 *name_len = *id - *name;
629 } else {
630 /* there is no prefix, so what we have as prefix now is actually the name */
631 *name = *prefix;
632 *name_len = *id - *name;
633 *prefix = NULL;
634 }
635
636 return LY_SUCCESS;
637}
638
639LY_ERR
Radek Krejci084289f2019-07-09 17:35:30 +0200640ly_parse_instance_predicate(const char **pred, size_t limit, LYD_FORMAT format,
Radek Krejci0f969882020-08-21 16:56:47 +0200641 const char **prefix, size_t *prefix_len, const char **id, size_t *id_len, const char **value, size_t *value_len,
642 const char **errmsg)
Radek Krejcib4a4a272019-06-10 12:44:52 +0200643{
644 LY_ERR ret = LY_EVALID;
645 const char *in = *pred;
646 size_t offset = 1;
Radek Krejci857189e2020-09-01 13:26:36 +0200647 uint8_t expr = 0; /* 0 - position predicate; 1 - leaf-list-predicate; 2 - key-predicate */
Radek Krejcib4a4a272019-06-10 12:44:52 +0200648 char quot;
649
Radek Krejci4607f542020-12-01 12:18:49 +0100650 assert(in[0] == '[');
Radek Krejcib4a4a272019-06-10 12:44:52 +0200651
652 *prefix = *id = *value = NULL;
653 *prefix_len = *id_len = *value_len = 0;
654
655 /* leading *WSP */
Michal Vaskod989ba02020-08-24 10:59:24 +0200656 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200657
658 if (isdigit(in[offset])) {
659 /* pos: "[" *WSP positive-integer-value *WSP "]" */
660 if (in[offset] == '0') {
661 /* zero */
662 *errmsg = "The position predicate cannot be zero.";
663 goto error;
664 }
665
666 /* positive-integer-value */
Radek Krejci10bfdf82019-06-10 14:08:13 +0200667 *value = &in[offset++];
Michal Vaskod989ba02020-08-24 10:59:24 +0200668 for ( ; isdigit(in[offset]); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200669 *value_len = &in[offset] - *value;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200670
671 } else if (in[offset] == '.') {
672 /* leaf-list-predicate: "[" *WSP "." *WSP "=" *WSP quoted-string *WSP "]" */
673 *id = &in[offset];
674 *id_len = 1;
675 offset++;
676 expr = 1;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200677 } else if (in[offset] == '-') {
678 /* typically negative value */
679 *errmsg = "Invalid instance predicate format (negative position or invalid node-identifier).";
680 goto error;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200681 } else {
682 /* key-predicate: "[" *WSP node-identifier *WSP "=" *WSP quoted-string *WSP "]" */
683 in = &in[offset];
684 if (ly_parse_nodeid(&in, prefix, prefix_len, id, id_len)) {
685 *errmsg = "Invalid node-identifier.";
686 goto error;
687 }
Michal Vasko69730152020-10-09 16:30:07 +0200688 if ((format == LYD_XML) && !(*prefix)) {
Radek Krejci084289f2019-07-09 17:35:30 +0200689 /* all node names MUST be qualified with explicit namespace prefix */
690 *errmsg = "Missing prefix of a node name.";
691 goto error;
692 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200693 offset = in - *pred;
694 in = *pred;
Radek Krejci10bfdf82019-06-10 14:08:13 +0200695 expr = 2;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200696 }
697
698 if (expr) {
699 /* *WSP "=" *WSP quoted-string *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200700 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200701
702 if (in[offset] != '=') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200703 if (expr == 1) {
704 *errmsg = "Unexpected character instead of \'=\' in leaf-list-predicate.";
705 } else { /* 2 */
706 *errmsg = "Unexpected character instead of \'=\' in key-predicate.";
707 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200708 goto error;
709 }
710 offset++;
Michal Vaskod989ba02020-08-24 10:59:24 +0200711 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200712
713 /* quoted-string */
714 quot = in[offset++];
Michal Vasko69730152020-10-09 16:30:07 +0200715 if ((quot != '\'') && (quot != '\"')) {
Radek Krejcib4a4a272019-06-10 12:44:52 +0200716 *errmsg = "String value is not quoted.";
717 goto error;
718 }
719 *value = &in[offset];
Michal Vaskod989ba02020-08-24 10:59:24 +0200720 for ( ; offset < limit && (in[offset] != quot || (offset && in[offset - 1] == '\\')); offset++) {}
Radek Krejci10bfdf82019-06-10 14:08:13 +0200721 if (in[offset] == quot) {
722 *value_len = &in[offset] - *value;
723 offset++;
724 } else {
725 *errmsg = "Value is not terminated quoted-string.";
726 goto error;
727 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200728 }
729
730 /* *WSP "]" */
Michal Vaskod989ba02020-08-24 10:59:24 +0200731 for ( ; isspace(in[offset]); offset++) {}
Radek Krejcib4a4a272019-06-10 12:44:52 +0200732 if (in[offset] != ']') {
Radek Krejci10bfdf82019-06-10 14:08:13 +0200733 if (expr == 0) {
734 *errmsg = "Predicate (pos) is not terminated by \']\' character.";
735 } else if (expr == 1) {
736 *errmsg = "Predicate (leaf-list-predicate) is not terminated by \']\' character.";
737 } else { /* 2 */
738 *errmsg = "Predicate (key-predicate) is not terminated by \']\' character.";
739 }
Radek Krejcib4a4a272019-06-10 12:44:52 +0200740 goto error;
741 }
Radek Krejci10bfdf82019-06-10 14:08:13 +0200742 offset++;
Radek Krejcib4a4a272019-06-10 12:44:52 +0200743
Radek Krejci10bfdf82019-06-10 14:08:13 +0200744 if (offset <= limit) {
745 *pred = &in[offset];
Radek Krejcib4a4a272019-06-10 12:44:52 +0200746 return LY_SUCCESS;
747 }
748
749 /* we read after the limit */
750 *errmsg = "Predicate is incomplete.";
751 *prefix = *id = *value = NULL;
752 *prefix_len = *id_len = *value_len = 0;
753 offset = limit;
754 ret = LY_EINVAL;
755
756error:
757 *pred = &in[offset];
758 return ret;
759}