blob: 4613d4984daf2128bbe1733053cc4c31446478e0 [file] [log] [blame]
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001/**
2 * @file json.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic JSON format parser for libyang
5 *
6 * Copyright (c) 2020 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include <assert.h>
16#include <ctype.h>
17#include <errno.h>
Radek Krejci50f0c6b2020-06-18 16:31:48 +020018#include <stdlib.h>
Radek Krejci47fab892020-11-05 17:02:41 +010019#include <string.h>
Radek Krejci50f0c6b2020-06-18 16:31:48 +020020#include <sys/types.h>
21
22#include "common.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020023#include "in_internal.h"
Radek Krejci47fab892020-11-05 17:02:41 +010024#include "json.h"
Radek Krejci50f0c6b2020-06-18 16:31:48 +020025
26#define JSON_PUSH_STATUS_RET(CTX, STATUS) \
Radek Krejci3d92e442020-10-12 12:48:13 +020027 LY_CHECK_RET(ly_set_add(&CTX->status, (void*)STATUS, 1, NULL))
Radek Krejci50f0c6b2020-06-18 16:31:48 +020028
29#define JSON_POP_STATUS_RET(CTX) \
30 assert(CTX->status.count); CTX->status.count--;
31
Michal Vasko22df3f02020-08-24 13:29:22 +020032const char *
Radek Krejci50f0c6b2020-06-18 16:31:48 +020033lyjson_token2str(enum LYJSON_PARSER_STATUS status)
34{
35 switch (status) {
36 case LYJSON_ERROR:
37 return "error";
38 case LYJSON_ROOT:
39 return "document root";
40 case LYJSON_FALSE:
41 return "false";
42 case LYJSON_TRUE:
43 return "true";
44 case LYJSON_NULL:
45 return "null";
46 case LYJSON_OBJECT:
47 return "object";
48 case LYJSON_OBJECT_CLOSED:
49 return "object closed";
50 case LYJSON_OBJECT_EMPTY:
51 return "empty object";
52 case LYJSON_ARRAY:
53 return "array";
54 case LYJSON_ARRAY_CLOSED:
55 return "array closed";
56 case LYJSON_ARRAY_EMPTY:
57 return "empty array";
58 case LYJSON_NUMBER:
59 return "number";
60 case LYJSON_STRING:
61 return "string";
62 case LYJSON_END:
63 return "end of input";
64 }
65
66 return "";
67}
68
69static LY_ERR
70skip_ws(struct lyjson_ctx *jsonctx)
71{
72 /* skip leading whitespaces */
73 while (*jsonctx->in->current != '\0' && is_jsonws(*jsonctx->in->current)) {
Radek Krejcidd713ce2021-01-04 23:12:12 +010074 if (*jsonctx->in->current == '\n') {
75 LY_IN_NEW_LINE(jsonctx->in);
76 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +020077 ly_in_skip(jsonctx->in, 1);
78 }
79 if (*jsonctx->in->current == '\0') {
80 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_END);
81 }
82
83 return LY_SUCCESS;
84}
85
86/*
87 * @brief Set value corresponding to the current context's status
88 */
89static void
Radek Krejci857189e2020-09-01 13:26:36 +020090lyjson_ctx_set_value(struct lyjson_ctx *jsonctx, const char *value, size_t value_len, ly_bool dynamic)
Radek Krejci50f0c6b2020-06-18 16:31:48 +020091{
92 assert(jsonctx);
93
Juraj Vijtiukec285cd2021-01-14 11:41:20 +010094 if (jsonctx->dynamic) {
Michal Vasko22df3f02020-08-24 13:29:22 +020095 free((char *)jsonctx->value);
Radek Krejci50f0c6b2020-06-18 16:31:48 +020096 }
97 jsonctx->value = value;
98 jsonctx->value_len = value_len;
99 jsonctx->dynamic = dynamic;
100}
101
102static LY_ERR
103lyjson_check_next(struct lyjson_ctx *jsonctx)
104{
105 if (jsonctx->status.count == 1) {
106 /* top level value (JSON-text), ws expected */
Michal Vasko69730152020-10-09 16:30:07 +0200107 if ((*jsonctx->in->current == '\0') || is_jsonws(*jsonctx->in->current)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200108 return LY_SUCCESS;
109 }
110 } else if (lyjson_ctx_status(jsonctx, 1) == LYJSON_OBJECT) {
111 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko69730152020-10-09 16:30:07 +0200112 if ((*jsonctx->in->current == ',') || (*jsonctx->in->current == '}')) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200113 return LY_SUCCESS;
114 }
115 } else if (lyjson_ctx_status(jsonctx, 1) == LYJSON_ARRAY) {
116 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko69730152020-10-09 16:30:07 +0200117 if ((*jsonctx->in->current == ',') || (*jsonctx->in->current == ']')) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200118 return LY_SUCCESS;
119 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200120 }
121
Radek Krejcie7010dc2021-03-04 15:54:24 +0100122 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Unexpected character \"%c\" after JSON %s.",
123 *jsonctx->in->current, lyjson_token2str(lyjson_ctx_status(jsonctx, 0)));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200124 return LY_EVALID;
125}
126
127/**
128 * Input is expected to start after the opening quotation-mark.
129 * When succeeds, input is moved after the closing quotation-mark.
130 */
131static LY_ERR
132lyjson_string_(struct lyjson_ctx *jsonctx)
133{
134#define BUFSIZE 24
135#define BUFSIZE_STEP 128
136
137 const char *in = jsonctx->in->current, *start;
138 char *buf = NULL;
139 size_t offset; /* read offset in input buffer */
140 size_t len; /* length of the output string (write offset in output buffer) */
141 size_t size = 0; /* size of the output buffer */
142 size_t u;
143 uint64_t start_line;
144
145 assert(jsonctx);
146
147 /* init */
148 start = in;
Radek Krejcid54412f2020-12-17 20:25:35 +0100149 start_line = jsonctx->in->line;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200150 offset = len = 0;
151
152 /* parse */
153 while (in[offset]) {
154 if (in[offset] == '\\') {
155 /* escape sequence */
Michal Vasko2be1d762021-03-11 16:53:15 +0100156 const char *slash = &in[offset];
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200157 uint32_t value;
158 uint8_t i = 1;
159
160 if (!buf) {
161 /* prepare output buffer */
162 buf = malloc(BUFSIZE);
163 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
164 size = BUFSIZE;
165 }
166
167 /* allocate enough for the offset and next character,
168 * we will need 4 bytes at most since we support only the predefined
169 * (one-char) entities and character references */
170 if (len + offset + 4 >= size) {
Juraj Vijtiukd746a352021-01-15 11:33:33 +0100171 size_t increment;
Radek Krejcidf549132021-01-21 10:32:32 +0100172 for (increment = BUFSIZE_STEP; len + offset + 4 >= size + increment; increment += BUFSIZE_STEP) {}
Juraj Vijtiukd746a352021-01-15 11:33:33 +0100173 buf = ly_realloc(buf, size + increment);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200174 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
175 size += BUFSIZE_STEP;
176 }
177
178 if (offset) {
179 /* store what we have so far */
180 memcpy(&buf[len], in, offset);
181 len += offset;
182 in += offset;
183 offset = 0;
184 }
185
186 switch (in[++offset]) {
187 case '"':
188 /* quotation mark */
189 value = 0x22;
190 break;
191 case '\\':
192 /* reverse solidus */
193 value = 0x5c;
194 break;
195 case '/':
196 /* solidus */
197 value = 0x2f;
198 break;
199 case 'b':
200 /* backspace */
201 value = 0x08;
202 break;
203 case 'f':
204 /* form feed */
205 value = 0x0c;
206 break;
207 case 'n':
208 /* line feed */
209 value = 0x0a;
210 break;
211 case 'r':
212 /* carriage return */
213 value = 0x0d;
214 break;
215 case 't':
216 /* tab */
217 value = 0x09;
218 break;
219 case 'u':
220 /* Basic Multilingual Plane character \uXXXX */
221 offset++;
222 for (value = i = 0; i < 4; i++) {
Juraj Vijtiuk2b94e4b2020-11-16 23:52:07 +0100223 if (!in[offset + i]) {
Michal Vasko2be1d762021-03-11 16:53:15 +0100224 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid basic multilingual plane character \"%s\".", slash);
Juraj Vijtiuk2b94e4b2020-11-16 23:52:07 +0100225 goto error;
226 } else if (isdigit(in[offset + i])) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200227 u = (in[offset + i] - '0');
228 } else if (in[offset + i] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100229 u = LY_BASE_DEC + (in[offset + i] - 'a');
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200230 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100231 u = LY_BASE_DEC + (in[offset + i] - 'A');
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200232 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100233 value = (LY_BASE_HEX * value) + u;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200234 }
235 break;
236 default:
237 /* invalid escape sequence */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100238 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character escape sequence \\%c.", in[offset]);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200239 goto error;
240
241 }
242
243 offset += i; /* add read escaped characters */
244 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], value, &u),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100245 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).",
Michal Vasko2be1d762021-03-11 16:53:15 +0100246 (int)(&in[offset] - slash), slash, value),
Michal Vasko69730152020-10-09 16:30:07 +0200247 error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200248 len += u; /* update number of bytes in buffer */
249 in += offset; /* move the input by the processed bytes stored in the buffer ... */
250 offset = 0; /* ... and reset the offset index for future moving data into buffer */
251
252 } else if (in[offset] == '"') {
253 /* end of string */
254 if (buf) {
255 /* realloc exact size string */
256 buf = ly_realloc(buf, len + offset + 1);
257 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
258 size = len + offset + 1;
Michal Vasko08e9b112021-06-11 15:41:17 +0200259 if (offset) {
260 memcpy(&buf[len], in, offset);
261 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200262
263 /* set terminating NULL byte */
264 buf[len + offset] = '\0';
265 }
266 len += offset;
267 ++offset;
268 in += offset;
269 goto success;
270 } else {
271 /* get it as UTF-8 character for check */
272 const char *c = &in[offset];
273 uint32_t code = 0;
274 size_t code_len = 0;
275
276 LY_CHECK_ERR_GOTO(ly_getutf8(&c, &code, &code_len),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100277 LOGVAL(jsonctx->ctx, LY_VCODE_INCHAR, in[offset]), error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200278
279 LY_CHECK_ERR_GOTO(!is_jsonstrchar(code),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100280 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character in JSON string \"%.*s\" (0x%08x).",
Radek Krejci422afb12021-03-04 16:38:16 +0100281 (int)(&in[offset] - start + code_len), start, code),
Michal Vasko69730152020-10-09 16:30:07 +0200282 error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200283
284 /* character is ok, continue */
285 offset += code_len;
286 }
287 }
288
289 /* EOF reached before endchar */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100290 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
291 LOGVAL_LINE(jsonctx->ctx, start_line, LYVE_SYNTAX, "Missing quotation-mark at the end of a JSON string.");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200292
293error:
294 free(buf);
295 return LY_EVALID;
296
297success:
Radek Krejcid54412f2020-12-17 20:25:35 +0100298 jsonctx->in->current = in;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200299 if (buf) {
300 lyjson_ctx_set_value(jsonctx, buf, len, 1);
301 } else {
302 lyjson_ctx_set_value(jsonctx, start, len, 0);
303 }
304
305 return LY_SUCCESS;
306
307#undef BUFSIZE
308#undef BUFSIZE_STEP
309}
310
311/*
312 *
313 * Wrapper around lyjson_string_() adding LYJSON_STRING status into context to allow using lyjson_string_() for parsing object's name.
314 */
315static LY_ERR
316lyjson_string(struct lyjson_ctx *jsonctx)
317{
318 LY_CHECK_RET(lyjson_string_(jsonctx));
319
320 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_STRING);
321 LY_CHECK_RET(lyjson_check_next(jsonctx));
322
323 return LY_SUCCESS;
324}
325
aPieceke87c0a12021-05-13 15:43:26 +0200326/**
aPiecek76034c32021-06-08 15:03:11 +0200327 * @brief Calculate how many @p c characters there are in a row.
328 *
329 * @param[in] str Count from this position.
330 * @param[in] end Position after the last checked character.
331 * @param[in] c Checked character.
332 * @param[in] backwards Set to 1, if to proceed from end-1 to str.
333 * @return Number of characters in a row.
334 */
335static uint32_t
336lyjson_count_in_row(const char *str, const char *end, char c, ly_bool backwards)
337{
338 uint32_t cnt;
339
340 assert(str && end);
341
342 if (str >= end) {
343 return 0;
344 }
345
346 if (!backwards) {
347 for (cnt = 0; (str != end) && (*str == c); ++str, ++cnt) {}
348 } else {
349 --end;
350 --str;
351 for (cnt = 0; (str != end) && (*end == c); --end, ++cnt) {}
352 }
353
354 return cnt;
355}
356
357/**
358 * @brief Check if the number can be shortened to zero.
359 *
aPiecek76034c32021-06-08 15:03:11 +0200360 * @param[in] in Start of input string;
361 * @param[in] end End of input string;
362 * @return 1 if number is zero, otherwise 0.
363 */
364static ly_bool
365lyjson_number_is_zero(const char *in, const char *end)
366{
aPiecek28e101a2021-06-10 09:09:31 +0200367 assert(in < end);
aPiecek76034c32021-06-08 15:03:11 +0200368
369 if ((in[0] == '-') || (in[0] == '+')) {
370 in++;
aPiecek28e101a2021-06-10 09:09:31 +0200371 assert(in < end);
aPiecek76034c32021-06-08 15:03:11 +0200372 }
373 if ((in[0] == '0') && (in[1] == '.')) {
374 in += 2;
aPiecek28e101a2021-06-10 09:09:31 +0200375 if (!(in < end)) {
376 return 1;
377 }
aPiecek76034c32021-06-08 15:03:11 +0200378 }
379
380 return lyjson_count_in_row(in, end, '0', 0) == end - in;
381}
382
383/**
aPieceke87c0a12021-05-13 15:43:26 +0200384 * @brief Allocate buffer for number in string format.
385 *
386 * @param[in] jsonctx JSON context.
387 * @param[in] num_len Required space in bytes for a number.
388 * Terminating null byte is added by default.
389 * @param[out] buffer Output allocated buffer.
390 * @return LY_ERR value.
391 */
392static LY_ERR
aPiecek76034c32021-06-08 15:03:11 +0200393lyjson_get_buffer_for_number(const struct ly_ctx *ctx, uint32_t num_len, char **buffer)
aPieceke87c0a12021-05-13 15:43:26 +0200394{
395 *buffer = NULL;
396
aPiecek76034c32021-06-08 15:03:11 +0200397 LY_CHECK_ERR_RET((num_len + 1) > LY_NUMBER_MAXLEN, LOGVAL(ctx, LYVE_SEMANTICS,
aPieceke87c0a12021-05-13 15:43:26 +0200398 "Number encoded as a string exceeded the LY_NUMBER_MAXLEN limit."), LY_EVALID);
399
aPiecek76034c32021-06-08 15:03:11 +0200400 /* allocate buffer for the result (add NULL-byte) */
aPieceke87c0a12021-05-13 15:43:26 +0200401 *buffer = malloc(num_len + 1);
aPiecek76034c32021-06-08 15:03:11 +0200402 LY_CHECK_ERR_RET(!(*buffer), LOGMEM(ctx), LY_EMEM);
403 return LY_SUCCESS;
404}
405
406/**
407 * @brief Copy the 'numeric part' (@p num) except its decimal point
408 * (@p dec_point) and insert the new decimal point (@p dp_position)
409 * only if it is to be placed in the 'numeric part' range (@p num).
410 *
411 * @param[in] num Begin of the 'numeric part'.
412 * @param[in] num_len Length of the 'numeric part'.
413 * @param[in] dec_point Pointer to the old decimal point.
414 * If it has a NULL value, it is ignored.
415 * @param[in] dp_position Position of the new decimal point.
416 * If it has a negative value, it is ignored.
417 * @param[out] dst Memory into which the copied result is written.
418 * @return Number of characters written to the @p dst.
419 */
420static uint32_t
421lyjson_exp_number_copy_num_part(const char *num, uint32_t num_len,
422 char *dec_point, int32_t dp_position, char *dst)
423{
424 int32_t dec_point_idx;
425 int32_t n, d;
426
427 assert(num && dst);
428
429 dec_point_idx = dec_point ? dec_point - num : INT32_MAX;
430 assert((dec_point_idx >= 0) && (dec_point_idx != dp_position));
431
432 for (n = 0, d = 0; (uint32_t)n < num_len; n++) {
433 if (n == dec_point_idx) {
434 continue;
435 } else if (d == dp_position) {
436 dst[d++] = '.';
437 dst[d++] = num[n];
438 } else {
439 dst[d++] = num[n];
440 }
441 }
442
443 return d;
444}
445
446/**
447 * @brief Convert JSON number with exponent into the representation
448 * used by YANG.
449 *
450 * The input numeric string must be syntactically valid. Also, before
451 * calling this function, checks should be performed using the
452 * ::lyjson_number_is_zero().
453 *
454 * @param[in] ctx Context for the error message.
455 * @param[in] in Beginning of the string containing the number.
456 * @param[in] exponent Pointer to the letter E/e.
457 * @param[in] total_len Total size of the input number.
458 * @param[out] res Conversion result.
459 * @param[out] res_len Length of the result.
460 * @return LY_ERR value.
461 */
462static LY_ERR
463lyjson_exp_number(const struct ly_ctx *ctx, const char *in, const char *exponent,
464 size_t total_len, char **res, size_t *res_len)
465{
466
467#define MAYBE_WRITE_MINUS(ARRAY, INDEX, FLAG) \
468 if (FLAG) { \
469 ARRAY[INDEX++] = '-'; \
470 }
471
472/* Length of leading zero followed by the decimal point. */
473#define LEADING_ZERO 1
474
475/* Flags for the ::lyjson_count_in_row() */
476#define FORWARD 0
477#define BACKWARD 1
478
479 /* Buffer where the result is stored. */
480 char *buf;
481 /* Size without space for terminating NULL-byte. */
482 uint32_t buf_len;
483 /* Index to buf. */
484 uint32_t i = 0;
485 /* A 'numeric part' doesn't contain a minus sign or an leading zero.
486 * For example, in 0.45, there is the leading zero.
487 */
488 const char *num;
489 /* Length of the 'numeric part' ends before E/e. */
490 uint32_t num_len;
491 /* Position of decimal point in the num. */
492 char *dec_point;
493 /* Final position of decimal point in the buf. */
494 int32_t dp_position;
495 /* Exponent as integer. */
496 long int e_val;
497 /* Byte for the decimal point. */
498 int8_t dot;
499 /* Required additional byte for the minus sign. */
500 uint8_t minus;
501 /* The number of zeros. */
502 long zeros;
503 /* If the number starts with leading zero followed by the decimal point. */
504 ly_bool leading_zero;
505
506 assert(ctx && in && exponent && res && res_len && (total_len > 2));
507 assert((in < exponent) && ((*exponent == 'e') || (*exponent == 'E')));
508
509 /* Convert exponent. */
510 errno = 0;
511 e_val = strtol(exponent + 1, NULL, LY_BASE_DEC);
512 if (errno) {
513 LOGVAL(ctx, LYVE_SEMANTICS,
514 "Exponent out-of-bounds in a JSON Number value (%.*s).",
515 total_len, in);
516 return LY_EVALID;
517 }
518
519 minus = in[0] == '-';
520 if (in[minus] == '0') {
521 assert(in[minus + 1] == '.');
522 leading_zero = 1;
523 /* The leading zero has been found, it will be skipped. */
524 num = &in[minus + 1];
525 } else {
526 leading_zero = 0;
527 /* Set to the first number. */
528 num = &in[minus];
529 }
530 num_len = exponent - num;
531
532 /* Find the location of the decimal points. */
533 dec_point = ly_strnchr(num, '.', num_len);
534 dp_position = dec_point ?
535 dec_point - num + e_val :
536 num_len + e_val;
537
538 /* Remove zeros after the decimal point from the end of
539 * the 'numeric part' because these are useless.
540 * (For example, in 40.001000 these are the last 3).
541 */
542 num_len -= dp_position > 0 ?
543 lyjson_count_in_row(num + dp_position - 1, exponent, '0', BACKWARD) :
544 lyjson_count_in_row(num, exponent, '0', BACKWARD);
545
546 /* Decide what to do with the dot from the 'numeric part'. */
547 if (dec_point && ((int32_t)(num_len - 1) == dp_position)) {
548 /* Decimal point in the last place is useless. */
549 dot = -1;
550 } else if (dec_point) {
551 /* Decimal point is shifted. */
552 dot = 0;
553 } else {
554 /* Additional byte for the decimal point is requred. */
555 dot = 1;
556 }
557
558 /* Final composition of the result. */
559 if (dp_position <= 0) {
560 /* Adding decimal point before the integer with adding additional zero(s). */
561
562 zeros = labs(dp_position);
563 buf_len = minus + LEADING_ZERO + dot + zeros + num_len;
564 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
565 MAYBE_WRITE_MINUS(buf, i, minus);
566 buf[i++] = '0';
567 buf[i++] = '.';
568 memset(buf + i, '0', zeros);
569 i += zeros;
570 dp_position = -1;
571 lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
572 } else if (leading_zero && (dp_position < (ssize_t)num_len)) {
573 /* Insert decimal point between the integer's digits. */
574
575 /* Set a new range of 'numeric part'. Old decimal point is skipped. */
576 num++;
577 num_len--;
578 dp_position--;
579 /* Get the number of useless zeros between the old
580 * and new decimal point. For example, in the number 0.005E1,
581 * there is one useless zero.
582 */
583 zeros = lyjson_count_in_row(num, num + dp_position + 1, '0', FORWARD);
584 /* If the new decimal point will be in the place of the first non-zero subnumber. */
585 if (zeros == (dp_position + 1)) {
586 /* keep one zero as leading zero */
587 zeros--;
588 /* new decimal point will be behind the leading zero */
589 dp_position = 1;
590 dot = 1;
591 } else {
592 dot = 0;
593 }
594 buf_len = minus + dot + (num_len - zeros);
595 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
596 MAYBE_WRITE_MINUS(buf, i, minus);
597 /* Skip useless zeros and copy. */
598 lyjson_exp_number_copy_num_part(num + zeros, num_len - zeros, NULL, dp_position, buf + i);
599 } else if (dp_position < (ssize_t)num_len) {
600 /* Insert decimal point between the integer's digits. */
601
602 buf_len = minus + dot + num_len;
603 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
604 MAYBE_WRITE_MINUS(buf, i, minus);
605 lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
606 } else if (leading_zero) {
607 /* Adding decimal point after the decimal value make the integer result. */
608
609 /* Set a new range of 'numeric part'. Old decimal point is skipped. */
610 num++;
611 num_len--;
612 /* Get the number of useless zeros. */
613 zeros = lyjson_count_in_row(num, num + num_len, '0', FORWARD);
614 buf_len = minus + dp_position - zeros;
615 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
616 MAYBE_WRITE_MINUS(buf, i, minus);
617 /* Skip useless zeros and copy. */
618 i += lyjson_exp_number_copy_num_part(num + zeros, num_len - zeros, NULL, dp_position, buf + i);
619 /* Add multiples of ten behind the 'numeric part'. */
620 memset(buf + i, '0', buf_len - i);
621 } else {
622 /* Adding decimal point after the decimal value make the integer result. */
623
624 buf_len = minus + dp_position;
625 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
626 MAYBE_WRITE_MINUS(buf, i, minus);
627 i += lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
628 /* Add multiples of ten behind the 'numeric part'. */
629 memset(buf + i, '0', buf_len - i);
630 }
631
632 buf[buf_len] = '\0';
633 *res = buf;
634 *res_len = buf_len;
635
636#undef MAYBE_WRITE_MINUS
637#undef LEADING_ZERO
638#undef FORWARD
639#undef BACKWARD
640
aPieceke87c0a12021-05-13 15:43:26 +0200641 return LY_SUCCESS;
642}
643
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200644static LY_ERR
645lyjson_number(struct lyjson_ctx *jsonctx)
646{
aPiecek76034c32021-06-08 15:03:11 +0200647 size_t offset = 0, num_len;
648 const char *in = jsonctx->in->current, *exponent = NULL;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200649 uint8_t minus = 0;
aPiecek76034c32021-06-08 15:03:11 +0200650 char *num;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200651
652 if (in[offset] == '-') {
653 ++offset;
654 minus = 1;
655 }
656
657 if (in[offset] == '0') {
658 ++offset;
659 } else if (isdigit(in[offset])) {
660 ++offset;
661 while (isdigit(in[offset])) {
662 ++offset;
663 }
664 } else {
665invalid_character:
666 if (in[offset]) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100667 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character in JSON Number value (\"%c\").", in[offset]);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200668 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100669 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200670 }
671 return LY_EVALID;
672 }
673
674 if (in[offset] == '.') {
675 ++offset;
676 if (!isdigit(in[offset])) {
677 goto invalid_character;
678 }
679 while (isdigit(in[offset])) {
680 ++offset;
681 }
682 }
683
684 if ((in[offset] == 'e') || (in[offset] == 'E')) {
aPiecek76034c32021-06-08 15:03:11 +0200685 exponent = &in[offset];
686 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200687 if ((in[offset] == '+') || (in[offset] == '-')) {
688 ++offset;
689 }
690 if (!isdigit(in[offset])) {
691 goto invalid_character;
692 }
693 while (isdigit(in[offset])) {
694 ++offset;
695 }
696 }
697
aPiecek76034c32021-06-08 15:03:11 +0200698 if (lyjson_number_is_zero(in, exponent ? exponent : &in[offset])) {
699 lyjson_ctx_set_value(jsonctx, in, minus + 1, 0);
700 } else if (exponent && lyjson_number_is_zero(exponent + 1, &in[offset])) {
701 lyjson_ctx_set_value(jsonctx, in, exponent - in, 0);
702 } else if (exponent) {
703 LY_CHECK_RET(lyjson_exp_number(jsonctx->ctx, in, exponent, offset, &num, &num_len));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200704 lyjson_ctx_set_value(jsonctx, num, num_len, 1);
705 } else {
aPiecek5b6dd182021-06-10 09:11:58 +0200706 if (offset > LY_NUMBER_MAXLEN) {
707 LOGVAL(jsonctx->ctx, LYVE_SEMANTICS,
708 "Number encoded as a string exceeded the LY_NUMBER_MAXLEN limit.");
709 return LY_EVALID;
710 }
aPiecek76034c32021-06-08 15:03:11 +0200711 lyjson_ctx_set_value(jsonctx, in, offset, 0);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200712 }
713 ly_in_skip(jsonctx->in, offset);
714
715 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_NUMBER);
716 LY_CHECK_RET(lyjson_check_next(jsonctx));
717
718 return LY_SUCCESS;
719}
720
721static LY_ERR
722lyjson_object_name(struct lyjson_ctx *jsonctx)
723{
724 if (*jsonctx->in->current != '"') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100725 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200726 jsonctx->in->current, "a JSON object's member");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200727 return LY_EVALID;
728 }
729 ly_in_skip(jsonctx->in, 1);
730
731 LY_CHECK_RET(lyjson_string_(jsonctx));
732 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko08dc70b2020-10-07 13:58:47 +0200733 if (*jsonctx->in->current != ':') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100734 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current), jsonctx->in->current,
735 "a JSON object's name-separator ':'");
Michal Vasko08dc70b2020-10-07 13:58:47 +0200736 return LY_EVALID;
737 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200738 ly_in_skip(jsonctx->in, 1);
739 LY_CHECK_RET(skip_ws(jsonctx));
740
741 return LY_SUCCESS;
742}
743
744static LY_ERR
745lyjson_object(struct lyjson_ctx *jsonctx)
746{
747 LY_CHECK_RET(skip_ws(jsonctx));
748
749 if (*jsonctx->in->current == '}') {
aPiecek93582ed2021-05-25 14:49:06 +0200750 assert(jsonctx->depth);
751 jsonctx->depth--;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200752 /* empty object */
753 ly_in_skip(jsonctx->in, 1);
754 lyjson_ctx_set_value(jsonctx, NULL, 0, 0);
755 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_OBJECT_EMPTY);
756 return LY_SUCCESS;
757 }
758
759 LY_CHECK_RET(lyjson_object_name(jsonctx));
760
761 /* output data are set by lyjson_string_() */
762 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_OBJECT);
763
764 return LY_SUCCESS;
765}
766
767/*
768 * @brief Process JSON array envelope
769 *
770 *
771 *
772 * @param[in] jsonctx JSON parser context
773 * @return LY_SUCCESS or LY_EMEM
774 */
775static LY_ERR
776lyjson_array(struct lyjson_ctx *jsonctx)
777{
778 LY_CHECK_RET(skip_ws(jsonctx));
779
780 if (*jsonctx->in->current == ']') {
781 /* empty array */
782 ly_in_skip(jsonctx->in, 1);
783 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_ARRAY_EMPTY);
784 } else {
785 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_ARRAY);
786 }
787
788 /* erase previous values, array has no value on its own */
789 lyjson_ctx_set_value(jsonctx, NULL, 0, 0);
790
791 return LY_SUCCESS;
792}
793
794static LY_ERR
795lyjson_value(struct lyjson_ctx *jsonctx)
796{
Michal Vasko69730152020-10-09 16:30:07 +0200797 if (jsonctx->status.count && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200798 return LY_SUCCESS;
799 }
800
Radek Krejcif13b87b2020-12-01 22:02:17 +0100801 if ((*jsonctx->in->current == 'f') && !strncmp(jsonctx->in->current, "false", ly_strlen_const("false"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200802 /* false */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100803 lyjson_ctx_set_value(jsonctx, jsonctx->in->current, ly_strlen_const("false"), 0);
804 ly_in_skip(jsonctx->in, ly_strlen_const("false"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200805 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_FALSE);
806 LY_CHECK_RET(lyjson_check_next(jsonctx));
807
Radek Krejcif13b87b2020-12-01 22:02:17 +0100808 } else if ((*jsonctx->in->current == 't') && !strncmp(jsonctx->in->current, "true", ly_strlen_const("true"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200809 /* true */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100810 lyjson_ctx_set_value(jsonctx, jsonctx->in->current, ly_strlen_const("true"), 0);
811 ly_in_skip(jsonctx->in, ly_strlen_const("true"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200812 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_TRUE);
813 LY_CHECK_RET(lyjson_check_next(jsonctx));
814
Radek Krejcif13b87b2020-12-01 22:02:17 +0100815 } else if ((*jsonctx->in->current == 'n') && !strncmp(jsonctx->in->current, "null", ly_strlen_const("null"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200816 /* none */
Radek Krejci201963a2020-12-03 11:43:40 +0100817 lyjson_ctx_set_value(jsonctx, "", 0, 0);
Radek Krejcif13b87b2020-12-01 22:02:17 +0100818 ly_in_skip(jsonctx->in, ly_strlen_const("null"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200819 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_NULL);
820 LY_CHECK_RET(lyjson_check_next(jsonctx));
821
822 } else if (*jsonctx->in->current == '"') {
823 /* string */
824 ly_in_skip(jsonctx->in, 1);
825 LY_CHECK_RET(lyjson_string(jsonctx));
826
827 } else if (*jsonctx->in->current == '[') {
828 /* array */
829 ly_in_skip(jsonctx->in, 1);
830 LY_CHECK_RET(lyjson_array(jsonctx));
831
832 } else if (*jsonctx->in->current == '{') {
aPiecek93582ed2021-05-25 14:49:06 +0200833 jsonctx->depth++;
834 if (jsonctx->depth > LY_MAX_BLOCK_DEPTH) {
835 LOGERR(jsonctx->ctx, LY_EINVAL,
836 "The maximum number of block nestings has been exceeded.");
837 return LY_EINVAL;
838 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200839 /* object */
840 ly_in_skip(jsonctx->in, 1);
841 LY_CHECK_RET(lyjson_object(jsonctx));
842
Michal Vasko69730152020-10-09 16:30:07 +0200843 } else if ((*jsonctx->in->current == '-') || ((*jsonctx->in->current >= '0') && (*jsonctx->in->current <= '9'))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200844 /* number */
845 LY_CHECK_RET(lyjson_number(jsonctx));
846
847 } else {
848 /* unexpected value */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100849 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200850 jsonctx->in->current, "a JSON value");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200851 return LY_EVALID;
852 }
853
854 return LY_SUCCESS;
855}
856
857LY_ERR
858lyjson_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyjson_ctx **jsonctx_p)
859{
860 LY_ERR ret = LY_SUCCESS;
861 struct lyjson_ctx *jsonctx;
862
863 assert(ctx);
864 assert(in);
865 assert(jsonctx_p);
866
867 /* new context */
868 jsonctx = calloc(1, sizeof *jsonctx);
869 LY_CHECK_ERR_RET(!jsonctx, LOGMEM(ctx), LY_EMEM);
870 jsonctx->ctx = ctx;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200871 jsonctx->in = in;
872
Radek Krejciddace2c2021-01-08 11:30:56 +0100873 LOG_LOCINIT(NULL, NULL, NULL, in);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100874
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200875 /* parse JSON value, if any */
876 LY_CHECK_GOTO(ret = skip_ws(jsonctx), cleanup);
877 if (lyjson_ctx_status(jsonctx, 0) == LYJSON_END) {
878 /* empty data input */
879 goto cleanup;
880 }
881
882 ret = lyjson_value(jsonctx);
883
Michal Vasko69730152020-10-09 16:30:07 +0200884 if ((jsonctx->status.count > 1) && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100885 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200886 ret = LY_EVALID;
887 }
888
889cleanup:
890 if (ret) {
891 lyjson_ctx_free(jsonctx);
892 } else {
893 *jsonctx_p = jsonctx;
894 }
895 return ret;
896}
897
898void
899lyjson_ctx_backup(struct lyjson_ctx *jsonctx)
900{
901 if (jsonctx->backup.dynamic) {
902 free((char *)jsonctx->backup.value);
903 }
904 jsonctx->backup.status = lyjson_ctx_status(jsonctx, 0);
905 jsonctx->backup.status_count = jsonctx->status.count;
906 jsonctx->backup.value = jsonctx->value;
907 jsonctx->backup.value_len = jsonctx->value_len;
908 jsonctx->backup.input = jsonctx->in->current;
909 jsonctx->backup.dynamic = jsonctx->dynamic;
aPiecek93582ed2021-05-25 14:49:06 +0200910 jsonctx->backup.depth = jsonctx->depth;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200911 jsonctx->dynamic = 0;
912}
913
914void
915lyjson_ctx_restore(struct lyjson_ctx *jsonctx)
916{
917 if (jsonctx->dynamic) {
918 free((char *)jsonctx->value);
919 }
920 jsonctx->status.count = jsonctx->backup.status_count;
Michal Vasko22df3f02020-08-24 13:29:22 +0200921 jsonctx->status.objs[jsonctx->backup.status_count - 1] = (void *)jsonctx->backup.status;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200922 jsonctx->value = jsonctx->backup.value;
923 jsonctx->value_len = jsonctx->backup.value_len;
924 jsonctx->in->current = jsonctx->backup.input;
925 jsonctx->dynamic = jsonctx->backup.dynamic;
aPiecek93582ed2021-05-25 14:49:06 +0200926 jsonctx->depth = jsonctx->backup.depth;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200927 jsonctx->backup.dynamic = 0;
928}
929
930LY_ERR
931lyjson_ctx_next(struct lyjson_ctx *jsonctx, enum LYJSON_PARSER_STATUS *status)
932{
933 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200934 ly_bool toplevel = 0;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200935 enum LYJSON_PARSER_STATUS prev;
936
937 assert(jsonctx);
938
939 prev = lyjson_ctx_status(jsonctx, 0);
940
Michal Vasko69730152020-10-09 16:30:07 +0200941 if ((prev == LYJSON_OBJECT) || (prev == LYJSON_ARRAY)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200942 /* get value for the object's member OR the first value in the array */
943 ret = lyjson_value(jsonctx);
944 goto result;
945 } else {
946 /* the previous token is closed and should be completely processed */
947 JSON_POP_STATUS_RET(jsonctx);
948 prev = lyjson_ctx_status(jsonctx, 0);
949 }
950
951 if (!jsonctx->status.count) {
952 /* we are done with the top level value */
953 toplevel = 1;
954 }
955 LY_CHECK_RET(skip_ws(jsonctx));
956 if (toplevel && !jsonctx->status.count) {
957 /* EOF expected, but there are some data after the top level token */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100958 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Expecting end-of-input, but some data follows the top level JSON value.");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200959 return LY_EVALID;
960 }
961
962 if (toplevel) {
963 /* we are done */
964 return LY_SUCCESS;
965 }
966
967 /* continue with the next token */
968 assert(prev == LYJSON_OBJECT || prev == LYJSON_ARRAY);
969
970 if (*jsonctx->in->current == ',') {
971 /* sibling item in the ... */
972 ly_in_skip(jsonctx->in, 1);
973 LY_CHECK_RET(skip_ws(jsonctx));
974
975 if (prev == LYJSON_OBJECT) {
976 /* ... object - get another object's member */
977 ret = lyjson_object_name(jsonctx);
978 } else { /* LYJSON_ARRAY */
979 /* ... array - get another complete value */
980 ret = lyjson_value(jsonctx);
981 }
Michal Vasko69730152020-10-09 16:30:07 +0200982 } else if (((prev == LYJSON_OBJECT) && (*jsonctx->in->current == '}')) || ((prev == LYJSON_ARRAY) && (*jsonctx->in->current == ']'))) {
aPiecek93582ed2021-05-25 14:49:06 +0200983 if (*jsonctx->in->current == '}') {
984 assert(jsonctx->depth);
985 jsonctx->depth--;
986 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200987 ly_in_skip(jsonctx->in, 1);
988 JSON_POP_STATUS_RET(jsonctx);
989 JSON_PUSH_STATUS_RET(jsonctx, prev + 1);
990 } else {
991 /* unexpected value */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100992 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current), jsonctx->in->current,
993 prev == LYJSON_ARRAY ? "another JSON value in array" : "another JSON object's member");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200994 return LY_EVALID;
995 }
996
997result:
Michal Vasko69730152020-10-09 16:30:07 +0200998 if ((ret == LY_SUCCESS) && (jsonctx->status.count > 1) && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100999 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001000 ret = LY_EVALID;
1001 }
1002
Michal Vasko69730152020-10-09 16:30:07 +02001003 if ((ret == LY_SUCCESS) && status) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001004 *status = lyjson_ctx_status(jsonctx, 0);
1005 }
1006
1007 return ret;
1008}
1009
1010enum LYJSON_PARSER_STATUS
1011lyjson_ctx_status(struct lyjson_ctx *jsonctx, uint32_t index)
1012{
1013 assert(jsonctx);
1014
1015 if (jsonctx->status.count < index) {
1016 return LYJSON_ERROR;
1017 } else if (jsonctx->status.count == index) {
1018 return LYJSON_ROOT;
1019 } else {
Michal Vasko27915722020-08-31 14:54:42 +02001020 return (enum LYJSON_PARSER_STATUS)(uintptr_t)jsonctx->status.objs[jsonctx->status.count - (index + 1)];
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001021 }
1022}
1023
1024void
1025lyjson_ctx_free(struct lyjson_ctx *jsonctx)
1026{
1027 if (!jsonctx) {
1028 return;
1029 }
1030
Radek Krejciddace2c2021-01-08 11:30:56 +01001031 LOG_LOCBACK(0, 0, 0, 1);
Radek Krejci2efc45b2020-12-22 16:25:44 +01001032
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001033 if (jsonctx->dynamic) {
Michal Vasko22df3f02020-08-24 13:29:22 +02001034 free((char *)jsonctx->value);
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001035 }
1036 if (jsonctx->backup.dynamic) {
1037 free((char *)jsonctx->backup.value);
1038 }
1039
1040 ly_set_erase(&jsonctx->status, NULL);
1041
1042 free(jsonctx);
1043}