blob: 6ae4484ba8a1333b219c2716640c38a6f261523a [file] [log] [blame]
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001/**
2 * @file json.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic JSON format parser for libyang
5 *
6 * Copyright (c) 2020 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include <assert.h>
16#include <ctype.h>
17#include <errno.h>
Radek Krejci50f0c6b2020-06-18 16:31:48 +020018#include <stdlib.h>
Radek Krejci47fab892020-11-05 17:02:41 +010019#include <string.h>
Radek Krejci50f0c6b2020-06-18 16:31:48 +020020#include <sys/types.h>
21
22#include "common.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020023#include "in_internal.h"
Radek Krejci47fab892020-11-05 17:02:41 +010024#include "json.h"
Radek Krejci50f0c6b2020-06-18 16:31:48 +020025
26#define JSON_PUSH_STATUS_RET(CTX, STATUS) \
Radek Krejci3d92e442020-10-12 12:48:13 +020027 LY_CHECK_RET(ly_set_add(&CTX->status, (void*)STATUS, 1, NULL))
Radek Krejci50f0c6b2020-06-18 16:31:48 +020028
29#define JSON_POP_STATUS_RET(CTX) \
30 assert(CTX->status.count); CTX->status.count--;
31
Michal Vasko22df3f02020-08-24 13:29:22 +020032const char *
Radek Krejci50f0c6b2020-06-18 16:31:48 +020033lyjson_token2str(enum LYJSON_PARSER_STATUS status)
34{
35 switch (status) {
36 case LYJSON_ERROR:
37 return "error";
38 case LYJSON_ROOT:
39 return "document root";
40 case LYJSON_FALSE:
41 return "false";
42 case LYJSON_TRUE:
43 return "true";
44 case LYJSON_NULL:
45 return "null";
46 case LYJSON_OBJECT:
47 return "object";
48 case LYJSON_OBJECT_CLOSED:
49 return "object closed";
50 case LYJSON_OBJECT_EMPTY:
51 return "empty object";
52 case LYJSON_ARRAY:
53 return "array";
54 case LYJSON_ARRAY_CLOSED:
55 return "array closed";
56 case LYJSON_ARRAY_EMPTY:
57 return "empty array";
58 case LYJSON_NUMBER:
59 return "number";
60 case LYJSON_STRING:
61 return "string";
62 case LYJSON_END:
63 return "end of input";
64 }
65
66 return "";
67}
68
69static LY_ERR
70skip_ws(struct lyjson_ctx *jsonctx)
71{
72 /* skip leading whitespaces */
73 while (*jsonctx->in->current != '\0' && is_jsonws(*jsonctx->in->current)) {
Radek Krejcidd713ce2021-01-04 23:12:12 +010074 if (*jsonctx->in->current == '\n') {
75 LY_IN_NEW_LINE(jsonctx->in);
76 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +020077 ly_in_skip(jsonctx->in, 1);
78 }
79 if (*jsonctx->in->current == '\0') {
80 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_END);
81 }
82
83 return LY_SUCCESS;
84}
85
86/*
87 * @brief Set value corresponding to the current context's status
88 */
89static void
Radek Krejci857189e2020-09-01 13:26:36 +020090lyjson_ctx_set_value(struct lyjson_ctx *jsonctx, const char *value, size_t value_len, ly_bool dynamic)
Radek Krejci50f0c6b2020-06-18 16:31:48 +020091{
92 assert(jsonctx);
93
Juraj Vijtiukec285cd2021-01-14 11:41:20 +010094 if (jsonctx->dynamic) {
Michal Vasko22df3f02020-08-24 13:29:22 +020095 free((char *)jsonctx->value);
Radek Krejci50f0c6b2020-06-18 16:31:48 +020096 }
97 jsonctx->value = value;
98 jsonctx->value_len = value_len;
99 jsonctx->dynamic = dynamic;
100}
101
102static LY_ERR
103lyjson_check_next(struct lyjson_ctx *jsonctx)
104{
105 if (jsonctx->status.count == 1) {
106 /* top level value (JSON-text), ws expected */
Michal Vasko69730152020-10-09 16:30:07 +0200107 if ((*jsonctx->in->current == '\0') || is_jsonws(*jsonctx->in->current)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200108 return LY_SUCCESS;
109 }
110 } else if (lyjson_ctx_status(jsonctx, 1) == LYJSON_OBJECT) {
111 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko69730152020-10-09 16:30:07 +0200112 if ((*jsonctx->in->current == ',') || (*jsonctx->in->current == '}')) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200113 return LY_SUCCESS;
114 }
115 } else if (lyjson_ctx_status(jsonctx, 1) == LYJSON_ARRAY) {
116 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko69730152020-10-09 16:30:07 +0200117 if ((*jsonctx->in->current == ',') || (*jsonctx->in->current == ']')) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200118 return LY_SUCCESS;
119 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200120 }
121
Radek Krejcie7010dc2021-03-04 15:54:24 +0100122 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Unexpected character \"%c\" after JSON %s.",
123 *jsonctx->in->current, lyjson_token2str(lyjson_ctx_status(jsonctx, 0)));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200124 return LY_EVALID;
125}
126
127/**
128 * Input is expected to start after the opening quotation-mark.
129 * When succeeds, input is moved after the closing quotation-mark.
130 */
131static LY_ERR
132lyjson_string_(struct lyjson_ctx *jsonctx)
133{
134#define BUFSIZE 24
135#define BUFSIZE_STEP 128
136
137 const char *in = jsonctx->in->current, *start;
138 char *buf = NULL;
139 size_t offset; /* read offset in input buffer */
140 size_t len; /* length of the output string (write offset in output buffer) */
141 size_t size = 0; /* size of the output buffer */
142 size_t u;
143 uint64_t start_line;
144
145 assert(jsonctx);
146
147 /* init */
148 start = in;
Radek Krejcid54412f2020-12-17 20:25:35 +0100149 start_line = jsonctx->in->line;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200150 offset = len = 0;
151
152 /* parse */
153 while (in[offset]) {
154 if (in[offset] == '\\') {
155 /* escape sequence */
Michal Vasko2be1d762021-03-11 16:53:15 +0100156 const char *slash = &in[offset];
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200157 uint32_t value;
158 uint8_t i = 1;
159
160 if (!buf) {
161 /* prepare output buffer */
162 buf = malloc(BUFSIZE);
163 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
164 size = BUFSIZE;
165 }
166
167 /* allocate enough for the offset and next character,
168 * we will need 4 bytes at most since we support only the predefined
169 * (one-char) entities and character references */
170 if (len + offset + 4 >= size) {
Juraj Vijtiukd746a352021-01-15 11:33:33 +0100171 size_t increment;
Radek Krejcidf549132021-01-21 10:32:32 +0100172 for (increment = BUFSIZE_STEP; len + offset + 4 >= size + increment; increment += BUFSIZE_STEP) {}
Juraj Vijtiukd746a352021-01-15 11:33:33 +0100173 buf = ly_realloc(buf, size + increment);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200174 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
175 size += BUFSIZE_STEP;
176 }
177
178 if (offset) {
179 /* store what we have so far */
180 memcpy(&buf[len], in, offset);
181 len += offset;
182 in += offset;
183 offset = 0;
184 }
185
186 switch (in[++offset]) {
187 case '"':
188 /* quotation mark */
189 value = 0x22;
190 break;
191 case '\\':
192 /* reverse solidus */
193 value = 0x5c;
194 break;
195 case '/':
196 /* solidus */
197 value = 0x2f;
198 break;
199 case 'b':
200 /* backspace */
201 value = 0x08;
202 break;
203 case 'f':
204 /* form feed */
205 value = 0x0c;
206 break;
207 case 'n':
208 /* line feed */
209 value = 0x0a;
210 break;
211 case 'r':
212 /* carriage return */
213 value = 0x0d;
214 break;
215 case 't':
216 /* tab */
217 value = 0x09;
218 break;
219 case 'u':
220 /* Basic Multilingual Plane character \uXXXX */
221 offset++;
222 for (value = i = 0; i < 4; i++) {
Juraj Vijtiuk2b94e4b2020-11-16 23:52:07 +0100223 if (!in[offset + i]) {
Michal Vasko2be1d762021-03-11 16:53:15 +0100224 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid basic multilingual plane character \"%s\".", slash);
Juraj Vijtiuk2b94e4b2020-11-16 23:52:07 +0100225 goto error;
226 } else if (isdigit(in[offset + i])) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200227 u = (in[offset + i] - '0');
228 } else if (in[offset + i] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100229 u = LY_BASE_DEC + (in[offset + i] - 'a');
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200230 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100231 u = LY_BASE_DEC + (in[offset + i] - 'A');
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200232 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100233 value = (LY_BASE_HEX * value) + u;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200234 }
235 break;
236 default:
237 /* invalid escape sequence */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100238 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character escape sequence \\%c.", in[offset]);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200239 goto error;
240
241 }
242
243 offset += i; /* add read escaped characters */
244 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], value, &u),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100245 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).",
Michal Vasko2be1d762021-03-11 16:53:15 +0100246 (int)(&in[offset] - slash), slash, value),
Michal Vasko69730152020-10-09 16:30:07 +0200247 error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200248 len += u; /* update number of bytes in buffer */
249 in += offset; /* move the input by the processed bytes stored in the buffer ... */
250 offset = 0; /* ... and reset the offset index for future moving data into buffer */
251
252 } else if (in[offset] == '"') {
253 /* end of string */
254 if (buf) {
255 /* realloc exact size string */
256 buf = ly_realloc(buf, len + offset + 1);
257 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
258 size = len + offset + 1;
259 memcpy(&buf[len], in, offset);
260
261 /* set terminating NULL byte */
262 buf[len + offset] = '\0';
263 }
264 len += offset;
265 ++offset;
266 in += offset;
267 goto success;
268 } else {
269 /* get it as UTF-8 character for check */
270 const char *c = &in[offset];
271 uint32_t code = 0;
272 size_t code_len = 0;
273
274 LY_CHECK_ERR_GOTO(ly_getutf8(&c, &code, &code_len),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100275 LOGVAL(jsonctx->ctx, LY_VCODE_INCHAR, in[offset]), error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200276
277 LY_CHECK_ERR_GOTO(!is_jsonstrchar(code),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100278 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character in JSON string \"%.*s\" (0x%08x).",
Radek Krejci422afb12021-03-04 16:38:16 +0100279 (int)(&in[offset] - start + code_len), start, code),
Michal Vasko69730152020-10-09 16:30:07 +0200280 error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200281
282 /* character is ok, continue */
283 offset += code_len;
284 }
285 }
286
287 /* EOF reached before endchar */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100288 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
289 LOGVAL_LINE(jsonctx->ctx, start_line, LYVE_SYNTAX, "Missing quotation-mark at the end of a JSON string.");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200290
291error:
292 free(buf);
293 return LY_EVALID;
294
295success:
Radek Krejcid54412f2020-12-17 20:25:35 +0100296 jsonctx->in->current = in;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200297 if (buf) {
298 lyjson_ctx_set_value(jsonctx, buf, len, 1);
299 } else {
300 lyjson_ctx_set_value(jsonctx, start, len, 0);
301 }
302
303 return LY_SUCCESS;
304
305#undef BUFSIZE
306#undef BUFSIZE_STEP
307}
308
309/*
310 *
311 * Wrapper around lyjson_string_() adding LYJSON_STRING status into context to allow using lyjson_string_() for parsing object's name.
312 */
313static LY_ERR
314lyjson_string(struct lyjson_ctx *jsonctx)
315{
316 LY_CHECK_RET(lyjson_string_(jsonctx));
317
318 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_STRING);
319 LY_CHECK_RET(lyjson_check_next(jsonctx));
320
321 return LY_SUCCESS;
322}
323
aPieceke87c0a12021-05-13 15:43:26 +0200324/**
aPiecek76034c32021-06-08 15:03:11 +0200325 * @brief Calculate how many @p c characters there are in a row.
326 *
327 * @param[in] str Count from this position.
328 * @param[in] end Position after the last checked character.
329 * @param[in] c Checked character.
330 * @param[in] backwards Set to 1, if to proceed from end-1 to str.
331 * @return Number of characters in a row.
332 */
333static uint32_t
334lyjson_count_in_row(const char *str, const char *end, char c, ly_bool backwards)
335{
336 uint32_t cnt;
337
338 assert(str && end);
339
340 if (str >= end) {
341 return 0;
342 }
343
344 if (!backwards) {
345 for (cnt = 0; (str != end) && (*str == c); ++str, ++cnt) {}
346 } else {
347 --end;
348 --str;
349 for (cnt = 0; (str != end) && (*end == c); --end, ++cnt) {}
350 }
351
352 return cnt;
353}
354
355/**
356 * @brief Check if the number can be shortened to zero.
357 *
358 * The input number must be syntactically valid.
359 *
360 * @param[in] in Start of input string;
361 * @param[in] end End of input string;
362 * @return 1 if number is zero, otherwise 0.
363 */
364static ly_bool
365lyjson_number_is_zero(const char *in, const char *end)
366{
367 assert(end >= in);
368
369 if ((in[0] == '-') || (in[0] == '+')) {
370 in++;
371 }
372 if ((in[0] == '0') && (in[1] == '.')) {
373 in += 2;
374 }
375
376 return lyjson_count_in_row(in, end, '0', 0) == end - in;
377}
378
379/**
aPieceke87c0a12021-05-13 15:43:26 +0200380 * @brief Allocate buffer for number in string format.
381 *
382 * @param[in] jsonctx JSON context.
383 * @param[in] num_len Required space in bytes for a number.
384 * Terminating null byte is added by default.
385 * @param[out] buffer Output allocated buffer.
386 * @return LY_ERR value.
387 */
388static LY_ERR
aPiecek76034c32021-06-08 15:03:11 +0200389lyjson_get_buffer_for_number(const struct ly_ctx *ctx, uint32_t num_len, char **buffer)
aPieceke87c0a12021-05-13 15:43:26 +0200390{
391 *buffer = NULL;
392
aPiecek76034c32021-06-08 15:03:11 +0200393 LY_CHECK_ERR_RET((num_len + 1) > LY_NUMBER_MAXLEN, LOGVAL(ctx, LYVE_SEMANTICS,
aPieceke87c0a12021-05-13 15:43:26 +0200394 "Number encoded as a string exceeded the LY_NUMBER_MAXLEN limit."), LY_EVALID);
395
aPiecek76034c32021-06-08 15:03:11 +0200396 /* allocate buffer for the result (add NULL-byte) */
aPieceke87c0a12021-05-13 15:43:26 +0200397 *buffer = malloc(num_len + 1);
aPiecek76034c32021-06-08 15:03:11 +0200398 LY_CHECK_ERR_RET(!(*buffer), LOGMEM(ctx), LY_EMEM);
399 return LY_SUCCESS;
400}
401
402/**
403 * @brief Copy the 'numeric part' (@p num) except its decimal point
404 * (@p dec_point) and insert the new decimal point (@p dp_position)
405 * only if it is to be placed in the 'numeric part' range (@p num).
406 *
407 * @param[in] num Begin of the 'numeric part'.
408 * @param[in] num_len Length of the 'numeric part'.
409 * @param[in] dec_point Pointer to the old decimal point.
410 * If it has a NULL value, it is ignored.
411 * @param[in] dp_position Position of the new decimal point.
412 * If it has a negative value, it is ignored.
413 * @param[out] dst Memory into which the copied result is written.
414 * @return Number of characters written to the @p dst.
415 */
416static uint32_t
417lyjson_exp_number_copy_num_part(const char *num, uint32_t num_len,
418 char *dec_point, int32_t dp_position, char *dst)
419{
420 int32_t dec_point_idx;
421 int32_t n, d;
422
423 assert(num && dst);
424
425 dec_point_idx = dec_point ? dec_point - num : INT32_MAX;
426 assert((dec_point_idx >= 0) && (dec_point_idx != dp_position));
427
428 for (n = 0, d = 0; (uint32_t)n < num_len; n++) {
429 if (n == dec_point_idx) {
430 continue;
431 } else if (d == dp_position) {
432 dst[d++] = '.';
433 dst[d++] = num[n];
434 } else {
435 dst[d++] = num[n];
436 }
437 }
438
439 return d;
440}
441
442/**
443 * @brief Convert JSON number with exponent into the representation
444 * used by YANG.
445 *
446 * The input numeric string must be syntactically valid. Also, before
447 * calling this function, checks should be performed using the
448 * ::lyjson_number_is_zero().
449 *
450 * @param[in] ctx Context for the error message.
451 * @param[in] in Beginning of the string containing the number.
452 * @param[in] exponent Pointer to the letter E/e.
453 * @param[in] total_len Total size of the input number.
454 * @param[out] res Conversion result.
455 * @param[out] res_len Length of the result.
456 * @return LY_ERR value.
457 */
458static LY_ERR
459lyjson_exp_number(const struct ly_ctx *ctx, const char *in, const char *exponent,
460 size_t total_len, char **res, size_t *res_len)
461{
462
463#define MAYBE_WRITE_MINUS(ARRAY, INDEX, FLAG) \
464 if (FLAG) { \
465 ARRAY[INDEX++] = '-'; \
466 }
467
468/* Length of leading zero followed by the decimal point. */
469#define LEADING_ZERO 1
470
471/* Flags for the ::lyjson_count_in_row() */
472#define FORWARD 0
473#define BACKWARD 1
474
475 /* Buffer where the result is stored. */
476 char *buf;
477 /* Size without space for terminating NULL-byte. */
478 uint32_t buf_len;
479 /* Index to buf. */
480 uint32_t i = 0;
481 /* A 'numeric part' doesn't contain a minus sign or an leading zero.
482 * For example, in 0.45, there is the leading zero.
483 */
484 const char *num;
485 /* Length of the 'numeric part' ends before E/e. */
486 uint32_t num_len;
487 /* Position of decimal point in the num. */
488 char *dec_point;
489 /* Final position of decimal point in the buf. */
490 int32_t dp_position;
491 /* Exponent as integer. */
492 long int e_val;
493 /* Byte for the decimal point. */
494 int8_t dot;
495 /* Required additional byte for the minus sign. */
496 uint8_t minus;
497 /* The number of zeros. */
498 long zeros;
499 /* If the number starts with leading zero followed by the decimal point. */
500 ly_bool leading_zero;
501
502 assert(ctx && in && exponent && res && res_len && (total_len > 2));
503 assert((in < exponent) && ((*exponent == 'e') || (*exponent == 'E')));
504
505 /* Convert exponent. */
506 errno = 0;
507 e_val = strtol(exponent + 1, NULL, LY_BASE_DEC);
508 if (errno) {
509 LOGVAL(ctx, LYVE_SEMANTICS,
510 "Exponent out-of-bounds in a JSON Number value (%.*s).",
511 total_len, in);
512 return LY_EVALID;
513 }
514
515 minus = in[0] == '-';
516 if (in[minus] == '0') {
517 assert(in[minus + 1] == '.');
518 leading_zero = 1;
519 /* The leading zero has been found, it will be skipped. */
520 num = &in[minus + 1];
521 } else {
522 leading_zero = 0;
523 /* Set to the first number. */
524 num = &in[minus];
525 }
526 num_len = exponent - num;
527
528 /* Find the location of the decimal points. */
529 dec_point = ly_strnchr(num, '.', num_len);
530 dp_position = dec_point ?
531 dec_point - num + e_val :
532 num_len + e_val;
533
534 /* Remove zeros after the decimal point from the end of
535 * the 'numeric part' because these are useless.
536 * (For example, in 40.001000 these are the last 3).
537 */
538 num_len -= dp_position > 0 ?
539 lyjson_count_in_row(num + dp_position - 1, exponent, '0', BACKWARD) :
540 lyjson_count_in_row(num, exponent, '0', BACKWARD);
541
542 /* Decide what to do with the dot from the 'numeric part'. */
543 if (dec_point && ((int32_t)(num_len - 1) == dp_position)) {
544 /* Decimal point in the last place is useless. */
545 dot = -1;
546 } else if (dec_point) {
547 /* Decimal point is shifted. */
548 dot = 0;
549 } else {
550 /* Additional byte for the decimal point is requred. */
551 dot = 1;
552 }
553
554 /* Final composition of the result. */
555 if (dp_position <= 0) {
556 /* Adding decimal point before the integer with adding additional zero(s). */
557
558 zeros = labs(dp_position);
559 buf_len = minus + LEADING_ZERO + dot + zeros + num_len;
560 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
561 MAYBE_WRITE_MINUS(buf, i, minus);
562 buf[i++] = '0';
563 buf[i++] = '.';
564 memset(buf + i, '0', zeros);
565 i += zeros;
566 dp_position = -1;
567 lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
568 } else if (leading_zero && (dp_position < (ssize_t)num_len)) {
569 /* Insert decimal point between the integer's digits. */
570
571 /* Set a new range of 'numeric part'. Old decimal point is skipped. */
572 num++;
573 num_len--;
574 dp_position--;
575 /* Get the number of useless zeros between the old
576 * and new decimal point. For example, in the number 0.005E1,
577 * there is one useless zero.
578 */
579 zeros = lyjson_count_in_row(num, num + dp_position + 1, '0', FORWARD);
580 /* If the new decimal point will be in the place of the first non-zero subnumber. */
581 if (zeros == (dp_position + 1)) {
582 /* keep one zero as leading zero */
583 zeros--;
584 /* new decimal point will be behind the leading zero */
585 dp_position = 1;
586 dot = 1;
587 } else {
588 dot = 0;
589 }
590 buf_len = minus + dot + (num_len - zeros);
591 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
592 MAYBE_WRITE_MINUS(buf, i, minus);
593 /* Skip useless zeros and copy. */
594 lyjson_exp_number_copy_num_part(num + zeros, num_len - zeros, NULL, dp_position, buf + i);
595 } else if (dp_position < (ssize_t)num_len) {
596 /* Insert decimal point between the integer's digits. */
597
598 buf_len = minus + dot + num_len;
599 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
600 MAYBE_WRITE_MINUS(buf, i, minus);
601 lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
602 } else if (leading_zero) {
603 /* Adding decimal point after the decimal value make the integer result. */
604
605 /* Set a new range of 'numeric part'. Old decimal point is skipped. */
606 num++;
607 num_len--;
608 /* Get the number of useless zeros. */
609 zeros = lyjson_count_in_row(num, num + num_len, '0', FORWARD);
610 buf_len = minus + dp_position - zeros;
611 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
612 MAYBE_WRITE_MINUS(buf, i, minus);
613 /* Skip useless zeros and copy. */
614 i += lyjson_exp_number_copy_num_part(num + zeros, num_len - zeros, NULL, dp_position, buf + i);
615 /* Add multiples of ten behind the 'numeric part'. */
616 memset(buf + i, '0', buf_len - i);
617 } else {
618 /* Adding decimal point after the decimal value make the integer result. */
619
620 buf_len = minus + dp_position;
621 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
622 MAYBE_WRITE_MINUS(buf, i, minus);
623 i += lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
624 /* Add multiples of ten behind the 'numeric part'. */
625 memset(buf + i, '0', buf_len - i);
626 }
627
628 buf[buf_len] = '\0';
629 *res = buf;
630 *res_len = buf_len;
631
632#undef MAYBE_WRITE_MINUS
633#undef LEADING_ZERO
634#undef FORWARD
635#undef BACKWARD
636
aPieceke87c0a12021-05-13 15:43:26 +0200637 return LY_SUCCESS;
638}
639
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200640static LY_ERR
641lyjson_number(struct lyjson_ctx *jsonctx)
642{
aPiecek76034c32021-06-08 15:03:11 +0200643 size_t offset = 0, num_len;
644 const char *in = jsonctx->in->current, *exponent = NULL;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200645 uint8_t minus = 0;
aPiecek76034c32021-06-08 15:03:11 +0200646 char *num;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200647
648 if (in[offset] == '-') {
649 ++offset;
650 minus = 1;
651 }
652
653 if (in[offset] == '0') {
654 ++offset;
655 } else if (isdigit(in[offset])) {
656 ++offset;
657 while (isdigit(in[offset])) {
658 ++offset;
659 }
660 } else {
661invalid_character:
662 if (in[offset]) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100663 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character in JSON Number value (\"%c\").", in[offset]);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200664 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100665 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200666 }
667 return LY_EVALID;
668 }
669
670 if (in[offset] == '.') {
671 ++offset;
672 if (!isdigit(in[offset])) {
673 goto invalid_character;
674 }
675 while (isdigit(in[offset])) {
676 ++offset;
677 }
678 }
679
680 if ((in[offset] == 'e') || (in[offset] == 'E')) {
aPiecek76034c32021-06-08 15:03:11 +0200681 exponent = &in[offset];
682 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200683 if ((in[offset] == '+') || (in[offset] == '-')) {
684 ++offset;
685 }
686 if (!isdigit(in[offset])) {
687 goto invalid_character;
688 }
689 while (isdigit(in[offset])) {
690 ++offset;
691 }
692 }
693
aPiecek76034c32021-06-08 15:03:11 +0200694 if (lyjson_number_is_zero(in, exponent ? exponent : &in[offset])) {
695 lyjson_ctx_set_value(jsonctx, in, minus + 1, 0);
696 } else if (exponent && lyjson_number_is_zero(exponent + 1, &in[offset])) {
697 lyjson_ctx_set_value(jsonctx, in, exponent - in, 0);
698 } else if (exponent) {
699 LY_CHECK_RET(lyjson_exp_number(jsonctx->ctx, in, exponent, offset, &num, &num_len));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200700 lyjson_ctx_set_value(jsonctx, num, num_len, 1);
701 } else {
702 /* store the number */
aPiecek76034c32021-06-08 15:03:11 +0200703 lyjson_ctx_set_value(jsonctx, in, offset, 0);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200704 }
705 ly_in_skip(jsonctx->in, offset);
706
707 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_NUMBER);
708 LY_CHECK_RET(lyjson_check_next(jsonctx));
709
710 return LY_SUCCESS;
711}
712
713static LY_ERR
714lyjson_object_name(struct lyjson_ctx *jsonctx)
715{
716 if (*jsonctx->in->current != '"') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100717 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200718 jsonctx->in->current, "a JSON object's member");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200719 return LY_EVALID;
720 }
721 ly_in_skip(jsonctx->in, 1);
722
723 LY_CHECK_RET(lyjson_string_(jsonctx));
724 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko08dc70b2020-10-07 13:58:47 +0200725 if (*jsonctx->in->current != ':') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100726 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current), jsonctx->in->current,
727 "a JSON object's name-separator ':'");
Michal Vasko08dc70b2020-10-07 13:58:47 +0200728 return LY_EVALID;
729 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200730 ly_in_skip(jsonctx->in, 1);
731 LY_CHECK_RET(skip_ws(jsonctx));
732
733 return LY_SUCCESS;
734}
735
736static LY_ERR
737lyjson_object(struct lyjson_ctx *jsonctx)
738{
739 LY_CHECK_RET(skip_ws(jsonctx));
740
741 if (*jsonctx->in->current == '}') {
aPiecek93582ed2021-05-25 14:49:06 +0200742 assert(jsonctx->depth);
743 jsonctx->depth--;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200744 /* empty object */
745 ly_in_skip(jsonctx->in, 1);
746 lyjson_ctx_set_value(jsonctx, NULL, 0, 0);
747 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_OBJECT_EMPTY);
748 return LY_SUCCESS;
749 }
750
751 LY_CHECK_RET(lyjson_object_name(jsonctx));
752
753 /* output data are set by lyjson_string_() */
754 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_OBJECT);
755
756 return LY_SUCCESS;
757}
758
759/*
760 * @brief Process JSON array envelope
761 *
762 *
763 *
764 * @param[in] jsonctx JSON parser context
765 * @return LY_SUCCESS or LY_EMEM
766 */
767static LY_ERR
768lyjson_array(struct lyjson_ctx *jsonctx)
769{
770 LY_CHECK_RET(skip_ws(jsonctx));
771
772 if (*jsonctx->in->current == ']') {
773 /* empty array */
774 ly_in_skip(jsonctx->in, 1);
775 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_ARRAY_EMPTY);
776 } else {
777 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_ARRAY);
778 }
779
780 /* erase previous values, array has no value on its own */
781 lyjson_ctx_set_value(jsonctx, NULL, 0, 0);
782
783 return LY_SUCCESS;
784}
785
786static LY_ERR
787lyjson_value(struct lyjson_ctx *jsonctx)
788{
Michal Vasko69730152020-10-09 16:30:07 +0200789 if (jsonctx->status.count && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200790 return LY_SUCCESS;
791 }
792
Radek Krejcif13b87b2020-12-01 22:02:17 +0100793 if ((*jsonctx->in->current == 'f') && !strncmp(jsonctx->in->current, "false", ly_strlen_const("false"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200794 /* false */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100795 lyjson_ctx_set_value(jsonctx, jsonctx->in->current, ly_strlen_const("false"), 0);
796 ly_in_skip(jsonctx->in, ly_strlen_const("false"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200797 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_FALSE);
798 LY_CHECK_RET(lyjson_check_next(jsonctx));
799
Radek Krejcif13b87b2020-12-01 22:02:17 +0100800 } else if ((*jsonctx->in->current == 't') && !strncmp(jsonctx->in->current, "true", ly_strlen_const("true"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200801 /* true */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100802 lyjson_ctx_set_value(jsonctx, jsonctx->in->current, ly_strlen_const("true"), 0);
803 ly_in_skip(jsonctx->in, ly_strlen_const("true"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200804 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_TRUE);
805 LY_CHECK_RET(lyjson_check_next(jsonctx));
806
Radek Krejcif13b87b2020-12-01 22:02:17 +0100807 } else if ((*jsonctx->in->current == 'n') && !strncmp(jsonctx->in->current, "null", ly_strlen_const("null"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200808 /* none */
Radek Krejci201963a2020-12-03 11:43:40 +0100809 lyjson_ctx_set_value(jsonctx, "", 0, 0);
Radek Krejcif13b87b2020-12-01 22:02:17 +0100810 ly_in_skip(jsonctx->in, ly_strlen_const("null"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200811 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_NULL);
812 LY_CHECK_RET(lyjson_check_next(jsonctx));
813
814 } else if (*jsonctx->in->current == '"') {
815 /* string */
816 ly_in_skip(jsonctx->in, 1);
817 LY_CHECK_RET(lyjson_string(jsonctx));
818
819 } else if (*jsonctx->in->current == '[') {
820 /* array */
821 ly_in_skip(jsonctx->in, 1);
822 LY_CHECK_RET(lyjson_array(jsonctx));
823
824 } else if (*jsonctx->in->current == '{') {
aPiecek93582ed2021-05-25 14:49:06 +0200825 jsonctx->depth++;
826 if (jsonctx->depth > LY_MAX_BLOCK_DEPTH) {
827 LOGERR(jsonctx->ctx, LY_EINVAL,
828 "The maximum number of block nestings has been exceeded.");
829 return LY_EINVAL;
830 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200831 /* object */
832 ly_in_skip(jsonctx->in, 1);
833 LY_CHECK_RET(lyjson_object(jsonctx));
834
Michal Vasko69730152020-10-09 16:30:07 +0200835 } else if ((*jsonctx->in->current == '-') || ((*jsonctx->in->current >= '0') && (*jsonctx->in->current <= '9'))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200836 /* number */
837 LY_CHECK_RET(lyjson_number(jsonctx));
838
839 } else {
840 /* unexpected value */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100841 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200842 jsonctx->in->current, "a JSON value");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200843 return LY_EVALID;
844 }
845
846 return LY_SUCCESS;
847}
848
849LY_ERR
850lyjson_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyjson_ctx **jsonctx_p)
851{
852 LY_ERR ret = LY_SUCCESS;
853 struct lyjson_ctx *jsonctx;
854
855 assert(ctx);
856 assert(in);
857 assert(jsonctx_p);
858
859 /* new context */
860 jsonctx = calloc(1, sizeof *jsonctx);
861 LY_CHECK_ERR_RET(!jsonctx, LOGMEM(ctx), LY_EMEM);
862 jsonctx->ctx = ctx;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200863 jsonctx->in = in;
864
Radek Krejciddace2c2021-01-08 11:30:56 +0100865 LOG_LOCINIT(NULL, NULL, NULL, in);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100866
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200867 /* parse JSON value, if any */
868 LY_CHECK_GOTO(ret = skip_ws(jsonctx), cleanup);
869 if (lyjson_ctx_status(jsonctx, 0) == LYJSON_END) {
870 /* empty data input */
871 goto cleanup;
872 }
873
874 ret = lyjson_value(jsonctx);
875
Michal Vasko69730152020-10-09 16:30:07 +0200876 if ((jsonctx->status.count > 1) && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100877 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200878 ret = LY_EVALID;
879 }
880
881cleanup:
882 if (ret) {
883 lyjson_ctx_free(jsonctx);
884 } else {
885 *jsonctx_p = jsonctx;
886 }
887 return ret;
888}
889
890void
891lyjson_ctx_backup(struct lyjson_ctx *jsonctx)
892{
893 if (jsonctx->backup.dynamic) {
894 free((char *)jsonctx->backup.value);
895 }
896 jsonctx->backup.status = lyjson_ctx_status(jsonctx, 0);
897 jsonctx->backup.status_count = jsonctx->status.count;
898 jsonctx->backup.value = jsonctx->value;
899 jsonctx->backup.value_len = jsonctx->value_len;
900 jsonctx->backup.input = jsonctx->in->current;
901 jsonctx->backup.dynamic = jsonctx->dynamic;
aPiecek93582ed2021-05-25 14:49:06 +0200902 jsonctx->backup.depth = jsonctx->depth;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200903 jsonctx->dynamic = 0;
904}
905
906void
907lyjson_ctx_restore(struct lyjson_ctx *jsonctx)
908{
909 if (jsonctx->dynamic) {
910 free((char *)jsonctx->value);
911 }
912 jsonctx->status.count = jsonctx->backup.status_count;
Michal Vasko22df3f02020-08-24 13:29:22 +0200913 jsonctx->status.objs[jsonctx->backup.status_count - 1] = (void *)jsonctx->backup.status;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200914 jsonctx->value = jsonctx->backup.value;
915 jsonctx->value_len = jsonctx->backup.value_len;
916 jsonctx->in->current = jsonctx->backup.input;
917 jsonctx->dynamic = jsonctx->backup.dynamic;
aPiecek93582ed2021-05-25 14:49:06 +0200918 jsonctx->depth = jsonctx->backup.depth;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200919 jsonctx->backup.dynamic = 0;
920}
921
922LY_ERR
923lyjson_ctx_next(struct lyjson_ctx *jsonctx, enum LYJSON_PARSER_STATUS *status)
924{
925 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200926 ly_bool toplevel = 0;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200927 enum LYJSON_PARSER_STATUS prev;
928
929 assert(jsonctx);
930
931 prev = lyjson_ctx_status(jsonctx, 0);
932
Michal Vasko69730152020-10-09 16:30:07 +0200933 if ((prev == LYJSON_OBJECT) || (prev == LYJSON_ARRAY)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200934 /* get value for the object's member OR the first value in the array */
935 ret = lyjson_value(jsonctx);
936 goto result;
937 } else {
938 /* the previous token is closed and should be completely processed */
939 JSON_POP_STATUS_RET(jsonctx);
940 prev = lyjson_ctx_status(jsonctx, 0);
941 }
942
943 if (!jsonctx->status.count) {
944 /* we are done with the top level value */
945 toplevel = 1;
946 }
947 LY_CHECK_RET(skip_ws(jsonctx));
948 if (toplevel && !jsonctx->status.count) {
949 /* EOF expected, but there are some data after the top level token */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100950 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Expecting end-of-input, but some data follows the top level JSON value.");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200951 return LY_EVALID;
952 }
953
954 if (toplevel) {
955 /* we are done */
956 return LY_SUCCESS;
957 }
958
959 /* continue with the next token */
960 assert(prev == LYJSON_OBJECT || prev == LYJSON_ARRAY);
961
962 if (*jsonctx->in->current == ',') {
963 /* sibling item in the ... */
964 ly_in_skip(jsonctx->in, 1);
965 LY_CHECK_RET(skip_ws(jsonctx));
966
967 if (prev == LYJSON_OBJECT) {
968 /* ... object - get another object's member */
969 ret = lyjson_object_name(jsonctx);
970 } else { /* LYJSON_ARRAY */
971 /* ... array - get another complete value */
972 ret = lyjson_value(jsonctx);
973 }
Michal Vasko69730152020-10-09 16:30:07 +0200974 } else if (((prev == LYJSON_OBJECT) && (*jsonctx->in->current == '}')) || ((prev == LYJSON_ARRAY) && (*jsonctx->in->current == ']'))) {
aPiecek93582ed2021-05-25 14:49:06 +0200975 if (*jsonctx->in->current == '}') {
976 assert(jsonctx->depth);
977 jsonctx->depth--;
978 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200979 ly_in_skip(jsonctx->in, 1);
980 JSON_POP_STATUS_RET(jsonctx);
981 JSON_PUSH_STATUS_RET(jsonctx, prev + 1);
982 } else {
983 /* unexpected value */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100984 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current), jsonctx->in->current,
985 prev == LYJSON_ARRAY ? "another JSON value in array" : "another JSON object's member");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200986 return LY_EVALID;
987 }
988
989result:
Michal Vasko69730152020-10-09 16:30:07 +0200990 if ((ret == LY_SUCCESS) && (jsonctx->status.count > 1) && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100991 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200992 ret = LY_EVALID;
993 }
994
Michal Vasko69730152020-10-09 16:30:07 +0200995 if ((ret == LY_SUCCESS) && status) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200996 *status = lyjson_ctx_status(jsonctx, 0);
997 }
998
999 return ret;
1000}
1001
1002enum LYJSON_PARSER_STATUS
1003lyjson_ctx_status(struct lyjson_ctx *jsonctx, uint32_t index)
1004{
1005 assert(jsonctx);
1006
1007 if (jsonctx->status.count < index) {
1008 return LYJSON_ERROR;
1009 } else if (jsonctx->status.count == index) {
1010 return LYJSON_ROOT;
1011 } else {
Michal Vasko27915722020-08-31 14:54:42 +02001012 return (enum LYJSON_PARSER_STATUS)(uintptr_t)jsonctx->status.objs[jsonctx->status.count - (index + 1)];
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001013 }
1014}
1015
1016void
1017lyjson_ctx_free(struct lyjson_ctx *jsonctx)
1018{
1019 if (!jsonctx) {
1020 return;
1021 }
1022
Radek Krejciddace2c2021-01-08 11:30:56 +01001023 LOG_LOCBACK(0, 0, 0, 1);
Radek Krejci2efc45b2020-12-22 16:25:44 +01001024
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001025 if (jsonctx->dynamic) {
Michal Vasko22df3f02020-08-24 13:29:22 +02001026 free((char *)jsonctx->value);
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001027 }
1028 if (jsonctx->backup.dynamic) {
1029 free((char *)jsonctx->backup.value);
1030 }
1031
1032 ly_set_erase(&jsonctx->status, NULL);
1033
1034 free(jsonctx);
1035}