blob: c5c3f32c3ea27061c6cb9e208ffbb6c2f80f2dbd [file] [log] [blame]
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001/**
2 * @file json.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic JSON format parser for libyang
5 *
6 * Copyright (c) 2020 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include <assert.h>
16#include <ctype.h>
17#include <errno.h>
Radek Krejci50f0c6b2020-06-18 16:31:48 +020018#include <stdlib.h>
Radek Krejci47fab892020-11-05 17:02:41 +010019#include <string.h>
Radek Krejci50f0c6b2020-06-18 16:31:48 +020020#include <sys/types.h>
21
22#include "common.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020023#include "in_internal.h"
Radek Krejci47fab892020-11-05 17:02:41 +010024#include "json.h"
Radek Krejci50f0c6b2020-06-18 16:31:48 +020025
26#define JSON_PUSH_STATUS_RET(CTX, STATUS) \
Radek Krejci3d92e442020-10-12 12:48:13 +020027 LY_CHECK_RET(ly_set_add(&CTX->status, (void*)STATUS, 1, NULL))
Radek Krejci50f0c6b2020-06-18 16:31:48 +020028
29#define JSON_POP_STATUS_RET(CTX) \
30 assert(CTX->status.count); CTX->status.count--;
31
Michal Vasko22df3f02020-08-24 13:29:22 +020032const char *
Radek Krejci50f0c6b2020-06-18 16:31:48 +020033lyjson_token2str(enum LYJSON_PARSER_STATUS status)
34{
35 switch (status) {
36 case LYJSON_ERROR:
37 return "error";
38 case LYJSON_ROOT:
39 return "document root";
40 case LYJSON_FALSE:
41 return "false";
42 case LYJSON_TRUE:
43 return "true";
44 case LYJSON_NULL:
45 return "null";
46 case LYJSON_OBJECT:
47 return "object";
48 case LYJSON_OBJECT_CLOSED:
49 return "object closed";
50 case LYJSON_OBJECT_EMPTY:
51 return "empty object";
52 case LYJSON_ARRAY:
53 return "array";
54 case LYJSON_ARRAY_CLOSED:
55 return "array closed";
56 case LYJSON_ARRAY_EMPTY:
57 return "empty array";
58 case LYJSON_NUMBER:
59 return "number";
60 case LYJSON_STRING:
61 return "string";
62 case LYJSON_END:
63 return "end of input";
64 }
65
66 return "";
67}
68
69static LY_ERR
70skip_ws(struct lyjson_ctx *jsonctx)
71{
72 /* skip leading whitespaces */
73 while (*jsonctx->in->current != '\0' && is_jsonws(*jsonctx->in->current)) {
Radek Krejcidd713ce2021-01-04 23:12:12 +010074 if (*jsonctx->in->current == '\n') {
75 LY_IN_NEW_LINE(jsonctx->in);
76 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +020077 ly_in_skip(jsonctx->in, 1);
78 }
79 if (*jsonctx->in->current == '\0') {
80 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_END);
81 }
82
83 return LY_SUCCESS;
84}
85
86/*
87 * @brief Set value corresponding to the current context's status
88 */
89static void
Radek Krejci857189e2020-09-01 13:26:36 +020090lyjson_ctx_set_value(struct lyjson_ctx *jsonctx, const char *value, size_t value_len, ly_bool dynamic)
Radek Krejci50f0c6b2020-06-18 16:31:48 +020091{
92 assert(jsonctx);
93
Juraj Vijtiukec285cd2021-01-14 11:41:20 +010094 if (jsonctx->dynamic) {
Michal Vasko22df3f02020-08-24 13:29:22 +020095 free((char *)jsonctx->value);
Radek Krejci50f0c6b2020-06-18 16:31:48 +020096 }
97 jsonctx->value = value;
98 jsonctx->value_len = value_len;
99 jsonctx->dynamic = dynamic;
100}
101
102static LY_ERR
103lyjson_check_next(struct lyjson_ctx *jsonctx)
104{
105 if (jsonctx->status.count == 1) {
106 /* top level value (JSON-text), ws expected */
Michal Vasko69730152020-10-09 16:30:07 +0200107 if ((*jsonctx->in->current == '\0') || is_jsonws(*jsonctx->in->current)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200108 return LY_SUCCESS;
109 }
110 } else if (lyjson_ctx_status(jsonctx, 1) == LYJSON_OBJECT) {
111 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko69730152020-10-09 16:30:07 +0200112 if ((*jsonctx->in->current == ',') || (*jsonctx->in->current == '}')) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200113 return LY_SUCCESS;
114 }
115 } else if (lyjson_ctx_status(jsonctx, 1) == LYJSON_ARRAY) {
116 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko69730152020-10-09 16:30:07 +0200117 if ((*jsonctx->in->current == ',') || (*jsonctx->in->current == ']')) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200118 return LY_SUCCESS;
119 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200120 }
121
Radek Krejcie7010dc2021-03-04 15:54:24 +0100122 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Unexpected character \"%c\" after JSON %s.",
123 *jsonctx->in->current, lyjson_token2str(lyjson_ctx_status(jsonctx, 0)));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200124 return LY_EVALID;
125}
126
127/**
128 * Input is expected to start after the opening quotation-mark.
129 * When succeeds, input is moved after the closing quotation-mark.
130 */
131static LY_ERR
132lyjson_string_(struct lyjson_ctx *jsonctx)
133{
134#define BUFSIZE 24
135#define BUFSIZE_STEP 128
136
137 const char *in = jsonctx->in->current, *start;
138 char *buf = NULL;
139 size_t offset; /* read offset in input buffer */
140 size_t len; /* length of the output string (write offset in output buffer) */
141 size_t size = 0; /* size of the output buffer */
142 size_t u;
143 uint64_t start_line;
144
145 assert(jsonctx);
146
147 /* init */
148 start = in;
Radek Krejcid54412f2020-12-17 20:25:35 +0100149 start_line = jsonctx->in->line;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200150 offset = len = 0;
151
152 /* parse */
153 while (in[offset]) {
154 if (in[offset] == '\\') {
155 /* escape sequence */
Michal Vasko2be1d762021-03-11 16:53:15 +0100156 const char *slash = &in[offset];
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200157 uint32_t value;
158 uint8_t i = 1;
159
160 if (!buf) {
161 /* prepare output buffer */
162 buf = malloc(BUFSIZE);
163 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
164 size = BUFSIZE;
165 }
166
167 /* allocate enough for the offset and next character,
168 * we will need 4 bytes at most since we support only the predefined
169 * (one-char) entities and character references */
170 if (len + offset + 4 >= size) {
Juraj Vijtiukd746a352021-01-15 11:33:33 +0100171 size_t increment;
Radek Krejcidf549132021-01-21 10:32:32 +0100172 for (increment = BUFSIZE_STEP; len + offset + 4 >= size + increment; increment += BUFSIZE_STEP) {}
Juraj Vijtiukd746a352021-01-15 11:33:33 +0100173 buf = ly_realloc(buf, size + increment);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200174 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
175 size += BUFSIZE_STEP;
176 }
177
178 if (offset) {
179 /* store what we have so far */
180 memcpy(&buf[len], in, offset);
181 len += offset;
182 in += offset;
183 offset = 0;
184 }
185
186 switch (in[++offset]) {
187 case '"':
188 /* quotation mark */
189 value = 0x22;
190 break;
191 case '\\':
192 /* reverse solidus */
193 value = 0x5c;
194 break;
195 case '/':
196 /* solidus */
197 value = 0x2f;
198 break;
199 case 'b':
200 /* backspace */
201 value = 0x08;
202 break;
203 case 'f':
204 /* form feed */
205 value = 0x0c;
206 break;
207 case 'n':
208 /* line feed */
209 value = 0x0a;
210 break;
211 case 'r':
212 /* carriage return */
213 value = 0x0d;
214 break;
215 case 't':
216 /* tab */
217 value = 0x09;
218 break;
219 case 'u':
220 /* Basic Multilingual Plane character \uXXXX */
221 offset++;
222 for (value = i = 0; i < 4; i++) {
Juraj Vijtiuk2b94e4b2020-11-16 23:52:07 +0100223 if (!in[offset + i]) {
Michal Vasko2be1d762021-03-11 16:53:15 +0100224 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid basic multilingual plane character \"%s\".", slash);
Juraj Vijtiuk2b94e4b2020-11-16 23:52:07 +0100225 goto error;
226 } else if (isdigit(in[offset + i])) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200227 u = (in[offset + i] - '0');
228 } else if (in[offset + i] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100229 u = LY_BASE_DEC + (in[offset + i] - 'a');
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200230 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100231 u = LY_BASE_DEC + (in[offset + i] - 'A');
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200232 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100233 value = (LY_BASE_HEX * value) + u;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200234 }
235 break;
236 default:
237 /* invalid escape sequence */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100238 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character escape sequence \\%c.", in[offset]);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200239 goto error;
240
241 }
242
243 offset += i; /* add read escaped characters */
244 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], value, &u),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100245 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).",
Michal Vasko2be1d762021-03-11 16:53:15 +0100246 (int)(&in[offset] - slash), slash, value),
Michal Vasko69730152020-10-09 16:30:07 +0200247 error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200248 len += u; /* update number of bytes in buffer */
249 in += offset; /* move the input by the processed bytes stored in the buffer ... */
250 offset = 0; /* ... and reset the offset index for future moving data into buffer */
251
252 } else if (in[offset] == '"') {
253 /* end of string */
254 if (buf) {
255 /* realloc exact size string */
256 buf = ly_realloc(buf, len + offset + 1);
257 LY_CHECK_ERR_RET(!buf, LOGMEM(jsonctx->ctx), LY_EMEM);
258 size = len + offset + 1;
259 memcpy(&buf[len], in, offset);
260
261 /* set terminating NULL byte */
262 buf[len + offset] = '\0';
263 }
264 len += offset;
265 ++offset;
266 in += offset;
267 goto success;
268 } else {
269 /* get it as UTF-8 character for check */
270 const char *c = &in[offset];
271 uint32_t code = 0;
272 size_t code_len = 0;
273
274 LY_CHECK_ERR_GOTO(ly_getutf8(&c, &code, &code_len),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100275 LOGVAL(jsonctx->ctx, LY_VCODE_INCHAR, in[offset]), error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200276
277 LY_CHECK_ERR_GOTO(!is_jsonstrchar(code),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100278 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character in JSON string \"%.*s\" (0x%08x).",
Radek Krejci422afb12021-03-04 16:38:16 +0100279 (int)(&in[offset] - start + code_len), start, code),
Michal Vasko69730152020-10-09 16:30:07 +0200280 error);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200281
282 /* character is ok, continue */
283 offset += code_len;
284 }
285 }
286
287 /* EOF reached before endchar */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100288 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
289 LOGVAL_LINE(jsonctx->ctx, start_line, LYVE_SYNTAX, "Missing quotation-mark at the end of a JSON string.");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200290
291error:
292 free(buf);
293 return LY_EVALID;
294
295success:
Radek Krejcid54412f2020-12-17 20:25:35 +0100296 jsonctx->in->current = in;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200297 if (buf) {
298 lyjson_ctx_set_value(jsonctx, buf, len, 1);
299 } else {
300 lyjson_ctx_set_value(jsonctx, start, len, 0);
301 }
302
303 return LY_SUCCESS;
304
305#undef BUFSIZE
306#undef BUFSIZE_STEP
307}
308
309/*
310 *
311 * Wrapper around lyjson_string_() adding LYJSON_STRING status into context to allow using lyjson_string_() for parsing object's name.
312 */
313static LY_ERR
314lyjson_string(struct lyjson_ctx *jsonctx)
315{
316 LY_CHECK_RET(lyjson_string_(jsonctx));
317
318 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_STRING);
319 LY_CHECK_RET(lyjson_check_next(jsonctx));
320
321 return LY_SUCCESS;
322}
323
aPieceke87c0a12021-05-13 15:43:26 +0200324/**
aPiecek76034c32021-06-08 15:03:11 +0200325 * @brief Calculate how many @p c characters there are in a row.
326 *
327 * @param[in] str Count from this position.
328 * @param[in] end Position after the last checked character.
329 * @param[in] c Checked character.
330 * @param[in] backwards Set to 1, if to proceed from end-1 to str.
331 * @return Number of characters in a row.
332 */
333static uint32_t
334lyjson_count_in_row(const char *str, const char *end, char c, ly_bool backwards)
335{
336 uint32_t cnt;
337
338 assert(str && end);
339
340 if (str >= end) {
341 return 0;
342 }
343
344 if (!backwards) {
345 for (cnt = 0; (str != end) && (*str == c); ++str, ++cnt) {}
346 } else {
347 --end;
348 --str;
349 for (cnt = 0; (str != end) && (*end == c); --end, ++cnt) {}
350 }
351
352 return cnt;
353}
354
355/**
356 * @brief Check if the number can be shortened to zero.
357 *
aPiecek76034c32021-06-08 15:03:11 +0200358 * @param[in] in Start of input string;
359 * @param[in] end End of input string;
360 * @return 1 if number is zero, otherwise 0.
361 */
362static ly_bool
363lyjson_number_is_zero(const char *in, const char *end)
364{
aPiecek28e101a2021-06-10 09:09:31 +0200365 assert(in < end);
aPiecek76034c32021-06-08 15:03:11 +0200366
367 if ((in[0] == '-') || (in[0] == '+')) {
368 in++;
aPiecek28e101a2021-06-10 09:09:31 +0200369 assert(in < end);
aPiecek76034c32021-06-08 15:03:11 +0200370 }
371 if ((in[0] == '0') && (in[1] == '.')) {
372 in += 2;
aPiecek28e101a2021-06-10 09:09:31 +0200373 if (!(in < end)) {
374 return 1;
375 }
aPiecek76034c32021-06-08 15:03:11 +0200376 }
377
378 return lyjson_count_in_row(in, end, '0', 0) == end - in;
379}
380
381/**
aPieceke87c0a12021-05-13 15:43:26 +0200382 * @brief Allocate buffer for number in string format.
383 *
384 * @param[in] jsonctx JSON context.
385 * @param[in] num_len Required space in bytes for a number.
386 * Terminating null byte is added by default.
387 * @param[out] buffer Output allocated buffer.
388 * @return LY_ERR value.
389 */
390static LY_ERR
aPiecek76034c32021-06-08 15:03:11 +0200391lyjson_get_buffer_for_number(const struct ly_ctx *ctx, uint32_t num_len, char **buffer)
aPieceke87c0a12021-05-13 15:43:26 +0200392{
393 *buffer = NULL;
394
aPiecek76034c32021-06-08 15:03:11 +0200395 LY_CHECK_ERR_RET((num_len + 1) > LY_NUMBER_MAXLEN, LOGVAL(ctx, LYVE_SEMANTICS,
aPieceke87c0a12021-05-13 15:43:26 +0200396 "Number encoded as a string exceeded the LY_NUMBER_MAXLEN limit."), LY_EVALID);
397
aPiecek76034c32021-06-08 15:03:11 +0200398 /* allocate buffer for the result (add NULL-byte) */
aPieceke87c0a12021-05-13 15:43:26 +0200399 *buffer = malloc(num_len + 1);
aPiecek76034c32021-06-08 15:03:11 +0200400 LY_CHECK_ERR_RET(!(*buffer), LOGMEM(ctx), LY_EMEM);
401 return LY_SUCCESS;
402}
403
404/**
405 * @brief Copy the 'numeric part' (@p num) except its decimal point
406 * (@p dec_point) and insert the new decimal point (@p dp_position)
407 * only if it is to be placed in the 'numeric part' range (@p num).
408 *
409 * @param[in] num Begin of the 'numeric part'.
410 * @param[in] num_len Length of the 'numeric part'.
411 * @param[in] dec_point Pointer to the old decimal point.
412 * If it has a NULL value, it is ignored.
413 * @param[in] dp_position Position of the new decimal point.
414 * If it has a negative value, it is ignored.
415 * @param[out] dst Memory into which the copied result is written.
416 * @return Number of characters written to the @p dst.
417 */
418static uint32_t
419lyjson_exp_number_copy_num_part(const char *num, uint32_t num_len,
420 char *dec_point, int32_t dp_position, char *dst)
421{
422 int32_t dec_point_idx;
423 int32_t n, d;
424
425 assert(num && dst);
426
427 dec_point_idx = dec_point ? dec_point - num : INT32_MAX;
428 assert((dec_point_idx >= 0) && (dec_point_idx != dp_position));
429
430 for (n = 0, d = 0; (uint32_t)n < num_len; n++) {
431 if (n == dec_point_idx) {
432 continue;
433 } else if (d == dp_position) {
434 dst[d++] = '.';
435 dst[d++] = num[n];
436 } else {
437 dst[d++] = num[n];
438 }
439 }
440
441 return d;
442}
443
444/**
445 * @brief Convert JSON number with exponent into the representation
446 * used by YANG.
447 *
448 * The input numeric string must be syntactically valid. Also, before
449 * calling this function, checks should be performed using the
450 * ::lyjson_number_is_zero().
451 *
452 * @param[in] ctx Context for the error message.
453 * @param[in] in Beginning of the string containing the number.
454 * @param[in] exponent Pointer to the letter E/e.
455 * @param[in] total_len Total size of the input number.
456 * @param[out] res Conversion result.
457 * @param[out] res_len Length of the result.
458 * @return LY_ERR value.
459 */
460static LY_ERR
461lyjson_exp_number(const struct ly_ctx *ctx, const char *in, const char *exponent,
462 size_t total_len, char **res, size_t *res_len)
463{
464
465#define MAYBE_WRITE_MINUS(ARRAY, INDEX, FLAG) \
466 if (FLAG) { \
467 ARRAY[INDEX++] = '-'; \
468 }
469
470/* Length of leading zero followed by the decimal point. */
471#define LEADING_ZERO 1
472
473/* Flags for the ::lyjson_count_in_row() */
474#define FORWARD 0
475#define BACKWARD 1
476
477 /* Buffer where the result is stored. */
478 char *buf;
479 /* Size without space for terminating NULL-byte. */
480 uint32_t buf_len;
481 /* Index to buf. */
482 uint32_t i = 0;
483 /* A 'numeric part' doesn't contain a minus sign or an leading zero.
484 * For example, in 0.45, there is the leading zero.
485 */
486 const char *num;
487 /* Length of the 'numeric part' ends before E/e. */
488 uint32_t num_len;
489 /* Position of decimal point in the num. */
490 char *dec_point;
491 /* Final position of decimal point in the buf. */
492 int32_t dp_position;
493 /* Exponent as integer. */
494 long int e_val;
495 /* Byte for the decimal point. */
496 int8_t dot;
497 /* Required additional byte for the minus sign. */
498 uint8_t minus;
499 /* The number of zeros. */
500 long zeros;
501 /* If the number starts with leading zero followed by the decimal point. */
502 ly_bool leading_zero;
503
504 assert(ctx && in && exponent && res && res_len && (total_len > 2));
505 assert((in < exponent) && ((*exponent == 'e') || (*exponent == 'E')));
506
507 /* Convert exponent. */
508 errno = 0;
509 e_val = strtol(exponent + 1, NULL, LY_BASE_DEC);
510 if (errno) {
511 LOGVAL(ctx, LYVE_SEMANTICS,
512 "Exponent out-of-bounds in a JSON Number value (%.*s).",
513 total_len, in);
514 return LY_EVALID;
515 }
516
517 minus = in[0] == '-';
518 if (in[minus] == '0') {
519 assert(in[minus + 1] == '.');
520 leading_zero = 1;
521 /* The leading zero has been found, it will be skipped. */
522 num = &in[minus + 1];
523 } else {
524 leading_zero = 0;
525 /* Set to the first number. */
526 num = &in[minus];
527 }
528 num_len = exponent - num;
529
530 /* Find the location of the decimal points. */
531 dec_point = ly_strnchr(num, '.', num_len);
532 dp_position = dec_point ?
533 dec_point - num + e_val :
534 num_len + e_val;
535
536 /* Remove zeros after the decimal point from the end of
537 * the 'numeric part' because these are useless.
538 * (For example, in 40.001000 these are the last 3).
539 */
540 num_len -= dp_position > 0 ?
541 lyjson_count_in_row(num + dp_position - 1, exponent, '0', BACKWARD) :
542 lyjson_count_in_row(num, exponent, '0', BACKWARD);
543
544 /* Decide what to do with the dot from the 'numeric part'. */
545 if (dec_point && ((int32_t)(num_len - 1) == dp_position)) {
546 /* Decimal point in the last place is useless. */
547 dot = -1;
548 } else if (dec_point) {
549 /* Decimal point is shifted. */
550 dot = 0;
551 } else {
552 /* Additional byte for the decimal point is requred. */
553 dot = 1;
554 }
555
556 /* Final composition of the result. */
557 if (dp_position <= 0) {
558 /* Adding decimal point before the integer with adding additional zero(s). */
559
560 zeros = labs(dp_position);
561 buf_len = minus + LEADING_ZERO + dot + zeros + num_len;
562 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
563 MAYBE_WRITE_MINUS(buf, i, minus);
564 buf[i++] = '0';
565 buf[i++] = '.';
566 memset(buf + i, '0', zeros);
567 i += zeros;
568 dp_position = -1;
569 lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
570 } else if (leading_zero && (dp_position < (ssize_t)num_len)) {
571 /* Insert decimal point between the integer's digits. */
572
573 /* Set a new range of 'numeric part'. Old decimal point is skipped. */
574 num++;
575 num_len--;
576 dp_position--;
577 /* Get the number of useless zeros between the old
578 * and new decimal point. For example, in the number 0.005E1,
579 * there is one useless zero.
580 */
581 zeros = lyjson_count_in_row(num, num + dp_position + 1, '0', FORWARD);
582 /* If the new decimal point will be in the place of the first non-zero subnumber. */
583 if (zeros == (dp_position + 1)) {
584 /* keep one zero as leading zero */
585 zeros--;
586 /* new decimal point will be behind the leading zero */
587 dp_position = 1;
588 dot = 1;
589 } else {
590 dot = 0;
591 }
592 buf_len = minus + dot + (num_len - zeros);
593 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
594 MAYBE_WRITE_MINUS(buf, i, minus);
595 /* Skip useless zeros and copy. */
596 lyjson_exp_number_copy_num_part(num + zeros, num_len - zeros, NULL, dp_position, buf + i);
597 } else if (dp_position < (ssize_t)num_len) {
598 /* Insert decimal point between the integer's digits. */
599
600 buf_len = minus + dot + num_len;
601 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
602 MAYBE_WRITE_MINUS(buf, i, minus);
603 lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
604 } else if (leading_zero) {
605 /* Adding decimal point after the decimal value make the integer result. */
606
607 /* Set a new range of 'numeric part'. Old decimal point is skipped. */
608 num++;
609 num_len--;
610 /* Get the number of useless zeros. */
611 zeros = lyjson_count_in_row(num, num + num_len, '0', FORWARD);
612 buf_len = minus + dp_position - zeros;
613 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
614 MAYBE_WRITE_MINUS(buf, i, minus);
615 /* Skip useless zeros and copy. */
616 i += lyjson_exp_number_copy_num_part(num + zeros, num_len - zeros, NULL, dp_position, buf + i);
617 /* Add multiples of ten behind the 'numeric part'. */
618 memset(buf + i, '0', buf_len - i);
619 } else {
620 /* Adding decimal point after the decimal value make the integer result. */
621
622 buf_len = minus + dp_position;
623 LY_CHECK_RET(lyjson_get_buffer_for_number(ctx, buf_len, &buf));
624 MAYBE_WRITE_MINUS(buf, i, minus);
625 i += lyjson_exp_number_copy_num_part(num, num_len, dec_point, dp_position, buf + i);
626 /* Add multiples of ten behind the 'numeric part'. */
627 memset(buf + i, '0', buf_len - i);
628 }
629
630 buf[buf_len] = '\0';
631 *res = buf;
632 *res_len = buf_len;
633
634#undef MAYBE_WRITE_MINUS
635#undef LEADING_ZERO
636#undef FORWARD
637#undef BACKWARD
638
aPieceke87c0a12021-05-13 15:43:26 +0200639 return LY_SUCCESS;
640}
641
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200642static LY_ERR
643lyjson_number(struct lyjson_ctx *jsonctx)
644{
aPiecek76034c32021-06-08 15:03:11 +0200645 size_t offset = 0, num_len;
646 const char *in = jsonctx->in->current, *exponent = NULL;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200647 uint8_t minus = 0;
aPiecek76034c32021-06-08 15:03:11 +0200648 char *num;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200649
650 if (in[offset] == '-') {
651 ++offset;
652 minus = 1;
653 }
654
655 if (in[offset] == '0') {
656 ++offset;
657 } else if (isdigit(in[offset])) {
658 ++offset;
659 while (isdigit(in[offset])) {
660 ++offset;
661 }
662 } else {
663invalid_character:
664 if (in[offset]) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100665 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Invalid character in JSON Number value (\"%c\").", in[offset]);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200666 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100667 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200668 }
669 return LY_EVALID;
670 }
671
672 if (in[offset] == '.') {
673 ++offset;
674 if (!isdigit(in[offset])) {
675 goto invalid_character;
676 }
677 while (isdigit(in[offset])) {
678 ++offset;
679 }
680 }
681
682 if ((in[offset] == 'e') || (in[offset] == 'E')) {
aPiecek76034c32021-06-08 15:03:11 +0200683 exponent = &in[offset];
684 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200685 if ((in[offset] == '+') || (in[offset] == '-')) {
686 ++offset;
687 }
688 if (!isdigit(in[offset])) {
689 goto invalid_character;
690 }
691 while (isdigit(in[offset])) {
692 ++offset;
693 }
694 }
695
aPiecek76034c32021-06-08 15:03:11 +0200696 if (lyjson_number_is_zero(in, exponent ? exponent : &in[offset])) {
697 lyjson_ctx_set_value(jsonctx, in, minus + 1, 0);
698 } else if (exponent && lyjson_number_is_zero(exponent + 1, &in[offset])) {
699 lyjson_ctx_set_value(jsonctx, in, exponent - in, 0);
700 } else if (exponent) {
701 LY_CHECK_RET(lyjson_exp_number(jsonctx->ctx, in, exponent, offset, &num, &num_len));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200702 lyjson_ctx_set_value(jsonctx, num, num_len, 1);
703 } else {
aPiecek5b6dd182021-06-10 09:11:58 +0200704 if (offset > LY_NUMBER_MAXLEN) {
705 LOGVAL(jsonctx->ctx, LYVE_SEMANTICS,
706 "Number encoded as a string exceeded the LY_NUMBER_MAXLEN limit.");
707 return LY_EVALID;
708 }
aPiecek76034c32021-06-08 15:03:11 +0200709 lyjson_ctx_set_value(jsonctx, in, offset, 0);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200710 }
711 ly_in_skip(jsonctx->in, offset);
712
713 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_NUMBER);
714 LY_CHECK_RET(lyjson_check_next(jsonctx));
715
716 return LY_SUCCESS;
717}
718
719static LY_ERR
720lyjson_object_name(struct lyjson_ctx *jsonctx)
721{
722 if (*jsonctx->in->current != '"') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100723 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200724 jsonctx->in->current, "a JSON object's member");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200725 return LY_EVALID;
726 }
727 ly_in_skip(jsonctx->in, 1);
728
729 LY_CHECK_RET(lyjson_string_(jsonctx));
730 LY_CHECK_RET(skip_ws(jsonctx));
Michal Vasko08dc70b2020-10-07 13:58:47 +0200731 if (*jsonctx->in->current != ':') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100732 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current), jsonctx->in->current,
733 "a JSON object's name-separator ':'");
Michal Vasko08dc70b2020-10-07 13:58:47 +0200734 return LY_EVALID;
735 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200736 ly_in_skip(jsonctx->in, 1);
737 LY_CHECK_RET(skip_ws(jsonctx));
738
739 return LY_SUCCESS;
740}
741
742static LY_ERR
743lyjson_object(struct lyjson_ctx *jsonctx)
744{
745 LY_CHECK_RET(skip_ws(jsonctx));
746
747 if (*jsonctx->in->current == '}') {
aPiecek93582ed2021-05-25 14:49:06 +0200748 assert(jsonctx->depth);
749 jsonctx->depth--;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200750 /* empty object */
751 ly_in_skip(jsonctx->in, 1);
752 lyjson_ctx_set_value(jsonctx, NULL, 0, 0);
753 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_OBJECT_EMPTY);
754 return LY_SUCCESS;
755 }
756
757 LY_CHECK_RET(lyjson_object_name(jsonctx));
758
759 /* output data are set by lyjson_string_() */
760 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_OBJECT);
761
762 return LY_SUCCESS;
763}
764
765/*
766 * @brief Process JSON array envelope
767 *
768 *
769 *
770 * @param[in] jsonctx JSON parser context
771 * @return LY_SUCCESS or LY_EMEM
772 */
773static LY_ERR
774lyjson_array(struct lyjson_ctx *jsonctx)
775{
776 LY_CHECK_RET(skip_ws(jsonctx));
777
778 if (*jsonctx->in->current == ']') {
779 /* empty array */
780 ly_in_skip(jsonctx->in, 1);
781 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_ARRAY_EMPTY);
782 } else {
783 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_ARRAY);
784 }
785
786 /* erase previous values, array has no value on its own */
787 lyjson_ctx_set_value(jsonctx, NULL, 0, 0);
788
789 return LY_SUCCESS;
790}
791
792static LY_ERR
793lyjson_value(struct lyjson_ctx *jsonctx)
794{
Michal Vasko69730152020-10-09 16:30:07 +0200795 if (jsonctx->status.count && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200796 return LY_SUCCESS;
797 }
798
Radek Krejcif13b87b2020-12-01 22:02:17 +0100799 if ((*jsonctx->in->current == 'f') && !strncmp(jsonctx->in->current, "false", ly_strlen_const("false"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200800 /* false */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100801 lyjson_ctx_set_value(jsonctx, jsonctx->in->current, ly_strlen_const("false"), 0);
802 ly_in_skip(jsonctx->in, ly_strlen_const("false"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200803 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_FALSE);
804 LY_CHECK_RET(lyjson_check_next(jsonctx));
805
Radek Krejcif13b87b2020-12-01 22:02:17 +0100806 } else if ((*jsonctx->in->current == 't') && !strncmp(jsonctx->in->current, "true", ly_strlen_const("true"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200807 /* true */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100808 lyjson_ctx_set_value(jsonctx, jsonctx->in->current, ly_strlen_const("true"), 0);
809 ly_in_skip(jsonctx->in, ly_strlen_const("true"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200810 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_TRUE);
811 LY_CHECK_RET(lyjson_check_next(jsonctx));
812
Radek Krejcif13b87b2020-12-01 22:02:17 +0100813 } else if ((*jsonctx->in->current == 'n') && !strncmp(jsonctx->in->current, "null", ly_strlen_const("null"))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200814 /* none */
Radek Krejci201963a2020-12-03 11:43:40 +0100815 lyjson_ctx_set_value(jsonctx, "", 0, 0);
Radek Krejcif13b87b2020-12-01 22:02:17 +0100816 ly_in_skip(jsonctx->in, ly_strlen_const("null"));
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200817 JSON_PUSH_STATUS_RET(jsonctx, LYJSON_NULL);
818 LY_CHECK_RET(lyjson_check_next(jsonctx));
819
820 } else if (*jsonctx->in->current == '"') {
821 /* string */
822 ly_in_skip(jsonctx->in, 1);
823 LY_CHECK_RET(lyjson_string(jsonctx));
824
825 } else if (*jsonctx->in->current == '[') {
826 /* array */
827 ly_in_skip(jsonctx->in, 1);
828 LY_CHECK_RET(lyjson_array(jsonctx));
829
830 } else if (*jsonctx->in->current == '{') {
aPiecek93582ed2021-05-25 14:49:06 +0200831 jsonctx->depth++;
832 if (jsonctx->depth > LY_MAX_BLOCK_DEPTH) {
833 LOGERR(jsonctx->ctx, LY_EINVAL,
834 "The maximum number of block nestings has been exceeded.");
835 return LY_EINVAL;
836 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200837 /* object */
838 ly_in_skip(jsonctx->in, 1);
839 LY_CHECK_RET(lyjson_object(jsonctx));
840
Michal Vasko69730152020-10-09 16:30:07 +0200841 } else if ((*jsonctx->in->current == '-') || ((*jsonctx->in->current >= '0') && (*jsonctx->in->current <= '9'))) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200842 /* number */
843 LY_CHECK_RET(lyjson_number(jsonctx));
844
845 } else {
846 /* unexpected value */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100847 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200848 jsonctx->in->current, "a JSON value");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200849 return LY_EVALID;
850 }
851
852 return LY_SUCCESS;
853}
854
855LY_ERR
856lyjson_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyjson_ctx **jsonctx_p)
857{
858 LY_ERR ret = LY_SUCCESS;
859 struct lyjson_ctx *jsonctx;
860
861 assert(ctx);
862 assert(in);
863 assert(jsonctx_p);
864
865 /* new context */
866 jsonctx = calloc(1, sizeof *jsonctx);
867 LY_CHECK_ERR_RET(!jsonctx, LOGMEM(ctx), LY_EMEM);
868 jsonctx->ctx = ctx;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200869 jsonctx->in = in;
870
Radek Krejciddace2c2021-01-08 11:30:56 +0100871 LOG_LOCINIT(NULL, NULL, NULL, in);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100872
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200873 /* parse JSON value, if any */
874 LY_CHECK_GOTO(ret = skip_ws(jsonctx), cleanup);
875 if (lyjson_ctx_status(jsonctx, 0) == LYJSON_END) {
876 /* empty data input */
877 goto cleanup;
878 }
879
880 ret = lyjson_value(jsonctx);
881
Michal Vasko69730152020-10-09 16:30:07 +0200882 if ((jsonctx->status.count > 1) && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100883 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200884 ret = LY_EVALID;
885 }
886
887cleanup:
888 if (ret) {
889 lyjson_ctx_free(jsonctx);
890 } else {
891 *jsonctx_p = jsonctx;
892 }
893 return ret;
894}
895
896void
897lyjson_ctx_backup(struct lyjson_ctx *jsonctx)
898{
899 if (jsonctx->backup.dynamic) {
900 free((char *)jsonctx->backup.value);
901 }
902 jsonctx->backup.status = lyjson_ctx_status(jsonctx, 0);
903 jsonctx->backup.status_count = jsonctx->status.count;
904 jsonctx->backup.value = jsonctx->value;
905 jsonctx->backup.value_len = jsonctx->value_len;
906 jsonctx->backup.input = jsonctx->in->current;
907 jsonctx->backup.dynamic = jsonctx->dynamic;
aPiecek93582ed2021-05-25 14:49:06 +0200908 jsonctx->backup.depth = jsonctx->depth;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200909 jsonctx->dynamic = 0;
910}
911
912void
913lyjson_ctx_restore(struct lyjson_ctx *jsonctx)
914{
915 if (jsonctx->dynamic) {
916 free((char *)jsonctx->value);
917 }
918 jsonctx->status.count = jsonctx->backup.status_count;
Michal Vasko22df3f02020-08-24 13:29:22 +0200919 jsonctx->status.objs[jsonctx->backup.status_count - 1] = (void *)jsonctx->backup.status;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200920 jsonctx->value = jsonctx->backup.value;
921 jsonctx->value_len = jsonctx->backup.value_len;
922 jsonctx->in->current = jsonctx->backup.input;
923 jsonctx->dynamic = jsonctx->backup.dynamic;
aPiecek93582ed2021-05-25 14:49:06 +0200924 jsonctx->depth = jsonctx->backup.depth;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200925 jsonctx->backup.dynamic = 0;
926}
927
928LY_ERR
929lyjson_ctx_next(struct lyjson_ctx *jsonctx, enum LYJSON_PARSER_STATUS *status)
930{
931 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200932 ly_bool toplevel = 0;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200933 enum LYJSON_PARSER_STATUS prev;
934
935 assert(jsonctx);
936
937 prev = lyjson_ctx_status(jsonctx, 0);
938
Michal Vasko69730152020-10-09 16:30:07 +0200939 if ((prev == LYJSON_OBJECT) || (prev == LYJSON_ARRAY)) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200940 /* get value for the object's member OR the first value in the array */
941 ret = lyjson_value(jsonctx);
942 goto result;
943 } else {
944 /* the previous token is closed and should be completely processed */
945 JSON_POP_STATUS_RET(jsonctx);
946 prev = lyjson_ctx_status(jsonctx, 0);
947 }
948
949 if (!jsonctx->status.count) {
950 /* we are done with the top level value */
951 toplevel = 1;
952 }
953 LY_CHECK_RET(skip_ws(jsonctx));
954 if (toplevel && !jsonctx->status.count) {
955 /* EOF expected, but there are some data after the top level token */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100956 LOGVAL(jsonctx->ctx, LYVE_SYNTAX, "Expecting end-of-input, but some data follows the top level JSON value.");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200957 return LY_EVALID;
958 }
959
960 if (toplevel) {
961 /* we are done */
962 return LY_SUCCESS;
963 }
964
965 /* continue with the next token */
966 assert(prev == LYJSON_OBJECT || prev == LYJSON_ARRAY);
967
968 if (*jsonctx->in->current == ',') {
969 /* sibling item in the ... */
970 ly_in_skip(jsonctx->in, 1);
971 LY_CHECK_RET(skip_ws(jsonctx));
972
973 if (prev == LYJSON_OBJECT) {
974 /* ... object - get another object's member */
975 ret = lyjson_object_name(jsonctx);
976 } else { /* LYJSON_ARRAY */
977 /* ... array - get another complete value */
978 ret = lyjson_value(jsonctx);
979 }
Michal Vasko69730152020-10-09 16:30:07 +0200980 } else if (((prev == LYJSON_OBJECT) && (*jsonctx->in->current == '}')) || ((prev == LYJSON_ARRAY) && (*jsonctx->in->current == ']'))) {
aPiecek93582ed2021-05-25 14:49:06 +0200981 if (*jsonctx->in->current == '}') {
982 assert(jsonctx->depth);
983 jsonctx->depth--;
984 }
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200985 ly_in_skip(jsonctx->in, 1);
986 JSON_POP_STATUS_RET(jsonctx);
987 JSON_PUSH_STATUS_RET(jsonctx, prev + 1);
988 } else {
989 /* unexpected value */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100990 LOGVAL(jsonctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(jsonctx->in->current), jsonctx->in->current,
991 prev == LYJSON_ARRAY ? "another JSON value in array" : "another JSON object's member");
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200992 return LY_EVALID;
993 }
994
995result:
Michal Vasko69730152020-10-09 16:30:07 +0200996 if ((ret == LY_SUCCESS) && (jsonctx->status.count > 1) && (lyjson_ctx_status(jsonctx, 0) == LYJSON_END)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100997 LOGVAL(jsonctx->ctx, LY_VCODE_EOF);
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200998 ret = LY_EVALID;
999 }
1000
Michal Vasko69730152020-10-09 16:30:07 +02001001 if ((ret == LY_SUCCESS) && status) {
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001002 *status = lyjson_ctx_status(jsonctx, 0);
1003 }
1004
1005 return ret;
1006}
1007
1008enum LYJSON_PARSER_STATUS
1009lyjson_ctx_status(struct lyjson_ctx *jsonctx, uint32_t index)
1010{
1011 assert(jsonctx);
1012
1013 if (jsonctx->status.count < index) {
1014 return LYJSON_ERROR;
1015 } else if (jsonctx->status.count == index) {
1016 return LYJSON_ROOT;
1017 } else {
Michal Vasko27915722020-08-31 14:54:42 +02001018 return (enum LYJSON_PARSER_STATUS)(uintptr_t)jsonctx->status.objs[jsonctx->status.count - (index + 1)];
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001019 }
1020}
1021
1022void
1023lyjson_ctx_free(struct lyjson_ctx *jsonctx)
1024{
1025 if (!jsonctx) {
1026 return;
1027 }
1028
Radek Krejciddace2c2021-01-08 11:30:56 +01001029 LOG_LOCBACK(0, 0, 0, 1);
Radek Krejci2efc45b2020-12-22 16:25:44 +01001030
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001031 if (jsonctx->dynamic) {
Michal Vasko22df3f02020-08-24 13:29:22 +02001032 free((char *)jsonctx->value);
Radek Krejci50f0c6b2020-06-18 16:31:48 +02001033 }
1034 if (jsonctx->backup.dynamic) {
1035 free((char *)jsonctx->backup.value);
1036 }
1037
1038 ly_set_erase(&jsonctx->status, NULL);
1039
1040 free(jsonctx);
1041}