blob: da15292cdfc62e3d3d666422f2c37e59eb6e02a0 [file] [log] [blame]
Radek Krejci5449d472015-10-26 14:35:56 +01001/**
2 * @file parser_json.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief JSON data parser for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of the Company nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 */
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdlib.h>
26#include <string.h>
27
28#include "libyang.h"
29#include "common.h"
30#include "context.h"
31#include "parser.h"
32#include "printer.h"
33#include "tree_internal.h"
34#include "validation.h"
35
36#ifndef NDEBUG
37static unsigned int lineno;
38#endif
39
40static int
41lyjson_isspace(int c)
42{
43 switch(c) {
44 case 0x20: /* space */
45 case 0x09: /* horizontal tab */
46 case 0x0a: /* line feed or new line */
47 case 0x0d: /* carriage return */
48 return 1;
49 default:
50 return 0;
51 }
52}
53
54static unsigned int
55skip_ws(const char *data)
56{
57 unsigned int len = 0;
58
59 /* skip leading whitespaces */
60 while (data[len] && lyjson_isspace(data[len])) {
61 COUNTLINE(data[len]);
62 len++;
63 }
64
65 return len;
66}
67
68int lyjson_dump_string(struct lyout *out, const char *text)
69{
70 unsigned int i, n;
71
72 if (!text) {
73 return 0;
74 }
75
76 ly_write(out, "\"", 1);
77 for (i = n = 0; text[i]; i++) {
78 if (text[i] < 0x20) {
79 /* control character */
80 n += ly_print(out, "\\u%.4X");
81 } else {
82 switch (text[i]) {
83 case '"':
84 n += ly_print(out, "\\\"");
85 break;
86 case '\\':
87 n += ly_print(out, "\\\\");
88 break;
89 case '/':
90 n += ly_print(out, "\\/");
91 break;
92 default:
93 ly_write(out, &text[i], 1);
94 n++;
95 }
96 }
97 }
98 ly_write(out, "\"", 1);
99
100 return n + 2;
101}
102
103
104static char *
105lyjson_parse_text(const char *data, unsigned int *len)
106{
107#define BUFSIZE 1024
108
109 char buf[BUFSIZE];
110 char *result = NULL, *aux;
111 int o, size;
112 unsigned int r, i;
113 int32_t value;
114
115 for (*len = o = 0; data[*len] && data[*len] != '"'; o++) {
116 if (o > BUFSIZE - 3) {
117 /* add buffer into the result */
118 if (result) {
119 size = size + o;
120 aux = realloc(result, size + 1);
121 result = aux;
122 } else {
123 size = o;
124 result = malloc((size + 1) * sizeof *result);
125 }
126 memcpy(&result[size - o], buf, o);
127
128 /* write again into the beginning of the buffer */
129 o = 0;
130 }
131
132 if (data[*len] == '\\') {
133 /* parse escape sequence */
134 (*len)++;
135 i = 1;
136 switch (data[(*len)]) {
137 case '"':
138 /* quotation mark */
139 value = 0x22;
140 break;
141 case '\\':
142 /* reverse solidus */
143 value = 0x5c;
144 break;
145 case '/':
146 /* solidus */
147 value = 0x2f;
148 break;
149 case 'b':
150 /* backspace */
151 value = 0x08;
152 break;
153 case 'f':
154 /* form feed */
155 value = 0x0c;
156 break;
157 case 'n':
158 /* line feed */
159 value = 0x0a;
160 break;
161 case 'r':
162 /* carriage return */
163 value = 0x0d;
164 break;
165 case 't':
166 /* tab */
167 value = 0x09;
168 break;
169 case 'u':
170 /* Basic Multilingual Plane character \uXXXX */
171 (*len)++;
172 for (value = i = 0; i < 4; i++) {
173 if (isdigit(data[(*len) + i])) {
174 r = (data[(*len) + i] - '0');
175 } else if (data[(*len) + i] > 'F') {
176 r = 10 + (data[(*len) + i] - 'a');
177 } else {
178 r = 10 + (data[(*len) + i] - 'A');
179 }
180 value = (16 * value) + r;
181 }
182 break;
183 default:
184 /* invalid escape sequence */
185 LOGVAL(LYE_XML_INVAL, lineno, "character escape sequence");
186 goto error;
187
188 }
189 r = pututf8(&buf[o], value, lineno);
190 if (!r) {
191 LOGVAL(LYE_XML_INVAL, lineno, "character UTF8 character");
192 goto error;
193 }
194 o += r - 1; /* o is ++ in for loop */
195 (*len) += i; /* number of read characters */
196 } else if (data[*len] < 0x20 || data[*len] == 0x5c) {
197 /* control characters must be escaped */
198 LOGVAL(LYE_XML_INVAL, lineno, "control character (unescaped)");
199 goto error;
200 } else {
201 /* unescaped character */
202 buf[o] = data[*len];
203 COUNTLINE(buf[o]);
204 (*len)++;
205 }
206 }
207
208#undef BUFSIZE
209
210 if (o) {
211 if (result) {
212 size = size + o;
213 aux = realloc(result, size + 1);
214 result = aux;
215 } else {
216 size = o;
217 result = malloc((size + 1) * sizeof *result);
218 }
219 memcpy(&result[size - o], buf, o);
220 }
221 if (result) {
222 result[size] = '\0';
223 } else {
224 size = 0;
225 result = strdup("");
226 }
227
228 return result;
229
230error:
231 free(result);
232 return NULL;
233}
234
235static unsigned int
236lyjson_parse_number(const char *data)
237{
238 unsigned int len;
239 unsigned int i = 0;
240
241 for (len = 0;
242 data[len] && data[len] != ',' && data[len] != ']' && data[len] != '}' && !lyjson_isspace(data[len]);
243 len++) {
244
245 switch(data[len]) {
246 case '0':
247 if (!i && data[len + 1] != ';' && !lyjson_isspace(data[len + 1])) {
248 /* leading 0 is not allowed */
249 LOGVAL(LYE_XML_INVAL, lineno, "JSON number (leading zero)");
250 return 0;
251 }
252 /* no break */
253 case '1':
254 case '2':
255 case '3':
256 case '4':
257 case '5':
258 case '6':
259 case '7':
260 case '8':
261 case '9':
262 i = 1;
263 /* no break */
264 case 0x2d: /* minus */
265 /* ok */
266 break;
267 default:
268 LOGVAL(LYE_XML_INVAL, lineno, "character in JSON Number value");
269 return 0;
270 }
271 }
272
273 return len;
274}
275
276static unsigned int
277lyjson_parse_boolean(const char *data)
278{
279 unsigned int len;
280
281 if (!strncmp(data, "false", 5)) {
282 len = 5;
283 } else if (!strncmp(data, "true", 4)) {
284 len = 4;
285 }
286
287 if (data[len] && data[len] != ',' && data[len] != ']' && data[len] != '}' && !lyjson_isspace(data[len])) {
288 LOGVAL(LYE_XML_INVAL, lineno, "JSON literal value (expected true or false)");
289 return 0;
290 }
291
292 return len;
293}
294
295static unsigned int
296json_get_anyxml(struct lyd_node_anyxml *axml, const char *data)
297{
298 unsigned int len = 0;
299 char stop, start;
300 int level = 0;
301
302 switch (data[len]) {
303 case '"':
304 start = 0;
305 stop = '"';
306 level = 1;
307 break;
308 case '[':
309 start = '[';
310 stop = ']';
311 break;
312 case '{':
313 start = '{';
314 stop = '}';
315 break;
316 default:
317 /* number or one of literals */
318 while (!isspace(data[len])) {
319 len++;
320 }
321 axml->value = NULL; /* TODO ??? */
322 return len;
323 }
324
325 while (data[len]) {
326 if (start && data[len] == start) {
327 if (!len || data[len - 1] != '\\') {
328 level++;
329 }
330 } else if (data[len] == stop) {
331 if (!len || data[len - 1] != '\\') {
332 level--;
333 }
334 }
335 stop = len;
336 len++;
337 if (!level) {
338 /* we are done */
339 axml->value = NULL; /* TODO ??? */
340 return len;
341 }
342 }
343
344 return 0;
345}
346
347static unsigned int
Radek Krejci23238922015-10-27 17:13:34 +0100348json_get_value(struct lyd_node_leaf_list *leaf, const char *data, int options, struct unres_data *unres)
Radek Krejci5449d472015-10-26 14:35:56 +0100349{
350 struct lyd_node_leaf_list *new, *diter;
351 struct lys_type *stype, *type;
352 struct ly_ctx *ctx;
353 unsigned int len = 0, r;
Radek Krejci23238922015-10-27 17:13:34 +0100354 int found, resolve;
Radek Krejci5449d472015-10-26 14:35:56 +0100355 char *str;
356
357 assert(leaf && data && unres);
358 ctx = leaf->schema->module->ctx;
359
Radek Krejci23238922015-10-27 17:13:34 +0100360 if (options & (LYD_OPT_FILTER | LYD_OPT_EDIT)) {
361 resolve = 0;
362 } else {
363 resolve = 1;
364 }
365
Radek Krejci5449d472015-10-26 14:35:56 +0100366 stype = &((struct lys_node_leaf *)leaf->schema)->type;
Radek Krejci23238922015-10-27 17:13:34 +0100367
368 if (options & LYD_OPT_FILTER) {
369 /* no value in filter (selection) node is accepted in this case */
370 if (!strncmp(&data[len], "null", 4)) {
371 leaf->value_type = stype->base;
372 len +=4;
373 goto end;
374 }
375 }
376
Radek Krejci5449d472015-10-26 14:35:56 +0100377 if (leaf->schema->nodetype == LYS_LEAFLIST) {
378 /* expecting begin-array */
379 if (data[len++] != '[') {
380 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (expected begin-array)");
381 return 0;
382 }
383
384repeat:
385 len += skip_ws(&data[len]);
386 }
387
388 /* will be changed in case of union */
389 leaf->value_type = stype->base;
390
391 if (data[len] == '"') {
392 /* string representations */
393 if (data[len++] != '"') {
394 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing quotation-mark at the beginning of string)");
395 return 0;
396 }
397 str = lyjson_parse_text(&data[len], &r);
Radek Krejci23238922015-10-27 17:13:34 +0100398 if (!str) {
Radek Krejci5449d472015-10-26 14:35:56 +0100399 return 0;
400 }
401 leaf->value_str = lydict_insert_zc(ctx, str);
402 if (data[len + r] != '"') {
403 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing quotation-mark at the end of string)");
404 return 0;
405 }
406 len += r + 1;
407 } else if (data[len] == '-' || isdigit(data[len])) {
408 /* numeric type */
409 r = lyjson_parse_number(&data[len]);
410 if (!r) {
411 return 0;
412 }
413 leaf->value_str = lydict_insert(ctx, &data[len], r);
414 len += r;
415 } else if (data[len] == 'f' || data[len] == 't') {
416 /* boolean */
417 r = lyjson_parse_boolean(&data[len]);
418 if (!r) {
419 return 0;
420 }
421 leaf->value_str = lydict_insert(ctx, &data[len], r);
422 len += r;
423 } else if (!strncmp(&data[len], "[null]", 6)) {
424 /* empty */
425 leaf->value_str = NULL;
426 len += 6;
427 } else {
428 /* error */
429 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (unexpected value)");
430 return 0;
431 }
432
433 if (stype->base == LY_TYPE_UNION) {
434 found = 0;
435 type = lyp_get_next_union_type(stype, NULL, &found);
436 while (type) {
437 leaf->value_type = type->base;
438
Radek Krejci23238922015-10-27 17:13:34 +0100439 if (!lyp_parse_value(leaf, type, resolve, unres, UINT_MAX)) {
Radek Krejci5449d472015-10-26 14:35:56 +0100440 break;
441 }
442
443 found = 0;
444 type = lyp_get_next_union_type(stype, type, &found);
445 }
446
447 if (!type) {
448 LOGVAL(LYE_INVAL, lineno, (leaf->value_str ? leaf->value_str : ""), leaf->schema->name);
Radek Krejci23238922015-10-27 17:13:34 +0100449 return 0;
Radek Krejci5449d472015-10-26 14:35:56 +0100450 }
Radek Krejci23238922015-10-27 17:13:34 +0100451 } else if (lyp_parse_value(leaf, stype, resolve, unres, lineno)) {
452 ly_errno = LY_EVALID;
453 return 0;
Radek Krejci5449d472015-10-26 14:35:56 +0100454 }
455
456 if (leaf->schema->nodetype == LYS_LEAFLIST) {
457 /* repeat until end-array */
458 len += skip_ws(&data[len]);
459 if (data[len] == ',') {
460 /* another instance of the leaf-list */
461 new = calloc(1, sizeof(struct lyd_node_leaf_list));
462 new->parent = leaf->parent;
463 new->prev = (struct lyd_node *)leaf;
464 leaf->next = (struct lyd_node *)new;
465
466 /* fix the "last" pointer */
467 for (diter = leaf; diter->prev != (struct lyd_node *)leaf; diter = (struct lyd_node_leaf_list *)diter->prev);
468 diter->prev = (struct lyd_node *)new;
469
470 new->schema = leaf->schema;
471
472 /* repeat value parsing */
473 leaf = new;
474 len++;
475 goto repeat;
476 } else if (data[len] == ']') {
477 len++;
478 len += skip_ws(&data[len]);
479 } else {
480 /* something unexpected */
481 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (expecting value-separator or end-array)");
482 return 0;
483 }
484 }
485
Radek Krejci23238922015-10-27 17:13:34 +0100486end:
Radek Krejci5449d472015-10-26 14:35:56 +0100487 len += skip_ws(&data[len]);
488 return len;
489}
490
491static unsigned int
492json_parse_data(struct ly_ctx *ctx, const char *data, struct lyd_node **parent, struct lyd_node *prev,
493 int options, struct unres_data *unres)
494{
495 unsigned int len = 0;
496 unsigned int r;
497 int flag_object = 0;
498 char *name, *prefix = NULL, *str;
499 struct lys_module *module = NULL;
500 struct lys_node *schema = NULL;
501 struct lyd_node *result = NULL, *new, *list, *diter = NULL;
502
503 /* skip leading whitespaces */
504 len += skip_ws(&data[len]);
505
506 /* skip top-level { if any */
507 if (data[len] == '{') {
508 flag_object = 1;
509 len++;
510 len += skip_ws(&data[len]);
511 }
512
513 /* each YANG data node representation starts with string (node identifier) */
514 if (data[len] != '"') {
515 goto error;
516 }
517 len++;
518
Radek Krejci23238922015-10-27 17:13:34 +0100519 str = lyjson_parse_text(&data[len], &r);
Radek Krejci5449d472015-10-26 14:35:56 +0100520 if (!r) {
521 goto error;
522 } else if (data[len + r] != '"') {
523 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing quotation-mark at the end of string)");
524 goto error;
525 }
Radek Krejci23238922015-10-27 17:13:34 +0100526 if ((name = strchr(str, ':'))) {
527 *name = '\0';
528 name++;
529 prefix = str;
530 } else {
531 name = str;
Radek Krejci5449d472015-10-26 14:35:56 +0100532 }
533
534 /* TODO - process attributes (@) */
535
536 /* prepare data for parsing node content */
537 len += r + 1;
538 len += skip_ws(&data[len]);
539 if (data[len] != ':') {
540 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing name-separator)");
541 goto error;
542 }
543 len++;
544 len += skip_ws(&data[len]);
545
546 /* find schema node */
547 if (!(*parent)) {
548 /* starting in root */
549 /* get the proper schema */
550 module = ly_ctx_get_module(ctx, prefix, NULL);
551 if (module) {
552 /* get the proper schema node */
553 LY_TREE_FOR(module->data, schema) {
554 if (!strcmp(schema->name, name)) {
555 break;
556 }
557 }
558 }
559 } else {
560 /* parsing some internal node, we start with parent's schema pointer */
561 if (prefix) {
562 /* get the proper schema */
563 module = ly_ctx_get_module(ctx, prefix, NULL);
564 if (!module) {
565 LOGVAL(LYE_INELEM, lineno, name);
566 free(prefix);
567 return 0;
568 }
569 }
570 while ((schema = lys_getnext(schema, (*parent)->schema, module, 0))) {
571 if (!strcmp(schema->name, name)) {
572 break;
573 }
574 }
575 }
576 if (!schema) {
577 LOGVAL(LYE_INELEM, lineno, name);
Radek Krejci23238922015-10-27 17:13:34 +0100578 free(str);
Radek Krejci5449d472015-10-26 14:35:56 +0100579 return 0;
580 }
Radek Krejci23238922015-10-27 17:13:34 +0100581 free(str);
Radek Krejci5449d472015-10-26 14:35:56 +0100582
583 switch (schema->nodetype) {
584 case LYS_CONTAINER:
585 case LYS_LIST:
586 case LYS_NOTIF:
587 case LYS_RPC:
588 result = calloc(1, sizeof *result);
589 break;
590 case LYS_LEAF:
591 case LYS_LEAFLIST:
592 result = calloc(1, sizeof(struct lyd_node_leaf_list));
593 break;
594 case LYS_ANYXML:
595 result = calloc(1, sizeof(struct lyd_node_anyxml));
596 break;
597 default:
598 LOGINT;
599 return 0;
600 }
601 result->parent = *parent;
602 if (*parent && !(*parent)->child) {
603 (*parent)->child = result;
604 }
605 if (prev) {
606 result->prev = prev;
607 prev->next = result;
608
609 /* fix the "last" pointer */
610 for (diter = prev; diter->prev != prev; diter = diter->prev);
611 diter->prev = result;
612 } else {
613 result->prev = result;
614 }
615 result->schema = schema;
616
Radek Krejci23238922015-10-27 17:13:34 +0100617 if (lyv_data_context(result, options, lineno, unres)) {
Radek Krejci5449d472015-10-26 14:35:56 +0100618 goto error;
619 }
620
621 /* type specific processing */
622 if (schema->nodetype & (LYS_LEAF | LYS_LEAFLIST)) {
623 /* type detection and assigning the value */
Radek Krejci23238922015-10-27 17:13:34 +0100624 r = json_get_value((struct lyd_node_leaf_list *)result, &data[len], options, unres);
Radek Krejci5449d472015-10-26 14:35:56 +0100625 if (!r) {
626 goto error;
627 }
628 len += r;
629 len += skip_ws(&data[len]);
630 } else if (schema->nodetype == LYS_ANYXML) {
631 r = json_get_anyxml((struct lyd_node_anyxml *)result, &data[len]);
632 if (!r) {
633 goto error;
634 }
635 len += r;
636 len += skip_ws(&data[len]);
637 } else if (schema->nodetype == LYS_CONTAINER) {
638 if (data[len] != '{') {
639 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing begin-object)");
640 goto error;
641 }
642 len++;
643 len += skip_ws(&data[len]);
644
645 if (data[len] != '}') {
646 /* non-empty container */
647 r = json_parse_data(ctx, &data[len], &result, NULL, options, unres);
648 if (!r) {
649 goto error;
650 }
651 len += r;
652 }
653
654 if (data[len] != '}') {
655 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing end-object)");
656 goto error;
657 }
658 len++;
659 len += skip_ws(&data[len]);
660
661 } else if (schema->nodetype == LYS_LIST) {
662 if (data[len] != '[') {
663 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing begin-array)");
664 goto error;
665 }
666
667 list = result;
668 do {
669 len++;
670 len += skip_ws(&data[len]);
Radek Krejci23238922015-10-27 17:13:34 +0100671
672 if (options & LYD_OPT_FILTER) {
673 /* filter selection node ? */
674 if (data[len] == ']') {
675 break;
676 } else if (!strcmp(&data[len], "null")) {
677 len += 4;
678 len += skip_ws(&data[len]);
679 break;
680 }
681 }
682
Radek Krejci5449d472015-10-26 14:35:56 +0100683 r = json_parse_data(ctx, &data[len], &list, NULL, options, unres);
684 if (!r) {
685 goto error;
686 }
687 len += r;
688 len += skip_ws(&data[len]);
689
690 if (data[len] == ',') {
691 /* another instance of the list */
692 new = calloc(1, sizeof *new);
693 new->parent = list->parent;
694 new->prev = list;
695 list->next = new;
696
697 /* fix the "last" pointer */
698 for (diter = list; diter->prev != list; diter = diter->prev);
699 diter->prev = new;
700
701 new->schema = list->schema;
702
703 /* various validation checks */
704 ly_errno = 0;
Radek Krejci23238922015-10-27 17:13:34 +0100705 if (lyv_data_content(list, options, lineno, unres)) {
Radek Krejci5449d472015-10-26 14:35:56 +0100706 if (ly_errno) {
707 goto error;
708 }
709 }
710 list = new;
711 }
712 } while (data[len] == ',');
713 result = list;
714
715 if (data[len] != ']') {
716 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing end-array)");
717 goto error;
718 }
719 len++;
720 len += skip_ws(&data[len]);
721 }
722
723 /* various validation checks */
724 ly_errno = 0;
Radek Krejci23238922015-10-27 17:13:34 +0100725 if (lyv_data_content(result, options, lineno, unres)) {
Radek Krejci5449d472015-10-26 14:35:56 +0100726 if (ly_errno) {
727 goto error;
728 }
729 }
730
731 /* process siblings */
732 if (data[len] && data[len] == ',') {
733 /* have siblings */
734 len++;
735 len += skip_ws(&data[len]);
736
737 r = json_parse_data(ctx, &data[len], parent, result, options, unres);
738 if (!r) {
739 goto error;
740 }
741
742 len += r;
743 } else {
744 len += skip_ws(&data[len]);
745 }
746
747 if (flag_object) {
748 if (data[len] != '}') {
749 /* expecting end-object */
750 LOGVAL(LYE_XML_INVAL, lineno, "JSON data (missing end-object)");
751 goto error;
752 }
753 len++;
754 len += skip_ws(&data[len]);
755 }
756
757
758 if (!(*parent)) {
759 *parent = result;
760 }
761 return len;
762
763error:
764 lyd_free(result);
765 return 0;
766}
767
768struct lyd_node *
769lyd_parse_json(struct ly_ctx *ctx, const char *data, int options)
770{
771 struct lyd_node *result, *next, *iter;
772 struct unres_data *unres = NULL;
773
774 if (!ctx || !data) {
775 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
776 return NULL;
777 }
778
779 unres = calloc(1, sizeof *unres);
780
781#ifndef NDEBUG
782 lineno = 0;
783#endif
784 ly_errno = 0;
785 result = NULL;
786 json_parse_data(ctx, data, &result, NULL, options, unres);
787
788 /* check leafrefs and/or instids if any */
789 if (result && resolve_unres_data(unres)) {
790 /* leafref & instid checking failed */
791 LY_TREE_FOR_SAFE(result, next, iter)
792 {
793 lyd_free(iter);
794 }
795 result = NULL;
796 }
797
798 free(unres->node);
799 free(unres->type);
800#ifndef NDEBUG
801 free(unres->line);
802#endif
803 free(unres);
804
805 return result;
806}