blob: 019bb66ff5c775559d8dff615eb69318e13b3a07 [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of the Company nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 */
21
Radek Krejci02117302015-04-13 16:32:44 +020022
Radek Krejci709fee62015-04-15 13:56:19 +020023#include <ctype.h>
24#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020025#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020026#include <stdlib.h>
27#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028#include <unistd.h>
29
Radek Krejci06a704e2015-04-22 14:50:49 +020030#include "common.h"
31#include "dict.h"
Radek Krejcida04f4a2015-05-21 12:54:09 +020032#include "tree.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020033#include "xml.h"
34
Radek Krejci02117302015-04-13 16:32:44 +020035/*
36 * Macro to test if character is #x20 | #x9 | #xA | #xD (whitespace)
37 */
38#define is_xmlws(c) (c == 0x20 || c == 0x9 || c == 0xa || c == 0xd)
Radek Krejci54ea8de2015-04-09 18:02:56 +020039
Radek Krejci02117302015-04-13 16:32:44 +020040#define is_xmlnamestartchar(c) ((c >= 'a' && c <= 'z') || c == '_' || \
41 (c >= 'A' && c <= 'Z') || c == ':' || \
42 (c >= 0x370 && c <= 0x1fff && c != 0x37e ) || \
43 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
44 c == 0x200d || (c >= 0x2070 && c <= 0x218f) || \
45 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
46 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
47 (c >= 0x10000 && c <= 0xeffff))
48
49#define is_xmlnamechar(c) ((c >= 'a' && c <= 'z') || c == '_' || c == '-' || \
50 (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == ':' || \
51 c == '.' || c == 0xb7 || (c >= 0x370 && c <= 0x1fff && c != 0x37e ) ||\
52 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
53 c == 0x200d || (c >= 0x300 && c <= 0x36f) || \
54 (c >= 0x2070 && c <= 0x218f) || (c >= 0x2030f && c <= 0x2040) || \
55 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
56 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
57 (c >= 0x10000 && c <= 0xeffff))
58
59#define ign_xmlws(p) while(is_xmlws(*p)) {p++;}
60
Radek Krejcie1f13912015-05-26 15:17:38 +020061void lyxml_unlink_attr(struct lyxml_attr *attr)
Radek Krejci54ea8de2015-04-09 18:02:56 +020062{
Radek Krejcie1f13912015-05-26 15:17:38 +020063 struct lyxml_attr *prev;
Radek Krejci02117302015-04-13 16:32:44 +020064
65 if (!attr) {
Radek Krejcie1f13912015-05-26 15:17:38 +020066 return;
Radek Krejci02117302015-04-13 16:32:44 +020067 }
68
69 if (!attr->parent) {
Radek Krejcie1f13912015-05-26 15:17:38 +020070 /* hmm, something is probably wrong */
71 attr->next = NULL;
72 return;
Radek Krejci02117302015-04-13 16:32:44 +020073 }
74
Radek Krejcie1f13912015-05-26 15:17:38 +020075 prev = attr->parent->attr;
76 if (prev == attr) {
77 /* unlinking the first attribute -> update the element's pointer */
Radek Krejci02117302015-04-13 16:32:44 +020078 attr->parent->attr = attr->next;
79 } else {
Radek Krejcie1f13912015-05-26 15:17:38 +020080 while(prev && prev->next != attr) {
81 prev = prev->next;
Radek Krejci02117302015-04-13 16:32:44 +020082 }
83
Radek Krejcie1f13912015-05-26 15:17:38 +020084 if (!prev) {
85 /* something is probably broken */
86 attr->parent = NULL;
87 attr->next = NULL;
88 return;
Radek Krejci02117302015-04-13 16:32:44 +020089 }
90
Radek Krejcie1f13912015-05-26 15:17:38 +020091 /* fix the previous's attribute pointer to next in the list */
92 prev->next = attr->next;
Radek Krejci02117302015-04-13 16:32:44 +020093 }
94
95 attr->parent = NULL;
96 attr->next = NULL;
97
Radek Krejcie1f13912015-05-26 15:17:38 +020098 return;
Radek Krejci02117302015-04-13 16:32:44 +020099}
100
Radek Krejcie1f13912015-05-26 15:17:38 +0200101void lyxml_unlink_elem(struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200102{
Radek Krejcie1f13912015-05-26 15:17:38 +0200103 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200104
105 if (!elem) {
Radek Krejcie1f13912015-05-26 15:17:38 +0200106 return;
Radek Krejci02117302015-04-13 16:32:44 +0200107 }
108
Radek Krejcie1f13912015-05-26 15:17:38 +0200109 /* store pointers to important nodes */
110 parent = elem->parent;
111
112 /* unlink from parent */
113 if (parent) {
114 if (parent->child == elem) {
115 /* we unlink the first child */
116 /* update the parent's link */
117 parent->child = elem->next;
118 }
119 /* forget about the parent */
120 elem->parent = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200121 }
122
Radek Krejcie1f13912015-05-26 15:17:38 +0200123 /* unlink from siblings */
124 if (elem->prev == elem) {
Radek Krejcida04f4a2015-05-21 12:54:09 +0200125 /* there are no more siblings */
Radek Krejcie1f13912015-05-26 15:17:38 +0200126 return;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200127 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200128 if (elem->next) {
129 elem->next->prev = elem->prev;
130 } else {
Radek Krejcie1f13912015-05-26 15:17:38 +0200131 /* unlinking the last element */
132 if (parent) {
133 first = parent->child;
134 } else {
135 first = elem;
136 while (elem->prev->next) {
137 first = elem->prev;
138 }
139 }
140 first->prev = elem->prev;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200141 }
Radek Krejcie1f13912015-05-26 15:17:38 +0200142 if (elem->prev->next) {
Radek Krejcida04f4a2015-05-21 12:54:09 +0200143 elem->prev->next = elem->next;
144 }
145
Radek Krejci02117302015-04-13 16:32:44 +0200146 /* clean up the unlinked element */
Radek Krejcida04f4a2015-05-21 12:54:09 +0200147 elem->next = NULL;
148 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200149}
150
Radek Krejcida04f4a2015-05-21 12:54:09 +0200151void lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200152{
153 if (!attr) {
154 return;
155 }
156
157 lyxml_unlink_attr(attr);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200158 lydict_remove(ctx, attr->name);
159 lydict_remove(ctx, attr->value);
Radek Krejci02117302015-04-13 16:32:44 +0200160 free(attr);
161}
162
Radek Krejcida04f4a2015-05-21 12:54:09 +0200163void lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200164{
165 struct lyxml_attr *a, *next;
166 if (!elem || !elem->attr) {
167 return;
168 }
169
170 a = elem->attr;
171 do {
172 next = a->next;
173
Radek Krejcida04f4a2015-05-21 12:54:09 +0200174 lydict_remove(ctx, a->name);
175 lydict_remove(ctx, a->value);
Radek Krejci02117302015-04-13 16:32:44 +0200176 free(a);
177
178 a = next;
179 } while (a);
180}
181
Radek Krejcida04f4a2015-05-21 12:54:09 +0200182static void lyxml_free_elem_(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200183{
184 struct lyxml_elem *e, *next;
185
186 if (!elem) {
187 return;
188 }
189
Radek Krejcida04f4a2015-05-21 12:54:09 +0200190 lyxml_free_attrs(ctx, elem);
191 LY_TREE_FOR_SAFE(elem->child, next, e) {
192 lyxml_free_elem_(ctx, e);
Radek Krejci02117302015-04-13 16:32:44 +0200193 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200194 lydict_remove(ctx, elem->name);
195 lydict_remove(ctx, elem->content);
Radek Krejci02117302015-04-13 16:32:44 +0200196 free(elem);
197}
198
Radek Krejcida04f4a2015-05-21 12:54:09 +0200199void lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200200{
201 if (!elem) {
202 return;
203 }
204
205 lyxml_unlink_elem(elem);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200206 lyxml_free_elem_(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200207}
208
Radek Krejcida04f4a2015-05-21 12:54:09 +0200209int lyxml_add_attr(struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200210{
211 struct lyxml_attr *a;
212
213 if (!parent || !attr) {
214 LY_ERR(LY_EINVAL, NULL);
215 return EXIT_FAILURE;
216 }
217
218 /* (re)link attribute to parent */
219 if (attr->parent) {
220 lyxml_unlink_attr(attr);
221 }
222 attr->parent = parent;
223
224 /* link parent to attribute */
225 if (parent->attr) {
226 for (a = parent->attr; a->next; a = a->next);
227 a->next = attr;
228 } else {
229 parent->attr = attr;
230 }
231
232 return EXIT_SUCCESS;
233}
234
Radek Krejcida04f4a2015-05-21 12:54:09 +0200235const char *lyxml_get_attr(struct lyxml_elem *elem, const char *name,
236 const char *ns)
237{
238 struct lyxml_attr *a;
239
240 if (!elem || !name) {
241 LY_ERR(LY_EINVAL, NULL);
242 return NULL;
243 }
244
245 for (a = elem->attr; a; a = a->next) {
246 if (a->type != LYXML_ATTR_STD) {
247 continue;
248 }
249
250 if (!strcmp(name, a->name)) {
251 if ((!ns && !a->ns)
252 || (ns && a->ns && !strcmp(ns, a->ns->value))) {
253 return a->value;
254 }
255 }
256 }
257
258 return NULL;
259}
260
261int lyxml_add_child(struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200262{
263 struct lyxml_elem *e;
264
265 if (!parent || !elem) {
266 LY_ERR(LY_EINVAL, NULL);
267 return EXIT_FAILURE;
268 }
269
270 /* (re)link element to parent */
271 if (elem->parent) {
272 lyxml_unlink_elem(elem);
273 }
274 elem->parent = parent;
275
276 /* link parent to element */
277 if (parent->child) {
278 e = parent->child;
279 elem->prev = e->prev;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200280 elem->next = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200281 elem->prev->next = elem;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200282 e->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200283 } else {
284 parent->child = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200285 elem->prev = elem;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200286 elem->next = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200287 }
288
289 return EXIT_SUCCESS;
290}
291
292/**
293 * @brief Get the first UTF-8 character value (4bytes) from buffer
294 * @param[in] buf pointr to the current position in input buffer
295 * @param[out] read Number of processed bytes in buf (length of UTF-8
296 * character).
297 * @return UTF-8 value as 4 byte number. 0 means error, only UTF-8 characters
298 * valid for XML are returned, so:
299 * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
300 * = any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
301 *
302 * UTF-8 mapping:
303 * 00000000 -- 0000007F: 0xxxxxxx
304 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
305 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
306 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
307 *
308 */
309static int getutf8(const char *buf, unsigned int *read)
310{
311 int c, aux;
312 int i;
313
314 /* check input variable */
315 if (!buf || !read) {
316 LY_ERR(LY_EINVAL, NULL);
317 return 0;
318 }
319 c = buf[0];
320 *read = 0;
321
322 /* buf is NULL terminated string, so 0 means EOF */
323 if (!c) {
324 LY_ERR(LY_EEOF, NULL);
325 return 0;
326 }
327 *read = 1;
328
329 /* process character byte(s) */
Radek Krejcic1265a62015-05-26 15:46:28 +0200330 if ((c & 0xf8) == 0xf0) {
Radek Krejci02117302015-04-13 16:32:44 +0200331 /* four bytes character */
332 *read = 4;
333
334 c &= 0x07;
335 for (i = 1; i <= 3; i++) {
336 aux = buf[i];
337 if ((aux & 0xc0) != 0x80) {
338 LY_ERR(LY_EINVAL, NULL);
339 return 0;
340 }
341
342 c = (c << 6) | (aux & 0x3f);
343 }
344
345
346 if (c < 0x1000 || c > 0x10ffff) {
347 LY_ERR(LY_EINVAL, NULL);
348 return 0;
349 }
350 } else if ((c & 0xf0) == 0xe0) {
351 /* three bytes character */
352 *read = 3;
353
354 c &= 0x0f;
355 for (i = 1; i <= 2; i++) {
356 aux = buf[i];
357 if ((aux & 0xc0) != 0x80) {
358 LY_ERR(LY_EINVAL, NULL);
359 return 0;
360 }
361
362 c = (c << 6) | (aux & 0x3f);
363 }
364
365
366 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd ) {
367 LY_ERR(LY_EINVAL, NULL);
368 return 0;
369 }
370 } else if ((c & 0xe0) == 0xc0) {
371 /* two bytes character */
372 *read = 2;
373
374 aux = buf[1];
375 if ((aux & 0xc0) != 0x80) {
376 LY_ERR(LY_EINVAL, NULL);
377 return 0;
378 }
379 c = ((c & 0x1f) << 6) | (aux & 0x3f);
380
381 if (c < 0x80) {
382 LY_ERR(LY_EINVAL, NULL);
383 return 0;
384 }
385 } else if (!(c & 0x80)) {
386 /* one byte character */
387 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
388 /* invalid character */
389 LY_ERR(LY_EINVAL, NULL);
390 return 0;
391 }
392 } else {
393 /* invalid character */
394 LY_ERR(LY_EINVAL, NULL);
395 return 0;
396 }
397
398 return c;
399}
400
Radek Krejci709fee62015-04-15 13:56:19 +0200401/**
402 * Store UTF-8 character specified as 4byte integer into the dst buffer.
403 * Returns number of written bytes (4 max), expects that dst has enough space.
404 *
405 * UTF-8 mapping:
406 * 00000000 -- 0000007F: 0xxxxxxx
407 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
408 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
409 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
410 *
411 */
412static unsigned int pututf8(char *dst, int32_t value)
413{
414 if (value < 0x80) {
415 /* one byte character */
416 dst[0] = value;
417
418 return 1;
419 } else if (value < 0x800) {
420 /* two bytes character */
421 dst[0] = 0xc0 | (value >> 6);
422 dst[1] = 0x80 | (value & 0x3f);
423
424 return 2;
425 } else if (value < 0x10000) {
426 /* three bytes character */
427 dst[0] = 0xe0 | (value >> 12);
428 dst[1] = 0x80 | ((value >> 6) & 0x3f);
429 dst[2] = 0x80 | (value & 0x3f);
430
431 return 3;
432 } else if (value < 0x200000) {
433 /* four bytes character */
434 dst[0] = 0xf0 | (value >> 18);
435 dst[1] = 0x80 | ((value >> 12) & 0x3f);
436 dst[2] = 0x80 | ((value >> 6) & 0x3f);
437 dst[3] = 0x80 | (value & 0x3f);
438
439 return 4;
440 } else {
441 /* out of range */
442 LY_ERR(LY_EINVAL, NULL);
443 return 0;
444 }
445}
446
Radek Krejci05e37a32015-04-15 14:40:34 +0200447static int parse_ignore(const char *data, const char *endstr,
448 unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200449{
Radek Krejci05e37a32015-04-15 14:40:34 +0200450 unsigned int slen;
Radek Krejci02117302015-04-13 16:32:44 +0200451 const char *c = data;
452
Radek Krejci05e37a32015-04-15 14:40:34 +0200453 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200454
Radek Krejci05e37a32015-04-15 14:40:34 +0200455 while (*c && memcmp(c, endstr, slen)) {
Radek Krejci02117302015-04-13 16:32:44 +0200456 c++;
457 }
458 if (!*c) {
459 LY_ERR(LY_EWELLFORM, "Missing close sequence \"%s\".", endstr);
Radek Krejci05e37a32015-04-15 14:40:34 +0200460 return EXIT_FAILURE;
Radek Krejci02117302015-04-13 16:32:44 +0200461 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200462 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200463
Radek Krejci05e37a32015-04-15 14:40:34 +0200464 *len = c - data;
465 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200466}
467
Radek Krejci521008e2015-04-15 14:41:07 +0200468static char *parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200469{
Radek Krejci709fee62015-04-15 13:56:19 +0200470#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200471
Radek Krejci709fee62015-04-15 13:56:19 +0200472 char buf[BUFSIZE];
473 char *result = NULL, *aux;
474 unsigned int r;
475 int o, size = 0;
Radek Krejcia4a84062015-04-16 13:00:10 +0200476 int cdsect = 0;
Radek Krejci709fee62015-04-15 13:56:19 +0200477 int32_t n;
478
Radek Krejcia4a84062015-04-16 13:00:10 +0200479 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
480 if (!data[*len] || (!cdsect && !memcmp(&data[*len], "]]>", 2))) {
Radek Krejci02117302015-04-13 16:32:44 +0200481 LY_ERR(LY_EWELLFORM, "Invalid element content, \"]]>\" found.");
Radek Krejci709fee62015-04-15 13:56:19 +0200482 goto error;
Radek Krejci02117302015-04-13 16:32:44 +0200483 }
Radek Krejci709fee62015-04-15 13:56:19 +0200484
Radek Krejcia4a84062015-04-16 13:00:10 +0200485loop:
486
Radek Krejci709fee62015-04-15 13:56:19 +0200487 if (o > BUFSIZE - 3) {
488 /* add buffer into the result */
489 if (result) {
490 size = size + o;
491 aux = realloc(result, size + 1);
492 result = aux;
493 } else {
494 size = o;
495 result = malloc((size + 1) * sizeof *result);
496 }
497 memcpy(&result[size - o], buf, o);
498
499 /* write again into the beginning of the buffer */
500 o = 0;
501 }
502
Radek Krejcia4a84062015-04-16 13:00:10 +0200503 if (cdsect || !memcmp(&data[*len], "<![CDATA[", 9)) {
504 /* CDSect */
505 if (!cdsect) {
506 cdsect = 1;
507 *len += 9;
508 }
509 if (data[*len] && !memcmp(&data[*len], "]]>", 3)) {
510 *len += 3;
511 cdsect = 0;
512 o--; /* we don't write any data in this iteration */
513 } else {
514 buf[o] = data[*len];
515 (*len)++;
516 }
517 } else if (data[*len] == '&') {
Radek Krejci709fee62015-04-15 13:56:19 +0200518 (*len)++;
519 if (data[*len] != '#') {
520 /* entity reference - only predefined refs are supported */
521 if (!memcmp(&data[*len], "lt;", 3)) {
522 buf[o] = '<';
523 *len += 3;
524 } else if (!memcmp(&data[*len], "gt;", 3)) {
525 buf[o] = '>';
526 *len += 3;
527 } else if (!memcmp(&data[*len], "amp;", 4)) {
528 buf[o] = '&';
529 *len += 4;
530 } else if (!memcmp(&data[*len], "apos;", 5)) {
531 buf[o] = '\'';
532 *len += 5;
533 } else if (!memcmp(&data[*len], "quot;", 5)) {
534 buf[o] = '\"';
535 *len += 5;
536 } else {
537 LY_ERR(LY_EWELLFORM,
538 "Invalid entity reference, only predefined entity references are supported.");
539 goto error;
540 }
541 } else {
542 /* character reference */
543 (*len)++;
544 if (isdigit(data[*len])) {
545 for (n = 0; isdigit(data[*len]); (*len)++) {
546 n = (10 * n) + (data[*len] - '0');
547 }
548 if (data[*len] != ';') {
549 LY_ERR(LY_EWELLFORM,
550 "Invalid character reference, missing semicolon.");
551 goto error;
552 }
553 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
554 for (n = 0; isxdigit(data[*len]); (*len)++) {
555 if (isdigit(data[*len])) {
556 r = (data[*len] - '0');
557 } else if (data[*len] > 'F') {
558 r = 10 + (data[*len] - 'a');
559 } else {
560 r = 10 + (data[*len] - 'A');
561 }
562 n = (16 * n) + r;
563 }
564 } else {
565 LY_ERR(LY_EWELLFORM, "Invalid character reference.");
566 goto error;
567
568 }
569 r = pututf8(&buf[o], n);
570 if (!r) {
571 LY_ERR(LY_EWELLFORM, "Invalid character reference value.");
572 goto error;
573 }
574 o += r - 1; /* o is ++ in for loop */
575 (*len)++;
576 }
577 } else {
578 buf[o] = data[*len];
579 (*len)++;
580 }
Radek Krejci02117302015-04-13 16:32:44 +0200581 }
582
Radek Krejcia4a84062015-04-16 13:00:10 +0200583 if (delim == '<' && !memcmp(&data[*len], "<![CDATA[", 9)) {
584 /* ignore loop's end condition on beginning of CDSect */
585 goto loop;
586 }
587
Radek Krejci709fee62015-04-15 13:56:19 +0200588#undef BUFSIZE
589
590 if (o) {
591 if (result) {
592 size = size + o;
Radek Krejci9c16b332015-04-20 17:37:54 +0200593 aux = realloc(result, size + 1);
Radek Krejci709fee62015-04-15 13:56:19 +0200594 result = aux;
595 } else {
596 size = o;
Radek Krejci9c16b332015-04-20 17:37:54 +0200597 result = malloc((size + 1) * sizeof *result);
Radek Krejci709fee62015-04-15 13:56:19 +0200598 }
599 memcpy(&result[size - o], buf, o);
600 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200601 if (result) {
602 result[size] = '\0';
603 }
Radek Krejci02117302015-04-13 16:32:44 +0200604
Radek Krejci02117302015-04-13 16:32:44 +0200605 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200606
607error:
608 free(result);
609 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200610}
611
Radek Krejci674e1f82015-04-21 14:12:19 +0200612static struct lyxml_ns *get_ns(struct lyxml_elem *elem, const char *prefix)
Radek Krejci02117302015-04-13 16:32:44 +0200613{
Radek Krejci674e1f82015-04-21 14:12:19 +0200614 struct lyxml_attr *attr;
615 int len;
616
617 if (!elem) {
618 return NULL;
619 }
620
621 if (!prefix) {
622 len = 0;
623 } else {
624 len = strlen(prefix);
625 }
626
627 for (attr = elem->attr; attr; attr = attr->next) {
628 if (attr->type != LYXML_ATTR_NS) {
629 continue;
630 }
631 if (!attr->name) {
632 if (!len) {
633 /* default namespace found */
634 if (!attr->value) {
635 /* empty default namespace -> no default namespace */
636 return NULL;
637 }
638 return (struct lyxml_ns *)attr;
639 }
640 } else if (len && !memcmp(attr->name, prefix, len)) {
641 /* prefix found */
642 return (struct lyxml_ns *)attr;
643 }
644 }
645
646 /* go recursively */
647 return get_ns(elem->parent, prefix);
648}
649
Radek Krejcida04f4a2015-05-21 12:54:09 +0200650static struct lyxml_attr *parse_attr(struct ly_ctx *ctx, const char *data,
651 unsigned int *len, struct lyxml_elem *elem)
Radek Krejci674e1f82015-04-21 14:12:19 +0200652{
653 const char *c = data, *start, *delim;
654 char prefix[32];
Radek Krejci02117302015-04-13 16:32:44 +0200655 int uc;
656 struct lyxml_attr *attr = NULL;
657 unsigned int size;
658
Radek Krejci4ea08382015-04-21 09:41:40 +0200659
Radek Krejci674e1f82015-04-21 14:12:19 +0200660 /* check if it is attribute or namespace */
661 if (!memcmp(c, "xmlns", 5)) {
662 /* namespace */
663 attr = calloc(1, sizeof(struct lyxml_ns));
664 attr->type = LYXML_ATTR_NS;
665 c += 5;
666 if (*c != ':') {
667 /* default namespace, prefix will be empty */
668 goto equal;
669 }
670 c++; /* go after ':' to the prefix value */
671 } else {
672 /* attribute */
Radek Krejcibcf46db2015-05-26 15:40:56 +0200673 attr = calloc(1, sizeof *attr);
Radek Krejci674e1f82015-04-21 14:12:19 +0200674 attr->type = LYXML_ATTR_STD;
675 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200676
Radek Krejci02117302015-04-13 16:32:44 +0200677 /* process name part of the attribute */
Radek Krejci674e1f82015-04-21 14:12:19 +0200678 start = c;
Radek Krejci02117302015-04-13 16:32:44 +0200679 uc = getutf8(c, &size);
680 if (!is_xmlnamestartchar(uc)) {
681 LY_ERR(LY_EWELLFORM, "Invalid NameStartChar of the attribute");
Radek Krejci674e1f82015-04-21 14:12:19 +0200682 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200683 return NULL;
684 }
685 c += size;
686 uc = getutf8(c, &size);
687 while (is_xmlnamechar(uc)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200688 if (attr->type == LYXML_ATTR_STD && *c == ':') {
689 /* attribute in a namespace */
690 start = c + 1;
691
692 /* look for the prefix in namespaces */
693 memcpy(prefix, data, c - data);
694 prefix[c - data] = '\0';
695 attr->ns = get_ns(elem, prefix);
696 }
Radek Krejci02117302015-04-13 16:32:44 +0200697 c += size;
698 uc = getutf8(c, &size);
699 }
700
Radek Krejci02117302015-04-13 16:32:44 +0200701 /* store the name */
Radek Krejci674e1f82015-04-21 14:12:19 +0200702 size = c - start;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200703 attr->name = lydict_insert(ctx, start, size);
Radek Krejci02117302015-04-13 16:32:44 +0200704
Radek Krejci674e1f82015-04-21 14:12:19 +0200705
706equal:
Radek Krejci02117302015-04-13 16:32:44 +0200707 /* check Eq mark that can be surrounded by whitespaces */
708 ign_xmlws(c);
709 if (*c != '=') {
710 LY_ERR(LY_EWELLFORM, "Invalid attribute definition, \"=\" expected.");
711 goto error;
712 }
713 c++;
714 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200715
716 /* process value part of the attribute */
717 if (!*c || (*c != '"' && *c != '\'')) {
718 LY_ERR(LY_EWELLFORM, "Invalid attribute value, \" or \' expected.");
719 goto error;
720 }
721 delim = c;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200722 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
Radek Krejci521008e2015-04-15 14:41:07 +0200723 if (ly_errno) {
Radek Krejci02117302015-04-13 16:32:44 +0200724 goto error;
725 }
726
Radek Krejci521008e2015-04-15 14:41:07 +0200727 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci02117302015-04-13 16:32:44 +0200728 return attr;
729
730error:
Radek Krejcida04f4a2015-05-21 12:54:09 +0200731 lyxml_free_attr(ctx, attr);
Radek Krejci54ea8de2015-04-09 18:02:56 +0200732 return NULL;
733}
734
Radek Krejcida04f4a2015-05-21 12:54:09 +0200735static struct lyxml_elem *parse_elem(struct ly_ctx *ctx, const char *data,
736 unsigned int *len,
737 struct lyxml_elem *parent)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200738{
Radek Krejci674e1f82015-04-21 14:12:19 +0200739 const char *c = data, *start, *e;
Radek Krejci02117302015-04-13 16:32:44 +0200740 const char *lws; /* leading white space for handling mixed content */
741 int uc;
742 char *str;
Radek Krejci674e1f82015-04-21 14:12:19 +0200743 char prefix[32] = {0};
744 unsigned int prefix_len = 0;
Radek Krejci39ebd8e2015-05-26 15:49:54 +0200745 struct lyxml_elem *elem = NULL, *child;
Radek Krejci02117302015-04-13 16:32:44 +0200746 struct lyxml_attr *attr;
Radek Krejci05e37a32015-04-15 14:40:34 +0200747 unsigned int size;
Radek Krejci674e1f82015-04-21 14:12:19 +0200748 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200749
750 *len = 0;
751
752 if (*c != '<') {
753 return NULL;
754 }
755
756 /* locate element name */
757 c++;
758 e = c;
759
Radek Krejci05e37a32015-04-15 14:40:34 +0200760 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200761 if (!is_xmlnamestartchar(uc)) {
762 LY_ERR(LY_EWELLFORM, "Invalid NameStartChar of the attribute");
763 return NULL;
764 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200765 e += size;
766 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200767 while (is_xmlnamechar(uc)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200768 if (*e == ':') {
769 if (prefix_len) {
770 LY_ERR(LY_EWELLFORM, "Multiple colons in element name.");
771 goto error;
772 }
773 /* element in a namespace */
774 start = e + 1;
775
776 /* look for the prefix in namespaces */
777 memcpy(prefix, c, prefix_len = e - c);
778 prefix[prefix_len] = '\0';
779 c = start;
780 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200781 e += size;
782 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200783 }
784 if (!*e) {
785 LY_ERR(LY_EWELLFORM, "Unexpected end of input data.");
786 return NULL;
787 }
788
789 /* allocate element structure */
790 elem = calloc(1, sizeof *elem);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200791 elem->next = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200792 elem->prev = elem;
Radek Krejci674e1f82015-04-21 14:12:19 +0200793 if (parent) {
794 lyxml_add_child(parent, elem);
795 }
Radek Krejci02117302015-04-13 16:32:44 +0200796
797 /* store the name into the element structure */
Radek Krejcida04f4a2015-05-21 12:54:09 +0200798 elem->name = lydict_insert(ctx, c, e - c);
Radek Krejci02117302015-04-13 16:32:44 +0200799 c = e;
800
801process:
Radek Krejci709fee62015-04-15 13:56:19 +0200802 ly_errno = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200803 ign_xmlws(c);
804 if (!memcmp("/>", c, 2)) {
805 /* we are done, it was EmptyElemTag */
806 c += 2;
Radek Krejci674e1f82015-04-21 14:12:19 +0200807 closed_flag = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200808 } else if (*c == '>') {
809 /* process element content */
810 c++;
811 lws = NULL;
812
813 while (*c) {
814 if (!memcmp(c, "</", 2)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200815 if (lws && !elem->child) {
Radek Krejci02117302015-04-13 16:32:44 +0200816 /* leading white spaces were actually content */
817 goto store_content;
818 }
819
820 /* Etag */
821 c += 2;
822 /* get name and check it */
823 e = c;
Radek Krejci05e37a32015-04-15 14:40:34 +0200824 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200825 if (!is_xmlnamestartchar(uc)) {
826 LY_ERR(LY_EWELLFORM,
827 "Invalid NameStartChar of the attribute");
828 goto error;
829 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200830 e += size;
831 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200832 while (is_xmlnamechar(uc)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200833 if (*e == ':') {
834 /* element in a namespace */
835 start = e + 1;
836
837 /* look for the prefix in namespaces */
838 if (memcmp(prefix, c, e - c)) {
839 LY_ERR(LY_EWELLFORM,
840 "Mixed opening (%s) and closing element tag - different namespaces",
841 elem->name);
842 }
843 c = start;
844 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200845 e += size;
846 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200847 }
848 if (!*e) {
849 LY_ERR(LY_EWELLFORM, "Unexpected end of input data.");
850 goto error;
851 }
852
853 /* check that it corresponds to opening tag */
Radek Krejci05e37a32015-04-15 14:40:34 +0200854 size = e - c;
855 str = malloc((size + 1) * sizeof *str);
Radek Krejci02117302015-04-13 16:32:44 +0200856 memcpy(str, c, e - c);
857 str[e - c] = '\0';
Radek Krejci05e37a32015-04-15 14:40:34 +0200858 if (size != strlen(elem->name) ||
859 memcmp(str, elem->name, size)) {
Radek Krejci02117302015-04-13 16:32:44 +0200860 LY_ERR(LY_EWELLFORM,
861 "Mixed opening (%s) and closing (%s) element tag",
Radek Krejci674e1f82015-04-21 14:12:19 +0200862 elem->name, str);
Radek Krejci6f0c6f92015-05-25 15:01:15 +0200863 free(str);
Radek Krejci02117302015-04-13 16:32:44 +0200864 goto error;
865 }
866 free(str);
867 c = e;
868
869 ign_xmlws(c);
870 if (*c != '>') {
871 LY_ERR(LY_EWELLFORM,
872 "Close element tag \"%s\" contain additional data.",
873 elem->name);
874 goto error;
875 }
876 c++;
Radek Krejci674e1f82015-04-21 14:12:19 +0200877 closed_flag = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200878 break;
879
880 } else if (!memcmp(c, "<?", 2)) {
881 if (lws) {
882 /* leading white spaces were only formatting */
883 lws = NULL;
884 }
885 /* PI - ignore it */
886 c += 2;
Radek Krejci05e37a32015-04-15 14:40:34 +0200887 if (parse_ignore(c, "?>", &size)) {
Radek Krejci02117302015-04-13 16:32:44 +0200888 goto error;
889 }
890 c += size;
891 } else if (!memcmp(c, "<!--", 4)) {
892 if (lws) {
893 /* leading white spaces were only formatting */
894 lws = NULL;
895 }
896 /* Comment - ignore it */
897 c += 4;
Radek Krejci05e37a32015-04-15 14:40:34 +0200898 if (parse_ignore(c, "-->", &size)) {
Radek Krejci02117302015-04-13 16:32:44 +0200899 goto error;
900 }
901 c += size;
902 } else if (!memcmp(c, "<![CDATA[", 9)) {
903 /* CDSect */
Radek Krejcia4a84062015-04-16 13:00:10 +0200904 goto store_content;
Radek Krejci02117302015-04-13 16:32:44 +0200905 } else if (*c == '<') {
906 if (lws) {
Radek Krejcif0023a92015-04-20 20:51:39 +0200907 if (elem->flags & LYXML_ELEM_MIXED) {
908 /* we have a mixed content */
909 goto store_content;
910 } else {
911 /* leading white spaces were only formatting */
912 lws = NULL;
913 }
Radek Krejci02117302015-04-13 16:32:44 +0200914 }
915 if (elem->content) {
916 /* we have a mixed content */
917 child = calloc(1, sizeof *child);
918 child->content = elem->content;
919 elem->content = NULL;
920 lyxml_add_child(elem, child);
Radek Krejcif0023a92015-04-20 20:51:39 +0200921 elem->flags |= LYXML_ELEM_MIXED;
Radek Krejci02117302015-04-13 16:32:44 +0200922 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200923 child = parse_elem(ctx, c, &size, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200924 if (!child) {
925 LY_ERR(LY_EWELLFORM, "Unexpected end of input data.");
926 goto error;
927 }
Radek Krejci02117302015-04-13 16:32:44 +0200928 c += size; /* move after processed child element */
929 } else if (is_xmlws(*c)) {
930 lws = c;
931 ign_xmlws(c);
932 } else {
933store_content:
934 /* store text content */
935 if (lws) {
936 /* process content including the leading white spaces */
937 c = lws;
938 lws = NULL;
939 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200940 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
Radek Krejci521008e2015-04-15 14:41:07 +0200941 if (ly_errno) {
Radek Krejci709fee62015-04-15 13:56:19 +0200942 goto error;
943 }
Radek Krejci02117302015-04-13 16:32:44 +0200944 c += size; /* move after processed text content */
945
946 if (elem->child) {
947 /* we have a mixed content */
948 child = calloc(1, sizeof *child);
949 child->content = elem->content;
950 elem->content = NULL;
951 lyxml_add_child(elem, child);
Radek Krejcif0023a92015-04-20 20:51:39 +0200952 elem->flags |= LYXML_ELEM_MIXED;
Radek Krejci02117302015-04-13 16:32:44 +0200953 }
954 }
955 }
956 } else {
957 /* process attribute */
Radek Krejcida04f4a2015-05-21 12:54:09 +0200958 attr = parse_attr(ctx, c, &size, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200959 if (!attr) {
960 LY_ERR(LY_EWELLFORM, "Unexpected end of input data.");
961 goto error;
962 }
963 lyxml_add_attr(elem, attr);
964 c += size; /* move after processed attribute */
965
Radek Krejci674e1f82015-04-21 14:12:19 +0200966 /* check namespace */
967 if (attr->type == LYXML_ATTR_NS) {
968 if (!prefix[0] && !attr->name) {
969 if (attr->value) {
970 /* default prefix */
971 elem->ns = (struct lyxml_ns *)attr;
972 } else {
973 /* xmlns="" -> no namespace */
974 nons_flag = 1;
975 }
976 } else if (prefix[0] && attr->name &&
977 !memcmp(attr->name, prefix, prefix_len + 1)) {
978 /* matching namespace with prefix */
979 elem->ns = (struct lyxml_ns *)attr;
980 }
981 }
982
Radek Krejci02117302015-04-13 16:32:44 +0200983 /* go back to finish element processing */
984 goto process;
985 }
986
987 *len = c - data;
988
Radek Krejci674e1f82015-04-21 14:12:19 +0200989 if (!closed_flag) {
990 LY_ERR(LY_EWELLFORM, "Missing closing element tag (%s).", elem->name);
991 goto error;
992 }
993
994 if (!nons_flag && parent) {
995 elem->ns = get_ns(parent, prefix_len ? prefix : NULL);
996 }
997
Radek Krejci02117302015-04-13 16:32:44 +0200998 return elem;
999
1000error:
Radek Krejcida04f4a2015-05-21 12:54:09 +02001001 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001002
Radek Krejci54ea8de2015-04-09 18:02:56 +02001003 return NULL;
1004}
1005
Radek Krejcida04f4a2015-05-21 12:54:09 +02001006struct lyxml_elem *lyxml_read(struct ly_ctx *ctx, const char *data,
1007 int UNUSED(options))
Radek Krejci54ea8de2015-04-09 18:02:56 +02001008{
Radek Krejci02117302015-04-13 16:32:44 +02001009 const char *c = data;
Radek Krejci05e37a32015-04-15 14:40:34 +02001010 unsigned int len;
Radek Krejci02117302015-04-13 16:32:44 +02001011 struct lyxml_elem *root = NULL;
1012
Radek Krejcida04f4a2015-05-21 12:54:09 +02001013 if (!data || !ctx) {
Radek Krejci02117302015-04-13 16:32:44 +02001014 ly_errno = LY_EINVAL;
1015 return NULL;
1016 }
1017
1018 /* process document */
1019 while (*c) {
1020 if (is_xmlws(*c)) {
1021 /* skip whitespaces */
1022 c++;
1023 } else if (!memcmp(c, "<?", 2)) {
1024 /* XMLDecl or PI - ignore it */
1025 c += 2;
Radek Krejci05e37a32015-04-15 14:40:34 +02001026 if (parse_ignore(c, "?>", &len)) {
Radek Krejci02117302015-04-13 16:32:44 +02001027 LY_ERR(LY_EWELLFORM, "Missing close sequence \"?>\".");
1028 return NULL;
1029 }
1030 c += len;
1031 } else if (!memcmp(c, "<!--", 4)) {
1032 /* Comment - ignore it */
1033 c += 2;
Radek Krejci05e37a32015-04-15 14:40:34 +02001034 if (parse_ignore(c, "-->", &len)) {
Radek Krejci02117302015-04-13 16:32:44 +02001035 LY_ERR(LY_EWELLFORM, "Missing close sequence \"-->\".");
1036 return NULL;
1037 }
1038 c += len;
1039 } else if (!memcmp(c, "<!", 2)) {
1040 /* DOCTYPE */
1041 /* TODO - standalone ignore counting < and > */
1042 LY_ERR(LY_EINVAL, "DOCTYPE not implemented.");
1043 return NULL;
1044 } else if (*c == '<') {
1045 /* element - process it in next loop to strictly follow XML
1046 * format
1047 */
1048 break;
1049 }
1050 }
1051
Radek Krejcida04f4a2015-05-21 12:54:09 +02001052 root = parse_elem(ctx, c, &len, NULL);
Radek Krejci02117302015-04-13 16:32:44 +02001053 if (!root) {
1054 return NULL;
1055 }
1056 c += len;
1057
1058 /* ignore the rest of document where can be comments, PIs and whitespaces,
1059 * note that we are not detecting syntax errors in these parts
1060 */
1061 ign_xmlws(c);
1062 if (*c) {
1063 LY_WRN("There are some not parsed data:\n%s", c);
1064 }
1065
1066 return root;
1067}
1068
Radek Krejcida04f4a2015-05-21 12:54:09 +02001069struct lyxml_elem *lyxml_read_fd(struct ly_ctx *ctx, int fd,
1070 int UNUSED(options))
Radek Krejci02117302015-04-13 16:32:44 +02001071{
Radek Krejcida04f4a2015-05-21 12:54:09 +02001072 if (fd == -1 || !ctx) {
Radek Krejci02117302015-04-13 16:32:44 +02001073 ly_errno = LY_EINVAL;
1074 return NULL;
1075 }
1076
Radek Krejci54ea8de2015-04-09 18:02:56 +02001077 return NULL;
1078}
1079
Radek Krejcida04f4a2015-05-21 12:54:09 +02001080struct lyxml_elem *lyxml_read_file(struct ly_ctx *ctx, const char *filename,
1081 int UNUSED(options))
Radek Krejci54ea8de2015-04-09 18:02:56 +02001082{
Radek Krejcida04f4a2015-05-21 12:54:09 +02001083 if (!filename || !ctx) {
Radek Krejci02117302015-04-13 16:32:44 +02001084 LY_ERR(LY_EINVAL, NULL);
1085 return NULL;
1086 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001087
Radek Krejci02117302015-04-13 16:32:44 +02001088 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001089}
Radek Krejci02117302015-04-13 16:32:44 +02001090
Radek Krejcif0023a92015-04-20 20:51:39 +02001091static int dump_text(FILE *f, char* text)
1092{
1093 unsigned int i, n;
1094
1095 for (i = n = 0; text[i]; i++) {
1096 switch (text[i]) {
1097 case '&':
1098 n += fprintf(f, "&amp;");
1099 break;
1100 case '<':
1101 n += fprintf(f, "&lt;");
1102 break;
Radek Krejci674e1f82015-04-21 14:12:19 +02001103 case '>':
1104 /* not needed, just for readability */
1105 n += fprintf(f, "&gt;");
1106 break;
Radek Krejcif0023a92015-04-20 20:51:39 +02001107 default:
1108 fputc(text[i], f);
1109 n++;
1110 }
1111 }
1112
1113 return n;
1114}
1115
1116static int dump_elem(FILE *f, struct lyxml_elem *e, int level)
1117{
1118 int size = 0;
1119 struct lyxml_attr *a;
1120 struct lyxml_elem *child;
Radek Krejci674e1f82015-04-21 14:12:19 +02001121 const char *delim, *delim_outer;
1122 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001123
1124 if (!e->name) {
1125 /* mixed content */
1126 if (e->content) {
1127 return dump_text(f, e->content);
1128 } else {
1129 return 0;
1130 }
1131 }
1132
Radek Krejci674e1f82015-04-21 14:12:19 +02001133 delim = delim_outer = "\n";
1134 indent = 2 * level;
1135 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1136 delim = "";
1137 }
1138 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1139 delim_outer = "";
1140 indent = 0;
Radek Krejcif0023a92015-04-20 20:51:39 +02001141 }
1142
Radek Krejci674e1f82015-04-21 14:12:19 +02001143 /* opening tag */
1144 if (e->ns && e->ns->prefix) {
1145 size += fprintf(f, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
1146 } else {
1147 size += fprintf(f, "%*s<%s", indent, "", e->name);
Radek Krejcif0023a92015-04-20 20:51:39 +02001148 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001149
1150 /* attributes */
1151 for (a = e->attr; a; a = a->next) {
1152 if (a->type == LYXML_ATTR_NS) {
1153 if (a->name) {
1154 size += fprintf(f, " xmlns:%s=\"%s\"", a->name,
1155 a->value ? a->value : "");
1156 } else {
1157 size += fprintf(f, " xmlns=\"%s\"", a->value ? a->value : "");
1158 }
1159 } else if (a->ns && a->ns->prefix) {
1160 size += fprintf(f, " %s:%s=\"%s\"", a->ns->prefix, a->name,
1161 a->value);
1162 } else {
1163 size += fprintf(f, " %s=\"%s\"", a->name, a->value);
1164 }
1165 }
1166
Radek Krejcif0023a92015-04-20 20:51:39 +02001167 if (!e->child && !e->content) {
Radek Krejci674e1f82015-04-21 14:12:19 +02001168 size += fprintf(f, "/>%s", delim);
Radek Krejcif0023a92015-04-20 20:51:39 +02001169 return size;
1170 } else if (e->content) {
1171 fputc('>', f);
1172 size++;
1173
1174 size += dump_text(f, e->content);
1175
Radek Krejci674e1f82015-04-21 14:12:19 +02001176
1177 if (e->ns && e->ns->prefix) {
1178 size += fprintf(f, "</%s:%s>%s", e->ns->prefix, e->name, delim);
1179 } else {
1180 size += fprintf(f, "</%s>%s", e->name, delim);
1181 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001182 return size;
1183 } else {
Radek Krejci674e1f82015-04-21 14:12:19 +02001184 size += fprintf(f, ">%s", delim);
Radek Krejcif0023a92015-04-20 20:51:39 +02001185 }
1186
1187 /* go recursively */
Radek Krejcida04f4a2015-05-21 12:54:09 +02001188 LY_TREE_FOR(e->child, child) {
Radek Krejcif0023a92015-04-20 20:51:39 +02001189 size += dump_elem(f, child, level + 1);
Radek Krejcida04f4a2015-05-21 12:54:09 +02001190 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001191
Radek Krejci674e1f82015-04-21 14:12:19 +02001192 /* closing tag */
1193 if (e->ns && e->ns->prefix) {
1194 size += fprintf(f, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name,
1195 delim_outer);
Radek Krejcif0023a92015-04-20 20:51:39 +02001196 } else {
Radek Krejci674e1f82015-04-21 14:12:19 +02001197 size += fprintf(f, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejcif0023a92015-04-20 20:51:39 +02001198 }
1199
1200 return size;
1201}
1202
Radek Krejcida04f4a2015-05-21 12:54:09 +02001203int lyxml_dump(FILE *stream, struct lyxml_elem *elem, int UNUSED(options))
Radek Krejcif0023a92015-04-20 20:51:39 +02001204{
1205 if (!elem) {
1206 return 0;
1207 }
1208
1209 return dump_elem(stream, elem, 0);
1210}