blob: e17bd878e615a7ef71f8edac8ee32d6e0a4ef913 [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of the Company nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 */
21
Radek Krejci02117302015-04-13 16:32:44 +020022
Radek Krejci709fee62015-04-15 13:56:19 +020023#include <ctype.h>
24#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020025#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020026#include <stdlib.h>
27#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028#include <unistd.h>
29
Radek Krejci06a704e2015-04-22 14:50:49 +020030#include "common.h"
31#include "dict.h"
Radek Krejcida04f4a2015-05-21 12:54:09 +020032#include "tree.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020033#include "xml.h"
34
Radek Krejci3045cf32015-05-28 10:58:52 +020035#ifndef NDEBUG
36unsigned int lineno, lws_lineno;
37#define COUNTLINE(c) if ((c) == 0xa) {lineno++;}
38#else
39#define COUNTLINE(C)
40#endif
41
Radek Krejci02117302015-04-13 16:32:44 +020042/*
43 * Macro to test if character is #x20 | #x9 | #xA | #xD (whitespace)
44 */
45#define is_xmlws(c) (c == 0x20 || c == 0x9 || c == 0xa || c == 0xd)
Radek Krejci54ea8de2015-04-09 18:02:56 +020046
Radek Krejci02117302015-04-13 16:32:44 +020047#define is_xmlnamestartchar(c) ((c >= 'a' && c <= 'z') || c == '_' || \
48 (c >= 'A' && c <= 'Z') || c == ':' || \
49 (c >= 0x370 && c <= 0x1fff && c != 0x37e ) || \
50 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
51 c == 0x200d || (c >= 0x2070 && c <= 0x218f) || \
52 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
53 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
54 (c >= 0x10000 && c <= 0xeffff))
55
56#define is_xmlnamechar(c) ((c >= 'a' && c <= 'z') || c == '_' || c == '-' || \
57 (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == ':' || \
58 c == '.' || c == 0xb7 || (c >= 0x370 && c <= 0x1fff && c != 0x37e ) ||\
59 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
60 c == 0x200d || (c >= 0x300 && c <= 0x36f) || \
61 (c >= 0x2070 && c <= 0x218f) || (c >= 0x2030f && c <= 0x2040) || \
62 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
63 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
64 (c >= 0x10000 && c <= 0xeffff))
65
Radek Krejci3045cf32015-05-28 10:58:52 +020066#define ign_xmlws(p) \
67 while (is_xmlws(*p)) { \
68 COUNTLINE(*p); \
69 p++; \
70 }
Radek Krejci02117302015-04-13 16:32:44 +020071
Radek Krejcie1f13912015-05-26 15:17:38 +020072void lyxml_unlink_attr(struct lyxml_attr *attr)
Radek Krejci54ea8de2015-04-09 18:02:56 +020073{
Radek Krejcie1f13912015-05-26 15:17:38 +020074 struct lyxml_attr *prev;
Radek Krejci02117302015-04-13 16:32:44 +020075
76 if (!attr) {
Radek Krejcie1f13912015-05-26 15:17:38 +020077 return;
Radek Krejci02117302015-04-13 16:32:44 +020078 }
79
80 if (!attr->parent) {
Radek Krejcie1f13912015-05-26 15:17:38 +020081 /* hmm, something is probably wrong */
82 attr->next = NULL;
83 return;
Radek Krejci02117302015-04-13 16:32:44 +020084 }
85
Radek Krejcie1f13912015-05-26 15:17:38 +020086 prev = attr->parent->attr;
87 if (prev == attr) {
88 /* unlinking the first attribute -> update the element's pointer */
Radek Krejci02117302015-04-13 16:32:44 +020089 attr->parent->attr = attr->next;
90 } else {
Radek Krejcie1f13912015-05-26 15:17:38 +020091 while(prev && prev->next != attr) {
92 prev = prev->next;
Radek Krejci02117302015-04-13 16:32:44 +020093 }
94
Radek Krejcie1f13912015-05-26 15:17:38 +020095 if (!prev) {
96 /* something is probably broken */
97 attr->parent = NULL;
98 attr->next = NULL;
99 return;
Radek Krejci02117302015-04-13 16:32:44 +0200100 }
101
Radek Krejcie1f13912015-05-26 15:17:38 +0200102 /* fix the previous's attribute pointer to next in the list */
103 prev->next = attr->next;
Radek Krejci02117302015-04-13 16:32:44 +0200104 }
105
106 attr->parent = NULL;
107 attr->next = NULL;
108
Radek Krejcie1f13912015-05-26 15:17:38 +0200109 return;
Radek Krejci02117302015-04-13 16:32:44 +0200110}
111
Radek Krejcie1f13912015-05-26 15:17:38 +0200112void lyxml_unlink_elem(struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200113{
Radek Krejcie1f13912015-05-26 15:17:38 +0200114 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200115
116 if (!elem) {
Radek Krejcie1f13912015-05-26 15:17:38 +0200117 return;
Radek Krejci02117302015-04-13 16:32:44 +0200118 }
119
Radek Krejcie1f13912015-05-26 15:17:38 +0200120 /* store pointers to important nodes */
121 parent = elem->parent;
122
123 /* unlink from parent */
124 if (parent) {
125 if (parent->child == elem) {
126 /* we unlink the first child */
127 /* update the parent's link */
128 parent->child = elem->next;
129 }
130 /* forget about the parent */
131 elem->parent = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200132 }
133
Radek Krejcie1f13912015-05-26 15:17:38 +0200134 /* unlink from siblings */
135 if (elem->prev == elem) {
Radek Krejcida04f4a2015-05-21 12:54:09 +0200136 /* there are no more siblings */
Radek Krejcie1f13912015-05-26 15:17:38 +0200137 return;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200138 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200139 if (elem->next) {
140 elem->next->prev = elem->prev;
141 } else {
Radek Krejcie1f13912015-05-26 15:17:38 +0200142 /* unlinking the last element */
143 if (parent) {
144 first = parent->child;
145 } else {
146 first = elem;
147 while (elem->prev->next) {
148 first = elem->prev;
149 }
150 }
151 first->prev = elem->prev;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200152 }
Radek Krejcie1f13912015-05-26 15:17:38 +0200153 if (elem->prev->next) {
Radek Krejcida04f4a2015-05-21 12:54:09 +0200154 elem->prev->next = elem->next;
155 }
156
Radek Krejci02117302015-04-13 16:32:44 +0200157 /* clean up the unlinked element */
Radek Krejcida04f4a2015-05-21 12:54:09 +0200158 elem->next = NULL;
159 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200160}
161
Radek Krejcida04f4a2015-05-21 12:54:09 +0200162void lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200163{
164 if (!attr) {
165 return;
166 }
167
168 lyxml_unlink_attr(attr);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200169 lydict_remove(ctx, attr->name);
170 lydict_remove(ctx, attr->value);
Radek Krejci02117302015-04-13 16:32:44 +0200171 free(attr);
172}
173
Radek Krejcida04f4a2015-05-21 12:54:09 +0200174void lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200175{
176 struct lyxml_attr *a, *next;
177 if (!elem || !elem->attr) {
178 return;
179 }
180
181 a = elem->attr;
182 do {
183 next = a->next;
184
Radek Krejcida04f4a2015-05-21 12:54:09 +0200185 lydict_remove(ctx, a->name);
186 lydict_remove(ctx, a->value);
Radek Krejci02117302015-04-13 16:32:44 +0200187 free(a);
188
189 a = next;
190 } while (a);
191}
192
Radek Krejcida04f4a2015-05-21 12:54:09 +0200193static void lyxml_free_elem_(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200194{
195 struct lyxml_elem *e, *next;
196
197 if (!elem) {
198 return;
199 }
200
Radek Krejcida04f4a2015-05-21 12:54:09 +0200201 lyxml_free_attrs(ctx, elem);
202 LY_TREE_FOR_SAFE(elem->child, next, e) {
203 lyxml_free_elem_(ctx, e);
Radek Krejci02117302015-04-13 16:32:44 +0200204 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200205 lydict_remove(ctx, elem->name);
206 lydict_remove(ctx, elem->content);
Radek Krejci02117302015-04-13 16:32:44 +0200207 free(elem);
208}
209
Radek Krejcida04f4a2015-05-21 12:54:09 +0200210void lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200211{
212 if (!elem) {
213 return;
214 }
215
216 lyxml_unlink_elem(elem);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200217 lyxml_free_elem_(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200218}
219
Radek Krejcida04f4a2015-05-21 12:54:09 +0200220int lyxml_add_attr(struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200221{
222 struct lyxml_attr *a;
223
224 if (!parent || !attr) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200225 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200226 return EXIT_FAILURE;
227 }
228
229 /* (re)link attribute to parent */
230 if (attr->parent) {
231 lyxml_unlink_attr(attr);
232 }
233 attr->parent = parent;
234
235 /* link parent to attribute */
236 if (parent->attr) {
237 for (a = parent->attr; a->next; a = a->next);
238 a->next = attr;
239 } else {
240 parent->attr = attr;
241 }
242
243 return EXIT_SUCCESS;
244}
245
Radek Krejcida04f4a2015-05-21 12:54:09 +0200246const char *lyxml_get_attr(struct lyxml_elem *elem, const char *name,
247 const char *ns)
248{
249 struct lyxml_attr *a;
250
251 if (!elem || !name) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200252 LOGERR(LY_EINVAL, NULL);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200253 return NULL;
254 }
255
256 for (a = elem->attr; a; a = a->next) {
257 if (a->type != LYXML_ATTR_STD) {
258 continue;
259 }
260
261 if (!strcmp(name, a->name)) {
262 if ((!ns && !a->ns)
263 || (ns && a->ns && !strcmp(ns, a->ns->value))) {
264 return a->value;
265 }
266 }
267 }
268
269 return NULL;
270}
271
272int lyxml_add_child(struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200273{
274 struct lyxml_elem *e;
275
276 if (!parent || !elem) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200277 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200278 return EXIT_FAILURE;
279 }
280
281 /* (re)link element to parent */
282 if (elem->parent) {
283 lyxml_unlink_elem(elem);
284 }
285 elem->parent = parent;
286
287 /* link parent to element */
288 if (parent->child) {
289 e = parent->child;
290 elem->prev = e->prev;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200291 elem->next = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200292 elem->prev->next = elem;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200293 e->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200294 } else {
295 parent->child = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200296 elem->prev = elem;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200297 elem->next = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200298 }
299
300 return EXIT_SUCCESS;
301}
302
303/**
304 * @brief Get the first UTF-8 character value (4bytes) from buffer
305 * @param[in] buf pointr to the current position in input buffer
306 * @param[out] read Number of processed bytes in buf (length of UTF-8
307 * character).
308 * @return UTF-8 value as 4 byte number. 0 means error, only UTF-8 characters
309 * valid for XML are returned, so:
310 * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
311 * = any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
312 *
313 * UTF-8 mapping:
314 * 00000000 -- 0000007F: 0xxxxxxx
315 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
316 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
317 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
318 *
319 */
320static int getutf8(const char *buf, unsigned int *read)
321{
322 int c, aux;
323 int i;
324
325 /* check input variable */
326 if (!buf || !read) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200327 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200328 return 0;
329 }
330 c = buf[0];
331 *read = 0;
332
333 /* buf is NULL terminated string, so 0 means EOF */
334 if (!c) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200335 LOGERR(LY_EEOF, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200336 return 0;
337 }
338 *read = 1;
339
340 /* process character byte(s) */
Radek Krejcic1265a62015-05-26 15:46:28 +0200341 if ((c & 0xf8) == 0xf0) {
Radek Krejci02117302015-04-13 16:32:44 +0200342 /* four bytes character */
343 *read = 4;
344
345 c &= 0x07;
346 for (i = 1; i <= 3; i++) {
347 aux = buf[i];
348 if ((aux & 0xc0) != 0x80) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200349 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200350 return 0;
351 }
352
353 c = (c << 6) | (aux & 0x3f);
354 }
355
356
357 if (c < 0x1000 || c > 0x10ffff) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200358 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200359 return 0;
360 }
361 } else if ((c & 0xf0) == 0xe0) {
362 /* three bytes character */
363 *read = 3;
364
365 c &= 0x0f;
366 for (i = 1; i <= 2; i++) {
367 aux = buf[i];
368 if ((aux & 0xc0) != 0x80) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200369 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200370 return 0;
371 }
372
373 c = (c << 6) | (aux & 0x3f);
374 }
375
376
377 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd ) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200378 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200379 return 0;
380 }
381 } else if ((c & 0xe0) == 0xc0) {
382 /* two bytes character */
383 *read = 2;
384
385 aux = buf[1];
386 if ((aux & 0xc0) != 0x80) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200387 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200388 return 0;
389 }
390 c = ((c & 0x1f) << 6) | (aux & 0x3f);
391
392 if (c < 0x80) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200393 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200394 return 0;
395 }
396 } else if (!(c & 0x80)) {
397 /* one byte character */
398 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
399 /* invalid character */
Radek Krejci3045cf32015-05-28 10:58:52 +0200400 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200401 return 0;
402 }
403 } else {
404 /* invalid character */
Radek Krejci3045cf32015-05-28 10:58:52 +0200405 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +0200406 return 0;
407 }
408
409 return c;
410}
411
Radek Krejci709fee62015-04-15 13:56:19 +0200412/**
413 * Store UTF-8 character specified as 4byte integer into the dst buffer.
414 * Returns number of written bytes (4 max), expects that dst has enough space.
415 *
416 * UTF-8 mapping:
417 * 00000000 -- 0000007F: 0xxxxxxx
418 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
419 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
420 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
421 *
422 */
423static unsigned int pututf8(char *dst, int32_t value)
424{
425 if (value < 0x80) {
426 /* one byte character */
427 dst[0] = value;
428
429 return 1;
430 } else if (value < 0x800) {
431 /* two bytes character */
432 dst[0] = 0xc0 | (value >> 6);
433 dst[1] = 0x80 | (value & 0x3f);
434
435 return 2;
436 } else if (value < 0x10000) {
437 /* three bytes character */
438 dst[0] = 0xe0 | (value >> 12);
439 dst[1] = 0x80 | ((value >> 6) & 0x3f);
440 dst[2] = 0x80 | (value & 0x3f);
441
442 return 3;
443 } else if (value < 0x200000) {
444 /* four bytes character */
445 dst[0] = 0xf0 | (value >> 18);
446 dst[1] = 0x80 | ((value >> 12) & 0x3f);
447 dst[2] = 0x80 | ((value >> 6) & 0x3f);
448 dst[3] = 0x80 | (value & 0x3f);
449
450 return 4;
451 } else {
452 /* out of range */
Radek Krejci3045cf32015-05-28 10:58:52 +0200453 LOGERR(LY_EINVAL, NULL);
Radek Krejci709fee62015-04-15 13:56:19 +0200454 return 0;
455 }
456}
457
Radek Krejci05e37a32015-04-15 14:40:34 +0200458static int parse_ignore(const char *data, const char *endstr,
459 unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200460{
Radek Krejci05e37a32015-04-15 14:40:34 +0200461 unsigned int slen;
Radek Krejci02117302015-04-13 16:32:44 +0200462 const char *c = data;
463
Radek Krejci05e37a32015-04-15 14:40:34 +0200464 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200465
Radek Krejci05e37a32015-04-15 14:40:34 +0200466 while (*c && memcmp(c, endstr, slen)) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200467 COUNTLINE(*c);
Radek Krejci02117302015-04-13 16:32:44 +0200468 c++;
469 }
470 if (!*c) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200471 LOGERR(LY_EWELLFORM, "Missing close sequence \"%s\".", endstr);
Radek Krejci05e37a32015-04-15 14:40:34 +0200472 return EXIT_FAILURE;
Radek Krejci02117302015-04-13 16:32:44 +0200473 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200474 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200475
Radek Krejci05e37a32015-04-15 14:40:34 +0200476 *len = c - data;
477 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200478}
479
Radek Krejci521008e2015-04-15 14:41:07 +0200480static char *parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200481{
Radek Krejci709fee62015-04-15 13:56:19 +0200482#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200483
Radek Krejci709fee62015-04-15 13:56:19 +0200484 char buf[BUFSIZE];
485 char *result = NULL, *aux;
486 unsigned int r;
487 int o, size = 0;
Radek Krejcia4a84062015-04-16 13:00:10 +0200488 int cdsect = 0;
Radek Krejci709fee62015-04-15 13:56:19 +0200489 int32_t n;
490
Radek Krejcia4a84062015-04-16 13:00:10 +0200491 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
492 if (!data[*len] || (!cdsect && !memcmp(&data[*len], "]]>", 2))) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200493 LOGERR(LY_EWELLFORM, "Invalid element content, \"]]>\" found.");
Radek Krejci709fee62015-04-15 13:56:19 +0200494 goto error;
Radek Krejci02117302015-04-13 16:32:44 +0200495 }
Radek Krejci709fee62015-04-15 13:56:19 +0200496
Radek Krejcia4a84062015-04-16 13:00:10 +0200497loop:
498
Radek Krejci709fee62015-04-15 13:56:19 +0200499 if (o > BUFSIZE - 3) {
500 /* add buffer into the result */
501 if (result) {
502 size = size + o;
503 aux = realloc(result, size + 1);
504 result = aux;
505 } else {
506 size = o;
507 result = malloc((size + 1) * sizeof *result);
508 }
509 memcpy(&result[size - o], buf, o);
510
511 /* write again into the beginning of the buffer */
512 o = 0;
513 }
514
Radek Krejcia4a84062015-04-16 13:00:10 +0200515 if (cdsect || !memcmp(&data[*len], "<![CDATA[", 9)) {
516 /* CDSect */
517 if (!cdsect) {
518 cdsect = 1;
519 *len += 9;
520 }
521 if (data[*len] && !memcmp(&data[*len], "]]>", 3)) {
522 *len += 3;
523 cdsect = 0;
524 o--; /* we don't write any data in this iteration */
525 } else {
526 buf[o] = data[*len];
527 (*len)++;
528 }
529 } else if (data[*len] == '&') {
Radek Krejci709fee62015-04-15 13:56:19 +0200530 (*len)++;
531 if (data[*len] != '#') {
532 /* entity reference - only predefined refs are supported */
533 if (!memcmp(&data[*len], "lt;", 3)) {
534 buf[o] = '<';
535 *len += 3;
536 } else if (!memcmp(&data[*len], "gt;", 3)) {
537 buf[o] = '>';
538 *len += 3;
539 } else if (!memcmp(&data[*len], "amp;", 4)) {
540 buf[o] = '&';
541 *len += 4;
542 } else if (!memcmp(&data[*len], "apos;", 5)) {
543 buf[o] = '\'';
544 *len += 5;
545 } else if (!memcmp(&data[*len], "quot;", 5)) {
546 buf[o] = '\"';
547 *len += 5;
548 } else {
Radek Krejci3045cf32015-05-28 10:58:52 +0200549 LOGERR(LY_EWELLFORM,
Radek Krejci709fee62015-04-15 13:56:19 +0200550 "Invalid entity reference, only predefined entity references are supported.");
551 goto error;
552 }
553 } else {
554 /* character reference */
555 (*len)++;
556 if (isdigit(data[*len])) {
557 for (n = 0; isdigit(data[*len]); (*len)++) {
558 n = (10 * n) + (data[*len] - '0');
559 }
560 if (data[*len] != ';') {
Radek Krejci3045cf32015-05-28 10:58:52 +0200561 LOGERR(LY_EWELLFORM,
Radek Krejci709fee62015-04-15 13:56:19 +0200562 "Invalid character reference, missing semicolon.");
563 goto error;
564 }
565 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
566 for (n = 0; isxdigit(data[*len]); (*len)++) {
567 if (isdigit(data[*len])) {
568 r = (data[*len] - '0');
569 } else if (data[*len] > 'F') {
570 r = 10 + (data[*len] - 'a');
571 } else {
572 r = 10 + (data[*len] - 'A');
573 }
574 n = (16 * n) + r;
575 }
576 } else {
Radek Krejci3045cf32015-05-28 10:58:52 +0200577 LOGERR(LY_EWELLFORM, "Invalid character reference.");
Radek Krejci709fee62015-04-15 13:56:19 +0200578 goto error;
579
580 }
581 r = pututf8(&buf[o], n);
582 if (!r) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200583 LOGERR(LY_EWELLFORM, "Invalid character reference value.");
Radek Krejci709fee62015-04-15 13:56:19 +0200584 goto error;
585 }
586 o += r - 1; /* o is ++ in for loop */
587 (*len)++;
588 }
589 } else {
590 buf[o] = data[*len];
Radek Krejci3045cf32015-05-28 10:58:52 +0200591 COUNTLINE(buf[o]);
Radek Krejci709fee62015-04-15 13:56:19 +0200592 (*len)++;
593 }
Radek Krejci02117302015-04-13 16:32:44 +0200594 }
595
Radek Krejcia4a84062015-04-16 13:00:10 +0200596 if (delim == '<' && !memcmp(&data[*len], "<![CDATA[", 9)) {
597 /* ignore loop's end condition on beginning of CDSect */
598 goto loop;
599 }
600
Radek Krejci709fee62015-04-15 13:56:19 +0200601#undef BUFSIZE
602
603 if (o) {
604 if (result) {
605 size = size + o;
Radek Krejci9c16b332015-04-20 17:37:54 +0200606 aux = realloc(result, size + 1);
Radek Krejci709fee62015-04-15 13:56:19 +0200607 result = aux;
608 } else {
609 size = o;
Radek Krejci9c16b332015-04-20 17:37:54 +0200610 result = malloc((size + 1) * sizeof *result);
Radek Krejci709fee62015-04-15 13:56:19 +0200611 }
612 memcpy(&result[size - o], buf, o);
613 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200614 if (result) {
615 result[size] = '\0';
616 }
Radek Krejci02117302015-04-13 16:32:44 +0200617
Radek Krejci02117302015-04-13 16:32:44 +0200618 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200619
620error:
621 free(result);
622 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200623}
624
Radek Krejci674e1f82015-04-21 14:12:19 +0200625static struct lyxml_ns *get_ns(struct lyxml_elem *elem, const char *prefix)
Radek Krejci02117302015-04-13 16:32:44 +0200626{
Radek Krejci674e1f82015-04-21 14:12:19 +0200627 struct lyxml_attr *attr;
628 int len;
629
630 if (!elem) {
631 return NULL;
632 }
633
634 if (!prefix) {
635 len = 0;
636 } else {
637 len = strlen(prefix);
638 }
639
640 for (attr = elem->attr; attr; attr = attr->next) {
641 if (attr->type != LYXML_ATTR_NS) {
642 continue;
643 }
644 if (!attr->name) {
645 if (!len) {
646 /* default namespace found */
647 if (!attr->value) {
648 /* empty default namespace -> no default namespace */
649 return NULL;
650 }
651 return (struct lyxml_ns *)attr;
652 }
653 } else if (len && !memcmp(attr->name, prefix, len)) {
654 /* prefix found */
655 return (struct lyxml_ns *)attr;
656 }
657 }
658
659 /* go recursively */
660 return get_ns(elem->parent, prefix);
661}
662
Radek Krejcida04f4a2015-05-21 12:54:09 +0200663static struct lyxml_attr *parse_attr(struct ly_ctx *ctx, const char *data,
664 unsigned int *len, struct lyxml_elem *elem)
Radek Krejci674e1f82015-04-21 14:12:19 +0200665{
666 const char *c = data, *start, *delim;
667 char prefix[32];
Radek Krejci02117302015-04-13 16:32:44 +0200668 int uc;
669 struct lyxml_attr *attr = NULL;
670 unsigned int size;
671
Radek Krejci4ea08382015-04-21 09:41:40 +0200672
Radek Krejci674e1f82015-04-21 14:12:19 +0200673 /* check if it is attribute or namespace */
674 if (!memcmp(c, "xmlns", 5)) {
675 /* namespace */
676 attr = calloc(1, sizeof(struct lyxml_ns));
677 attr->type = LYXML_ATTR_NS;
678 c += 5;
679 if (*c != ':') {
680 /* default namespace, prefix will be empty */
681 goto equal;
682 }
683 c++; /* go after ':' to the prefix value */
684 } else {
685 /* attribute */
Radek Krejcibcf46db2015-05-26 15:40:56 +0200686 attr = calloc(1, sizeof *attr);
Radek Krejci674e1f82015-04-21 14:12:19 +0200687 attr->type = LYXML_ATTR_STD;
688 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200689
Radek Krejci02117302015-04-13 16:32:44 +0200690 /* process name part of the attribute */
Radek Krejci674e1f82015-04-21 14:12:19 +0200691 start = c;
Radek Krejci02117302015-04-13 16:32:44 +0200692 uc = getutf8(c, &size);
693 if (!is_xmlnamestartchar(uc)) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200694 LOGERR(LY_EWELLFORM, "Invalid NameStartChar of the attribute");
Radek Krejci674e1f82015-04-21 14:12:19 +0200695 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200696 return NULL;
697 }
698 c += size;
699 uc = getutf8(c, &size);
700 while (is_xmlnamechar(uc)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200701 if (attr->type == LYXML_ATTR_STD && *c == ':') {
702 /* attribute in a namespace */
703 start = c + 1;
704
705 /* look for the prefix in namespaces */
706 memcpy(prefix, data, c - data);
707 prefix[c - data] = '\0';
708 attr->ns = get_ns(elem, prefix);
709 }
Radek Krejci02117302015-04-13 16:32:44 +0200710 c += size;
711 uc = getutf8(c, &size);
712 }
713
Radek Krejci02117302015-04-13 16:32:44 +0200714 /* store the name */
Radek Krejci674e1f82015-04-21 14:12:19 +0200715 size = c - start;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200716 attr->name = lydict_insert(ctx, start, size);
Radek Krejci02117302015-04-13 16:32:44 +0200717
Radek Krejci674e1f82015-04-21 14:12:19 +0200718
719equal:
Radek Krejci02117302015-04-13 16:32:44 +0200720 /* check Eq mark that can be surrounded by whitespaces */
721 ign_xmlws(c);
722 if (*c != '=') {
Radek Krejci3045cf32015-05-28 10:58:52 +0200723 LOGERR(LY_EWELLFORM, "Invalid attribute definition, \"=\" expected.");
Radek Krejci02117302015-04-13 16:32:44 +0200724 goto error;
725 }
726 c++;
727 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200728
729 /* process value part of the attribute */
730 if (!*c || (*c != '"' && *c != '\'')) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200731 LOGERR(LY_EWELLFORM, "Invalid attribute value, \" or \' expected.");
Radek Krejci02117302015-04-13 16:32:44 +0200732 goto error;
733 }
734 delim = c;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200735 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
Radek Krejci521008e2015-04-15 14:41:07 +0200736 if (ly_errno) {
Radek Krejci02117302015-04-13 16:32:44 +0200737 goto error;
738 }
739
Radek Krejci521008e2015-04-15 14:41:07 +0200740 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci02117302015-04-13 16:32:44 +0200741 return attr;
742
743error:
Radek Krejcida04f4a2015-05-21 12:54:09 +0200744 lyxml_free_attr(ctx, attr);
Radek Krejci54ea8de2015-04-09 18:02:56 +0200745 return NULL;
746}
747
Radek Krejcida04f4a2015-05-21 12:54:09 +0200748static struct lyxml_elem *parse_elem(struct ly_ctx *ctx, const char *data,
749 unsigned int *len,
750 struct lyxml_elem *parent)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200751{
Radek Krejci674e1f82015-04-21 14:12:19 +0200752 const char *c = data, *start, *e;
Radek Krejci02117302015-04-13 16:32:44 +0200753 const char *lws; /* leading white space for handling mixed content */
754 int uc;
755 char *str;
Radek Krejci674e1f82015-04-21 14:12:19 +0200756 char prefix[32] = {0};
757 unsigned int prefix_len = 0;
Radek Krejci39ebd8e2015-05-26 15:49:54 +0200758 struct lyxml_elem *elem = NULL, *child;
Radek Krejci02117302015-04-13 16:32:44 +0200759 struct lyxml_attr *attr;
Radek Krejci05e37a32015-04-15 14:40:34 +0200760 unsigned int size;
Radek Krejci674e1f82015-04-21 14:12:19 +0200761 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200762
763 *len = 0;
764
765 if (*c != '<') {
766 return NULL;
767 }
768
769 /* locate element name */
770 c++;
771 e = c;
772
Radek Krejci05e37a32015-04-15 14:40:34 +0200773 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200774 if (!is_xmlnamestartchar(uc)) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200775 LOGERR(LY_EWELLFORM, "Invalid NameStartChar of the attribute");
Radek Krejci02117302015-04-13 16:32:44 +0200776 return NULL;
777 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200778 e += size;
779 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200780 while (is_xmlnamechar(uc)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200781 if (*e == ':') {
782 if (prefix_len) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200783 LOGERR(LY_EWELLFORM, "Multiple colons in element name.");
Radek Krejci674e1f82015-04-21 14:12:19 +0200784 goto error;
785 }
786 /* element in a namespace */
787 start = e + 1;
788
789 /* look for the prefix in namespaces */
790 memcpy(prefix, c, prefix_len = e - c);
791 prefix[prefix_len] = '\0';
792 c = start;
793 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200794 e += size;
795 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200796 }
797 if (!*e) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200798 LOGERR(LY_EWELLFORM, "Unexpected end of input data.");
Radek Krejci02117302015-04-13 16:32:44 +0200799 return NULL;
800 }
801
802 /* allocate element structure */
803 elem = calloc(1, sizeof *elem);
Radek Krejci3045cf32015-05-28 10:58:52 +0200804#ifndef NDEBUG
805 elem->line = lineno;
806#endif
Radek Krejcida04f4a2015-05-21 12:54:09 +0200807 elem->next = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200808 elem->prev = elem;
Radek Krejci674e1f82015-04-21 14:12:19 +0200809 if (parent) {
810 lyxml_add_child(parent, elem);
811 }
Radek Krejci02117302015-04-13 16:32:44 +0200812
813 /* store the name into the element structure */
Radek Krejcida04f4a2015-05-21 12:54:09 +0200814 elem->name = lydict_insert(ctx, c, e - c);
Radek Krejci02117302015-04-13 16:32:44 +0200815 c = e;
816
817process:
Radek Krejci709fee62015-04-15 13:56:19 +0200818 ly_errno = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200819 ign_xmlws(c);
820 if (!memcmp("/>", c, 2)) {
821 /* we are done, it was EmptyElemTag */
822 c += 2;
Radek Krejci674e1f82015-04-21 14:12:19 +0200823 closed_flag = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200824 } else if (*c == '>') {
825 /* process element content */
826 c++;
827 lws = NULL;
828
829 while (*c) {
830 if (!memcmp(c, "</", 2)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200831 if (lws && !elem->child) {
Radek Krejci02117302015-04-13 16:32:44 +0200832 /* leading white spaces were actually content */
833 goto store_content;
834 }
835
836 /* Etag */
837 c += 2;
838 /* get name and check it */
839 e = c;
Radek Krejci05e37a32015-04-15 14:40:34 +0200840 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200841 if (!is_xmlnamestartchar(uc)) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200842 LOGERR(LY_EWELLFORM,
Radek Krejci02117302015-04-13 16:32:44 +0200843 "Invalid NameStartChar of the attribute");
844 goto error;
845 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200846 e += size;
847 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200848 while (is_xmlnamechar(uc)) {
Radek Krejci674e1f82015-04-21 14:12:19 +0200849 if (*e == ':') {
850 /* element in a namespace */
851 start = e + 1;
852
853 /* look for the prefix in namespaces */
854 if (memcmp(prefix, c, e - c)) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200855 LOGERR(LY_EWELLFORM,
Radek Krejci674e1f82015-04-21 14:12:19 +0200856 "Mixed opening (%s) and closing element tag - different namespaces",
857 elem->name);
858 }
859 c = start;
860 }
Radek Krejci05e37a32015-04-15 14:40:34 +0200861 e += size;
862 uc = getutf8(e, &size);
Radek Krejci02117302015-04-13 16:32:44 +0200863 }
864 if (!*e) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200865 LOGERR(LY_EWELLFORM, "Unexpected end of input data.");
Radek Krejci02117302015-04-13 16:32:44 +0200866 goto error;
867 }
868
869 /* check that it corresponds to opening tag */
Radek Krejci05e37a32015-04-15 14:40:34 +0200870 size = e - c;
871 str = malloc((size + 1) * sizeof *str);
Radek Krejci02117302015-04-13 16:32:44 +0200872 memcpy(str, c, e - c);
873 str[e - c] = '\0';
Radek Krejci05e37a32015-04-15 14:40:34 +0200874 if (size != strlen(elem->name) ||
875 memcmp(str, elem->name, size)) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200876 LOGERR(LY_EWELLFORM,
Radek Krejci02117302015-04-13 16:32:44 +0200877 "Mixed opening (%s) and closing (%s) element tag",
Radek Krejci674e1f82015-04-21 14:12:19 +0200878 elem->name, str);
Radek Krejci6f0c6f92015-05-25 15:01:15 +0200879 free(str);
Radek Krejci02117302015-04-13 16:32:44 +0200880 goto error;
881 }
882 free(str);
883 c = e;
884
885 ign_xmlws(c);
886 if (*c != '>') {
Radek Krejci3045cf32015-05-28 10:58:52 +0200887 LOGERR(LY_EWELLFORM,
Radek Krejci02117302015-04-13 16:32:44 +0200888 "Close element tag \"%s\" contain additional data.",
889 elem->name);
890 goto error;
891 }
892 c++;
Radek Krejci674e1f82015-04-21 14:12:19 +0200893 closed_flag = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200894 break;
895
896 } else if (!memcmp(c, "<?", 2)) {
897 if (lws) {
898 /* leading white spaces were only formatting */
899 lws = NULL;
900 }
901 /* PI - ignore it */
902 c += 2;
Radek Krejci05e37a32015-04-15 14:40:34 +0200903 if (parse_ignore(c, "?>", &size)) {
Radek Krejci02117302015-04-13 16:32:44 +0200904 goto error;
905 }
906 c += size;
907 } else if (!memcmp(c, "<!--", 4)) {
908 if (lws) {
909 /* leading white spaces were only formatting */
910 lws = NULL;
911 }
912 /* Comment - ignore it */
913 c += 4;
Radek Krejci05e37a32015-04-15 14:40:34 +0200914 if (parse_ignore(c, "-->", &size)) {
Radek Krejci02117302015-04-13 16:32:44 +0200915 goto error;
916 }
917 c += size;
918 } else if (!memcmp(c, "<![CDATA[", 9)) {
919 /* CDSect */
Radek Krejcia4a84062015-04-16 13:00:10 +0200920 goto store_content;
Radek Krejci02117302015-04-13 16:32:44 +0200921 } else if (*c == '<') {
922 if (lws) {
Radek Krejcif0023a92015-04-20 20:51:39 +0200923 if (elem->flags & LYXML_ELEM_MIXED) {
924 /* we have a mixed content */
925 goto store_content;
926 } else {
927 /* leading white spaces were only formatting */
928 lws = NULL;
929 }
Radek Krejci02117302015-04-13 16:32:44 +0200930 }
931 if (elem->content) {
932 /* we have a mixed content */
933 child = calloc(1, sizeof *child);
934 child->content = elem->content;
935 elem->content = NULL;
936 lyxml_add_child(elem, child);
Radek Krejcif0023a92015-04-20 20:51:39 +0200937 elem->flags |= LYXML_ELEM_MIXED;
Radek Krejci02117302015-04-13 16:32:44 +0200938 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200939 child = parse_elem(ctx, c, &size, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200940 if (!child) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200941 LOGERR(LY_EWELLFORM, "Unexpected end of input data.");
Radek Krejci02117302015-04-13 16:32:44 +0200942 goto error;
943 }
Radek Krejci02117302015-04-13 16:32:44 +0200944 c += size; /* move after processed child element */
945 } else if (is_xmlws(*c)) {
946 lws = c;
Radek Krejci3045cf32015-05-28 10:58:52 +0200947#ifndef NDEBUG
948 lws_lineno = lineno;
949#endif
Radek Krejci02117302015-04-13 16:32:44 +0200950 ign_xmlws(c);
951 } else {
952store_content:
953 /* store text content */
954 if (lws) {
955 /* process content including the leading white spaces */
956 c = lws;
Radek Krejci3045cf32015-05-28 10:58:52 +0200957#ifndef NDEBUG
958 lineno = lws_lineno;
959#endif
Radek Krejci02117302015-04-13 16:32:44 +0200960 lws = NULL;
961 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200962 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
Radek Krejci521008e2015-04-15 14:41:07 +0200963 if (ly_errno) {
Radek Krejci709fee62015-04-15 13:56:19 +0200964 goto error;
965 }
Radek Krejci02117302015-04-13 16:32:44 +0200966 c += size; /* move after processed text content */
967
968 if (elem->child) {
969 /* we have a mixed content */
970 child = calloc(1, sizeof *child);
971 child->content = elem->content;
972 elem->content = NULL;
973 lyxml_add_child(elem, child);
Radek Krejcif0023a92015-04-20 20:51:39 +0200974 elem->flags |= LYXML_ELEM_MIXED;
Radek Krejci02117302015-04-13 16:32:44 +0200975 }
976 }
977 }
978 } else {
979 /* process attribute */
Radek Krejcida04f4a2015-05-21 12:54:09 +0200980 attr = parse_attr(ctx, c, &size, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200981 if (!attr) {
Radek Krejci3045cf32015-05-28 10:58:52 +0200982 LOGERR(LY_EWELLFORM, "Unexpected end of input data.");
Radek Krejci02117302015-04-13 16:32:44 +0200983 goto error;
984 }
985 lyxml_add_attr(elem, attr);
986 c += size; /* move after processed attribute */
987
Radek Krejci674e1f82015-04-21 14:12:19 +0200988 /* check namespace */
989 if (attr->type == LYXML_ATTR_NS) {
990 if (!prefix[0] && !attr->name) {
991 if (attr->value) {
992 /* default prefix */
993 elem->ns = (struct lyxml_ns *)attr;
994 } else {
995 /* xmlns="" -> no namespace */
996 nons_flag = 1;
997 }
998 } else if (prefix[0] && attr->name &&
999 !memcmp(attr->name, prefix, prefix_len + 1)) {
1000 /* matching namespace with prefix */
1001 elem->ns = (struct lyxml_ns *)attr;
1002 }
1003 }
1004
Radek Krejci02117302015-04-13 16:32:44 +02001005 /* go back to finish element processing */
1006 goto process;
1007 }
1008
1009 *len = c - data;
1010
Radek Krejci674e1f82015-04-21 14:12:19 +02001011 if (!closed_flag) {
Radek Krejci3045cf32015-05-28 10:58:52 +02001012 LOGERR(LY_EWELLFORM, "Missing closing element tag (%s).", elem->name);
Radek Krejci674e1f82015-04-21 14:12:19 +02001013 goto error;
1014 }
1015
1016 if (!nons_flag && parent) {
1017 elem->ns = get_ns(parent, prefix_len ? prefix : NULL);
1018 }
1019
Radek Krejci02117302015-04-13 16:32:44 +02001020 return elem;
1021
1022error:
Radek Krejcida04f4a2015-05-21 12:54:09 +02001023 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001024
Radek Krejci54ea8de2015-04-09 18:02:56 +02001025 return NULL;
1026}
1027
Radek Krejcida04f4a2015-05-21 12:54:09 +02001028struct lyxml_elem *lyxml_read(struct ly_ctx *ctx, const char *data,
1029 int UNUSED(options))
Radek Krejci54ea8de2015-04-09 18:02:56 +02001030{
Radek Krejci02117302015-04-13 16:32:44 +02001031 const char *c = data;
Radek Krejci05e37a32015-04-15 14:40:34 +02001032 unsigned int len;
Radek Krejci02117302015-04-13 16:32:44 +02001033 struct lyxml_elem *root = NULL;
1034
Radek Krejcida04f4a2015-05-21 12:54:09 +02001035 if (!data || !ctx) {
Radek Krejci02117302015-04-13 16:32:44 +02001036 ly_errno = LY_EINVAL;
1037 return NULL;
1038 }
1039
Radek Krejci3045cf32015-05-28 10:58:52 +02001040#ifndef NDEBUG
1041 /* TODO: threads support */
1042 lineno = 1;
1043#endif
1044
Radek Krejci02117302015-04-13 16:32:44 +02001045 /* process document */
1046 while (*c) {
1047 if (is_xmlws(*c)) {
1048 /* skip whitespaces */
Radek Krejci3045cf32015-05-28 10:58:52 +02001049 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +02001050 } else if (!memcmp(c, "<?", 2)) {
1051 /* XMLDecl or PI - ignore it */
1052 c += 2;
Radek Krejci05e37a32015-04-15 14:40:34 +02001053 if (parse_ignore(c, "?>", &len)) {
Radek Krejci3045cf32015-05-28 10:58:52 +02001054 LOGERR(LY_EWELLFORM, "Missing close sequence \"?>\".");
Radek Krejci02117302015-04-13 16:32:44 +02001055 return NULL;
1056 }
1057 c += len;
1058 } else if (!memcmp(c, "<!--", 4)) {
1059 /* Comment - ignore it */
1060 c += 2;
Radek Krejci05e37a32015-04-15 14:40:34 +02001061 if (parse_ignore(c, "-->", &len)) {
Radek Krejci3045cf32015-05-28 10:58:52 +02001062 LOGERR(LY_EWELLFORM, "Missing close sequence \"-->\".");
Radek Krejci02117302015-04-13 16:32:44 +02001063 return NULL;
1064 }
1065 c += len;
1066 } else if (!memcmp(c, "<!", 2)) {
1067 /* DOCTYPE */
1068 /* TODO - standalone ignore counting < and > */
Radek Krejci3045cf32015-05-28 10:58:52 +02001069 LOGERR(LY_EINVAL, "DOCTYPE not implemented.");
Radek Krejci02117302015-04-13 16:32:44 +02001070 return NULL;
1071 } else if (*c == '<') {
1072 /* element - process it in next loop to strictly follow XML
1073 * format
1074 */
1075 break;
1076 }
1077 }
1078
Radek Krejcida04f4a2015-05-21 12:54:09 +02001079 root = parse_elem(ctx, c, &len, NULL);
Radek Krejci02117302015-04-13 16:32:44 +02001080 if (!root) {
1081 return NULL;
1082 }
1083 c += len;
1084
1085 /* ignore the rest of document where can be comments, PIs and whitespaces,
1086 * note that we are not detecting syntax errors in these parts
1087 */
1088 ign_xmlws(c);
1089 if (*c) {
Radek Krejci3045cf32015-05-28 10:58:52 +02001090 LOGWRN("There are some not parsed data:\n%s", c);
Radek Krejci02117302015-04-13 16:32:44 +02001091 }
1092
1093 return root;
1094}
1095
Radek Krejcida04f4a2015-05-21 12:54:09 +02001096struct lyxml_elem *lyxml_read_fd(struct ly_ctx *ctx, int fd,
1097 int UNUSED(options))
Radek Krejci02117302015-04-13 16:32:44 +02001098{
Radek Krejcida04f4a2015-05-21 12:54:09 +02001099 if (fd == -1 || !ctx) {
Radek Krejci02117302015-04-13 16:32:44 +02001100 ly_errno = LY_EINVAL;
1101 return NULL;
1102 }
1103
Radek Krejci54ea8de2015-04-09 18:02:56 +02001104 return NULL;
1105}
1106
Radek Krejcida04f4a2015-05-21 12:54:09 +02001107struct lyxml_elem *lyxml_read_file(struct ly_ctx *ctx, const char *filename,
1108 int UNUSED(options))
Radek Krejci54ea8de2015-04-09 18:02:56 +02001109{
Radek Krejcida04f4a2015-05-21 12:54:09 +02001110 if (!filename || !ctx) {
Radek Krejci3045cf32015-05-28 10:58:52 +02001111 LOGERR(LY_EINVAL, NULL);
Radek Krejci02117302015-04-13 16:32:44 +02001112 return NULL;
1113 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001114
Radek Krejci02117302015-04-13 16:32:44 +02001115 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001116}
Radek Krejci02117302015-04-13 16:32:44 +02001117
Radek Krejcif0023a92015-04-20 20:51:39 +02001118static int dump_text(FILE *f, char* text)
1119{
1120 unsigned int i, n;
1121
1122 for (i = n = 0; text[i]; i++) {
1123 switch (text[i]) {
1124 case '&':
1125 n += fprintf(f, "&amp;");
1126 break;
1127 case '<':
1128 n += fprintf(f, "&lt;");
1129 break;
Radek Krejci674e1f82015-04-21 14:12:19 +02001130 case '>':
1131 /* not needed, just for readability */
1132 n += fprintf(f, "&gt;");
1133 break;
Radek Krejcif0023a92015-04-20 20:51:39 +02001134 default:
1135 fputc(text[i], f);
1136 n++;
1137 }
1138 }
1139
1140 return n;
1141}
1142
1143static int dump_elem(FILE *f, struct lyxml_elem *e, int level)
1144{
1145 int size = 0;
1146 struct lyxml_attr *a;
1147 struct lyxml_elem *child;
Radek Krejci674e1f82015-04-21 14:12:19 +02001148 const char *delim, *delim_outer;
1149 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001150
1151 if (!e->name) {
1152 /* mixed content */
1153 if (e->content) {
1154 return dump_text(f, e->content);
1155 } else {
1156 return 0;
1157 }
1158 }
1159
Radek Krejci674e1f82015-04-21 14:12:19 +02001160 delim = delim_outer = "\n";
1161 indent = 2 * level;
1162 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1163 delim = "";
1164 }
1165 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1166 delim_outer = "";
1167 indent = 0;
Radek Krejcif0023a92015-04-20 20:51:39 +02001168 }
1169
Radek Krejci674e1f82015-04-21 14:12:19 +02001170 /* opening tag */
1171 if (e->ns && e->ns->prefix) {
1172 size += fprintf(f, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
1173 } else {
1174 size += fprintf(f, "%*s<%s", indent, "", e->name);
Radek Krejcif0023a92015-04-20 20:51:39 +02001175 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001176
1177 /* attributes */
1178 for (a = e->attr; a; a = a->next) {
1179 if (a->type == LYXML_ATTR_NS) {
1180 if (a->name) {
1181 size += fprintf(f, " xmlns:%s=\"%s\"", a->name,
1182 a->value ? a->value : "");
1183 } else {
1184 size += fprintf(f, " xmlns=\"%s\"", a->value ? a->value : "");
1185 }
1186 } else if (a->ns && a->ns->prefix) {
1187 size += fprintf(f, " %s:%s=\"%s\"", a->ns->prefix, a->name,
1188 a->value);
1189 } else {
1190 size += fprintf(f, " %s=\"%s\"", a->name, a->value);
1191 }
1192 }
1193
Radek Krejcif0023a92015-04-20 20:51:39 +02001194 if (!e->child && !e->content) {
Radek Krejci674e1f82015-04-21 14:12:19 +02001195 size += fprintf(f, "/>%s", delim);
Radek Krejcif0023a92015-04-20 20:51:39 +02001196 return size;
1197 } else if (e->content) {
1198 fputc('>', f);
1199 size++;
1200
1201 size += dump_text(f, e->content);
1202
Radek Krejci674e1f82015-04-21 14:12:19 +02001203
1204 if (e->ns && e->ns->prefix) {
1205 size += fprintf(f, "</%s:%s>%s", e->ns->prefix, e->name, delim);
1206 } else {
1207 size += fprintf(f, "</%s>%s", e->name, delim);
1208 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001209 return size;
1210 } else {
Radek Krejci674e1f82015-04-21 14:12:19 +02001211 size += fprintf(f, ">%s", delim);
Radek Krejcif0023a92015-04-20 20:51:39 +02001212 }
1213
1214 /* go recursively */
Radek Krejcida04f4a2015-05-21 12:54:09 +02001215 LY_TREE_FOR(e->child, child) {
Radek Krejcif0023a92015-04-20 20:51:39 +02001216 size += dump_elem(f, child, level + 1);
Radek Krejcida04f4a2015-05-21 12:54:09 +02001217 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001218
Radek Krejci674e1f82015-04-21 14:12:19 +02001219 /* closing tag */
1220 if (e->ns && e->ns->prefix) {
1221 size += fprintf(f, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name,
1222 delim_outer);
Radek Krejcif0023a92015-04-20 20:51:39 +02001223 } else {
Radek Krejci674e1f82015-04-21 14:12:19 +02001224 size += fprintf(f, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejcif0023a92015-04-20 20:51:39 +02001225 }
1226
1227 return size;
1228}
1229
Radek Krejcida04f4a2015-05-21 12:54:09 +02001230int lyxml_dump(FILE *stream, struct lyxml_elem *elem, int UNUSED(options))
Radek Krejcif0023a92015-04-20 20:51:39 +02001231{
1232 if (!elem) {
1233 return 0;
1234 }
1235
1236 return dump_elem(stream, elem, 0);
1237}