blob: a6af7d37273240a4ab48d0d67bceb4160a13edae [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of the Company nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 */
21
Radek Krejci812b10a2015-05-28 16:48:25 +020022#include <assert.h>
Radek Krejci709fee62015-04-15 13:56:19 +020023#include <ctype.h>
24#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020025#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020026#include <stdlib.h>
27#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028#include <unistd.h>
29
Radek Krejci06a704e2015-04-22 14:50:49 +020030#include "common.h"
31#include "dict.h"
Radek Krejcida04f4a2015-05-21 12:54:09 +020032#include "tree.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020033#include "xml.h"
34
Radek Krejci3045cf32015-05-28 10:58:52 +020035#ifndef NDEBUG
36unsigned int lineno, lws_lineno;
37#define COUNTLINE(c) if ((c) == 0xa) {lineno++;}
38#else
Radek Krejci8dfe2892015-05-28 15:04:25 +020039#define lineno 0
Radek Krejci3045cf32015-05-28 10:58:52 +020040#define COUNTLINE(C)
41#endif
42
Radek Krejci02117302015-04-13 16:32:44 +020043/*
44 * Macro to test if character is #x20 | #x9 | #xA | #xD (whitespace)
45 */
46#define is_xmlws(c) (c == 0x20 || c == 0x9 || c == 0xa || c == 0xd)
Radek Krejci54ea8de2015-04-09 18:02:56 +020047
Radek Krejci02117302015-04-13 16:32:44 +020048#define is_xmlnamestartchar(c) ((c >= 'a' && c <= 'z') || c == '_' || \
49 (c >= 'A' && c <= 'Z') || c == ':' || \
50 (c >= 0x370 && c <= 0x1fff && c != 0x37e ) || \
51 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
52 c == 0x200d || (c >= 0x2070 && c <= 0x218f) || \
53 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
54 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
55 (c >= 0x10000 && c <= 0xeffff))
56
57#define is_xmlnamechar(c) ((c >= 'a' && c <= 'z') || c == '_' || c == '-' || \
58 (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == ':' || \
59 c == '.' || c == 0xb7 || (c >= 0x370 && c <= 0x1fff && c != 0x37e ) ||\
60 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
61 c == 0x200d || (c >= 0x300 && c <= 0x36f) || \
62 (c >= 0x2070 && c <= 0x218f) || (c >= 0x2030f && c <= 0x2040) || \
63 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
64 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
65 (c >= 0x10000 && c <= 0xeffff))
66
Radek Krejci3045cf32015-05-28 10:58:52 +020067#define ign_xmlws(p) \
68 while (is_xmlws(*p)) { \
69 COUNTLINE(*p); \
70 p++; \
71 }
Radek Krejci02117302015-04-13 16:32:44 +020072
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020073void
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020074lyxml_unlink_elem(struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +020075{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020076 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +020077
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020078 if (!elem) {
79 return;
80 }
Radek Krejci02117302015-04-13 16:32:44 +020081
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020082 /* store pointers to important nodes */
83 parent = elem->parent;
Radek Krejcie1f13912015-05-26 15:17:38 +020084
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020085 /* unlink from parent */
86 if (parent) {
87 if (parent->child == elem) {
88 /* we unlink the first child */
89 /* update the parent's link */
90 parent->child = elem->next;
91 }
92 /* forget about the parent */
93 elem->parent = NULL;
94 }
Radek Krejci02117302015-04-13 16:32:44 +020095
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020096 /* unlink from siblings */
97 if (elem->prev == elem) {
98 /* there are no more siblings */
99 return;
100 }
101 if (elem->next) {
102 elem->next->prev = elem->prev;
103 } else {
104 /* unlinking the last element */
105 if (parent) {
106 first = parent->child;
107 } else {
108 first = elem;
109 while (elem->prev->next) {
110 first = elem->prev;
111 }
112 }
113 first->prev = elem->prev;
114 }
115 if (elem->prev->next) {
116 elem->prev->next = elem->next;
117 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200118
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200119 /* clean up the unlinked element */
120 elem->next = NULL;
121 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200122}
123
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200124void
Radek Krejci00249f22015-07-07 13:43:28 +0200125lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200126{
Radek Krejci00249f22015-07-07 13:43:28 +0200127 struct lyxml_attr *aiter, *aprev;
128
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200129 if (!attr) {
130 return;
131 }
Radek Krejci02117302015-04-13 16:32:44 +0200132
Radek Krejci00249f22015-07-07 13:43:28 +0200133 if (parent) {
134 /* unlink attribute from the parent's list of attributes */
135 aprev = NULL;
136 for (aiter = parent->attr; aiter; aiter = aiter->next) {
137 if (aiter == attr) {
138 break;
139 }
140 aprev = aiter;
141 }
142 if (!aiter) {
143 /* attribute to remove not found */
144 return;
145 }
146
147 if (!aprev) {
148 /* attribute is first in parent's list of attributes */
149 parent->attr = attr->next;
150 } else {
151 /* reconnect previous attribute to the next */
152 aprev->next = attr->next;
153 }
154 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200155 lydict_remove(ctx, attr->name);
156 lydict_remove(ctx, attr->value);
157 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200158}
159
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200160void
161lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200162{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200163 struct lyxml_attr *a, *next;
164 if (!elem || !elem->attr) {
165 return;
166 }
Radek Krejci02117302015-04-13 16:32:44 +0200167
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200168 a = elem->attr;
169 do {
170 next = a->next;
Radek Krejci02117302015-04-13 16:32:44 +0200171
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200172 lydict_remove(ctx, a->name);
173 lydict_remove(ctx, a->value);
174 free(a);
Radek Krejci02117302015-04-13 16:32:44 +0200175
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200176 a = next;
177 } while (a);
Radek Krejci02117302015-04-13 16:32:44 +0200178}
179
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200180static void
181lyxml_free_elem_(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200182{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200183 struct lyxml_elem *e, *next;
Radek Krejci02117302015-04-13 16:32:44 +0200184
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200185 if (!elem) {
186 return;
187 }
Radek Krejci02117302015-04-13 16:32:44 +0200188
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200189 lyxml_free_attrs(ctx, elem);
190 LY_TREE_FOR_SAFE(elem->child, next, e) {
191 lyxml_free_elem_(ctx, e);
192 }
193 lydict_remove(ctx, elem->name);
194 lydict_remove(ctx, elem->content);
195 free(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200196}
197
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200198void
199lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200200{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200201 if (!elem) {
202 return;
203 }
Radek Krejci02117302015-04-13 16:32:44 +0200204
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200205 lyxml_unlink_elem(elem);
206 lyxml_free_elem_(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200207}
208
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200209const char *
210lyxml_get_attr(struct lyxml_elem *elem, const char *name, const char *ns)
Radek Krejcida04f4a2015-05-21 12:54:09 +0200211{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200212 struct lyxml_attr *a;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200213
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200214 assert(elem);
215 assert(name);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200216
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200217 for (a = elem->attr; a; a = a->next) {
218 if (a->type != LYXML_ATTR_STD) {
219 continue;
220 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200221
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200222 if (!strcmp(name, a->name)) {
223 if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
224 return a->value;
225 }
226 }
227 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200228
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200229 return NULL;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200230}
231
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200232int
233lyxml_add_child(struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200234{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200235 struct lyxml_elem *e;
Radek Krejci02117302015-04-13 16:32:44 +0200236
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200237 assert(parent);
238 assert(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200239
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200240 /* (re)link element to parent */
241 if (elem->parent) {
242 lyxml_unlink_elem(elem);
243 }
244 elem->parent = parent;
Radek Krejci02117302015-04-13 16:32:44 +0200245
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200246 /* link parent to element */
247 if (parent->child) {
248 e = parent->child;
249 elem->prev = e->prev;
250 elem->next = NULL;
251 elem->prev->next = elem;
252 e->prev = elem;
253 } else {
254 parent->child = elem;
255 elem->prev = elem;
256 elem->next = NULL;
257 }
Radek Krejci02117302015-04-13 16:32:44 +0200258
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200259 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200260}
261
262/**
263 * @brief Get the first UTF-8 character value (4bytes) from buffer
264 * @param[in] buf pointr to the current position in input buffer
265 * @param[out] read Number of processed bytes in buf (length of UTF-8
266 * character).
267 * @return UTF-8 value as 4 byte number. 0 means error, only UTF-8 characters
268 * valid for XML are returned, so:
269 * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
270 * = any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
271 *
272 * UTF-8 mapping:
273 * 00000000 -- 0000007F: 0xxxxxxx
274 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
275 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
276 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
277 *
278 */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200279static int
280getutf8(const char *buf, unsigned int *read)
Radek Krejci02117302015-04-13 16:32:44 +0200281{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200282 int c, aux;
283 int i;
Radek Krejci02117302015-04-13 16:32:44 +0200284
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200285 c = buf[0];
286 *read = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200287
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200288 /* buf is NULL terminated string, so 0 means EOF */
289 if (!c) {
290 LOGVAL(VE_EOF, lineno);
291 return 0;
292 }
293 *read = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200294
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200295 /* process character byte(s) */
296 if ((c & 0xf8) == 0xf0) {
297 /* four bytes character */
298 *read = 4;
Radek Krejci02117302015-04-13 16:32:44 +0200299
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200300 c &= 0x07;
301 for (i = 1; i <= 3; i++) {
302 aux = buf[i];
303 if ((aux & 0xc0) != 0x80) {
304 LOGVAL(VE_XML_INVAL, lineno, "input character");
305 return 0;
306 }
Radek Krejci02117302015-04-13 16:32:44 +0200307
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200308 c = (c << 6) | (aux & 0x3f);
309 }
Radek Krejci02117302015-04-13 16:32:44 +0200310
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200311 if (c < 0x1000 || c > 0x10ffff) {
312 LOGVAL(VE_XML_INVAL, lineno, "input character");
313 return 0;
314 }
315 } else if ((c & 0xf0) == 0xe0) {
316 /* three bytes character */
317 *read = 3;
Radek Krejci02117302015-04-13 16:32:44 +0200318
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200319 c &= 0x0f;
320 for (i = 1; i <= 2; i++) {
321 aux = buf[i];
322 if ((aux & 0xc0) != 0x80) {
323 LOGVAL(VE_XML_INVAL, lineno, "input character");
324 return 0;
325 }
Radek Krejci02117302015-04-13 16:32:44 +0200326
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200327 c = (c << 6) | (aux & 0x3f);
328 }
Radek Krejci02117302015-04-13 16:32:44 +0200329
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200330 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
331 LOGVAL(VE_XML_INVAL, lineno, "input character");
332 return 0;
333 }
334 } else if ((c & 0xe0) == 0xc0) {
335 /* two bytes character */
336 *read = 2;
Radek Krejci02117302015-04-13 16:32:44 +0200337
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200338 aux = buf[1];
339 if ((aux & 0xc0) != 0x80) {
340 LOGVAL(VE_XML_INVAL, lineno, "input character");
341 return 0;
342 }
343 c = ((c & 0x1f) << 6) | (aux & 0x3f);
Radek Krejci02117302015-04-13 16:32:44 +0200344
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200345 if (c < 0x80) {
346 LOGVAL(VE_XML_INVAL, lineno, "input character");
347 return 0;
348 }
349 } else if (!(c & 0x80)) {
350 /* one byte character */
351 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
352 /* invalid character */
353 LOGVAL(VE_XML_INVAL, lineno, "input character");
354 return 0;
355 }
356 } else {
357 /* invalid character */
358 LOGVAL(VE_XML_INVAL, lineno, "input character");
359 return 0;
360 }
Radek Krejci02117302015-04-13 16:32:44 +0200361
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200362 return c;
Radek Krejci02117302015-04-13 16:32:44 +0200363}
364
Radek Krejci709fee62015-04-15 13:56:19 +0200365/**
366 * Store UTF-8 character specified as 4byte integer into the dst buffer.
367 * Returns number of written bytes (4 max), expects that dst has enough space.
368 *
369 * UTF-8 mapping:
370 * 00000000 -- 0000007F: 0xxxxxxx
371 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
372 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
373 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
374 *
375 */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200376static unsigned int
377pututf8(char *dst, int32_t value)
Radek Krejci709fee62015-04-15 13:56:19 +0200378{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200379 if (value < 0x80) {
380 /* one byte character */
381 dst[0] = value;
Radek Krejci709fee62015-04-15 13:56:19 +0200382
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200383 return 1;
384 } else if (value < 0x800) {
385 /* two bytes character */
386 dst[0] = 0xc0 | (value >> 6);
387 dst[1] = 0x80 | (value & 0x3f);
Radek Krejci709fee62015-04-15 13:56:19 +0200388
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200389 return 2;
390 } else if (value < 0x10000) {
391 /* three bytes character */
392 dst[0] = 0xe0 | (value >> 12);
393 dst[1] = 0x80 | ((value >> 6) & 0x3f);
394 dst[2] = 0x80 | (value & 0x3f);
Radek Krejci709fee62015-04-15 13:56:19 +0200395
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200396 return 3;
397 } else if (value < 0x200000) {
398 /* four bytes character */
399 dst[0] = 0xf0 | (value >> 18);
400 dst[1] = 0x80 | ((value >> 12) & 0x3f);
401 dst[2] = 0x80 | ((value >> 6) & 0x3f);
402 dst[3] = 0x80 | (value & 0x3f);
Radek Krejci709fee62015-04-15 13:56:19 +0200403
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200404 return 4;
405 } else {
406 /* out of range */
407 LOGVAL(VE_SPEC, lineno, "Invalid UTF-8 value 0x%08x", value);
408 return 0;
409 }
Radek Krejci709fee62015-04-15 13:56:19 +0200410}
411
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200412static int
413parse_ignore(const char *data, const char *endstr, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200414{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200415 unsigned int slen;
416 const char *c = data;
Radek Krejci02117302015-04-13 16:32:44 +0200417
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200418 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200419
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200420 while (*c && memcmp(c, endstr, slen)) {
421 COUNTLINE(*c);
422 c++;
423 }
424 if (!*c) {
425 LOGVAL(VE_XML_MISS, lineno, "closing sequence", endstr);
426 return EXIT_FAILURE;
427 }
428 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200429
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200430 *len = c - data;
431 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200432}
433
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200434static char *
435parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200436{
Radek Krejci709fee62015-04-15 13:56:19 +0200437#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200438
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200439 char buf[BUFSIZE];
440 char *result = NULL, *aux;
441 unsigned int r;
442 int o, size = 0;
443 int cdsect = 0;
444 int32_t n;
Radek Krejci709fee62015-04-15 13:56:19 +0200445
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200446 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
447 if (!data[*len] || (!cdsect && !memcmp(&data[*len], "]]>", 2))) {
448 LOGVAL(VE_XML_INVAL, lineno, "element content, \"]]>\" found");
449 goto error;
450 }
Radek Krejci709fee62015-04-15 13:56:19 +0200451
Radek Krejcia4a84062015-04-16 13:00:10 +0200452loop:
453
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200454 if (o > BUFSIZE - 3) {
455 /* add buffer into the result */
456 if (result) {
457 size = size + o;
458 aux = realloc(result, size + 1);
459 result = aux;
460 } else {
461 size = o;
462 result = malloc((size + 1) * sizeof *result);
463 }
464 memcpy(&result[size - o], buf, o);
Radek Krejci709fee62015-04-15 13:56:19 +0200465
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200466 /* write again into the beginning of the buffer */
467 o = 0;
468 }
Radek Krejci709fee62015-04-15 13:56:19 +0200469
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200470 if (cdsect || !memcmp(&data[*len], "<![CDATA[", 9)) {
471 /* CDSect */
472 if (!cdsect) {
473 cdsect = 1;
474 *len += 9;
475 }
476 if (data[*len] && !memcmp(&data[*len], "]]>", 3)) {
477 *len += 3;
478 cdsect = 0;
479 o--; /* we don't write any data in this iteration */
480 } else {
481 buf[o] = data[*len];
482 (*len)++;
483 }
484 } else if (data[*len] == '&') {
485 (*len)++;
486 if (data[*len] != '#') {
487 /* entity reference - only predefined refs are supported */
488 if (!memcmp(&data[*len], "lt;", 3)) {
489 buf[o] = '<';
490 *len += 3;
491 } else if (!memcmp(&data[*len], "gt;", 3)) {
492 buf[o] = '>';
493 *len += 3;
494 } else if (!memcmp(&data[*len], "amp;", 4)) {
495 buf[o] = '&';
496 *len += 4;
497 } else if (!memcmp(&data[*len], "apos;", 5)) {
498 buf[o] = '\'';
499 *len += 5;
500 } else if (!memcmp(&data[*len], "quot;", 5)) {
501 buf[o] = '\"';
502 *len += 5;
503 } else {
504 LOGVAL(VE_XML_INVAL, lineno, "entity reference (only predefined references are supported)");
505 goto error;
506 }
507 } else {
508 /* character reference */
509 (*len)++;
510 if (isdigit(data[*len])) {
511 for (n = 0; isdigit(data[*len]); (*len)++) {
512 n = (10 * n) + (data[*len] - '0');
513 }
514 if (data[*len] != ';') {
515 LOGVAL(VE_XML_INVAL, lineno, "character reference, missing semicolon");
516 goto error;
517 }
518 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
519 for (n = 0; isxdigit(data[*len]); (*len)++) {
520 if (isdigit(data[*len])) {
521 r = (data[*len] - '0');
522 } else if (data[*len] > 'F') {
523 r = 10 + (data[*len] - 'a');
524 } else {
525 r = 10 + (data[*len] - 'A');
526 }
527 n = (16 * n) + r;
528 }
529 } else {
530 LOGVAL(VE_XML_INVAL, lineno, "character reference");
531 goto error;
Radek Krejci709fee62015-04-15 13:56:19 +0200532
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200533 }
534 r = pututf8(&buf[o], n);
535 if (!r) {
536 LOGVAL(VE_XML_INVAL, lineno, "character reference value");
537 goto error;
538 }
539 o += r - 1; /* o is ++ in for loop */
540 (*len)++;
541 }
542 } else {
543 buf[o] = data[*len];
544 COUNTLINE(buf[o]);
545 (*len)++;
546 }
547 }
Radek Krejci02117302015-04-13 16:32:44 +0200548
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200549 if (delim == '<' && !memcmp(&data[*len], "<![CDATA[", 9)) {
550 /* ignore loop's end condition on beginning of CDSect */
551 goto loop;
552 }
Radek Krejci709fee62015-04-15 13:56:19 +0200553#undef BUFSIZE
554
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200555 if (o) {
556 if (result) {
557 size = size + o;
558 aux = realloc(result, size + 1);
559 result = aux;
560 } else {
561 size = o;
562 result = malloc((size + 1) * sizeof *result);
563 }
564 memcpy(&result[size - o], buf, o);
565 }
566 if (result) {
567 result[size] = '\0';
568 }
Radek Krejci02117302015-04-13 16:32:44 +0200569
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200570 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200571
572error:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200573 free(result);
574 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200575}
576
Radek Krejci4476d412015-07-10 15:35:01 +0200577struct lyxml_ns *
578lyxml_get_ns(struct lyxml_elem *elem, const char *prefix)
Radek Krejci02117302015-04-13 16:32:44 +0200579{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200580 struct lyxml_attr *attr;
581 int len;
Radek Krejci674e1f82015-04-21 14:12:19 +0200582
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200583 if (!elem) {
584 return NULL;
585 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200586
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200587 if (!prefix) {
588 len = 0;
589 } else {
590 len = strlen(prefix);
591 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200592
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200593 for (attr = elem->attr; attr; attr = attr->next) {
594 if (attr->type != LYXML_ATTR_NS) {
595 continue;
596 }
597 if (!attr->name) {
598 if (!len) {
599 /* default namespace found */
600 if (!attr->value) {
601 /* empty default namespace -> no default namespace */
602 return NULL;
603 }
604 return (struct lyxml_ns *)attr;
605 }
606 } else if (len && !memcmp(attr->name, prefix, len)) {
607 /* prefix found */
608 return (struct lyxml_ns *)attr;
609 }
610 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200611
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200612 /* go recursively */
Radek Krejci4476d412015-07-10 15:35:01 +0200613 return lyxml_get_ns(elem->parent, prefix);
Radek Krejci674e1f82015-04-21 14:12:19 +0200614}
615
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200616struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200617lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci4bd12912015-06-10 14:35:52 +0200618{
Radek Krejci00249f22015-07-07 13:43:28 +0200619 struct lyxml_attr *result, *a;
Radek Krejci4bd12912015-06-10 14:35:52 +0200620
Radek Krejci00249f22015-07-07 13:43:28 +0200621 if (!attr || !parent) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200622 return NULL;
623 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200624
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200625 if (attr->type == LYXML_ATTR_NS) {
626 /* this is correct, despite that all attributes seems like a standard
627 * attributes (struct lyxml_attr), some of them can be namespace
628 * definitions (and in that case they are struct lyxml_ns).
629 */
630 result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
631 } else {
632 result = calloc(1, sizeof (struct lyxml_attr));
633 }
634 result->value = lydict_insert(ctx, attr->value, 0);
635 result->name = lydict_insert(ctx, attr->name, 0);
636 result->type = attr->type;
Radek Krejci4bd12912015-06-10 14:35:52 +0200637
Radek Krejci00249f22015-07-07 13:43:28 +0200638 /* set namespace in case of standard attributes */
639 if (result->type == LYXML_ATTR_STD && attr->ns) {
Radek Krejci4476d412015-07-10 15:35:01 +0200640 result->ns = lyxml_get_ns(parent, attr->ns->prefix);
Radek Krejci00249f22015-07-07 13:43:28 +0200641 }
642
643 /* set parent pointer in case of namespace attribute */
644 if (result->type == LYXML_ATTR_NS) {
645 ((struct lyxml_ns *)result)->parent = parent;
646 }
647
648 /* put attribute into the parent's attributes list */
649 if (parent->attr) {
650 /* go to the end of the list */
651 for (a = parent->attr; a->next; a = a->next);
652 /* and append new attribute */
653 a->next = result;
654 } else {
655 /* add the first attribute in the list */
656 parent->attr = result;
657 }
658
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200659 return result;
Radek Krejci4bd12912015-06-10 14:35:52 +0200660}
661
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200662struct lyxml_elem *
663lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
Radek Krejci4bd12912015-06-10 14:35:52 +0200664{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200665 struct lyxml_elem *result, *child;
Radek Krejci00249f22015-07-07 13:43:28 +0200666 struct lyxml_attr *attr;
Radek Krejci4bd12912015-06-10 14:35:52 +0200667
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200668 if (!elem) {
669 return NULL;
670 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200671
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200672 result = calloc(1, sizeof *result);
673 result->content = lydict_insert(ctx, elem->content, 0);
674 result->name = lydict_insert(ctx, elem->name, 0);
675 result->flags = elem->flags;
676 result->line = elem->line;
677 result->prev = result;
Radek Krejci4bd12912015-06-10 14:35:52 +0200678
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200679 if (parent) {
680 lyxml_add_child(parent, result);
681 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200682
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200683 /* namespace */
684 if (elem->ns) {
Radek Krejci4476d412015-07-10 15:35:01 +0200685 result->ns = lyxml_get_ns(result, elem->ns->prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200686 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200687
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200688 /* duplicate attributes */
689 for (attr = elem->attr; attr; attr = attr->next) {
Radek Krejci00249f22015-07-07 13:43:28 +0200690 lyxml_dup_attr(ctx, result, attr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200691 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200692
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200693 if (!recursive) {
694 return result;
695 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200696
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200697 /* duplicate children */
698 LY_TREE_FOR(elem->child, child) {
699 lyxml_dup_elem(ctx, child, result, 1);
700 }
Radek Krejci4bd12912015-06-10 14:35:52 +0200701
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200702 return result;
Radek Krejci4bd12912015-06-10 14:35:52 +0200703}
704
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200705static struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200706parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci674e1f82015-04-21 14:12:19 +0200707{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200708 const char *c = data, *start, *delim;
709 char prefix[32];
710 int uc;
Radek Krejci00249f22015-07-07 13:43:28 +0200711 struct lyxml_attr *attr = NULL, *a;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200712 unsigned int size;
Radek Krejci02117302015-04-13 16:32:44 +0200713
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200714 /* check if it is attribute or namespace */
715 if (!memcmp(c, "xmlns", 5)) {
716 /* namespace */
717 attr = calloc(1, sizeof (struct lyxml_ns));
718 attr->type = LYXML_ATTR_NS;
Radek Krejci00249f22015-07-07 13:43:28 +0200719 ((struct lyxml_ns *)attr)->parent = parent;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200720 c += 5;
721 if (*c != ':') {
722 /* default namespace, prefix will be empty */
723 goto equal;
724 }
725 c++; /* go after ':' to the prefix value */
726 } else {
727 /* attribute */
728 attr = calloc(1, sizeof *attr);
729 attr->type = LYXML_ATTR_STD;
730 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200731
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200732 /* process name part of the attribute */
733 start = c;
734 uc = getutf8(c, &size);
735 if (!is_xmlnamestartchar(uc)) {
736 LOGVAL(VE_XML_INVAL, lineno, "NameStartChar of the attribute");
737 free(attr);
738 return NULL;
739 }
740 c += size;
741 uc = getutf8(c, &size);
742 while (is_xmlnamechar(uc)) {
743 if (attr->type == LYXML_ATTR_STD && *c == ':') {
744 /* attribute in a namespace */
745 start = c + 1;
Radek Krejci4ea08382015-04-21 09:41:40 +0200746
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200747 /* look for the prefix in namespaces */
748 memcpy(prefix, data, c - data);
749 prefix[c - data] = '\0';
Radek Krejci4476d412015-07-10 15:35:01 +0200750 attr->ns = lyxml_get_ns(parent, prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200751 }
752 c += size;
753 uc = getutf8(c, &size);
754 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200755
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200756 /* store the name */
757 size = c - start;
758 attr->name = lydict_insert(ctx, start, size);
Radek Krejci674e1f82015-04-21 14:12:19 +0200759
760equal:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200761 /* check Eq mark that can be surrounded by whitespaces */
762 ign_xmlws(c);
763 if (*c != '=') {
764 LOGVAL(VE_XML_INVAL, lineno, "attribute definition, \"=\" expected");
765 goto error;
766 }
767 c++;
768 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200769
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200770 /* process value part of the attribute */
771 if (!*c || (*c != '"' && *c != '\'')) {
772 LOGVAL(VE_XML_INVAL, lineno, "attribute value, \" or \' expected");
773 goto error;
774 }
775 delim = c;
776 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
777 if (ly_errno) {
778 goto error;
779 }
Radek Krejci02117302015-04-13 16:32:44 +0200780
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200781 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci00249f22015-07-07 13:43:28 +0200782
783 /* put attribute into the parent's attributes list */
784 if (parent->attr) {
785 /* go to the end of the list */
786 for (a = parent->attr; a->next; a = a->next);
787 /* and append new attribute */
788 a->next = attr;
789 } else {
790 /* add the first attribute in the list */
791 parent->attr = attr;
792 }
793
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200794 return attr;
Radek Krejci02117302015-04-13 16:32:44 +0200795
796error:
Radek Krejci00249f22015-07-07 13:43:28 +0200797 lyxml_free_attr(ctx, NULL, attr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200798 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +0200799}
800
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200801static struct lyxml_elem *
802parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200803{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200804 const char *c = data, *start, *e;
805 const char *lws; /* leading white space for handling mixed content */
806 int uc;
807 char *str;
808 char prefix[32] = { 0 };
809 unsigned int prefix_len = 0;
810 struct lyxml_elem *elem = NULL, *child;
811 struct lyxml_attr *attr;
812 unsigned int size;
813 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200814
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200815 *len = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200816
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200817 if (*c != '<') {
818 return NULL;
819 }
Radek Krejci02117302015-04-13 16:32:44 +0200820
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200821 /* locate element name */
822 c++;
823 e = c;
Radek Krejci02117302015-04-13 16:32:44 +0200824
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200825 uc = getutf8(e, &size);
826 if (!is_xmlnamestartchar(uc)) {
827 LOGVAL(VE_XML_INVAL, lineno, "NameStartChar of the element");
828 return NULL;
829 }
830 e += size;
831 uc = getutf8(e, &size);
832 while (is_xmlnamechar(uc)) {
833 if (*e == ':') {
834 if (prefix_len) {
835 LOGVAL(VE_XML_INVAL, lineno, "element name, multiple colons found");
836 goto error;
837 }
838 /* element in a namespace */
839 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200840
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200841 /* look for the prefix in namespaces */
842 memcpy(prefix, c, prefix_len = e - c);
843 prefix[prefix_len] = '\0';
844 c = start;
845 }
846 e += size;
847 uc = getutf8(e, &size);
848 }
849 if (!*e) {
850 LOGVAL(VE_EOF, lineno);
851 return NULL;
852 }
Radek Krejci02117302015-04-13 16:32:44 +0200853
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200854 /* allocate element structure */
855 elem = calloc(1, sizeof *elem);
Radek Krejci3045cf32015-05-28 10:58:52 +0200856#ifndef NDEBUG
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200857 elem->line = lineno;
Radek Krejci3045cf32015-05-28 10:58:52 +0200858#endif
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200859 elem->next = NULL;
860 elem->prev = elem;
861 if (parent) {
862 lyxml_add_child(parent, elem);
863 }
Radek Krejci02117302015-04-13 16:32:44 +0200864
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200865 /* store the name into the element structure */
866 elem->name = lydict_insert(ctx, c, e - c);
867 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200868
869process:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200870 ly_errno = 0;
871 ign_xmlws(c);
872 if (!memcmp("/>", c, 2)) {
873 /* we are done, it was EmptyElemTag */
874 c += 2;
875 closed_flag = 1;
876 } else if (*c == '>') {
877 /* process element content */
878 c++;
879 lws = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200880
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200881 while (*c) {
882 if (!memcmp(c, "</", 2)) {
883 if (lws && !elem->child) {
884 /* leading white spaces were actually content */
885 goto store_content;
886 }
Radek Krejci02117302015-04-13 16:32:44 +0200887
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200888 /* Etag */
889 c += 2;
890 /* get name and check it */
891 e = c;
892 uc = getutf8(e, &size);
893 if (!is_xmlnamestartchar(uc)) {
894 LOGVAL(VE_XML_INVAL, lineno, "NameStartChar of the attribute");
895 goto error;
896 }
897 e += size;
898 uc = getutf8(e, &size);
899 while (is_xmlnamechar(uc)) {
900 if (*e == ':') {
901 /* element in a namespace */
902 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200903
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200904 /* look for the prefix in namespaces */
905 if (memcmp(prefix, c, e - c)) {
906 LOGVAL(VE_SPEC, lineno,
907 "Mixed opening (%s) and closing element tags (different namespaces).", elem->name);
908 goto error;
909 }
910 c = start;
911 }
912 e += size;
913 uc = getutf8(e, &size);
914 }
915 if (!*e) {
916 LOGVAL(VE_EOF, lineno);
917 goto error;
918 }
Radek Krejci02117302015-04-13 16:32:44 +0200919
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200920 /* check that it corresponds to opening tag */
921 size = e - c;
922 str = malloc((size + 1) * sizeof *str);
923 memcpy(str, c, e - c);
924 str[e - c] = '\0';
925 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
926 LOGVAL(VE_SPEC, lineno, "Mixed opening (%s) and closing (%s) element tags.", elem->name, str);
927 free(str);
928 goto error;
929 }
930 free(str);
931 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200932
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200933 ign_xmlws(c);
934 if (*c != '>') {
935 LOGVAL(VE_SPEC, lineno, "Close element tag \"%s\" contain additional data.", elem->name);
936 goto error;
937 }
938 c++;
939 closed_flag = 1;
940 break;
Radek Krejci02117302015-04-13 16:32:44 +0200941
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200942 } else if (!memcmp(c, "<?", 2)) {
943 if (lws) {
944 /* leading white spaces were only formatting */
945 lws = NULL;
946 }
947 /* PI - ignore it */
948 c += 2;
949 if (parse_ignore(c, "?>", &size)) {
950 goto error;
951 }
952 c += size;
953 } else if (!memcmp(c, "<!--", 4)) {
954 if (lws) {
955 /* leading white spaces were only formatting */
956 lws = NULL;
957 }
958 /* Comment - ignore it */
959 c += 4;
960 if (parse_ignore(c, "-->", &size)) {
961 goto error;
962 }
963 c += size;
964 } else if (!memcmp(c, "<![CDATA[", 9)) {
965 /* CDSect */
966 goto store_content;
967 } else if (*c == '<') {
968 if (lws) {
969 if (elem->flags & LYXML_ELEM_MIXED) {
970 /* we have a mixed content */
971 goto store_content;
972 } else {
973 /* leading white spaces were only formatting */
974 lws = NULL;
975 }
976 }
977 if (elem->content) {
978 /* we have a mixed content */
979 child = calloc(1, sizeof *child);
980 child->content = elem->content;
981 elem->content = NULL;
982 lyxml_add_child(elem, child);
983 elem->flags |= LYXML_ELEM_MIXED;
984 }
985 child = parse_elem(ctx, c, &size, elem);
986 if (!child) {
987 goto error;
988 }
989 c += size; /* move after processed child element */
990 } else if (is_xmlws(*c)) {
991 lws = c;
Radek Krejci3045cf32015-05-28 10:58:52 +0200992#ifndef NDEBUG
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200993 lws_lineno = lineno;
Radek Krejci3045cf32015-05-28 10:58:52 +0200994#endif
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200995 ign_xmlws(c);
996 } else {
Radek Krejci02117302015-04-13 16:32:44 +0200997store_content:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200998 /* store text content */
999 if (lws) {
1000 /* process content including the leading white spaces */
1001 c = lws;
Radek Krejci3045cf32015-05-28 10:58:52 +02001002#ifndef NDEBUG
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001003 lineno = lws_lineno;
Radek Krejci3045cf32015-05-28 10:58:52 +02001004#endif
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001005 lws = NULL;
1006 }
1007 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
1008 if (ly_errno) {
1009 goto error;
1010 }
1011 c += size; /* move after processed text content */
Radek Krejci02117302015-04-13 16:32:44 +02001012
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001013 if (elem->child) {
1014 /* we have a mixed content */
1015 child = calloc(1, sizeof *child);
1016 child->content = elem->content;
1017 elem->content = NULL;
1018 lyxml_add_child(elem, child);
1019 elem->flags |= LYXML_ELEM_MIXED;
1020 }
1021 }
1022 }
1023 } else {
1024 /* process attribute */
1025 attr = parse_attr(ctx, c, &size, elem);
1026 if (!attr) {
1027 goto error;
1028 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001029 c += size; /* move after processed attribute */
Radek Krejci02117302015-04-13 16:32:44 +02001030
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001031 /* check namespace */
1032 if (attr->type == LYXML_ATTR_NS) {
1033 if (!prefix[0] && !attr->name) {
1034 if (attr->value) {
1035 /* default prefix */
1036 elem->ns = (struct lyxml_ns *)attr;
1037 } else {
1038 /* xmlns="" -> no namespace */
1039 nons_flag = 1;
1040 }
1041 } else if (prefix[0] && attr->name && !memcmp(attr->name, prefix, prefix_len + 1)) {
1042 /* matching namespace with prefix */
1043 elem->ns = (struct lyxml_ns *)attr;
1044 }
1045 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001046
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001047 /* go back to finish element processing */
1048 goto process;
1049 }
Radek Krejci02117302015-04-13 16:32:44 +02001050
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001051 *len = c - data;
Radek Krejci02117302015-04-13 16:32:44 +02001052
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001053 if (!closed_flag) {
1054 LOGVAL(VE_XML_MISS, lineno, "closing element tag", elem->name);
1055 goto error;
1056 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001057
Radek Krejci78a230a2015-07-07 17:04:40 +02001058 if (!elem->ns && !nons_flag && parent) {
Radek Krejci4476d412015-07-10 15:35:01 +02001059 elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001060 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001061
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001062 return elem;
Radek Krejci02117302015-04-13 16:32:44 +02001063
1064error:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001065 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001066
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001067 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001068}
1069
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001070struct lyxml_elem *
1071lyxml_read(struct ly_ctx *ctx, const char *data, int UNUSED(options))
Radek Krejci54ea8de2015-04-09 18:02:56 +02001072{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001073 const char *c = data;
1074 unsigned int len;
1075 struct lyxml_elem *root = NULL;
Radek Krejci02117302015-04-13 16:32:44 +02001076
Radek Krejci3045cf32015-05-28 10:58:52 +02001077#ifndef NDEBUG
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001078 /* TODO: threads support */
1079 lineno = 1;
Radek Krejci3045cf32015-05-28 10:58:52 +02001080#endif
1081
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001082 /* process document */
1083 while (*c) {
1084 if (is_xmlws(*c)) {
1085 /* skip whitespaces */
1086 ign_xmlws(c);
1087 } else if (!memcmp(c, "<?", 2)) {
1088 /* XMLDecl or PI - ignore it */
1089 c += 2;
1090 if (parse_ignore(c, "?>", &len)) {
1091 LOGVAL(VE_XML_MISS, lineno, "close sequence", "?>");
1092 return NULL;
1093 }
1094 c += len;
1095 } else if (!memcmp(c, "<!--", 4)) {
1096 /* Comment - ignore it */
1097 c += 2;
1098 if (parse_ignore(c, "-->", &len)) {
1099 LOGVAL(VE_XML_MISS, lineno, "close sequence", "-->");
1100 return NULL;
1101 }
1102 c += len;
1103 } else if (!memcmp(c, "<!", 2)) {
1104 /* DOCTYPE */
1105 /* TODO - standalone ignore counting < and > */
1106 LOGERR(LY_EINVAL, "DOCTYPE not supported in XML documents.");
1107 return NULL;
1108 } else if (*c == '<') {
1109 /* element - process it in next loop to strictly follow XML
1110 * format
1111 */
1112 break;
1113 }
1114 }
Radek Krejci02117302015-04-13 16:32:44 +02001115
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001116 root = parse_elem(ctx, c, &len, NULL);
1117 if (!root) {
1118 return NULL;
1119 }
1120 c += len;
Radek Krejci02117302015-04-13 16:32:44 +02001121
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001122 /* ignore the rest of document where can be comments, PIs and whitespaces,
1123 * note that we are not detecting syntax errors in these parts
1124 */
1125 ign_xmlws(c);
1126 if (*c) {
1127 LOGWRN("There are some not parsed data:\n%s", c);
1128 }
Radek Krejci02117302015-04-13 16:32:44 +02001129
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001130 return root;
Radek Krejci02117302015-04-13 16:32:44 +02001131}
1132
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001133struct lyxml_elem *
1134lyxml_read_fd(struct ly_ctx *ctx, int fd, int UNUSED(options))
Radek Krejci02117302015-04-13 16:32:44 +02001135{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001136 if (fd == -1 || !ctx) {
1137 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1138 return NULL;
1139 }
Radek Krejci02117302015-04-13 16:32:44 +02001140
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001141 LOGERR(LY_EINT, "%s function is not implemented", __func__);
1142 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001143}
1144
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001145struct lyxml_elem *
1146lyxml_read_file(struct ly_ctx *ctx, const char *filename, int UNUSED(options))
Radek Krejci54ea8de2015-04-09 18:02:56 +02001147{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001148 if (!filename || !ctx) {
1149 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1150 return NULL;
1151 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001152
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001153 LOGERR(LY_EINT, "%s function is not implemented", __func__);
1154 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001155}
Radek Krejci02117302015-04-13 16:32:44 +02001156
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001157static int
1158dump_text(FILE * f, const char *text)
Radek Krejcif0023a92015-04-20 20:51:39 +02001159{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001160 unsigned int i, n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001161
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001162 for (i = n = 0; text[i]; i++) {
1163 switch (text[i]) {
1164 case '&':
1165 n += fprintf(f, "&amp;");
1166 break;
1167 case '<':
1168 n += fprintf(f, "&lt;");
1169 break;
1170 case '>':
1171 /* not needed, just for readability */
1172 n += fprintf(f, "&gt;");
1173 break;
1174 default:
1175 fputc(text[i], f);
1176 n++;
1177 }
1178 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001179
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001180 return n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001181}
1182
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001183static int
1184dump_elem(FILE * f, struct lyxml_elem *e, int level)
Radek Krejcif0023a92015-04-20 20:51:39 +02001185{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001186 int size = 0;
1187 struct lyxml_attr *a;
1188 struct lyxml_elem *child;
1189 const char *delim, *delim_outer;
1190 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001191
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001192 if (!e->name) {
1193 /* mixed content */
1194 if (e->content) {
1195 return dump_text(f, e->content);
1196 } else {
1197 return 0;
1198 }
1199 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001200
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001201 delim = delim_outer = "\n";
1202 indent = 2 * level;
1203 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1204 delim = "";
1205 }
1206 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1207 delim_outer = "";
1208 indent = 0;
1209 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001210
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001211 /* opening tag */
1212 if (e->ns && e->ns->prefix) {
1213 size += fprintf(f, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
1214 } else {
1215 size += fprintf(f, "%*s<%s", indent, "", e->name);
1216 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001217
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001218 /* attributes */
1219 for (a = e->attr; a; a = a->next) {
1220 if (a->type == LYXML_ATTR_NS) {
1221 if (a->name) {
1222 size += fprintf(f, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
1223 } else {
1224 size += fprintf(f, " xmlns=\"%s\"", a->value ? a->value : "");
1225 }
1226 } else if (a->ns && a->ns->prefix) {
1227 size += fprintf(f, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
1228 } else {
1229 size += fprintf(f, " %s=\"%s\"", a->name, a->value);
1230 }
1231 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001232
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001233 if (!e->child && !e->content) {
1234 size += fprintf(f, "/>%s", delim);
1235 return size;
1236 } else if (e->content) {
1237 fputc('>', f);
1238 size++;
Radek Krejcif0023a92015-04-20 20:51:39 +02001239
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001240 size += dump_text(f, e->content);
Radek Krejcif0023a92015-04-20 20:51:39 +02001241
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001242 if (e->ns && e->ns->prefix) {
1243 size += fprintf(f, "</%s:%s>%s", e->ns->prefix, e->name, delim);
1244 } else {
1245 size += fprintf(f, "</%s>%s", e->name, delim);
1246 }
1247 return size;
1248 } else {
1249 size += fprintf(f, ">%s", delim);
1250 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001251
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001252 /* go recursively */
1253 LY_TREE_FOR(e->child, child) {
1254 size += dump_elem(f, child, level + 1);
1255 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001256
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001257 /* closing tag */
1258 if (e->ns && e->ns->prefix) {
1259 size += fprintf(f, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
1260 } else {
1261 size += fprintf(f, "%*s</%s>%s", indent, "", e->name, delim_outer);
1262 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001263
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001264 return size;
Radek Krejcif0023a92015-04-20 20:51:39 +02001265}
1266
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001267int
1268lyxml_dump(FILE * stream, struct lyxml_elem *elem, int UNUSED(options))
Radek Krejcif0023a92015-04-20 20:51:39 +02001269{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001270 if (!elem) {
1271 return 0;
1272 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001273
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001274 return dump_elem(stream, elem, 0);
Radek Krejcif0023a92015-04-20 20:51:39 +02001275}