blob: 2d8f78ceea72d75e9ad2e28062e190f9376f4f42 [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejci54ea8de2015-04-09 18:02:56 +020013 */
14
Radek Krejci812b10a2015-05-28 16:48:25 +020015#include <assert.h>
Radek Krejci563427e2016-02-08 16:26:34 +010016#include <errno.h>
Radek Krejci709fee62015-04-15 13:56:19 +020017#include <ctype.h>
18#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020019#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020020#include <stdlib.h>
21#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020022#include <unistd.h>
Radek Krejci563427e2016-02-08 16:26:34 +010023#include <pthread.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010024#include <sys/stat.h>
25#include <sys/mman.h>
Radek Krejci563427e2016-02-08 16:26:34 +010026#include <sys/syscall.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010027#include <fcntl.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028
Radek Krejci06a704e2015-04-22 14:50:49 +020029#include "common.h"
Radek Krejci41912fe2015-10-22 10:22:12 +020030#include "dict_private.h"
Radek Krejci5248f132015-10-09 10:34:25 +020031#include "printer.h"
Radek Krejci5449d472015-10-26 14:35:56 +010032#include "parser.h"
Michal Vasko2d162e12015-09-24 14:33:29 +020033#include "tree_schema.h"
Michal Vaskofc5744d2015-10-22 12:09:34 +020034#include "xml_internal.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020035
Radek Krejci3045cf32015-05-28 10:58:52 +020036#define ign_xmlws(p) \
Radek Krejci563427e2016-02-08 16:26:34 +010037 while (is_xmlws(*p)) { \
Radek Krejci563427e2016-02-08 16:26:34 +010038 p++; \
39 }
Radek Krejci02117302015-04-13 16:32:44 +020040
Michal Vasko88c29542015-11-27 14:57:53 +010041static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);
42
Michal Vasko1e62a092015-12-01 12:27:20 +010043API const struct lyxml_ns *
44lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
Michal Vaskof8879c22015-08-21 09:07:36 +020045{
46 struct lyxml_attr *attr;
47 int len;
48
49 if (!elem) {
50 return NULL;
51 }
52
53 if (!prefix) {
54 len = 0;
55 } else {
56 len = strlen(prefix) + 1;
57 }
58
59 for (attr = elem->attr; attr; attr = attr->next) {
60 if (attr->type != LYXML_ATTR_NS) {
61 continue;
62 }
63 if (!attr->name) {
64 if (!len) {
65 /* default namespace found */
66 if (!attr->value) {
67 /* empty default namespace -> no default namespace */
68 return NULL;
69 }
70 return (struct lyxml_ns *)attr;
71 }
72 } else if (len && !memcmp(attr->name, prefix, len)) {
73 /* prefix found */
74 return (struct lyxml_ns *)attr;
75 }
76 }
77
78 /* go recursively */
79 return lyxml_get_ns(elem->parent, prefix);
80}
81
Michal Vasko88c29542015-11-27 14:57:53 +010082static void
83lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
84{
85 const struct lyxml_ns *tmp_ns;
Michal Vaskof6109112015-12-03 14:00:42 +010086 struct lyxml_elem *ns_root, *attr_root;
Michal Vasko88c29542015-11-27 14:57:53 +010087
88 if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010089 /* find the root of attr */
90 for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +010091
92 /* find the root of attr NS */
93 for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);
94
Michal Vaskof6109112015-12-03 14:00:42 +010095 /* attr NS is defined outside attr parent subtree */
96 if (ns_root != attr_root) {
Michal Vasko88c29542015-11-27 14:57:53 +010097 if (copy_ns) {
98 tmp_ns = attr->ns;
99 /* we may have already copied the NS over? */
Radek Krejci66aca402016-05-24 15:23:02 +0200100 attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +0100101
102 /* we haven't copied it over, copy it now */
103 if (!attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +0100104 attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +0100105 }
106 } else {
107 attr->ns = NULL;
108 }
109 }
110 }
111}
112
113static struct lyxml_attr *
Michal Vaskof8879c22015-08-21 09:07:36 +0200114lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
115{
116 struct lyxml_attr *result, *a;
117
118 if (!attr || !parent) {
119 return NULL;
120 }
121
122 if (attr->type == LYXML_ATTR_NS) {
123 /* this is correct, despite that all attributes seems like a standard
124 * attributes (struct lyxml_attr), some of them can be namespace
125 * definitions (and in that case they are struct lyxml_ns).
126 */
127 result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
128 } else {
129 result = calloc(1, sizeof (struct lyxml_attr));
130 }
Michal Vasko253035f2015-12-17 16:58:13 +0100131 if (!result) {
132 LOGMEM;
133 return NULL;
134 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200135 result->value = lydict_insert(ctx, attr->value, 0);
136 result->name = lydict_insert(ctx, attr->name, 0);
137 result->type = attr->type;
138
139 /* set namespace in case of standard attributes */
140 if (result->type == LYXML_ATTR_STD && attr->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100141 result->ns = attr->ns;
142 lyxml_correct_attr_ns(ctx, result, parent, 1);
Michal Vaskof8879c22015-08-21 09:07:36 +0200143 }
144
145 /* set parent pointer in case of namespace attribute */
146 if (result->type == LYXML_ATTR_NS) {
147 ((struct lyxml_ns *)result)->parent = parent;
148 }
149
150 /* put attribute into the parent's attributes list */
151 if (parent->attr) {
152 /* go to the end of the list */
153 for (a = parent->attr; a->next; a = a->next);
154 /* and append new attribute */
155 a->next = result;
156 } else {
157 /* add the first attribute in the list */
158 parent->attr = result;
159 }
160
161 return result;
162}
163
Michal Vaskof748dbc2016-04-05 11:27:47 +0200164void
Michal Vasko88c29542015-11-27 14:57:53 +0100165lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns, int correct_attrs)
166{
167 const struct lyxml_ns *tmp_ns;
Radek Krejcid5be5682016-01-14 16:23:22 +0100168 struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
Michal Vasko88c29542015-11-27 14:57:53 +0100169 struct lyxml_attr *attr;
170
171 /* find the root of elem */
172 for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
173
Radek Krejcid5be5682016-01-14 16:23:22 +0100174 LY_TREE_DFS_BEGIN(elem, tmp, iter) {
175 if (iter->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100176 /* find the root of elem NS */
Radek Krejcic071c542016-01-27 14:57:51 +0100177 for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +0100178
179 /* elem NS is defined outside elem subtree */
180 if (ns_root != elem_root) {
181 if (copy_ns) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100182 tmp_ns = iter->ns;
Michal Vasko88c29542015-11-27 14:57:53 +0100183 /* we may have already copied the NS over? */
Radek Krejcid5be5682016-01-14 16:23:22 +0100184 iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +0100185
186 /* we haven't copied it over, copy it now */
Radek Krejcid5be5682016-01-14 16:23:22 +0100187 if (!iter->ns) {
188 iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +0100189 }
190 } else {
Radek Krejcid5be5682016-01-14 16:23:22 +0100191 iter->ns = NULL;
Michal Vasko88c29542015-11-27 14:57:53 +0100192 }
193 }
194 }
195 if (correct_attrs) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100196 LY_TREE_FOR(iter->attr, attr) {
Michal Vasko88c29542015-11-27 14:57:53 +0100197 lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
198 }
199 }
Radek Krejcid5be5682016-01-14 16:23:22 +0100200 LY_TREE_DFS_END(elem, tmp, iter);
Michal Vasko88c29542015-11-27 14:57:53 +0100201 }
202}
203
Michal Vaskof8879c22015-08-21 09:07:36 +0200204struct lyxml_elem *
205lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
206{
207 struct lyxml_elem *result, *child;
208 struct lyxml_attr *attr;
209
210 if (!elem) {
211 return NULL;
212 }
213
214 result = calloc(1, sizeof *result);
Michal Vasko253035f2015-12-17 16:58:13 +0100215 if (!result) {
216 LOGMEM;
217 return NULL;
218 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200219 result->content = lydict_insert(ctx, elem->content, 0);
220 result->name = lydict_insert(ctx, elem->name, 0);
221 result->flags = elem->flags;
Michal Vaskof8879c22015-08-21 09:07:36 +0200222 result->prev = result;
223
224 if (parent) {
225 lyxml_add_child(ctx, parent, result);
226 }
227
Michal Vasko88c29542015-11-27 14:57:53 +0100228 /* keep old namespace for now */
229 result->ns = elem->ns;
230
231 /* correct namespaces */
232 lyxml_correct_elem_ns(ctx, result, 1, 0);
Michal Vaskof8879c22015-08-21 09:07:36 +0200233
234 /* duplicate attributes */
235 for (attr = elem->attr; attr; attr = attr->next) {
236 lyxml_dup_attr(ctx, result, attr);
237 }
238
239 if (!recursive) {
240 return result;
241 }
242
243 /* duplicate children */
244 LY_TREE_FOR(elem->child, child) {
245 lyxml_dup_elem(ctx, child, result, 1);
246 }
247
248 return result;
249}
250
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200251void
Michal Vaskof8879c22015-08-21 09:07:36 +0200252lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
Radek Krejci02117302015-04-13 16:32:44 +0200253{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200254 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200255
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200256 if (!elem) {
257 return;
258 }
Radek Krejci02117302015-04-13 16:32:44 +0200259
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200260 /* store pointers to important nodes */
261 parent = elem->parent;
Radek Krejcie1f13912015-05-26 15:17:38 +0200262
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200263 /* unlink from parent */
264 if (parent) {
265 if (parent->child == elem) {
266 /* we unlink the first child */
267 /* update the parent's link */
268 parent->child = elem->next;
269 }
270 /* forget about the parent */
271 elem->parent = NULL;
272 }
Radek Krejci02117302015-04-13 16:32:44 +0200273
Michal Vasko88c29542015-11-27 14:57:53 +0100274 if (copy_ns < 2) {
275 lyxml_correct_elem_ns(ctx, elem, copy_ns, 1);
276 }
277
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200278 /* unlink from siblings */
279 if (elem->prev == elem) {
280 /* there are no more siblings */
281 return;
282 }
283 if (elem->next) {
284 elem->next->prev = elem->prev;
285 } else {
286 /* unlinking the last element */
287 if (parent) {
288 first = parent->child;
289 } else {
290 first = elem;
Radek Krejcie4fffcf2016-02-23 16:06:25 +0100291 while (first->prev->next) {
292 first = first->prev;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200293 }
294 }
295 first->prev = elem->prev;
296 }
297 if (elem->prev->next) {
298 elem->prev->next = elem->next;
299 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200300
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200301 /* clean up the unlinked element */
302 elem->next = NULL;
303 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200304}
305
Michal Vasko345da0a2015-12-02 10:35:55 +0100306API void
307lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
308{
309 if (!elem) {
310 return;
311 }
312
313 lyxml_unlink_elem(ctx, elem, 1);
314}
315
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200316void
Radek Krejci00249f22015-07-07 13:43:28 +0200317lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200318{
Radek Krejci00249f22015-07-07 13:43:28 +0200319 struct lyxml_attr *aiter, *aprev;
320
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200321 if (!attr) {
322 return;
323 }
Radek Krejci02117302015-04-13 16:32:44 +0200324
Radek Krejci00249f22015-07-07 13:43:28 +0200325 if (parent) {
326 /* unlink attribute from the parent's list of attributes */
327 aprev = NULL;
328 for (aiter = parent->attr; aiter; aiter = aiter->next) {
329 if (aiter == attr) {
330 break;
331 }
332 aprev = aiter;
333 }
334 if (!aiter) {
335 /* attribute to remove not found */
336 return;
337 }
338
339 if (!aprev) {
340 /* attribute is first in parent's list of attributes */
341 parent->attr = attr->next;
342 } else {
343 /* reconnect previous attribute to the next */
344 aprev->next = attr->next;
345 }
346 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200347 lydict_remove(ctx, attr->name);
348 lydict_remove(ctx, attr->value);
349 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200350}
351
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200352void
353lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200354{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200355 struct lyxml_attr *a, *next;
356 if (!elem || !elem->attr) {
357 return;
358 }
Radek Krejci02117302015-04-13 16:32:44 +0200359
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200360 a = elem->attr;
361 do {
362 next = a->next;
Radek Krejci02117302015-04-13 16:32:44 +0200363
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200364 lydict_remove(ctx, a->name);
365 lydict_remove(ctx, a->value);
366 free(a);
Radek Krejci02117302015-04-13 16:32:44 +0200367
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200368 a = next;
369 } while (a);
Radek Krejci02117302015-04-13 16:32:44 +0200370}
371
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200372static void
Michal Vasko272e42f2015-12-02 12:20:37 +0100373lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200374{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200375 struct lyxml_elem *e, *next;
Radek Krejci02117302015-04-13 16:32:44 +0200376
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200377 if (!elem) {
378 return;
379 }
Radek Krejci02117302015-04-13 16:32:44 +0200380
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200381 lyxml_free_attrs(ctx, elem);
382 LY_TREE_FOR_SAFE(elem->child, next, e) {
Michal Vasko272e42f2015-12-02 12:20:37 +0100383 lyxml_free_elem(ctx, e);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200384 }
385 lydict_remove(ctx, elem->name);
386 lydict_remove(ctx, elem->content);
387 free(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200388}
389
Radek Krejcic6704c82015-10-06 11:12:45 +0200390API void
Michal Vasko345da0a2015-12-02 10:35:55 +0100391lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200392{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200393 if (!elem) {
394 return;
395 }
Radek Krejci02117302015-04-13 16:32:44 +0200396
Michal Vasko61f7ccb2015-10-23 10:15:08 +0200397 lyxml_unlink_elem(ctx, elem, 2);
Michal Vasko272e42f2015-12-02 12:20:37 +0100398 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200399}
400
Radek Krejci8f8db232016-05-23 16:48:21 +0200401API void
402lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
403{
404 struct lyxml_elem *iter, *aux;
405
406 if (!elem) {
407 return;
408 }
409
410 /* optimization - avoid freeing (unlinking) the last node of the siblings list */
411 /* so, first, free the node's predecessors to the beginning of the list ... */
412 for(iter = elem->prev; iter->next; iter = aux) {
413 aux = iter->prev;
414 lyxml_free(ctx, iter);
415 }
416 /* ... then, the node is the first in the siblings list, so free them all */
417 LY_TREE_FOR_SAFE(elem, aux, iter) {
418 lyxml_free(ctx, iter);
419 }
420}
421
Michal Vasko88c29542015-11-27 14:57:53 +0100422API const char *
Michal Vasko1e62a092015-12-01 12:27:20 +0100423lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
Radek Krejcida04f4a2015-05-21 12:54:09 +0200424{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200425 struct lyxml_attr *a;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200426
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200427 assert(elem);
428 assert(name);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200429
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200430 for (a = elem->attr; a; a = a->next) {
431 if (a->type != LYXML_ATTR_STD) {
432 continue;
433 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200434
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200435 if (!strcmp(name, a->name)) {
436 if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
437 return a->value;
438 }
439 }
440 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200441
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200442 return NULL;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200443}
444
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200445int
Michal Vaskof8879c22015-08-21 09:07:36 +0200446lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200447{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200448 struct lyxml_elem *e;
Radek Krejci02117302015-04-13 16:32:44 +0200449
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200450 assert(parent);
451 assert(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200452
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200453 /* (re)link element to parent */
454 if (elem->parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200455 lyxml_unlink_elem(ctx, elem, 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200456 }
457 elem->parent = parent;
Radek Krejci02117302015-04-13 16:32:44 +0200458
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200459 /* link parent to element */
460 if (parent->child) {
461 e = parent->child;
462 elem->prev = e->prev;
463 elem->next = NULL;
464 elem->prev->next = elem;
465 e->prev = elem;
466 } else {
467 parent->child = elem;
468 elem->prev = elem;
469 elem->next = NULL;
470 }
Radek Krejci02117302015-04-13 16:32:44 +0200471
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200472 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200473}
474
Michal Vasko3b855722015-08-28 16:01:18 +0200475int
Radek Krejci48464ed2016-03-17 15:44:09 +0100476lyxml_getutf8(const char *buf, unsigned int *read)
Radek Krejci02117302015-04-13 16:32:44 +0200477{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200478 int c, aux;
479 int i;
Radek Krejci02117302015-04-13 16:32:44 +0200480
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200481 c = buf[0];
482 *read = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200483
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200484 /* buf is NULL terminated string, so 0 means EOF */
485 if (!c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100486 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200487 return 0;
488 }
489 *read = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200490
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200491 /* process character byte(s) */
492 if ((c & 0xf8) == 0xf0) {
493 /* four bytes character */
494 *read = 4;
Radek Krejci02117302015-04-13 16:32:44 +0200495
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200496 c &= 0x07;
497 for (i = 1; i <= 3; i++) {
498 aux = buf[i];
499 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100500 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200501 return 0;
502 }
Radek Krejci02117302015-04-13 16:32:44 +0200503
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200504 c = (c << 6) | (aux & 0x3f);
505 }
Radek Krejci02117302015-04-13 16:32:44 +0200506
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200507 if (c < 0x1000 || c > 0x10ffff) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100508 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200509 return 0;
510 }
511 } else if ((c & 0xf0) == 0xe0) {
512 /* three bytes character */
513 *read = 3;
Radek Krejci02117302015-04-13 16:32:44 +0200514
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200515 c &= 0x0f;
516 for (i = 1; i <= 2; i++) {
517 aux = buf[i];
518 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100519 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200520 return 0;
521 }
Radek Krejci02117302015-04-13 16:32:44 +0200522
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200523 c = (c << 6) | (aux & 0x3f);
524 }
Radek Krejci02117302015-04-13 16:32:44 +0200525
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200526 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100527 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200528 return 0;
529 }
530 } else if ((c & 0xe0) == 0xc0) {
531 /* two bytes character */
532 *read = 2;
Radek Krejci02117302015-04-13 16:32:44 +0200533
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200534 aux = buf[1];
535 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100536 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200537 return 0;
538 }
539 c = ((c & 0x1f) << 6) | (aux & 0x3f);
Radek Krejci02117302015-04-13 16:32:44 +0200540
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200541 if (c < 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100542 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200543 return 0;
544 }
545 } else if (!(c & 0x80)) {
546 /* one byte character */
547 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
548 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100549 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200550 return 0;
551 }
552 } else {
553 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100554 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200555 return 0;
556 }
Radek Krejci02117302015-04-13 16:32:44 +0200557
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200558 return c;
Radek Krejci02117302015-04-13 16:32:44 +0200559}
560
Michal Vasko0d343d12015-08-24 14:57:36 +0200561/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200562static int
563parse_ignore(const char *data, const char *endstr, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200564{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200565 unsigned int slen;
566 const char *c = data;
Radek Krejci02117302015-04-13 16:32:44 +0200567
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200568 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200569
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200570 while (*c && memcmp(c, endstr, slen)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200571 c++;
572 }
573 if (!*c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100574 LOGVAL(LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200575 return EXIT_FAILURE;
576 }
577 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200578
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200579 *len = c - data;
580 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200581}
582
Michal Vasko0d343d12015-08-24 14:57:36 +0200583/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200584static char *
585parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200586{
Radek Krejci709fee62015-04-15 13:56:19 +0200587#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200588
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200589 char buf[BUFSIZE];
590 char *result = NULL, *aux;
591 unsigned int r;
592 int o, size = 0;
593 int cdsect = 0;
594 int32_t n;
Radek Krejci709fee62015-04-15 13:56:19 +0200595
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200596 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
Radek Krejci62ea46b2016-06-16 11:18:52 +0200597 if (!data[*len] || (!cdsect && !memcmp(&data[*len], "]]>", 3))) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100598 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200599 goto error;
600 }
Radek Krejci709fee62015-04-15 13:56:19 +0200601
Radek Krejcia4a84062015-04-16 13:00:10 +0200602loop:
603
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200604 if (o > BUFSIZE - 3) {
605 /* add buffer into the result */
606 if (result) {
607 size = size + o;
Michal Vasko253035f2015-12-17 16:58:13 +0100608 aux = ly_realloc(result, size + 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200609 result = aux;
610 } else {
611 size = o;
612 result = malloc((size + 1) * sizeof *result);
613 }
Michal Vasko253035f2015-12-17 16:58:13 +0100614 if (!result) {
615 LOGMEM;
616 return NULL;
617 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200618 memcpy(&result[size - o], buf, o);
Radek Krejci709fee62015-04-15 13:56:19 +0200619
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200620 /* write again into the beginning of the buffer */
621 o = 0;
622 }
Radek Krejci709fee62015-04-15 13:56:19 +0200623
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200624 if (cdsect || !memcmp(&data[*len], "<![CDATA[", 9)) {
625 /* CDSect */
626 if (!cdsect) {
627 cdsect = 1;
628 *len += 9;
629 }
630 if (data[*len] && !memcmp(&data[*len], "]]>", 3)) {
631 *len += 3;
632 cdsect = 0;
633 o--; /* we don't write any data in this iteration */
634 } else {
635 buf[o] = data[*len];
636 (*len)++;
637 }
638 } else if (data[*len] == '&') {
639 (*len)++;
640 if (data[*len] != '#') {
641 /* entity reference - only predefined refs are supported */
642 if (!memcmp(&data[*len], "lt;", 3)) {
643 buf[o] = '<';
644 *len += 3;
645 } else if (!memcmp(&data[*len], "gt;", 3)) {
646 buf[o] = '>';
647 *len += 3;
648 } else if (!memcmp(&data[*len], "amp;", 4)) {
649 buf[o] = '&';
650 *len += 4;
651 } else if (!memcmp(&data[*len], "apos;", 5)) {
652 buf[o] = '\'';
653 *len += 5;
654 } else if (!memcmp(&data[*len], "quot;", 5)) {
655 buf[o] = '\"';
656 *len += 5;
657 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100658 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200659 goto error;
660 }
661 } else {
662 /* character reference */
663 (*len)++;
664 if (isdigit(data[*len])) {
665 for (n = 0; isdigit(data[*len]); (*len)++) {
666 n = (10 * n) + (data[*len] - '0');
667 }
668 if (data[*len] != ';') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100669 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200670 goto error;
671 }
672 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
673 for (n = 0; isxdigit(data[*len]); (*len)++) {
674 if (isdigit(data[*len])) {
675 r = (data[*len] - '0');
676 } else if (data[*len] > 'F') {
677 r = 10 + (data[*len] - 'a');
678 } else {
679 r = 10 + (data[*len] - 'A');
680 }
681 n = (16 * n) + r;
682 }
683 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100684 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200685 goto error;
Radek Krejci709fee62015-04-15 13:56:19 +0200686
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200687 }
Radek Krejci48464ed2016-03-17 15:44:09 +0100688 r = pututf8(&buf[o], n);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200689 if (!r) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100690 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200691 goto error;
692 }
693 o += r - 1; /* o is ++ in for loop */
694 (*len)++;
695 }
696 } else {
697 buf[o] = data[*len];
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200698 (*len)++;
699 }
700 }
Radek Krejci02117302015-04-13 16:32:44 +0200701
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200702 if (delim == '<' && !memcmp(&data[*len], "<![CDATA[", 9)) {
703 /* ignore loop's end condition on beginning of CDSect */
704 goto loop;
705 }
Radek Krejci709fee62015-04-15 13:56:19 +0200706#undef BUFSIZE
707
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200708 if (o) {
709 if (result) {
710 size = size + o;
711 aux = realloc(result, size + 1);
712 result = aux;
713 } else {
714 size = o;
715 result = malloc((size + 1) * sizeof *result);
716 }
Michal Vasko253035f2015-12-17 16:58:13 +0100717 if (!result) {
718 LOGMEM;
719 return NULL;
720 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200721 memcpy(&result[size - o], buf, o);
722 }
723 if (result) {
724 result[size] = '\0';
Radek Krejcia5269642015-07-20 19:04:11 +0200725 } else {
726 size = 0;
727 result = strdup("");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200728 }
Radek Krejci02117302015-04-13 16:32:44 +0200729
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200730 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200731
732error:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200733 free(result);
734 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200735}
736
Michal Vasko0d343d12015-08-24 14:57:36 +0200737/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200738static struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200739parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci674e1f82015-04-21 14:12:19 +0200740{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200741 const char *c = data, *start, *delim;
742 char prefix[32];
743 int uc;
Radek Krejci00249f22015-07-07 13:43:28 +0200744 struct lyxml_attr *attr = NULL, *a;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200745 unsigned int size;
Radek Krejci02117302015-04-13 16:32:44 +0200746
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200747 /* check if it is attribute or namespace */
748 if (!memcmp(c, "xmlns", 5)) {
749 /* namespace */
750 attr = calloc(1, sizeof (struct lyxml_ns));
Michal Vasko253035f2015-12-17 16:58:13 +0100751 if (!attr) {
752 LOGMEM;
753 return NULL;
754 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200755 attr->type = LYXML_ATTR_NS;
Radek Krejci00249f22015-07-07 13:43:28 +0200756 ((struct lyxml_ns *)attr)->parent = parent;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200757 c += 5;
758 if (*c != ':') {
759 /* default namespace, prefix will be empty */
760 goto equal;
761 }
762 c++; /* go after ':' to the prefix value */
763 } else {
764 /* attribute */
765 attr = calloc(1, sizeof *attr);
Michal Vasko253035f2015-12-17 16:58:13 +0100766 if (!attr) {
767 LOGMEM;
768 return NULL;
769 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200770 attr->type = LYXML_ATTR_STD;
771 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200772
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200773 /* process name part of the attribute */
774 start = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100775 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200776 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100777 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200778 free(attr);
779 return NULL;
780 }
781 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100782 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200783 while (is_xmlnamechar(uc)) {
784 if (attr->type == LYXML_ATTR_STD && *c == ':') {
785 /* attribute in a namespace */
786 start = c + 1;
Radek Krejci4ea08382015-04-21 09:41:40 +0200787
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200788 /* look for the prefix in namespaces */
789 memcpy(prefix, data, c - data);
790 prefix[c - data] = '\0';
Radek Krejci4476d412015-07-10 15:35:01 +0200791 attr->ns = lyxml_get_ns(parent, prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200792 }
793 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100794 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200795 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200796
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200797 /* store the name */
798 size = c - start;
799 attr->name = lydict_insert(ctx, start, size);
Radek Krejci674e1f82015-04-21 14:12:19 +0200800
801equal:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200802 /* check Eq mark that can be surrounded by whitespaces */
803 ign_xmlws(c);
804 if (*c != '=') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100805 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200806 goto error;
807 }
808 c++;
809 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200810
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200811 /* process value part of the attribute */
812 if (!*c || (*c != '"' && *c != '\'')) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100813 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200814 goto error;
815 }
816 delim = c;
817 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
818 if (ly_errno) {
819 goto error;
820 }
Radek Krejci02117302015-04-13 16:32:44 +0200821
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200822 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci00249f22015-07-07 13:43:28 +0200823
824 /* put attribute into the parent's attributes list */
825 if (parent->attr) {
826 /* go to the end of the list */
827 for (a = parent->attr; a->next; a = a->next);
828 /* and append new attribute */
829 a->next = attr;
830 } else {
831 /* add the first attribute in the list */
832 parent->attr = attr;
833 }
834
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200835 return attr;
Radek Krejci02117302015-04-13 16:32:44 +0200836
837error:
Radek Krejci00249f22015-07-07 13:43:28 +0200838 lyxml_free_attr(ctx, NULL, attr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200839 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +0200840}
841
Michal Vasko0d343d12015-08-24 14:57:36 +0200842/* logs directly */
Radek Krejci9a5daea2016-03-02 16:49:40 +0100843struct lyxml_elem *
844lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200845{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200846 const char *c = data, *start, *e;
847 const char *lws; /* leading white space for handling mixed content */
848 int uc;
849 char *str;
850 char prefix[32] = { 0 };
851 unsigned int prefix_len = 0;
852 struct lyxml_elem *elem = NULL, *child;
853 struct lyxml_attr *attr;
854 unsigned int size;
855 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200856
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200857 *len = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200858
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200859 if (*c != '<') {
860 return NULL;
861 }
Radek Krejci02117302015-04-13 16:32:44 +0200862
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200863 /* locate element name */
864 c++;
865 e = c;
Radek Krejci02117302015-04-13 16:32:44 +0200866
Radek Krejci48464ed2016-03-17 15:44:09 +0100867 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200868 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100869 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200870 return NULL;
871 }
872 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100873 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200874 while (is_xmlnamechar(uc)) {
875 if (*e == ':') {
876 if (prefix_len) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100877 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200878 goto error;
879 }
880 /* element in a namespace */
881 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200882
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200883 /* look for the prefix in namespaces */
884 memcpy(prefix, c, prefix_len = e - c);
885 prefix[prefix_len] = '\0';
886 c = start;
887 }
888 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100889 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200890 }
891 if (!*e) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100892 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200893 return NULL;
894 }
Radek Krejci02117302015-04-13 16:32:44 +0200895
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200896 /* allocate element structure */
897 elem = calloc(1, sizeof *elem);
Michal Vasko253035f2015-12-17 16:58:13 +0100898 if (!elem) {
899 LOGMEM;
900 return NULL;
901 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200902 elem->next = NULL;
903 elem->prev = elem;
904 if (parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200905 lyxml_add_child(ctx, parent, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200906 }
Radek Krejci02117302015-04-13 16:32:44 +0200907
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200908 /* store the name into the element structure */
909 elem->name = lydict_insert(ctx, c, e - c);
910 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200911
912process:
Radek Krejcidb4f0782016-05-03 12:07:16 +0200913 ly_errno = LY_SUCCESS;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200914 ign_xmlws(c);
915 if (!memcmp("/>", c, 2)) {
916 /* we are done, it was EmptyElemTag */
917 c += 2;
Michal Vasko44913842016-04-13 14:20:41 +0200918 elem->content = lydict_insert(ctx, "", 0);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200919 closed_flag = 1;
920 } else if (*c == '>') {
921 /* process element content */
922 c++;
923 lws = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200924
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200925 while (*c) {
926 if (!memcmp(c, "</", 2)) {
927 if (lws && !elem->child) {
928 /* leading white spaces were actually content */
929 goto store_content;
930 }
Radek Krejci02117302015-04-13 16:32:44 +0200931
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200932 /* Etag */
933 c += 2;
934 /* get name and check it */
935 e = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100936 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200937 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100938 LOGVAL(LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200939 goto error;
940 }
941 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100942 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200943 while (is_xmlnamechar(uc)) {
944 if (*e == ':') {
945 /* element in a namespace */
946 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200947
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200948 /* look for the prefix in namespaces */
949 if (memcmp(prefix, c, e - c)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200950 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
951 "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200952 goto error;
953 }
954 c = start;
955 }
956 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100957 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200958 }
959 if (!*e) {
Radek Krejci3cc10962016-04-13 15:03:27 +0200960 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200961 goto error;
962 }
Radek Krejci02117302015-04-13 16:32:44 +0200963
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200964 /* check that it corresponds to opening tag */
965 size = e - c;
966 str = malloc((size + 1) * sizeof *str);
Michal Vasko253035f2015-12-17 16:58:13 +0100967 if (!str) {
968 LOGMEM;
969 goto error;
970 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200971 memcpy(str, c, e - c);
972 str[e - c] = '\0';
973 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200974 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
975 "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200976 free(str);
977 goto error;
978 }
979 free(str);
980 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200981
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200982 ign_xmlws(c);
983 if (*c != '>') {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200984 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200985 goto error;
986 }
987 c++;
Michal Vaskoe00b7892016-04-14 10:12:18 +0200988 if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
989 /* there was no content, but we don't want NULL (only if mixed content) */
990 elem->content = lydict_insert(ctx, "", 0);
991 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200992 closed_flag = 1;
993 break;
Radek Krejci02117302015-04-13 16:32:44 +0200994
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200995 } else if (!memcmp(c, "<?", 2)) {
996 if (lws) {
997 /* leading white spaces were only formatting */
998 lws = NULL;
999 }
1000 /* PI - ignore it */
1001 c += 2;
1002 if (parse_ignore(c, "?>", &size)) {
1003 goto error;
1004 }
1005 c += size;
1006 } else if (!memcmp(c, "<!--", 4)) {
1007 if (lws) {
1008 /* leading white spaces were only formatting */
1009 lws = NULL;
1010 }
1011 /* Comment - ignore it */
1012 c += 4;
1013 if (parse_ignore(c, "-->", &size)) {
1014 goto error;
1015 }
1016 c += size;
1017 } else if (!memcmp(c, "<![CDATA[", 9)) {
1018 /* CDSect */
1019 goto store_content;
1020 } else if (*c == '<') {
1021 if (lws) {
1022 if (elem->flags & LYXML_ELEM_MIXED) {
1023 /* we have a mixed content */
1024 goto store_content;
1025 } else {
1026 /* leading white spaces were only formatting */
1027 lws = NULL;
1028 }
1029 }
1030 if (elem->content) {
1031 /* we have a mixed content */
1032 child = calloc(1, sizeof *child);
Michal Vasko253035f2015-12-17 16:58:13 +01001033 if (!child) {
1034 LOGMEM;
1035 goto error;
1036 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001037 child->content = elem->content;
1038 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001039 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001040 elem->flags |= LYXML_ELEM_MIXED;
1041 }
Radek Krejci9a5daea2016-03-02 16:49:40 +01001042 child = lyxml_parse_elem(ctx, c, &size, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001043 if (!child) {
1044 goto error;
1045 }
1046 c += size; /* move after processed child element */
1047 } else if (is_xmlws(*c)) {
1048 lws = c;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001049 ign_xmlws(c);
1050 } else {
Radek Krejci02117302015-04-13 16:32:44 +02001051store_content:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001052 /* store text content */
1053 if (lws) {
1054 /* process content including the leading white spaces */
1055 c = lws;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001056 lws = NULL;
1057 }
1058 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
1059 if (ly_errno) {
1060 goto error;
1061 }
1062 c += size; /* move after processed text content */
Radek Krejci02117302015-04-13 16:32:44 +02001063
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001064 if (elem->child) {
1065 /* we have a mixed content */
1066 child = calloc(1, sizeof *child);
Michal Vasko253035f2015-12-17 16:58:13 +01001067 if (!child) {
1068 LOGMEM;
1069 goto error;
1070 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001071 child->content = elem->content;
1072 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001073 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001074 elem->flags |= LYXML_ELEM_MIXED;
1075 }
1076 }
1077 }
1078 } else {
1079 /* process attribute */
1080 attr = parse_attr(ctx, c, &size, elem);
1081 if (!attr) {
1082 goto error;
1083 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001084 c += size; /* move after processed attribute */
Radek Krejci02117302015-04-13 16:32:44 +02001085
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001086 /* check namespace */
1087 if (attr->type == LYXML_ATTR_NS) {
1088 if (!prefix[0] && !attr->name) {
1089 if (attr->value) {
1090 /* default prefix */
1091 elem->ns = (struct lyxml_ns *)attr;
1092 } else {
1093 /* xmlns="" -> no namespace */
1094 nons_flag = 1;
1095 }
1096 } else if (prefix[0] && attr->name && !memcmp(attr->name, prefix, prefix_len + 1)) {
1097 /* matching namespace with prefix */
1098 elem->ns = (struct lyxml_ns *)attr;
1099 }
1100 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001101
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001102 /* go back to finish element processing */
1103 goto process;
1104 }
Radek Krejci02117302015-04-13 16:32:44 +02001105
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001106 *len = c - data;
Radek Krejci02117302015-04-13 16:32:44 +02001107
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001108 if (!closed_flag) {
Radek Krejci48464ed2016-03-17 15:44:09 +01001109 LOGVAL(LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001110 goto error;
1111 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001112
Radek Krejci78a230a2015-07-07 17:04:40 +02001113 if (!elem->ns && !nons_flag && parent) {
Radek Krejci4476d412015-07-10 15:35:01 +02001114 elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001115 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001116
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001117 return elem;
Radek Krejci02117302015-04-13 16:32:44 +02001118
1119error:
Michal Vasko345da0a2015-12-02 10:35:55 +01001120 lyxml_free(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001121
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001122 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001123}
1124
Michal Vasko0d343d12015-08-24 14:57:36 +02001125/* logs directly */
Radek Krejcic6704c82015-10-06 11:12:45 +02001126API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001127lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001128{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001129 const char *c = data;
1130 unsigned int len;
Radek Krejci851ea662016-01-08 09:30:53 +01001131 struct lyxml_elem *root, *first = NULL, *next;
Radek Krejci02117302015-04-13 16:32:44 +02001132
Radek Krejci2342cf62016-01-29 16:48:23 +01001133 ly_errno = LY_SUCCESS;
1134
Radek Krejci120f6242015-12-17 12:32:56 +01001135repeat:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001136 /* process document */
1137 while (*c) {
1138 if (is_xmlws(*c)) {
1139 /* skip whitespaces */
1140 ign_xmlws(c);
1141 } else if (!memcmp(c, "<?", 2)) {
1142 /* XMLDecl or PI - ignore it */
1143 c += 2;
1144 if (parse_ignore(c, "?>", &len)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001145 return NULL;
1146 }
1147 c += len;
1148 } else if (!memcmp(c, "<!--", 4)) {
1149 /* Comment - ignore it */
1150 c += 2;
1151 if (parse_ignore(c, "-->", &len)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001152 return NULL;
1153 }
1154 c += len;
1155 } else if (!memcmp(c, "<!", 2)) {
1156 /* DOCTYPE */
1157 /* TODO - standalone ignore counting < and > */
1158 LOGERR(LY_EINVAL, "DOCTYPE not supported in XML documents.");
1159 return NULL;
1160 } else if (*c == '<') {
1161 /* element - process it in next loop to strictly follow XML
1162 * format
1163 */
1164 break;
Michal Vaskoc2e80562015-07-27 11:31:41 +02001165 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +01001166 LOGVAL(LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
Michal Vaskoc2e80562015-07-27 11:31:41 +02001167 return NULL;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001168 }
1169 }
Radek Krejci02117302015-04-13 16:32:44 +02001170
Radek Krejci9a5daea2016-03-02 16:49:40 +01001171 root = lyxml_parse_elem(ctx, c, &len, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001172 if (!root) {
Michal Vaskobc58b4a2016-01-07 14:42:31 +01001173 if (first) {
Radek Krejci851ea662016-01-08 09:30:53 +01001174 LY_TREE_FOR_SAFE(first, next, root) {
Michal Vaskobc58b4a2016-01-07 14:42:31 +01001175 lyxml_free(ctx, root);
1176 }
Radek Krejci120f6242015-12-17 12:32:56 +01001177 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001178 return NULL;
Radek Krejci120f6242015-12-17 12:32:56 +01001179 } else if (!first) {
1180 first = root;
1181 } else {
1182 first->prev->next = root;
1183 root->prev = first->prev;
1184 first->prev = root;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001185 }
1186 c += len;
Radek Krejci02117302015-04-13 16:32:44 +02001187
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001188 /* ignore the rest of document where can be comments, PIs and whitespaces,
1189 * note that we are not detecting syntax errors in these parts
1190 */
1191 ign_xmlws(c);
1192 if (*c) {
Radek Krejci722b0072016-02-01 17:09:45 +01001193 if (options & LYXML_PARSE_MULTIROOT) {
Radek Krejci120f6242015-12-17 12:32:56 +01001194 goto repeat;
1195 } else {
1196 LOGWRN("There are some not parsed data:\n%s", c);
1197 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001198 }
Radek Krejci02117302015-04-13 16:32:44 +02001199
Radek Krejci120f6242015-12-17 12:32:56 +01001200 return first;
Radek Krejci02117302015-04-13 16:32:44 +02001201}
1202
Radek Krejcic6704c82015-10-06 11:12:45 +02001203API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001204lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001205{
Radek Krejci6b3d9262015-12-03 13:45:27 +01001206 struct lyxml_elem *elem = NULL;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001207 struct stat sb;
1208 int fd;
1209 char *addr;
1210
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001211 if (!filename || !ctx) {
1212 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1213 return NULL;
1214 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001215
Pavol Vicanb2570c12015-11-12 13:50:20 +01001216 fd = open(filename, O_RDONLY);
1217 if (fd == -1) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001218 LOGERR(LY_EINVAL,"Opening file \"%s\" failed.", filename);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001219 return NULL;
1220 }
1221 if (fstat(fd, &sb) == -1) {
1222 LOGERR(LY_EINVAL, "Unable to get file \"%s\" information.\n", filename);
1223 goto error;
1224 }
1225 if (!S_ISREG(sb.st_mode)) {
Radek Krejcib051f722016-02-25 15:12:21 +01001226 LOGERR(LY_EINVAL, "%s: Invalid parameter, input file is not a regular file", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001227 goto error;
1228 }
Pavol Vicanf7cc2852016-03-22 23:27:35 +01001229 addr = mmap(NULL, sb.st_size + 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001230 if (addr == MAP_FAILED) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001231 LOGERR(LY_EMEM,"Map file into memory failed (%s()).", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001232 goto error;
1233 }
Radek Krejci6b3d9262015-12-03 13:45:27 +01001234
Radek Krejci722b0072016-02-01 17:09:45 +01001235 elem = lyxml_parse_mem(ctx, addr, options);
Pavol Vicanf7cc2852016-03-22 23:27:35 +01001236 munmap(addr, sb.st_size +2);
Radek Krejci30793ab2015-12-03 13:45:45 +01001237 close(fd);
Radek Krejci6b3d9262015-12-03 13:45:27 +01001238
Pavol Vicanb2570c12015-11-12 13:50:20 +01001239 return elem;
1240
1241error:
Radek Krejci6b3d9262015-12-03 13:45:27 +01001242 if (fd != -1) {
1243 close(fd);
1244 }
1245
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001246 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001247}
Radek Krejci02117302015-04-13 16:32:44 +02001248
Michal Vasko5db027d2015-10-09 14:38:50 +02001249int
1250lyxml_dump_text(struct lyout *out, const char *text)
Radek Krejcif0023a92015-04-20 20:51:39 +02001251{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001252 unsigned int i, n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001253
Michal Vasko5db027d2015-10-09 14:38:50 +02001254 if (!text) {
1255 return 0;
1256 }
1257
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001258 for (i = n = 0; text[i]; i++) {
1259 switch (text[i]) {
1260 case '&':
Radek Krejci5248f132015-10-09 10:34:25 +02001261 n += ly_print(out, "&amp;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001262 break;
1263 case '<':
Radek Krejci5248f132015-10-09 10:34:25 +02001264 n += ly_print(out, "&lt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001265 break;
1266 case '>':
1267 /* not needed, just for readability */
Radek Krejci5248f132015-10-09 10:34:25 +02001268 n += ly_print(out, "&gt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001269 break;
Radek Krejci952a7252016-07-16 20:52:43 +02001270 case '"':
1271 n += ly_print(out, "&quot;");
1272 break;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001273 default:
Radek Krejci5248f132015-10-09 10:34:25 +02001274 ly_write(out, &text[i], 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001275 n++;
1276 }
1277 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001278
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001279 return n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001280}
1281
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001282static int
Michal Vasko1e62a092015-12-01 12:27:20 +01001283dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001284{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001285 int size = 0;
1286 struct lyxml_attr *a;
1287 struct lyxml_elem *child;
1288 const char *delim, *delim_outer;
1289 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001290
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001291 if (!e->name) {
1292 /* mixed content */
1293 if (e->content) {
Michal Vasko5db027d2015-10-09 14:38:50 +02001294 return lyxml_dump_text(out, e->content);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001295 } else {
1296 return 0;
1297 }
1298 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001299
Radek Krejci722b0072016-02-01 17:09:45 +01001300 delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001301 indent = 2 * level;
1302 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1303 delim = "";
1304 }
1305 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1306 delim_outer = "";
1307 indent = 0;
1308 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001309
Radek Krejci722b0072016-02-01 17:09:45 +01001310 if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN)) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001311 /* opening tag */
1312 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001313 size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001314 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001315 size += ly_print(out, "%*s<%s", indent, "", e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001316 }
Radek Krejci722b0072016-02-01 17:09:45 +01001317 } else if (options & LYXML_PRINT_CLOSE) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001318 indent = 0;
1319 goto close;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001320 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001321
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001322 /* attributes */
1323 for (a = e->attr; a; a = a->next) {
1324 if (a->type == LYXML_ATTR_NS) {
1325 if (a->name) {
Radek Krejci5248f132015-10-09 10:34:25 +02001326 size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001327 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001328 size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001329 }
1330 } else if (a->ns && a->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001331 size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001332 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001333 size += ly_print(out, " %s=\"%s\"", a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001334 }
1335 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001336
Radek Krejcic6704c82015-10-06 11:12:45 +02001337 /* apply options */
Radek Krejci722b0072016-02-01 17:09:45 +01001338 if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
Radek Krejci5248f132015-10-09 10:34:25 +02001339 size += ly_print(out, "/>%s", delim);
Radek Krejcic6704c82015-10-06 11:12:45 +02001340 return size;
Radek Krejci722b0072016-02-01 17:09:45 +01001341 } else if (options & LYXML_PRINT_OPEN) {
Radek Krejci5248f132015-10-09 10:34:25 +02001342 ly_print(out, ">");
Radek Krejcic6704c82015-10-06 11:12:45 +02001343 return ++size;
Radek Krejci722b0072016-02-01 17:09:45 +01001344 } else if (options & LYXML_PRINT_ATTRS) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001345 return size;
1346 }
1347
Michal Vasko3a611612016-04-14 10:12:56 +02001348 if (!e->child && (!e->content || !e->content[0])) {
Radek Krejci5248f132015-10-09 10:34:25 +02001349 size += ly_print(out, "/>%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001350 return size;
Michal Vasko3a611612016-04-14 10:12:56 +02001351 } else if (e->content && e->content[0]) {
Radek Krejci5248f132015-10-09 10:34:25 +02001352 ly_print(out, ">");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001353 size++;
Radek Krejcif0023a92015-04-20 20:51:39 +02001354
Michal Vasko5db027d2015-10-09 14:38:50 +02001355 size += lyxml_dump_text(out, e->content);
Radek Krejcif0023a92015-04-20 20:51:39 +02001356
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001357 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001358 size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001359 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001360 size += ly_print(out, "</%s>%s", e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001361 }
1362 return size;
1363 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001364 size += ly_print(out, ">%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001365 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001366
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001367 /* go recursively */
1368 LY_TREE_FOR(e->child, child) {
Radek Krejci722b0072016-02-01 17:09:45 +01001369 if (options & LYXML_PRINT_FORMAT) {
1370 size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001371 } else {
1372 size += dump_elem(out, child, level, 0);
1373 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001374 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001375
Radek Krejcic6704c82015-10-06 11:12:45 +02001376close:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001377 /* closing tag */
1378 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001379 size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001380 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001381 size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001382 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001383
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001384 return size;
Radek Krejcif0023a92015-04-20 20:51:39 +02001385}
1386
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001387static int
1388dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
1389{
1390 const struct lyxml_elem *start, *iter;
1391 int ret = 0;
1392
1393 if (e->parent) {
1394 start = e->parent->child;
1395 } else {
1396 start = e;
1397 while(start->prev && start->prev->next) {
1398 start = start->prev;
1399 }
1400 }
1401
1402 LY_TREE_FOR(start, iter) {
1403 ret += dump_elem(out, iter, 0, options);
1404 }
1405
1406 return ret;
1407}
1408
Radek Krejcic6704c82015-10-06 11:12:45 +02001409API int
Radek Krejci722b0072016-02-01 17:09:45 +01001410lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001411{
Radek Krejci5248f132015-10-09 10:34:25 +02001412 struct lyout out;
1413
1414 if (!stream || !elem) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001415 return 0;
1416 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001417
Radek Krejci5248f132015-10-09 10:34:25 +02001418 out.type = LYOUT_STREAM;
1419 out.method.f = stream;
1420
Radek Krejci722b0072016-02-01 17:09:45 +01001421 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001422 return dump_siblings(&out, elem, options);
1423 } else {
1424 return dump_elem(&out, elem, 0, options);
1425 }
Radek Krejci5248f132015-10-09 10:34:25 +02001426}
1427
1428API int
Radek Krejci722b0072016-02-01 17:09:45 +01001429lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
Radek Krejci5248f132015-10-09 10:34:25 +02001430{
1431 struct lyout out;
1432
1433 if (fd < 0 || !elem) {
1434 return 0;
1435 }
1436
1437 out.type = LYOUT_FD;
1438 out.method.fd = fd;
1439
Radek Krejci722b0072016-02-01 17:09:45 +01001440 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001441 return dump_siblings(&out, elem, options);
1442 } else {
1443 return dump_elem(&out, elem, 0, options);
1444 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001445}
Radek Krejci6140e4e2015-10-09 15:50:55 +02001446
1447API int
Radek Krejci722b0072016-02-01 17:09:45 +01001448lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001449{
1450 struct lyout out;
1451 int r;
1452
1453 if (!strp || !elem) {
1454 return 0;
1455 }
1456
1457 out.type = LYOUT_MEMORY;
1458 out.method.mem.buf = NULL;
1459 out.method.mem.len = 0;
1460 out.method.mem.size = 0;
1461
Radek Krejci722b0072016-02-01 17:09:45 +01001462 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001463 r = dump_siblings(&out, elem, options);
1464 } else {
1465 r = dump_elem(&out, elem, 0, options);
1466 }
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001467
1468 *strp = out.method.mem.buf;
1469 return r;
1470}
1471
1472API int
Radek Krejci722b0072016-02-01 17:09:45 +01001473lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
Radek Krejci6140e4e2015-10-09 15:50:55 +02001474{
1475 struct lyout out;
1476
1477 if (!writeclb || !elem) {
1478 return 0;
1479 }
1480
1481 out.type = LYOUT_CALLBACK;
Radek Krejci50929eb2015-10-09 18:14:15 +02001482 out.method.clb.f = writeclb;
1483 out.method.clb.arg = arg;
Radek Krejci6140e4e2015-10-09 15:50:55 +02001484
Radek Krejci722b0072016-02-01 17:09:45 +01001485 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001486 return dump_siblings(&out, elem, options);
1487 } else {
1488 return dump_elem(&out, elem, 0, options);
1489 }
Radek Krejci6140e4e2015-10-09 15:50:55 +02001490}