blob: d4c106363f06d5d7f98b3e92bfaeaafb3cb3e579 [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejci54ea8de2015-04-09 18:02:56 +020013 */
14
Radek Krejci812b10a2015-05-28 16:48:25 +020015#include <assert.h>
Radek Krejci563427e2016-02-08 16:26:34 +010016#include <errno.h>
Radek Krejci709fee62015-04-15 13:56:19 +020017#include <ctype.h>
18#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020019#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020020#include <stdlib.h>
21#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020022#include <unistd.h>
Radek Krejci563427e2016-02-08 16:26:34 +010023#include <pthread.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010024#include <sys/stat.h>
25#include <sys/mman.h>
Radek Krejci563427e2016-02-08 16:26:34 +010026#include <sys/syscall.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010027#include <fcntl.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028
Radek Krejci06a704e2015-04-22 14:50:49 +020029#include "common.h"
Radek Krejci41912fe2015-10-22 10:22:12 +020030#include "dict_private.h"
Radek Krejci5248f132015-10-09 10:34:25 +020031#include "printer.h"
Radek Krejci5449d472015-10-26 14:35:56 +010032#include "parser.h"
Michal Vasko2d162e12015-09-24 14:33:29 +020033#include "tree_schema.h"
Michal Vaskofc5744d2015-10-22 12:09:34 +020034#include "xml_internal.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020035
Radek Krejci3045cf32015-05-28 10:58:52 +020036#define ign_xmlws(p) \
Radek Krejci563427e2016-02-08 16:26:34 +010037 while (is_xmlws(*p)) { \
Radek Krejci563427e2016-02-08 16:26:34 +010038 p++; \
39 }
Radek Krejci02117302015-04-13 16:32:44 +020040
Michal Vasko88c29542015-11-27 14:57:53 +010041static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);
42
Michal Vasko1e62a092015-12-01 12:27:20 +010043API const struct lyxml_ns *
44lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
Michal Vaskof8879c22015-08-21 09:07:36 +020045{
46 struct lyxml_attr *attr;
Michal Vaskof8879c22015-08-21 09:07:36 +020047
48 if (!elem) {
49 return NULL;
50 }
51
Michal Vaskof8879c22015-08-21 09:07:36 +020052 for (attr = elem->attr; attr; attr = attr->next) {
53 if (attr->type != LYXML_ATTR_NS) {
54 continue;
55 }
56 if (!attr->name) {
Radek Krejci13f3f152016-10-03 11:40:13 +020057 if (!prefix) {
Michal Vaskof8879c22015-08-21 09:07:36 +020058 /* default namespace found */
59 if (!attr->value) {
60 /* empty default namespace -> no default namespace */
61 return NULL;
62 }
63 return (struct lyxml_ns *)attr;
64 }
Radek Krejci7d39dae2016-10-03 17:33:01 +020065 } else if (prefix && !strcmp(attr->name, prefix)) {
Michal Vaskof8879c22015-08-21 09:07:36 +020066 /* prefix found */
67 return (struct lyxml_ns *)attr;
68 }
69 }
70
71 /* go recursively */
72 return lyxml_get_ns(elem->parent, prefix);
73}
74
Michal Vasko88c29542015-11-27 14:57:53 +010075static void
76lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
77{
78 const struct lyxml_ns *tmp_ns;
Michal Vaskof6109112015-12-03 14:00:42 +010079 struct lyxml_elem *ns_root, *attr_root;
Michal Vasko88c29542015-11-27 14:57:53 +010080
81 if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010082 /* find the root of attr */
83 for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +010084
85 /* find the root of attr NS */
86 for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);
87
Michal Vaskof6109112015-12-03 14:00:42 +010088 /* attr NS is defined outside attr parent subtree */
89 if (ns_root != attr_root) {
Michal Vasko88c29542015-11-27 14:57:53 +010090 if (copy_ns) {
91 tmp_ns = attr->ns;
92 /* we may have already copied the NS over? */
Radek Krejci66aca402016-05-24 15:23:02 +020093 attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +010094
95 /* we haven't copied it over, copy it now */
96 if (!attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010097 attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +010098 }
99 } else {
100 attr->ns = NULL;
101 }
102 }
103 }
104}
105
106static struct lyxml_attr *
Michal Vaskof8879c22015-08-21 09:07:36 +0200107lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
108{
109 struct lyxml_attr *result, *a;
110
111 if (!attr || !parent) {
112 return NULL;
113 }
114
115 if (attr->type == LYXML_ATTR_NS) {
116 /* this is correct, despite that all attributes seems like a standard
117 * attributes (struct lyxml_attr), some of them can be namespace
118 * definitions (and in that case they are struct lyxml_ns).
119 */
120 result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
121 } else {
122 result = calloc(1, sizeof (struct lyxml_attr));
123 }
Michal Vasko253035f2015-12-17 16:58:13 +0100124 if (!result) {
125 LOGMEM;
126 return NULL;
127 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200128 result->value = lydict_insert(ctx, attr->value, 0);
129 result->name = lydict_insert(ctx, attr->name, 0);
130 result->type = attr->type;
131
132 /* set namespace in case of standard attributes */
133 if (result->type == LYXML_ATTR_STD && attr->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100134 result->ns = attr->ns;
135 lyxml_correct_attr_ns(ctx, result, parent, 1);
Michal Vaskof8879c22015-08-21 09:07:36 +0200136 }
137
138 /* set parent pointer in case of namespace attribute */
139 if (result->type == LYXML_ATTR_NS) {
140 ((struct lyxml_ns *)result)->parent = parent;
141 }
142
143 /* put attribute into the parent's attributes list */
144 if (parent->attr) {
145 /* go to the end of the list */
146 for (a = parent->attr; a->next; a = a->next);
147 /* and append new attribute */
148 a->next = result;
149 } else {
150 /* add the first attribute in the list */
151 parent->attr = result;
152 }
153
154 return result;
155}
156
Michal Vaskof748dbc2016-04-05 11:27:47 +0200157void
Michal Vasko88c29542015-11-27 14:57:53 +0100158lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns, int correct_attrs)
159{
160 const struct lyxml_ns *tmp_ns;
Radek Krejcid5be5682016-01-14 16:23:22 +0100161 struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
Michal Vasko88c29542015-11-27 14:57:53 +0100162 struct lyxml_attr *attr;
163
164 /* find the root of elem */
165 for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
166
Radek Krejcid5be5682016-01-14 16:23:22 +0100167 LY_TREE_DFS_BEGIN(elem, tmp, iter) {
168 if (iter->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100169 /* find the root of elem NS */
Radek Krejcic071c542016-01-27 14:57:51 +0100170 for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +0100171
172 /* elem NS is defined outside elem subtree */
173 if (ns_root != elem_root) {
174 if (copy_ns) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100175 tmp_ns = iter->ns;
Michal Vasko88c29542015-11-27 14:57:53 +0100176 /* we may have already copied the NS over? */
Radek Krejcid5be5682016-01-14 16:23:22 +0100177 iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +0100178
179 /* we haven't copied it over, copy it now */
Radek Krejcid5be5682016-01-14 16:23:22 +0100180 if (!iter->ns) {
181 iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +0100182 }
183 } else {
Radek Krejcid5be5682016-01-14 16:23:22 +0100184 iter->ns = NULL;
Michal Vasko88c29542015-11-27 14:57:53 +0100185 }
186 }
187 }
188 if (correct_attrs) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100189 LY_TREE_FOR(iter->attr, attr) {
Michal Vasko88c29542015-11-27 14:57:53 +0100190 lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
191 }
192 }
Radek Krejcid5be5682016-01-14 16:23:22 +0100193 LY_TREE_DFS_END(elem, tmp, iter);
Michal Vasko88c29542015-11-27 14:57:53 +0100194 }
195}
196
Michal Vaskof8879c22015-08-21 09:07:36 +0200197struct lyxml_elem *
198lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
199{
200 struct lyxml_elem *result, *child;
201 struct lyxml_attr *attr;
202
203 if (!elem) {
204 return NULL;
205 }
206
207 result = calloc(1, sizeof *result);
Michal Vasko253035f2015-12-17 16:58:13 +0100208 if (!result) {
209 LOGMEM;
210 return NULL;
211 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200212 result->content = lydict_insert(ctx, elem->content, 0);
213 result->name = lydict_insert(ctx, elem->name, 0);
214 result->flags = elem->flags;
Michal Vaskof8879c22015-08-21 09:07:36 +0200215 result->prev = result;
216
217 if (parent) {
218 lyxml_add_child(ctx, parent, result);
219 }
220
Michal Vasko88c29542015-11-27 14:57:53 +0100221 /* keep old namespace for now */
222 result->ns = elem->ns;
223
224 /* correct namespaces */
225 lyxml_correct_elem_ns(ctx, result, 1, 0);
Michal Vaskof8879c22015-08-21 09:07:36 +0200226
227 /* duplicate attributes */
228 for (attr = elem->attr; attr; attr = attr->next) {
229 lyxml_dup_attr(ctx, result, attr);
230 }
231
232 if (!recursive) {
233 return result;
234 }
235
236 /* duplicate children */
237 LY_TREE_FOR(elem->child, child) {
238 lyxml_dup_elem(ctx, child, result, 1);
239 }
240
241 return result;
242}
243
Radek Krejci6879d952017-01-09 12:49:19 +0100244API struct lyxml_elem *
245lyxml_dup(struct ly_ctx *ctx, struct lyxml_elem *root)
246{
247 return lyxml_dup_elem(ctx, root, NULL, 1);
248}
249
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200250void
Michal Vaskof8879c22015-08-21 09:07:36 +0200251lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
Radek Krejci02117302015-04-13 16:32:44 +0200252{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200253 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200254
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200255 if (!elem) {
256 return;
257 }
Radek Krejci02117302015-04-13 16:32:44 +0200258
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200259 /* store pointers to important nodes */
260 parent = elem->parent;
Radek Krejcie1f13912015-05-26 15:17:38 +0200261
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200262 /* unlink from parent */
263 if (parent) {
264 if (parent->child == elem) {
265 /* we unlink the first child */
266 /* update the parent's link */
267 parent->child = elem->next;
268 }
269 /* forget about the parent */
270 elem->parent = NULL;
271 }
Radek Krejci02117302015-04-13 16:32:44 +0200272
Michal Vasko88c29542015-11-27 14:57:53 +0100273 if (copy_ns < 2) {
274 lyxml_correct_elem_ns(ctx, elem, copy_ns, 1);
275 }
276
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200277 /* unlink from siblings */
278 if (elem->prev == elem) {
279 /* there are no more siblings */
280 return;
281 }
282 if (elem->next) {
283 elem->next->prev = elem->prev;
284 } else {
285 /* unlinking the last element */
286 if (parent) {
287 first = parent->child;
288 } else {
289 first = elem;
Radek Krejcie4fffcf2016-02-23 16:06:25 +0100290 while (first->prev->next) {
291 first = first->prev;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200292 }
293 }
294 first->prev = elem->prev;
295 }
296 if (elem->prev->next) {
297 elem->prev->next = elem->next;
298 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200299
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200300 /* clean up the unlinked element */
301 elem->next = NULL;
302 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200303}
304
Michal Vasko345da0a2015-12-02 10:35:55 +0100305API void
306lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
307{
308 if (!elem) {
309 return;
310 }
311
312 lyxml_unlink_elem(ctx, elem, 1);
313}
314
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200315void
Radek Krejci00249f22015-07-07 13:43:28 +0200316lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200317{
Radek Krejci00249f22015-07-07 13:43:28 +0200318 struct lyxml_attr *aiter, *aprev;
319
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200320 if (!attr) {
321 return;
322 }
Radek Krejci02117302015-04-13 16:32:44 +0200323
Radek Krejci00249f22015-07-07 13:43:28 +0200324 if (parent) {
325 /* unlink attribute from the parent's list of attributes */
326 aprev = NULL;
327 for (aiter = parent->attr; aiter; aiter = aiter->next) {
328 if (aiter == attr) {
329 break;
330 }
331 aprev = aiter;
332 }
333 if (!aiter) {
334 /* attribute to remove not found */
335 return;
336 }
337
338 if (!aprev) {
339 /* attribute is first in parent's list of attributes */
340 parent->attr = attr->next;
341 } else {
342 /* reconnect previous attribute to the next */
343 aprev->next = attr->next;
344 }
345 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200346 lydict_remove(ctx, attr->name);
347 lydict_remove(ctx, attr->value);
348 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200349}
350
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200351void
352lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200353{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200354 struct lyxml_attr *a, *next;
355 if (!elem || !elem->attr) {
356 return;
357 }
Radek Krejci02117302015-04-13 16:32:44 +0200358
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200359 a = elem->attr;
360 do {
361 next = a->next;
Radek Krejci02117302015-04-13 16:32:44 +0200362
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200363 lydict_remove(ctx, a->name);
364 lydict_remove(ctx, a->value);
365 free(a);
Radek Krejci02117302015-04-13 16:32:44 +0200366
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200367 a = next;
368 } while (a);
Radek Krejci02117302015-04-13 16:32:44 +0200369}
370
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200371static void
Michal Vasko272e42f2015-12-02 12:20:37 +0100372lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200373{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200374 struct lyxml_elem *e, *next;
Radek Krejci02117302015-04-13 16:32:44 +0200375
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200376 if (!elem) {
377 return;
378 }
Radek Krejci02117302015-04-13 16:32:44 +0200379
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200380 lyxml_free_attrs(ctx, elem);
381 LY_TREE_FOR_SAFE(elem->child, next, e) {
Michal Vasko272e42f2015-12-02 12:20:37 +0100382 lyxml_free_elem(ctx, e);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200383 }
384 lydict_remove(ctx, elem->name);
385 lydict_remove(ctx, elem->content);
386 free(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200387}
388
Radek Krejcic6704c82015-10-06 11:12:45 +0200389API void
Michal Vasko345da0a2015-12-02 10:35:55 +0100390lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200391{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200392 if (!elem) {
393 return;
394 }
Radek Krejci02117302015-04-13 16:32:44 +0200395
Michal Vasko61f7ccb2015-10-23 10:15:08 +0200396 lyxml_unlink_elem(ctx, elem, 2);
Michal Vasko272e42f2015-12-02 12:20:37 +0100397 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200398}
399
Radek Krejci8f8db232016-05-23 16:48:21 +0200400API void
401lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
402{
403 struct lyxml_elem *iter, *aux;
404
405 if (!elem) {
406 return;
407 }
408
409 /* optimization - avoid freeing (unlinking) the last node of the siblings list */
410 /* so, first, free the node's predecessors to the beginning of the list ... */
411 for(iter = elem->prev; iter->next; iter = aux) {
412 aux = iter->prev;
413 lyxml_free(ctx, iter);
414 }
415 /* ... then, the node is the first in the siblings list, so free them all */
416 LY_TREE_FOR_SAFE(elem, aux, iter) {
417 lyxml_free(ctx, iter);
418 }
419}
420
Michal Vasko88c29542015-11-27 14:57:53 +0100421API const char *
Michal Vasko1e62a092015-12-01 12:27:20 +0100422lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
Radek Krejcida04f4a2015-05-21 12:54:09 +0200423{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200424 struct lyxml_attr *a;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200425
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200426 assert(elem);
427 assert(name);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200428
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200429 for (a = elem->attr; a; a = a->next) {
430 if (a->type != LYXML_ATTR_STD) {
431 continue;
432 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200433
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200434 if (!strcmp(name, a->name)) {
435 if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
436 return a->value;
437 }
438 }
439 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200440
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200441 return NULL;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200442}
443
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200444int
Michal Vaskof8879c22015-08-21 09:07:36 +0200445lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200446{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200447 struct lyxml_elem *e;
Radek Krejci02117302015-04-13 16:32:44 +0200448
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200449 assert(parent);
450 assert(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200451
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200452 /* (re)link element to parent */
453 if (elem->parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200454 lyxml_unlink_elem(ctx, elem, 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200455 }
456 elem->parent = parent;
Radek Krejci02117302015-04-13 16:32:44 +0200457
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200458 /* link parent to element */
459 if (parent->child) {
460 e = parent->child;
461 elem->prev = e->prev;
462 elem->next = NULL;
463 elem->prev->next = elem;
464 e->prev = elem;
465 } else {
466 parent->child = elem;
467 elem->prev = elem;
468 elem->next = NULL;
469 }
Radek Krejci02117302015-04-13 16:32:44 +0200470
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200471 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200472}
473
Michal Vasko3b855722015-08-28 16:01:18 +0200474int
Radek Krejci48464ed2016-03-17 15:44:09 +0100475lyxml_getutf8(const char *buf, unsigned int *read)
Radek Krejci02117302015-04-13 16:32:44 +0200476{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200477 int c, aux;
478 int i;
Radek Krejci02117302015-04-13 16:32:44 +0200479
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200480 c = buf[0];
481 *read = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200482
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200483 /* buf is NULL terminated string, so 0 means EOF */
484 if (!c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100485 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200486 return 0;
487 }
488 *read = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200489
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200490 /* process character byte(s) */
491 if ((c & 0xf8) == 0xf0) {
492 /* four bytes character */
493 *read = 4;
Radek Krejci02117302015-04-13 16:32:44 +0200494
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200495 c &= 0x07;
496 for (i = 1; i <= 3; i++) {
497 aux = buf[i];
498 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100499 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200500 return 0;
501 }
Radek Krejci02117302015-04-13 16:32:44 +0200502
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200503 c = (c << 6) | (aux & 0x3f);
504 }
Radek Krejci02117302015-04-13 16:32:44 +0200505
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200506 if (c < 0x1000 || c > 0x10ffff) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100507 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200508 return 0;
509 }
510 } else if ((c & 0xf0) == 0xe0) {
511 /* three bytes character */
512 *read = 3;
Radek Krejci02117302015-04-13 16:32:44 +0200513
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200514 c &= 0x0f;
515 for (i = 1; i <= 2; i++) {
516 aux = buf[i];
517 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100518 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200519 return 0;
520 }
Radek Krejci02117302015-04-13 16:32:44 +0200521
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200522 c = (c << 6) | (aux & 0x3f);
523 }
Radek Krejci02117302015-04-13 16:32:44 +0200524
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200525 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100526 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200527 return 0;
528 }
529 } else if ((c & 0xe0) == 0xc0) {
530 /* two bytes character */
531 *read = 2;
Radek Krejci02117302015-04-13 16:32:44 +0200532
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200533 aux = buf[1];
534 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100535 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200536 return 0;
537 }
538 c = ((c & 0x1f) << 6) | (aux & 0x3f);
Radek Krejci02117302015-04-13 16:32:44 +0200539
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200540 if (c < 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100541 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200542 return 0;
543 }
544 } else if (!(c & 0x80)) {
545 /* one byte character */
546 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
547 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100548 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200549 return 0;
550 }
551 } else {
552 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100553 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200554 return 0;
555 }
Radek Krejci02117302015-04-13 16:32:44 +0200556
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200557 return c;
Radek Krejci02117302015-04-13 16:32:44 +0200558}
559
Michal Vasko0d343d12015-08-24 14:57:36 +0200560/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200561static int
562parse_ignore(const char *data, const char *endstr, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200563{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200564 unsigned int slen;
565 const char *c = data;
Radek Krejci02117302015-04-13 16:32:44 +0200566
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200567 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200568
Radek Krejcifb783942016-10-06 09:49:33 +0200569 while (*c && strncmp(c, endstr, slen)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200570 c++;
571 }
572 if (!*c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100573 LOGVAL(LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200574 return EXIT_FAILURE;
575 }
576 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200577
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200578 *len = c - data;
579 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200580}
581
Michal Vasko0d343d12015-08-24 14:57:36 +0200582/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200583static char *
584parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200585{
Radek Krejci709fee62015-04-15 13:56:19 +0200586#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200587
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200588 char buf[BUFSIZE];
589 char *result = NULL, *aux;
590 unsigned int r;
591 int o, size = 0;
592 int cdsect = 0;
593 int32_t n;
Radek Krejci709fee62015-04-15 13:56:19 +0200594
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200595 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
Radek Krejcifb783942016-10-06 09:49:33 +0200596 if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100597 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200598 goto error;
599 }
Radek Krejci709fee62015-04-15 13:56:19 +0200600
Radek Krejcia4a84062015-04-16 13:00:10 +0200601loop:
602
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200603 if (o > BUFSIZE - 3) {
604 /* add buffer into the result */
605 if (result) {
606 size = size + o;
Michal Vasko253035f2015-12-17 16:58:13 +0100607 aux = ly_realloc(result, size + 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200608 result = aux;
609 } else {
610 size = o;
611 result = malloc((size + 1) * sizeof *result);
612 }
Michal Vasko253035f2015-12-17 16:58:13 +0100613 if (!result) {
614 LOGMEM;
615 return NULL;
616 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200617 memcpy(&result[size - o], buf, o);
Radek Krejci709fee62015-04-15 13:56:19 +0200618
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200619 /* write again into the beginning of the buffer */
620 o = 0;
621 }
Radek Krejci709fee62015-04-15 13:56:19 +0200622
Radek Krejcifb783942016-10-06 09:49:33 +0200623 if (cdsect || !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200624 /* CDSect */
625 if (!cdsect) {
626 cdsect = 1;
627 *len += 9;
628 }
Radek Krejcifb783942016-10-06 09:49:33 +0200629 if (data[*len] && !strncmp(&data[*len], "]]>", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200630 *len += 3;
631 cdsect = 0;
632 o--; /* we don't write any data in this iteration */
633 } else {
634 buf[o] = data[*len];
635 (*len)++;
636 }
637 } else if (data[*len] == '&') {
638 (*len)++;
639 if (data[*len] != '#') {
640 /* entity reference - only predefined refs are supported */
Radek Krejcifb783942016-10-06 09:49:33 +0200641 if (!strncmp(&data[*len], "lt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200642 buf[o] = '<';
643 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200644 } else if (!strncmp(&data[*len], "gt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200645 buf[o] = '>';
646 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200647 } else if (!strncmp(&data[*len], "amp;", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200648 buf[o] = '&';
649 *len += 4;
Radek Krejcifb783942016-10-06 09:49:33 +0200650 } else if (!strncmp(&data[*len], "apos;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200651 buf[o] = '\'';
652 *len += 5;
Radek Krejcifb783942016-10-06 09:49:33 +0200653 } else if (!strncmp(&data[*len], "quot;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200654 buf[o] = '\"';
655 *len += 5;
656 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100657 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200658 goto error;
659 }
660 } else {
661 /* character reference */
662 (*len)++;
663 if (isdigit(data[*len])) {
664 for (n = 0; isdigit(data[*len]); (*len)++) {
665 n = (10 * n) + (data[*len] - '0');
666 }
667 if (data[*len] != ';') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100668 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200669 goto error;
670 }
671 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
672 for (n = 0; isxdigit(data[*len]); (*len)++) {
673 if (isdigit(data[*len])) {
674 r = (data[*len] - '0');
675 } else if (data[*len] > 'F') {
676 r = 10 + (data[*len] - 'a');
677 } else {
678 r = 10 + (data[*len] - 'A');
679 }
680 n = (16 * n) + r;
681 }
682 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100683 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200684 goto error;
Radek Krejci709fee62015-04-15 13:56:19 +0200685
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200686 }
Radek Krejci48464ed2016-03-17 15:44:09 +0100687 r = pututf8(&buf[o], n);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200688 if (!r) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100689 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200690 goto error;
691 }
692 o += r - 1; /* o is ++ in for loop */
693 (*len)++;
694 }
695 } else {
Radek Krejcideee60e2016-09-23 15:21:14 +0200696 r = copyutf8(&buf[o], &data[*len]);
697 if (!r) {
698 goto error;
699 }
700
701 o += r - 1; /* o is ++ in for loop */
702 (*len) = (*len) + r;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200703 }
704 }
Radek Krejci02117302015-04-13 16:32:44 +0200705
Radek Krejcifb783942016-10-06 09:49:33 +0200706 if (delim == '<' && !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200707 /* ignore loop's end condition on beginning of CDSect */
708 goto loop;
709 }
Radek Krejci709fee62015-04-15 13:56:19 +0200710#undef BUFSIZE
711
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200712 if (o) {
713 if (result) {
714 size = size + o;
715 aux = realloc(result, size + 1);
716 result = aux;
717 } else {
718 size = o;
719 result = malloc((size + 1) * sizeof *result);
720 }
Michal Vasko253035f2015-12-17 16:58:13 +0100721 if (!result) {
722 LOGMEM;
723 return NULL;
724 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200725 memcpy(&result[size - o], buf, o);
726 }
727 if (result) {
728 result[size] = '\0';
Radek Krejcia5269642015-07-20 19:04:11 +0200729 } else {
730 size = 0;
731 result = strdup("");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200732 }
Radek Krejci02117302015-04-13 16:32:44 +0200733
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200734 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200735
736error:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200737 free(result);
738 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200739}
740
Michal Vasko0d343d12015-08-24 14:57:36 +0200741/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200742static struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200743parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci674e1f82015-04-21 14:12:19 +0200744{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200745 const char *c = data, *start, *delim;
746 char prefix[32];
747 int uc;
Radek Krejci00249f22015-07-07 13:43:28 +0200748 struct lyxml_attr *attr = NULL, *a;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200749 unsigned int size;
Radek Krejci02117302015-04-13 16:32:44 +0200750
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200751 /* check if it is attribute or namespace */
Radek Krejcifb783942016-10-06 09:49:33 +0200752 if (!strncmp(c, "xmlns", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200753 /* namespace */
754 attr = calloc(1, sizeof (struct lyxml_ns));
Michal Vasko253035f2015-12-17 16:58:13 +0100755 if (!attr) {
756 LOGMEM;
757 return NULL;
758 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200759 attr->type = LYXML_ATTR_NS;
Radek Krejci00249f22015-07-07 13:43:28 +0200760 ((struct lyxml_ns *)attr)->parent = parent;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200761 c += 5;
762 if (*c != ':') {
763 /* default namespace, prefix will be empty */
764 goto equal;
765 }
766 c++; /* go after ':' to the prefix value */
767 } else {
768 /* attribute */
769 attr = calloc(1, sizeof *attr);
Michal Vasko253035f2015-12-17 16:58:13 +0100770 if (!attr) {
771 LOGMEM;
772 return NULL;
773 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200774 attr->type = LYXML_ATTR_STD;
775 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200776
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200777 /* process name part of the attribute */
778 start = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100779 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200780 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100781 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200782 free(attr);
783 return NULL;
784 }
785 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100786 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200787 while (is_xmlnamechar(uc)) {
788 if (attr->type == LYXML_ATTR_STD && *c == ':') {
789 /* attribute in a namespace */
790 start = c + 1;
Radek Krejci4ea08382015-04-21 09:41:40 +0200791
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200792 /* look for the prefix in namespaces */
793 memcpy(prefix, data, c - data);
794 prefix[c - data] = '\0';
Radek Krejci4476d412015-07-10 15:35:01 +0200795 attr->ns = lyxml_get_ns(parent, prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200796 }
797 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100798 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200799 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200800
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200801 /* store the name */
802 size = c - start;
803 attr->name = lydict_insert(ctx, start, size);
Radek Krejci674e1f82015-04-21 14:12:19 +0200804
805equal:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200806 /* check Eq mark that can be surrounded by whitespaces */
807 ign_xmlws(c);
808 if (*c != '=') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100809 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200810 goto error;
811 }
812 c++;
813 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200814
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200815 /* process value part of the attribute */
816 if (!*c || (*c != '"' && *c != '\'')) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100817 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200818 goto error;
819 }
820 delim = c;
821 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
822 if (ly_errno) {
823 goto error;
824 }
Radek Krejci02117302015-04-13 16:32:44 +0200825
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200826 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci00249f22015-07-07 13:43:28 +0200827
828 /* put attribute into the parent's attributes list */
829 if (parent->attr) {
830 /* go to the end of the list */
831 for (a = parent->attr; a->next; a = a->next);
832 /* and append new attribute */
833 a->next = attr;
834 } else {
835 /* add the first attribute in the list */
836 parent->attr = attr;
837 }
838
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200839 return attr;
Radek Krejci02117302015-04-13 16:32:44 +0200840
841error:
Radek Krejci00249f22015-07-07 13:43:28 +0200842 lyxml_free_attr(ctx, NULL, attr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200843 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +0200844}
845
Michal Vasko0d343d12015-08-24 14:57:36 +0200846/* logs directly */
Radek Krejci9a5daea2016-03-02 16:49:40 +0100847struct lyxml_elem *
848lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200849{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200850 const char *c = data, *start, *e;
851 const char *lws; /* leading white space for handling mixed content */
852 int uc;
853 char *str;
854 char prefix[32] = { 0 };
855 unsigned int prefix_len = 0;
856 struct lyxml_elem *elem = NULL, *child;
857 struct lyxml_attr *attr;
858 unsigned int size;
859 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200860
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200861 *len = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200862
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200863 if (*c != '<') {
864 return NULL;
865 }
Radek Krejci02117302015-04-13 16:32:44 +0200866
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200867 /* locate element name */
868 c++;
869 e = c;
Radek Krejci02117302015-04-13 16:32:44 +0200870
Radek Krejci48464ed2016-03-17 15:44:09 +0100871 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200872 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100873 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200874 return NULL;
875 }
876 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100877 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200878 while (is_xmlnamechar(uc)) {
879 if (*e == ':') {
880 if (prefix_len) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100881 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200882 goto error;
883 }
884 /* element in a namespace */
885 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200886
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200887 /* look for the prefix in namespaces */
888 memcpy(prefix, c, prefix_len = e - c);
889 prefix[prefix_len] = '\0';
890 c = start;
891 }
892 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100893 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200894 }
895 if (!*e) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100896 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200897 return NULL;
898 }
Radek Krejci02117302015-04-13 16:32:44 +0200899
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200900 /* allocate element structure */
901 elem = calloc(1, sizeof *elem);
Michal Vasko253035f2015-12-17 16:58:13 +0100902 if (!elem) {
903 LOGMEM;
904 return NULL;
905 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200906 elem->next = NULL;
907 elem->prev = elem;
908 if (parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200909 lyxml_add_child(ctx, parent, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200910 }
Radek Krejci02117302015-04-13 16:32:44 +0200911
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200912 /* store the name into the element structure */
913 elem->name = lydict_insert(ctx, c, e - c);
914 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200915
916process:
Radek Krejci00a0e712016-10-26 10:24:46 +0200917 ly_err_clean(1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200918 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +0200919 if (!strncmp("/>", c, 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200920 /* we are done, it was EmptyElemTag */
921 c += 2;
Michal Vasko44913842016-04-13 14:20:41 +0200922 elem->content = lydict_insert(ctx, "", 0);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200923 closed_flag = 1;
924 } else if (*c == '>') {
925 /* process element content */
926 c++;
927 lws = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200928
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200929 while (*c) {
Radek Krejcifb783942016-10-06 09:49:33 +0200930 if (!strncmp(c, "</", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200931 if (lws && !elem->child) {
932 /* leading white spaces were actually content */
933 goto store_content;
934 }
Radek Krejci02117302015-04-13 16:32:44 +0200935
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200936 /* Etag */
937 c += 2;
938 /* get name and check it */
939 e = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100940 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200941 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100942 LOGVAL(LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200943 goto error;
944 }
945 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100946 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200947 while (is_xmlnamechar(uc)) {
948 if (*e == ':') {
949 /* element in a namespace */
950 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200951
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200952 /* look for the prefix in namespaces */
953 if (memcmp(prefix, c, e - c)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200954 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
955 "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200956 goto error;
957 }
958 c = start;
959 }
960 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100961 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200962 }
963 if (!*e) {
Radek Krejci3cc10962016-04-13 15:03:27 +0200964 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200965 goto error;
966 }
Radek Krejci02117302015-04-13 16:32:44 +0200967
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200968 /* check that it corresponds to opening tag */
969 size = e - c;
970 str = malloc((size + 1) * sizeof *str);
Michal Vasko253035f2015-12-17 16:58:13 +0100971 if (!str) {
972 LOGMEM;
973 goto error;
974 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200975 memcpy(str, c, e - c);
976 str[e - c] = '\0';
977 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200978 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
979 "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200980 free(str);
981 goto error;
982 }
983 free(str);
984 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200985
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200986 ign_xmlws(c);
987 if (*c != '>') {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200988 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200989 goto error;
990 }
991 c++;
Michal Vaskoe00b7892016-04-14 10:12:18 +0200992 if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
993 /* there was no content, but we don't want NULL (only if mixed content) */
994 elem->content = lydict_insert(ctx, "", 0);
995 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200996 closed_flag = 1;
997 break;
Radek Krejci02117302015-04-13 16:32:44 +0200998
Radek Krejcifb783942016-10-06 09:49:33 +0200999 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001000 if (lws) {
1001 /* leading white spaces were only formatting */
1002 lws = NULL;
1003 }
1004 /* PI - ignore it */
1005 c += 2;
1006 if (parse_ignore(c, "?>", &size)) {
1007 goto error;
1008 }
1009 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001010 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001011 if (lws) {
1012 /* leading white spaces were only formatting */
1013 lws = NULL;
1014 }
1015 /* Comment - ignore it */
1016 c += 4;
1017 if (parse_ignore(c, "-->", &size)) {
1018 goto error;
1019 }
1020 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001021 } else if (!strncmp(c, "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001022 /* CDSect */
1023 goto store_content;
1024 } else if (*c == '<') {
1025 if (lws) {
1026 if (elem->flags & LYXML_ELEM_MIXED) {
1027 /* we have a mixed content */
1028 goto store_content;
1029 } else {
1030 /* leading white spaces were only formatting */
1031 lws = NULL;
1032 }
1033 }
1034 if (elem->content) {
1035 /* we have a mixed content */
1036 child = calloc(1, sizeof *child);
Michal Vasko253035f2015-12-17 16:58:13 +01001037 if (!child) {
1038 LOGMEM;
1039 goto error;
1040 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001041 child->content = elem->content;
1042 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001043 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001044 elem->flags |= LYXML_ELEM_MIXED;
1045 }
Radek Krejci9a5daea2016-03-02 16:49:40 +01001046 child = lyxml_parse_elem(ctx, c, &size, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001047 if (!child) {
1048 goto error;
1049 }
1050 c += size; /* move after processed child element */
1051 } else if (is_xmlws(*c)) {
1052 lws = c;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001053 ign_xmlws(c);
1054 } else {
Radek Krejci02117302015-04-13 16:32:44 +02001055store_content:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001056 /* store text content */
1057 if (lws) {
1058 /* process content including the leading white spaces */
1059 c = lws;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001060 lws = NULL;
1061 }
1062 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
1063 if (ly_errno) {
1064 goto error;
1065 }
1066 c += size; /* move after processed text content */
Radek Krejci02117302015-04-13 16:32:44 +02001067
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001068 if (elem->child) {
1069 /* we have a mixed content */
1070 child = calloc(1, sizeof *child);
Michal Vasko253035f2015-12-17 16:58:13 +01001071 if (!child) {
1072 LOGMEM;
1073 goto error;
1074 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001075 child->content = elem->content;
1076 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001077 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001078 elem->flags |= LYXML_ELEM_MIXED;
1079 }
1080 }
1081 }
1082 } else {
1083 /* process attribute */
1084 attr = parse_attr(ctx, c, &size, elem);
1085 if (!attr) {
1086 goto error;
1087 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001088 c += size; /* move after processed attribute */
Radek Krejci02117302015-04-13 16:32:44 +02001089
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001090 /* check namespace */
1091 if (attr->type == LYXML_ATTR_NS) {
1092 if (!prefix[0] && !attr->name) {
1093 if (attr->value) {
1094 /* default prefix */
1095 elem->ns = (struct lyxml_ns *)attr;
1096 } else {
1097 /* xmlns="" -> no namespace */
1098 nons_flag = 1;
1099 }
Radek Krejcifb783942016-10-06 09:49:33 +02001100 } else if (prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001101 /* matching namespace with prefix */
1102 elem->ns = (struct lyxml_ns *)attr;
1103 }
1104 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001105
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001106 /* go back to finish element processing */
1107 goto process;
1108 }
Radek Krejci02117302015-04-13 16:32:44 +02001109
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001110 *len = c - data;
Radek Krejci02117302015-04-13 16:32:44 +02001111
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001112 if (!closed_flag) {
Radek Krejci48464ed2016-03-17 15:44:09 +01001113 LOGVAL(LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001114 goto error;
1115 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001116
Radek Krejci78a230a2015-07-07 17:04:40 +02001117 if (!elem->ns && !nons_flag && parent) {
Radek Krejci4476d412015-07-10 15:35:01 +02001118 elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001119 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001120
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001121 return elem;
Radek Krejci02117302015-04-13 16:32:44 +02001122
1123error:
Michal Vasko345da0a2015-12-02 10:35:55 +01001124 lyxml_free(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001125
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001126 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001127}
1128
Michal Vasko0d343d12015-08-24 14:57:36 +02001129/* logs directly */
Radek Krejcic6704c82015-10-06 11:12:45 +02001130API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001131lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001132{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001133 const char *c = data;
1134 unsigned int len;
Radek Krejci851ea662016-01-08 09:30:53 +01001135 struct lyxml_elem *root, *first = NULL, *next;
Radek Krejci02117302015-04-13 16:32:44 +02001136
Radek Krejci00a0e712016-10-26 10:24:46 +02001137 ly_err_clean(1);
Radek Krejci2342cf62016-01-29 16:48:23 +01001138
Radek Krejci120f6242015-12-17 12:32:56 +01001139repeat:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001140 /* process document */
Radek Krejcif8ae23e2016-07-26 17:11:17 +02001141 while (1) {
1142 if (!*c) {
1143 /* eof */
1144 return first;
1145 } else if (is_xmlws(*c)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001146 /* skip whitespaces */
1147 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +02001148 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001149 /* XMLDecl or PI - ignore it */
1150 c += 2;
1151 if (parse_ignore(c, "?>", &len)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001152 return NULL;
1153 }
1154 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001155 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001156 /* Comment - ignore it */
1157 c += 2;
1158 if (parse_ignore(c, "-->", &len)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001159 return NULL;
1160 }
1161 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001162 } else if (!strncmp(c, "<!", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001163 /* DOCTYPE */
1164 /* TODO - standalone ignore counting < and > */
1165 LOGERR(LY_EINVAL, "DOCTYPE not supported in XML documents.");
1166 return NULL;
1167 } else if (*c == '<') {
1168 /* element - process it in next loop to strictly follow XML
1169 * format
1170 */
1171 break;
Michal Vaskoc2e80562015-07-27 11:31:41 +02001172 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +01001173 LOGVAL(LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
Michal Vaskoc2e80562015-07-27 11:31:41 +02001174 return NULL;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001175 }
1176 }
Radek Krejci02117302015-04-13 16:32:44 +02001177
Radek Krejci9a5daea2016-03-02 16:49:40 +01001178 root = lyxml_parse_elem(ctx, c, &len, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001179 if (!root) {
Michal Vaskobc58b4a2016-01-07 14:42:31 +01001180 if (first) {
Radek Krejci851ea662016-01-08 09:30:53 +01001181 LY_TREE_FOR_SAFE(first, next, root) {
Michal Vaskobc58b4a2016-01-07 14:42:31 +01001182 lyxml_free(ctx, root);
1183 }
Radek Krejci120f6242015-12-17 12:32:56 +01001184 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001185 return NULL;
Radek Krejci120f6242015-12-17 12:32:56 +01001186 } else if (!first) {
1187 first = root;
1188 } else {
1189 first->prev->next = root;
1190 root->prev = first->prev;
1191 first->prev = root;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001192 }
1193 c += len;
Radek Krejci02117302015-04-13 16:32:44 +02001194
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001195 /* ignore the rest of document where can be comments, PIs and whitespaces,
1196 * note that we are not detecting syntax errors in these parts
1197 */
1198 ign_xmlws(c);
1199 if (*c) {
Radek Krejci722b0072016-02-01 17:09:45 +01001200 if (options & LYXML_PARSE_MULTIROOT) {
Radek Krejci120f6242015-12-17 12:32:56 +01001201 goto repeat;
1202 } else {
1203 LOGWRN("There are some not parsed data:\n%s", c);
1204 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001205 }
Radek Krejci02117302015-04-13 16:32:44 +02001206
Radek Krejci120f6242015-12-17 12:32:56 +01001207 return first;
Radek Krejci02117302015-04-13 16:32:44 +02001208}
1209
Radek Krejcic6704c82015-10-06 11:12:45 +02001210API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001211lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001212{
Radek Krejci6b3d9262015-12-03 13:45:27 +01001213 struct lyxml_elem *elem = NULL;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001214 struct stat sb;
1215 int fd;
1216 char *addr;
1217
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001218 if (!filename || !ctx) {
1219 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1220 return NULL;
1221 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001222
Pavol Vicanb2570c12015-11-12 13:50:20 +01001223 fd = open(filename, O_RDONLY);
1224 if (fd == -1) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001225 LOGERR(LY_EINVAL,"Opening file \"%s\" failed.", filename);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001226 return NULL;
1227 }
1228 if (fstat(fd, &sb) == -1) {
1229 LOGERR(LY_EINVAL, "Unable to get file \"%s\" information.\n", filename);
1230 goto error;
1231 }
1232 if (!S_ISREG(sb.st_mode)) {
Radek Krejcib051f722016-02-25 15:12:21 +01001233 LOGERR(LY_EINVAL, "%s: Invalid parameter, input file is not a regular file", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001234 goto error;
1235 }
Pavol Vicanf7cc2852016-03-22 23:27:35 +01001236 addr = mmap(NULL, sb.st_size + 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001237 if (addr == MAP_FAILED) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001238 LOGERR(LY_EMEM,"Map file into memory failed (%s()).", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001239 goto error;
1240 }
Radek Krejci6b3d9262015-12-03 13:45:27 +01001241
Radek Krejci722b0072016-02-01 17:09:45 +01001242 elem = lyxml_parse_mem(ctx, addr, options);
Pavol Vicanf7cc2852016-03-22 23:27:35 +01001243 munmap(addr, sb.st_size +2);
Radek Krejci30793ab2015-12-03 13:45:45 +01001244 close(fd);
Radek Krejci6b3d9262015-12-03 13:45:27 +01001245
Pavol Vicanb2570c12015-11-12 13:50:20 +01001246 return elem;
1247
1248error:
Radek Krejci6b3d9262015-12-03 13:45:27 +01001249 if (fd != -1) {
1250 close(fd);
1251 }
1252
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001253 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001254}
Radek Krejci02117302015-04-13 16:32:44 +02001255
Michal Vasko5db027d2015-10-09 14:38:50 +02001256int
1257lyxml_dump_text(struct lyout *out, const char *text)
Radek Krejcif0023a92015-04-20 20:51:39 +02001258{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001259 unsigned int i, n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001260
Michal Vasko5db027d2015-10-09 14:38:50 +02001261 if (!text) {
1262 return 0;
1263 }
1264
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001265 for (i = n = 0; text[i]; i++) {
1266 switch (text[i]) {
1267 case '&':
Radek Krejci5248f132015-10-09 10:34:25 +02001268 n += ly_print(out, "&amp;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001269 break;
1270 case '<':
Radek Krejci5248f132015-10-09 10:34:25 +02001271 n += ly_print(out, "&lt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001272 break;
1273 case '>':
1274 /* not needed, just for readability */
Radek Krejci5248f132015-10-09 10:34:25 +02001275 n += ly_print(out, "&gt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001276 break;
Radek Krejci952a7252016-07-16 20:52:43 +02001277 case '"':
1278 n += ly_print(out, "&quot;");
1279 break;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001280 default:
Radek Krejci5248f132015-10-09 10:34:25 +02001281 ly_write(out, &text[i], 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001282 n++;
1283 }
1284 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001285
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001286 return n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001287}
1288
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001289static int
Michal Vaskob2f1db72016-11-16 13:57:35 +01001290dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options, int last_elem)
Radek Krejcif0023a92015-04-20 20:51:39 +02001291{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001292 int size = 0;
1293 struct lyxml_attr *a;
1294 struct lyxml_elem *child;
1295 const char *delim, *delim_outer;
1296 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001297
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001298 if (!e->name) {
1299 /* mixed content */
1300 if (e->content) {
Michal Vasko5db027d2015-10-09 14:38:50 +02001301 return lyxml_dump_text(out, e->content);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001302 } else {
1303 return 0;
1304 }
1305 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001306
Radek Krejci722b0072016-02-01 17:09:45 +01001307 delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001308 indent = 2 * level;
1309 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1310 delim = "";
1311 }
1312 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1313 delim_outer = "";
1314 indent = 0;
1315 }
Michal Vaskob2f1db72016-11-16 13:57:35 +01001316 if (last_elem && (options & LYXML_PRINT_NO_LAST_NEWLINE)) {
1317 delim_outer = "";
1318 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001319
Radek Krejci722b0072016-02-01 17:09:45 +01001320 if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN)) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001321 /* opening tag */
1322 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001323 size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001324 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001325 size += ly_print(out, "%*s<%s", indent, "", e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001326 }
Radek Krejci722b0072016-02-01 17:09:45 +01001327 } else if (options & LYXML_PRINT_CLOSE) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001328 indent = 0;
1329 goto close;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001330 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001331
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001332 /* attributes */
1333 for (a = e->attr; a; a = a->next) {
1334 if (a->type == LYXML_ATTR_NS) {
1335 if (a->name) {
Radek Krejci5248f132015-10-09 10:34:25 +02001336 size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001337 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001338 size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001339 }
1340 } else if (a->ns && a->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001341 size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001342 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001343 size += ly_print(out, " %s=\"%s\"", a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001344 }
1345 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001346
Radek Krejcic6704c82015-10-06 11:12:45 +02001347 /* apply options */
Radek Krejci722b0072016-02-01 17:09:45 +01001348 if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
Radek Krejci5248f132015-10-09 10:34:25 +02001349 size += ly_print(out, "/>%s", delim);
Radek Krejcic6704c82015-10-06 11:12:45 +02001350 return size;
Radek Krejci722b0072016-02-01 17:09:45 +01001351 } else if (options & LYXML_PRINT_OPEN) {
Radek Krejci5248f132015-10-09 10:34:25 +02001352 ly_print(out, ">");
Radek Krejcic6704c82015-10-06 11:12:45 +02001353 return ++size;
Radek Krejci722b0072016-02-01 17:09:45 +01001354 } else if (options & LYXML_PRINT_ATTRS) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001355 return size;
1356 }
1357
Michal Vasko3a611612016-04-14 10:12:56 +02001358 if (!e->child && (!e->content || !e->content[0])) {
Radek Krejci5248f132015-10-09 10:34:25 +02001359 size += ly_print(out, "/>%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001360 return size;
Michal Vasko3a611612016-04-14 10:12:56 +02001361 } else if (e->content && e->content[0]) {
Radek Krejci5248f132015-10-09 10:34:25 +02001362 ly_print(out, ">");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001363 size++;
Radek Krejcif0023a92015-04-20 20:51:39 +02001364
Michal Vasko5db027d2015-10-09 14:38:50 +02001365 size += lyxml_dump_text(out, e->content);
Radek Krejcif0023a92015-04-20 20:51:39 +02001366
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001367 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001368 size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001369 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001370 size += ly_print(out, "</%s>%s", e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001371 }
1372 return size;
1373 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001374 size += ly_print(out, ">%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001375 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001376
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001377 /* go recursively */
1378 LY_TREE_FOR(e->child, child) {
Radek Krejci722b0072016-02-01 17:09:45 +01001379 if (options & LYXML_PRINT_FORMAT) {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001380 size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT, 0);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001381 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001382 size += dump_elem(out, child, level, 0, 0);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001383 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001384 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001385
Radek Krejcic6704c82015-10-06 11:12:45 +02001386close:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001387 /* closing tag */
1388 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001389 size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001390 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001391 size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001392 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001393
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001394 return size;
Radek Krejcif0023a92015-04-20 20:51:39 +02001395}
1396
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001397static int
1398dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
1399{
Michal Vaskob2f1db72016-11-16 13:57:35 +01001400 const struct lyxml_elem *start, *iter, *next;
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001401 int ret = 0;
1402
1403 if (e->parent) {
1404 start = e->parent->child;
1405 } else {
1406 start = e;
1407 while(start->prev && start->prev->next) {
1408 start = start->prev;
1409 }
1410 }
1411
Michal Vaskob2f1db72016-11-16 13:57:35 +01001412 LY_TREE_FOR_SAFE(start, next, iter) {
1413 ret += dump_elem(out, iter, 0, options, (next ? 0 : 1));
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001414 }
1415
1416 return ret;
1417}
1418
Radek Krejcic6704c82015-10-06 11:12:45 +02001419API int
Radek Krejci722b0072016-02-01 17:09:45 +01001420lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001421{
Radek Krejci5248f132015-10-09 10:34:25 +02001422 struct lyout out;
1423
1424 if (!stream || !elem) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001425 return 0;
1426 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001427
Radek Krejci5248f132015-10-09 10:34:25 +02001428 out.type = LYOUT_STREAM;
1429 out.method.f = stream;
1430
Radek Krejci722b0072016-02-01 17:09:45 +01001431 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001432 return dump_siblings(&out, elem, options);
1433 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001434 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001435 }
Radek Krejci5248f132015-10-09 10:34:25 +02001436}
1437
1438API int
Radek Krejci722b0072016-02-01 17:09:45 +01001439lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
Radek Krejci5248f132015-10-09 10:34:25 +02001440{
1441 struct lyout out;
1442
1443 if (fd < 0 || !elem) {
1444 return 0;
1445 }
1446
1447 out.type = LYOUT_FD;
1448 out.method.fd = fd;
1449
Radek Krejci722b0072016-02-01 17:09:45 +01001450 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001451 return dump_siblings(&out, elem, options);
1452 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001453 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001454 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001455}
Radek Krejci6140e4e2015-10-09 15:50:55 +02001456
1457API int
Radek Krejci722b0072016-02-01 17:09:45 +01001458lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001459{
1460 struct lyout out;
1461 int r;
1462
1463 if (!strp || !elem) {
1464 return 0;
1465 }
1466
1467 out.type = LYOUT_MEMORY;
1468 out.method.mem.buf = NULL;
1469 out.method.mem.len = 0;
1470 out.method.mem.size = 0;
1471
Radek Krejci722b0072016-02-01 17:09:45 +01001472 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001473 r = dump_siblings(&out, elem, options);
1474 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001475 r = dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001476 }
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001477
1478 *strp = out.method.mem.buf;
1479 return r;
1480}
1481
1482API int
Radek Krejci722b0072016-02-01 17:09:45 +01001483lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
Radek Krejci6140e4e2015-10-09 15:50:55 +02001484{
1485 struct lyout out;
1486
1487 if (!writeclb || !elem) {
1488 return 0;
1489 }
1490
1491 out.type = LYOUT_CALLBACK;
Radek Krejci50929eb2015-10-09 18:14:15 +02001492 out.method.clb.f = writeclb;
1493 out.method.clb.arg = arg;
Radek Krejci6140e4e2015-10-09 15:50:55 +02001494
Radek Krejci722b0072016-02-01 17:09:45 +01001495 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001496 return dump_siblings(&out, elem, options);
1497 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001498 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001499 }
Radek Krejci6140e4e2015-10-09 15:50:55 +02001500}