blob: 434a703a1fda07bd439dfab8d29b5148466b26d7 [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejci54ea8de2015-04-09 18:02:56 +020013 */
14
Radek Krejci812b10a2015-05-28 16:48:25 +020015#include <assert.h>
Radek Krejci563427e2016-02-08 16:26:34 +010016#include <errno.h>
Radek Krejci709fee62015-04-15 13:56:19 +020017#include <ctype.h>
18#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020019#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020020#include <stdlib.h>
21#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020022#include <unistd.h>
Radek Krejci563427e2016-02-08 16:26:34 +010023#include <pthread.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010024#include <sys/stat.h>
25#include <sys/mman.h>
Radek Krejci563427e2016-02-08 16:26:34 +010026#include <sys/syscall.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010027#include <fcntl.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028
Radek Krejci06a704e2015-04-22 14:50:49 +020029#include "common.h"
Radek Krejci41912fe2015-10-22 10:22:12 +020030#include "dict_private.h"
Radek Krejci5248f132015-10-09 10:34:25 +020031#include "printer.h"
Radek Krejci5449d472015-10-26 14:35:56 +010032#include "parser.h"
Michal Vasko2d162e12015-09-24 14:33:29 +020033#include "tree_schema.h"
Michal Vaskofc5744d2015-10-22 12:09:34 +020034#include "xml_internal.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020035
Radek Krejci3045cf32015-05-28 10:58:52 +020036#define ign_xmlws(p) \
Radek Krejci563427e2016-02-08 16:26:34 +010037 while (is_xmlws(*p)) { \
Radek Krejci563427e2016-02-08 16:26:34 +010038 p++; \
39 }
Radek Krejci02117302015-04-13 16:32:44 +020040
Michal Vasko88c29542015-11-27 14:57:53 +010041static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);
42
Michal Vasko1e62a092015-12-01 12:27:20 +010043API const struct lyxml_ns *
44lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
Michal Vaskof8879c22015-08-21 09:07:36 +020045{
46 struct lyxml_attr *attr;
Michal Vaskof8879c22015-08-21 09:07:36 +020047
48 if (!elem) {
49 return NULL;
50 }
51
Michal Vaskof8879c22015-08-21 09:07:36 +020052 for (attr = elem->attr; attr; attr = attr->next) {
53 if (attr->type != LYXML_ATTR_NS) {
54 continue;
55 }
56 if (!attr->name) {
Radek Krejci13f3f152016-10-03 11:40:13 +020057 if (!prefix) {
Michal Vaskof8879c22015-08-21 09:07:36 +020058 /* default namespace found */
59 if (!attr->value) {
60 /* empty default namespace -> no default namespace */
61 return NULL;
62 }
63 return (struct lyxml_ns *)attr;
64 }
Radek Krejci7d39dae2016-10-03 17:33:01 +020065 } else if (prefix && !strcmp(attr->name, prefix)) {
Michal Vaskof8879c22015-08-21 09:07:36 +020066 /* prefix found */
67 return (struct lyxml_ns *)attr;
68 }
69 }
70
71 /* go recursively */
72 return lyxml_get_ns(elem->parent, prefix);
73}
74
Michal Vasko88c29542015-11-27 14:57:53 +010075static void
76lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
77{
78 const struct lyxml_ns *tmp_ns;
Michal Vaskof6109112015-12-03 14:00:42 +010079 struct lyxml_elem *ns_root, *attr_root;
Michal Vasko88c29542015-11-27 14:57:53 +010080
81 if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010082 /* find the root of attr */
83 for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +010084
85 /* find the root of attr NS */
86 for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);
87
Michal Vaskof6109112015-12-03 14:00:42 +010088 /* attr NS is defined outside attr parent subtree */
89 if (ns_root != attr_root) {
Michal Vasko88c29542015-11-27 14:57:53 +010090 if (copy_ns) {
91 tmp_ns = attr->ns;
92 /* we may have already copied the NS over? */
Radek Krejci66aca402016-05-24 15:23:02 +020093 attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +010094
95 /* we haven't copied it over, copy it now */
96 if (!attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010097 attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +010098 }
99 } else {
100 attr->ns = NULL;
101 }
102 }
103 }
104}
105
106static struct lyxml_attr *
Michal Vaskof8879c22015-08-21 09:07:36 +0200107lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
108{
109 struct lyxml_attr *result, *a;
110
111 if (!attr || !parent) {
112 return NULL;
113 }
114
115 if (attr->type == LYXML_ATTR_NS) {
116 /* this is correct, despite that all attributes seems like a standard
117 * attributes (struct lyxml_attr), some of them can be namespace
118 * definitions (and in that case they are struct lyxml_ns).
119 */
120 result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
121 } else {
122 result = calloc(1, sizeof (struct lyxml_attr));
123 }
Radek Krejcia8d111f2017-05-31 13:57:37 +0200124 LY_CHECK_ERR_RETURN(!result, LOGMEM, NULL);
125
Michal Vaskof8879c22015-08-21 09:07:36 +0200126 result->value = lydict_insert(ctx, attr->value, 0);
127 result->name = lydict_insert(ctx, attr->name, 0);
128 result->type = attr->type;
129
130 /* set namespace in case of standard attributes */
131 if (result->type == LYXML_ATTR_STD && attr->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100132 result->ns = attr->ns;
133 lyxml_correct_attr_ns(ctx, result, parent, 1);
Michal Vaskof8879c22015-08-21 09:07:36 +0200134 }
135
136 /* set parent pointer in case of namespace attribute */
137 if (result->type == LYXML_ATTR_NS) {
138 ((struct lyxml_ns *)result)->parent = parent;
139 }
140
141 /* put attribute into the parent's attributes list */
142 if (parent->attr) {
143 /* go to the end of the list */
144 for (a = parent->attr; a->next; a = a->next);
145 /* and append new attribute */
146 a->next = result;
147 } else {
148 /* add the first attribute in the list */
149 parent->attr = result;
150 }
151
152 return result;
153}
154
Michal Vaskof748dbc2016-04-05 11:27:47 +0200155void
Michal Vasko88c29542015-11-27 14:57:53 +0100156lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns, int correct_attrs)
157{
158 const struct lyxml_ns *tmp_ns;
Radek Krejcid5be5682016-01-14 16:23:22 +0100159 struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
Michal Vasko88c29542015-11-27 14:57:53 +0100160 struct lyxml_attr *attr;
161
162 /* find the root of elem */
163 for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
164
Radek Krejcid5be5682016-01-14 16:23:22 +0100165 LY_TREE_DFS_BEGIN(elem, tmp, iter) {
166 if (iter->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100167 /* find the root of elem NS */
Radek Krejcic071c542016-01-27 14:57:51 +0100168 for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +0100169
170 /* elem NS is defined outside elem subtree */
171 if (ns_root != elem_root) {
172 if (copy_ns) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100173 tmp_ns = iter->ns;
Michal Vasko88c29542015-11-27 14:57:53 +0100174 /* we may have already copied the NS over? */
Radek Krejcid5be5682016-01-14 16:23:22 +0100175 iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +0100176
177 /* we haven't copied it over, copy it now */
Radek Krejcid5be5682016-01-14 16:23:22 +0100178 if (!iter->ns) {
179 iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +0100180 }
181 } else {
Radek Krejcid5be5682016-01-14 16:23:22 +0100182 iter->ns = NULL;
Michal Vasko88c29542015-11-27 14:57:53 +0100183 }
184 }
185 }
186 if (correct_attrs) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100187 LY_TREE_FOR(iter->attr, attr) {
Michal Vasko88c29542015-11-27 14:57:53 +0100188 lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
189 }
190 }
Radek Krejcid5be5682016-01-14 16:23:22 +0100191 LY_TREE_DFS_END(elem, tmp, iter);
Michal Vasko88c29542015-11-27 14:57:53 +0100192 }
193}
194
Michal Vaskof8879c22015-08-21 09:07:36 +0200195struct lyxml_elem *
196lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
197{
198 struct lyxml_elem *result, *child;
199 struct lyxml_attr *attr;
200
201 if (!elem) {
202 return NULL;
203 }
204
205 result = calloc(1, sizeof *result);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200206 LY_CHECK_ERR_RETURN(!result, LOGMEM, NULL);
Michal Vaskof8879c22015-08-21 09:07:36 +0200207 result->content = lydict_insert(ctx, elem->content, 0);
208 result->name = lydict_insert(ctx, elem->name, 0);
209 result->flags = elem->flags;
Michal Vaskof8879c22015-08-21 09:07:36 +0200210 result->prev = result;
211
212 if (parent) {
213 lyxml_add_child(ctx, parent, result);
214 }
215
Michal Vasko88c29542015-11-27 14:57:53 +0100216 /* keep old namespace for now */
217 result->ns = elem->ns;
218
219 /* correct namespaces */
220 lyxml_correct_elem_ns(ctx, result, 1, 0);
Michal Vaskof8879c22015-08-21 09:07:36 +0200221
222 /* duplicate attributes */
223 for (attr = elem->attr; attr; attr = attr->next) {
224 lyxml_dup_attr(ctx, result, attr);
225 }
226
227 if (!recursive) {
228 return result;
229 }
230
231 /* duplicate children */
232 LY_TREE_FOR(elem->child, child) {
233 lyxml_dup_elem(ctx, child, result, 1);
234 }
235
236 return result;
237}
238
Radek Krejci6879d952017-01-09 12:49:19 +0100239API struct lyxml_elem *
240lyxml_dup(struct ly_ctx *ctx, struct lyxml_elem *root)
241{
242 return lyxml_dup_elem(ctx, root, NULL, 1);
243}
244
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200245void
Michal Vaskof8879c22015-08-21 09:07:36 +0200246lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
Radek Krejci02117302015-04-13 16:32:44 +0200247{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200248 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200249
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200250 if (!elem) {
251 return;
252 }
Radek Krejci02117302015-04-13 16:32:44 +0200253
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200254 /* store pointers to important nodes */
255 parent = elem->parent;
Radek Krejcie1f13912015-05-26 15:17:38 +0200256
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200257 /* unlink from parent */
258 if (parent) {
259 if (parent->child == elem) {
260 /* we unlink the first child */
261 /* update the parent's link */
262 parent->child = elem->next;
263 }
264 /* forget about the parent */
265 elem->parent = NULL;
266 }
Radek Krejci02117302015-04-13 16:32:44 +0200267
Michal Vasko88c29542015-11-27 14:57:53 +0100268 if (copy_ns < 2) {
269 lyxml_correct_elem_ns(ctx, elem, copy_ns, 1);
270 }
271
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200272 /* unlink from siblings */
273 if (elem->prev == elem) {
274 /* there are no more siblings */
275 return;
276 }
277 if (elem->next) {
278 elem->next->prev = elem->prev;
279 } else {
280 /* unlinking the last element */
281 if (parent) {
282 first = parent->child;
283 } else {
284 first = elem;
Radek Krejcie4fffcf2016-02-23 16:06:25 +0100285 while (first->prev->next) {
286 first = first->prev;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200287 }
288 }
289 first->prev = elem->prev;
290 }
291 if (elem->prev->next) {
292 elem->prev->next = elem->next;
293 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200294
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200295 /* clean up the unlinked element */
296 elem->next = NULL;
297 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200298}
299
Michal Vasko345da0a2015-12-02 10:35:55 +0100300API void
301lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
302{
303 if (!elem) {
304 return;
305 }
306
307 lyxml_unlink_elem(ctx, elem, 1);
308}
309
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200310void
Radek Krejci00249f22015-07-07 13:43:28 +0200311lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200312{
Radek Krejci00249f22015-07-07 13:43:28 +0200313 struct lyxml_attr *aiter, *aprev;
314
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200315 if (!attr) {
316 return;
317 }
Radek Krejci02117302015-04-13 16:32:44 +0200318
Radek Krejci00249f22015-07-07 13:43:28 +0200319 if (parent) {
320 /* unlink attribute from the parent's list of attributes */
321 aprev = NULL;
322 for (aiter = parent->attr; aiter; aiter = aiter->next) {
323 if (aiter == attr) {
324 break;
325 }
326 aprev = aiter;
327 }
328 if (!aiter) {
329 /* attribute to remove not found */
330 return;
331 }
332
333 if (!aprev) {
334 /* attribute is first in parent's list of attributes */
335 parent->attr = attr->next;
336 } else {
337 /* reconnect previous attribute to the next */
338 aprev->next = attr->next;
339 }
340 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200341 lydict_remove(ctx, attr->name);
342 lydict_remove(ctx, attr->value);
343 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200344}
345
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200346void
347lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200348{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200349 struct lyxml_attr *a, *next;
350 if (!elem || !elem->attr) {
351 return;
352 }
Radek Krejci02117302015-04-13 16:32:44 +0200353
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200354 a = elem->attr;
355 do {
356 next = a->next;
Radek Krejci02117302015-04-13 16:32:44 +0200357
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200358 lydict_remove(ctx, a->name);
359 lydict_remove(ctx, a->value);
360 free(a);
Radek Krejci02117302015-04-13 16:32:44 +0200361
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200362 a = next;
363 } while (a);
Radek Krejci02117302015-04-13 16:32:44 +0200364}
365
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200366static void
Michal Vasko272e42f2015-12-02 12:20:37 +0100367lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200368{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200369 struct lyxml_elem *e, *next;
Radek Krejci02117302015-04-13 16:32:44 +0200370
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200371 if (!elem) {
372 return;
373 }
Radek Krejci02117302015-04-13 16:32:44 +0200374
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200375 lyxml_free_attrs(ctx, elem);
376 LY_TREE_FOR_SAFE(elem->child, next, e) {
Michal Vasko272e42f2015-12-02 12:20:37 +0100377 lyxml_free_elem(ctx, e);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200378 }
379 lydict_remove(ctx, elem->name);
380 lydict_remove(ctx, elem->content);
381 free(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200382}
383
Radek Krejcic6704c82015-10-06 11:12:45 +0200384API void
Michal Vasko345da0a2015-12-02 10:35:55 +0100385lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200386{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200387 if (!elem) {
388 return;
389 }
Radek Krejci02117302015-04-13 16:32:44 +0200390
Michal Vasko61f7ccb2015-10-23 10:15:08 +0200391 lyxml_unlink_elem(ctx, elem, 2);
Michal Vasko272e42f2015-12-02 12:20:37 +0100392 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200393}
394
Radek Krejci8f8db232016-05-23 16:48:21 +0200395API void
396lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
397{
398 struct lyxml_elem *iter, *aux;
399
400 if (!elem) {
401 return;
402 }
403
404 /* optimization - avoid freeing (unlinking) the last node of the siblings list */
405 /* so, first, free the node's predecessors to the beginning of the list ... */
406 for(iter = elem->prev; iter->next; iter = aux) {
407 aux = iter->prev;
408 lyxml_free(ctx, iter);
409 }
410 /* ... then, the node is the first in the siblings list, so free them all */
411 LY_TREE_FOR_SAFE(elem, aux, iter) {
412 lyxml_free(ctx, iter);
413 }
414}
415
Michal Vasko88c29542015-11-27 14:57:53 +0100416API const char *
Michal Vasko1e62a092015-12-01 12:27:20 +0100417lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
Radek Krejcida04f4a2015-05-21 12:54:09 +0200418{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200419 struct lyxml_attr *a;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200420
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200421 assert(elem);
422 assert(name);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200423
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200424 for (a = elem->attr; a; a = a->next) {
425 if (a->type != LYXML_ATTR_STD) {
426 continue;
427 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200428
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200429 if (!strcmp(name, a->name)) {
430 if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
431 return a->value;
432 }
433 }
434 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200435
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200436 return NULL;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200437}
438
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200439int
Michal Vaskof8879c22015-08-21 09:07:36 +0200440lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200441{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200442 struct lyxml_elem *e;
Radek Krejci02117302015-04-13 16:32:44 +0200443
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200444 assert(parent);
445 assert(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200446
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200447 /* (re)link element to parent */
448 if (elem->parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200449 lyxml_unlink_elem(ctx, elem, 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200450 }
451 elem->parent = parent;
Radek Krejci02117302015-04-13 16:32:44 +0200452
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200453 /* link parent to element */
454 if (parent->child) {
455 e = parent->child;
456 elem->prev = e->prev;
457 elem->next = NULL;
458 elem->prev->next = elem;
459 e->prev = elem;
460 } else {
461 parent->child = elem;
462 elem->prev = elem;
463 elem->next = NULL;
464 }
Radek Krejci02117302015-04-13 16:32:44 +0200465
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200466 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200467}
468
Michal Vasko3b855722015-08-28 16:01:18 +0200469int
Radek Krejci48464ed2016-03-17 15:44:09 +0100470lyxml_getutf8(const char *buf, unsigned int *read)
Radek Krejci02117302015-04-13 16:32:44 +0200471{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200472 int c, aux;
473 int i;
Radek Krejci02117302015-04-13 16:32:44 +0200474
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200475 c = buf[0];
476 *read = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200477
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200478 /* buf is NULL terminated string, so 0 means EOF */
479 if (!c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100480 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200481 return 0;
482 }
483 *read = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200484
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200485 /* process character byte(s) */
486 if ((c & 0xf8) == 0xf0) {
487 /* four bytes character */
488 *read = 4;
Radek Krejci02117302015-04-13 16:32:44 +0200489
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200490 c &= 0x07;
491 for (i = 1; i <= 3; i++) {
492 aux = buf[i];
493 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100494 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200495 return 0;
496 }
Radek Krejci02117302015-04-13 16:32:44 +0200497
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200498 c = (c << 6) | (aux & 0x3f);
499 }
Radek Krejci02117302015-04-13 16:32:44 +0200500
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200501 if (c < 0x1000 || c > 0x10ffff) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100502 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200503 return 0;
504 }
505 } else if ((c & 0xf0) == 0xe0) {
506 /* three bytes character */
507 *read = 3;
Radek Krejci02117302015-04-13 16:32:44 +0200508
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200509 c &= 0x0f;
510 for (i = 1; i <= 2; i++) {
511 aux = buf[i];
512 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100513 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200514 return 0;
515 }
Radek Krejci02117302015-04-13 16:32:44 +0200516
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200517 c = (c << 6) | (aux & 0x3f);
518 }
Radek Krejci02117302015-04-13 16:32:44 +0200519
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200520 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100521 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200522 return 0;
523 }
524 } else if ((c & 0xe0) == 0xc0) {
525 /* two bytes character */
526 *read = 2;
Radek Krejci02117302015-04-13 16:32:44 +0200527
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200528 aux = buf[1];
529 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100530 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200531 return 0;
532 }
533 c = ((c & 0x1f) << 6) | (aux & 0x3f);
Radek Krejci02117302015-04-13 16:32:44 +0200534
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200535 if (c < 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100536 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200537 return 0;
538 }
539 } else if (!(c & 0x80)) {
540 /* one byte character */
541 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
542 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100543 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200544 return 0;
545 }
546 } else {
547 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100548 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200549 return 0;
550 }
Radek Krejci02117302015-04-13 16:32:44 +0200551
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200552 return c;
Radek Krejci02117302015-04-13 16:32:44 +0200553}
554
Michal Vasko0d343d12015-08-24 14:57:36 +0200555/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200556static int
557parse_ignore(const char *data, const char *endstr, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200558{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200559 unsigned int slen;
560 const char *c = data;
Radek Krejci02117302015-04-13 16:32:44 +0200561
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200562 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200563
Radek Krejcifb783942016-10-06 09:49:33 +0200564 while (*c && strncmp(c, endstr, slen)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200565 c++;
566 }
567 if (!*c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100568 LOGVAL(LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200569 return EXIT_FAILURE;
570 }
571 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200572
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200573 *len = c - data;
574 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200575}
576
Michal Vasko0d343d12015-08-24 14:57:36 +0200577/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200578static char *
579parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200580{
Radek Krejci709fee62015-04-15 13:56:19 +0200581#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200582
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200583 char buf[BUFSIZE];
584 char *result = NULL, *aux;
585 unsigned int r;
586 int o, size = 0;
587 int cdsect = 0;
588 int32_t n;
Radek Krejci709fee62015-04-15 13:56:19 +0200589
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200590 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
Radek Krejcifb783942016-10-06 09:49:33 +0200591 if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100592 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200593 goto error;
594 }
Radek Krejci709fee62015-04-15 13:56:19 +0200595
Radek Krejcia4a84062015-04-16 13:00:10 +0200596loop:
597
Radek Krejcia0802a82017-02-08 12:41:05 +0100598 if (o > BUFSIZE - 4) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200599 /* add buffer into the result */
600 if (result) {
601 size = size + o;
Radek Krejcia8d111f2017-05-31 13:57:37 +0200602 result = ly_realloc(result, size + 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200603 } else {
604 size = o;
605 result = malloc((size + 1) * sizeof *result);
606 }
Radek Krejcia8d111f2017-05-31 13:57:37 +0200607 LY_CHECK_ERR_RETURN(!result, LOGMEM, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200608 memcpy(&result[size - o], buf, o);
Radek Krejci709fee62015-04-15 13:56:19 +0200609
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200610 /* write again into the beginning of the buffer */
611 o = 0;
612 }
Radek Krejci709fee62015-04-15 13:56:19 +0200613
Radek Krejcifb783942016-10-06 09:49:33 +0200614 if (cdsect || !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200615 /* CDSect */
616 if (!cdsect) {
617 cdsect = 1;
618 *len += 9;
619 }
Radek Krejcifb783942016-10-06 09:49:33 +0200620 if (data[*len] && !strncmp(&data[*len], "]]>", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200621 *len += 3;
622 cdsect = 0;
623 o--; /* we don't write any data in this iteration */
624 } else {
625 buf[o] = data[*len];
626 (*len)++;
627 }
628 } else if (data[*len] == '&') {
629 (*len)++;
630 if (data[*len] != '#') {
631 /* entity reference - only predefined refs are supported */
Radek Krejcifb783942016-10-06 09:49:33 +0200632 if (!strncmp(&data[*len], "lt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200633 buf[o] = '<';
634 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200635 } else if (!strncmp(&data[*len], "gt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200636 buf[o] = '>';
637 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200638 } else if (!strncmp(&data[*len], "amp;", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200639 buf[o] = '&';
640 *len += 4;
Radek Krejcifb783942016-10-06 09:49:33 +0200641 } else if (!strncmp(&data[*len], "apos;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200642 buf[o] = '\'';
643 *len += 5;
Radek Krejcifb783942016-10-06 09:49:33 +0200644 } else if (!strncmp(&data[*len], "quot;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200645 buf[o] = '\"';
646 *len += 5;
647 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100648 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200649 goto error;
650 }
651 } else {
652 /* character reference */
653 (*len)++;
654 if (isdigit(data[*len])) {
655 for (n = 0; isdigit(data[*len]); (*len)++) {
656 n = (10 * n) + (data[*len] - '0');
657 }
658 if (data[*len] != ';') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100659 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200660 goto error;
661 }
662 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
663 for (n = 0; isxdigit(data[*len]); (*len)++) {
664 if (isdigit(data[*len])) {
665 r = (data[*len] - '0');
666 } else if (data[*len] > 'F') {
667 r = 10 + (data[*len] - 'a');
668 } else {
669 r = 10 + (data[*len] - 'A');
670 }
671 n = (16 * n) + r;
672 }
673 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100674 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200675 goto error;
Radek Krejci709fee62015-04-15 13:56:19 +0200676
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200677 }
Radek Krejci48464ed2016-03-17 15:44:09 +0100678 r = pututf8(&buf[o], n);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200679 if (!r) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100680 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200681 goto error;
682 }
683 o += r - 1; /* o is ++ in for loop */
684 (*len)++;
685 }
686 } else {
Radek Krejcideee60e2016-09-23 15:21:14 +0200687 r = copyutf8(&buf[o], &data[*len]);
688 if (!r) {
689 goto error;
690 }
691
692 o += r - 1; /* o is ++ in for loop */
693 (*len) = (*len) + r;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200694 }
695 }
Radek Krejci02117302015-04-13 16:32:44 +0200696
Radek Krejcifb783942016-10-06 09:49:33 +0200697 if (delim == '<' && !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200698 /* ignore loop's end condition on beginning of CDSect */
699 goto loop;
700 }
Radek Krejci709fee62015-04-15 13:56:19 +0200701#undef BUFSIZE
702
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200703 if (o) {
704 if (result) {
705 size = size + o;
706 aux = realloc(result, size + 1);
707 result = aux;
708 } else {
709 size = o;
710 result = malloc((size + 1) * sizeof *result);
711 }
Radek Krejcia8d111f2017-05-31 13:57:37 +0200712 LY_CHECK_ERR_RETURN(!result, LOGMEM, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200713 memcpy(&result[size - o], buf, o);
714 }
715 if (result) {
716 result[size] = '\0';
Radek Krejcia5269642015-07-20 19:04:11 +0200717 } else {
718 size = 0;
719 result = strdup("");
Radek Krejcia8d111f2017-05-31 13:57:37 +0200720 LY_CHECK_ERR_RETURN(!result, LOGMEM, NULL)
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200721 }
Radek Krejci02117302015-04-13 16:32:44 +0200722
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200723 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200724
725error:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200726 free(result);
727 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200728}
729
Michal Vasko0d343d12015-08-24 14:57:36 +0200730/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200731static struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200732parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci674e1f82015-04-21 14:12:19 +0200733{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200734 const char *c = data, *start, *delim;
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100735 char prefix[32], xml_flag;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200736 int uc;
Radek Krejci00249f22015-07-07 13:43:28 +0200737 struct lyxml_attr *attr = NULL, *a;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200738 unsigned int size;
Radek Krejci02117302015-04-13 16:32:44 +0200739
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200740 /* check if it is attribute or namespace */
Radek Krejcifb783942016-10-06 09:49:33 +0200741 if (!strncmp(c, "xmlns", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200742 /* namespace */
743 attr = calloc(1, sizeof (struct lyxml_ns));
Radek Krejcia8d111f2017-05-31 13:57:37 +0200744 LY_CHECK_ERR_RETURN(!attr, LOGMEM, NULL);
745
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200746 attr->type = LYXML_ATTR_NS;
Radek Krejci00249f22015-07-07 13:43:28 +0200747 ((struct lyxml_ns *)attr)->parent = parent;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200748 c += 5;
749 if (*c != ':') {
750 /* default namespace, prefix will be empty */
751 goto equal;
752 }
753 c++; /* go after ':' to the prefix value */
754 } else {
755 /* attribute */
756 attr = calloc(1, sizeof *attr);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200757 LY_CHECK_ERR_RETURN(!attr, LOGMEM, NULL);
758
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200759 attr->type = LYXML_ATTR_STD;
760 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200761
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200762 /* process name part of the attribute */
763 start = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100764 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200765 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100766 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200767 free(attr);
768 return NULL;
769 }
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100770 xml_flag = 4;
771 if (*c == 'x') {
772 xml_flag = 1;
773 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200774 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100775 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200776 while (is_xmlnamechar(uc)) {
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100777 if (attr->type == LYXML_ATTR_STD) {
778 if ((*c == ':') && (xml_flag != 3)) {
779 /* attribute in a namespace (but disregard the special "xml" namespace) */
780 start = c + 1;
Radek Krejci4ea08382015-04-21 09:41:40 +0200781
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100782 /* look for the prefix in namespaces */
783 memcpy(prefix, data, c - data);
784 prefix[c - data] = '\0';
785 attr->ns = lyxml_get_ns(parent, prefix);
786 } else if (((*c == 'm') && (xml_flag == 1)) ||
787 ((*c == 'l') && (xml_flag == 2))) {
788 ++xml_flag;
789 } else {
790 xml_flag = 4;
791 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200792 }
793 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100794 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200795 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200796
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200797 /* store the name */
798 size = c - start;
799 attr->name = lydict_insert(ctx, start, size);
Radek Krejci674e1f82015-04-21 14:12:19 +0200800
801equal:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200802 /* check Eq mark that can be surrounded by whitespaces */
803 ign_xmlws(c);
804 if (*c != '=') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100805 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200806 goto error;
807 }
808 c++;
809 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200810
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200811 /* process value part of the attribute */
812 if (!*c || (*c != '"' && *c != '\'')) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100813 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200814 goto error;
815 }
816 delim = c;
817 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
818 if (ly_errno) {
819 goto error;
820 }
Radek Krejci02117302015-04-13 16:32:44 +0200821
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200822 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci00249f22015-07-07 13:43:28 +0200823
824 /* put attribute into the parent's attributes list */
825 if (parent->attr) {
826 /* go to the end of the list */
827 for (a = parent->attr; a->next; a = a->next);
828 /* and append new attribute */
829 a->next = attr;
830 } else {
831 /* add the first attribute in the list */
832 parent->attr = attr;
833 }
834
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200835 return attr;
Radek Krejci02117302015-04-13 16:32:44 +0200836
837error:
Radek Krejci00249f22015-07-07 13:43:28 +0200838 lyxml_free_attr(ctx, NULL, attr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200839 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +0200840}
841
Michal Vasko0d343d12015-08-24 14:57:36 +0200842/* logs directly */
Radek Krejci9a5daea2016-03-02 16:49:40 +0100843struct lyxml_elem *
Radek Krejcie1bacd72017-03-01 13:18:46 +0100844lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200845{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200846 const char *c = data, *start, *e;
847 const char *lws; /* leading white space for handling mixed content */
848 int uc;
849 char *str;
850 char prefix[32] = { 0 };
851 unsigned int prefix_len = 0;
852 struct lyxml_elem *elem = NULL, *child;
853 struct lyxml_attr *attr;
854 unsigned int size;
855 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200856
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200857 *len = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200858
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200859 if (*c != '<') {
860 return NULL;
861 }
Radek Krejci02117302015-04-13 16:32:44 +0200862
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200863 /* locate element name */
864 c++;
865 e = c;
Radek Krejci02117302015-04-13 16:32:44 +0200866
Radek Krejci48464ed2016-03-17 15:44:09 +0100867 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200868 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100869 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200870 return NULL;
871 }
872 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100873 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200874 while (is_xmlnamechar(uc)) {
875 if (*e == ':') {
876 if (prefix_len) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100877 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200878 goto error;
879 }
880 /* element in a namespace */
881 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200882
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200883 /* look for the prefix in namespaces */
884 memcpy(prefix, c, prefix_len = e - c);
885 prefix[prefix_len] = '\0';
886 c = start;
887 }
888 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100889 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200890 }
891 if (!*e) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100892 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200893 return NULL;
894 }
Radek Krejci02117302015-04-13 16:32:44 +0200895
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200896 /* allocate element structure */
897 elem = calloc(1, sizeof *elem);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200898 LY_CHECK_ERR_RETURN(!elem, LOGMEM, NULL);
899
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200900 elem->next = NULL;
901 elem->prev = elem;
902 if (parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200903 lyxml_add_child(ctx, parent, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200904 }
Radek Krejci02117302015-04-13 16:32:44 +0200905
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200906 /* store the name into the element structure */
907 elem->name = lydict_insert(ctx, c, e - c);
908 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200909
910process:
Radek Krejcicf748252017-09-04 11:11:14 +0200911 ly_err_clean(ly_parser_data.ctx, 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200912 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +0200913 if (!strncmp("/>", c, 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200914 /* we are done, it was EmptyElemTag */
915 c += 2;
Michal Vasko44913842016-04-13 14:20:41 +0200916 elem->content = lydict_insert(ctx, "", 0);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200917 closed_flag = 1;
918 } else if (*c == '>') {
919 /* process element content */
920 c++;
921 lws = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200922
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200923 while (*c) {
Radek Krejcifb783942016-10-06 09:49:33 +0200924 if (!strncmp(c, "</", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200925 if (lws && !elem->child) {
926 /* leading white spaces were actually content */
927 goto store_content;
928 }
Radek Krejci02117302015-04-13 16:32:44 +0200929
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200930 /* Etag */
931 c += 2;
932 /* get name and check it */
933 e = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100934 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200935 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100936 LOGVAL(LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200937 goto error;
938 }
939 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100940 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200941 while (is_xmlnamechar(uc)) {
942 if (*e == ':') {
943 /* element in a namespace */
944 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200945
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200946 /* look for the prefix in namespaces */
947 if (memcmp(prefix, c, e - c)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200948 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
949 "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200950 goto error;
951 }
952 c = start;
953 }
954 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100955 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200956 }
957 if (!*e) {
Radek Krejci3cc10962016-04-13 15:03:27 +0200958 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200959 goto error;
960 }
Radek Krejci02117302015-04-13 16:32:44 +0200961
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200962 /* check that it corresponds to opening tag */
963 size = e - c;
964 str = malloc((size + 1) * sizeof *str);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200965 LY_CHECK_ERR_GOTO(!str, LOGMEM, error);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200966 memcpy(str, c, e - c);
967 str[e - c] = '\0';
968 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200969 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
970 "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200971 free(str);
972 goto error;
973 }
974 free(str);
975 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200976
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200977 ign_xmlws(c);
978 if (*c != '>') {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200979 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200980 goto error;
981 }
982 c++;
Michal Vaskoe00b7892016-04-14 10:12:18 +0200983 if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
984 /* there was no content, but we don't want NULL (only if mixed content) */
985 elem->content = lydict_insert(ctx, "", 0);
986 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200987 closed_flag = 1;
988 break;
Radek Krejci02117302015-04-13 16:32:44 +0200989
Radek Krejcifb783942016-10-06 09:49:33 +0200990 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200991 if (lws) {
992 /* leading white spaces were only formatting */
993 lws = NULL;
994 }
995 /* PI - ignore it */
996 c += 2;
997 if (parse_ignore(c, "?>", &size)) {
998 goto error;
999 }
1000 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001001 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001002 if (lws) {
1003 /* leading white spaces were only formatting */
1004 lws = NULL;
1005 }
1006 /* Comment - ignore it */
1007 c += 4;
1008 if (parse_ignore(c, "-->", &size)) {
1009 goto error;
1010 }
1011 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001012 } else if (!strncmp(c, "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001013 /* CDSect */
1014 goto store_content;
1015 } else if (*c == '<') {
1016 if (lws) {
1017 if (elem->flags & LYXML_ELEM_MIXED) {
1018 /* we have a mixed content */
1019 goto store_content;
1020 } else {
1021 /* leading white spaces were only formatting */
1022 lws = NULL;
1023 }
1024 }
1025 if (elem->content) {
1026 /* we have a mixed content */
Radek Krejcie1bacd72017-03-01 13:18:46 +01001027 if (options & LYXML_PARSE_NOMIXEDCONTENT) {
1028 LOGVAL(LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
1029 goto error;
1030 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001031 child = calloc(1, sizeof *child);
Radek Krejcia8d111f2017-05-31 13:57:37 +02001032 LY_CHECK_ERR_GOTO(!child, LOGMEM, error);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001033 child->content = elem->content;
1034 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001035 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001036 elem->flags |= LYXML_ELEM_MIXED;
1037 }
Radek Krejcie1bacd72017-03-01 13:18:46 +01001038 child = lyxml_parse_elem(ctx, c, &size, elem, options);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001039 if (!child) {
1040 goto error;
1041 }
1042 c += size; /* move after processed child element */
1043 } else if (is_xmlws(*c)) {
1044 lws = c;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001045 ign_xmlws(c);
1046 } else {
Radek Krejci02117302015-04-13 16:32:44 +02001047store_content:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001048 /* store text content */
1049 if (lws) {
1050 /* process content including the leading white spaces */
1051 c = lws;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001052 lws = NULL;
1053 }
1054 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
1055 if (ly_errno) {
1056 goto error;
1057 }
1058 c += size; /* move after processed text content */
Radek Krejci02117302015-04-13 16:32:44 +02001059
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001060 if (elem->child) {
1061 /* we have a mixed content */
Radek Krejcie1bacd72017-03-01 13:18:46 +01001062 if (options & LYXML_PARSE_NOMIXEDCONTENT) {
1063 LOGVAL(LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
1064 goto error;
1065 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001066 child = calloc(1, sizeof *child);
Radek Krejcia8d111f2017-05-31 13:57:37 +02001067 LY_CHECK_ERR_GOTO(!child, LOGMEM, error);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001068 child->content = elem->content;
1069 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001070 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001071 elem->flags |= LYXML_ELEM_MIXED;
1072 }
1073 }
1074 }
1075 } else {
1076 /* process attribute */
1077 attr = parse_attr(ctx, c, &size, elem);
1078 if (!attr) {
1079 goto error;
1080 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001081 c += size; /* move after processed attribute */
Radek Krejci02117302015-04-13 16:32:44 +02001082
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001083 /* check namespace */
1084 if (attr->type == LYXML_ATTR_NS) {
1085 if (!prefix[0] && !attr->name) {
1086 if (attr->value) {
1087 /* default prefix */
1088 elem->ns = (struct lyxml_ns *)attr;
1089 } else {
1090 /* xmlns="" -> no namespace */
1091 nons_flag = 1;
1092 }
Radek Krejcifb783942016-10-06 09:49:33 +02001093 } else if (prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001094 /* matching namespace with prefix */
1095 elem->ns = (struct lyxml_ns *)attr;
1096 }
1097 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001098
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001099 /* go back to finish element processing */
1100 goto process;
1101 }
Radek Krejci02117302015-04-13 16:32:44 +02001102
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001103 *len = c - data;
Radek Krejci02117302015-04-13 16:32:44 +02001104
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001105 if (!closed_flag) {
Radek Krejci48464ed2016-03-17 15:44:09 +01001106 LOGVAL(LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001107 goto error;
1108 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001109
Radek Krejci78a230a2015-07-07 17:04:40 +02001110 if (!elem->ns && !nons_flag && parent) {
Radek Krejci4476d412015-07-10 15:35:01 +02001111 elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001112 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001113
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001114 return elem;
Radek Krejci02117302015-04-13 16:32:44 +02001115
1116error:
Michal Vasko345da0a2015-12-02 10:35:55 +01001117 lyxml_free(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001118
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001119 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001120}
1121
Michal Vasko0d343d12015-08-24 14:57:36 +02001122/* logs directly */
Radek Krejcic6704c82015-10-06 11:12:45 +02001123API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001124lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001125{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001126 const char *c = data;
1127 unsigned int len;
Radek Krejci851ea662016-01-08 09:30:53 +01001128 struct lyxml_elem *root, *first = NULL, *next;
Radek Krejcicf748252017-09-04 11:11:14 +02001129 struct ly_ctx *ctx_prev = ly_parser_data.ctx;
Radek Krejci02117302015-04-13 16:32:44 +02001130
Radek Krejcicf748252017-09-04 11:11:14 +02001131 ly_err_clean(ctx, 1);
Radek Krejci2342cf62016-01-29 16:48:23 +01001132
Radek Krejci19b9b252017-03-17 16:14:09 +01001133 if (!ctx) {
1134 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1135 return NULL;
1136 }
1137
Radek Krejcicf748252017-09-04 11:11:14 +02001138 /* set parser context */
1139 ly_parser_data.ctx = ctx;
1140
Radek Krejci120f6242015-12-17 12:32:56 +01001141repeat:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001142 /* process document */
Radek Krejcif8ae23e2016-07-26 17:11:17 +02001143 while (1) {
1144 if (!*c) {
1145 /* eof */
Radek Krejci17d1ae62017-09-04 11:51:33 +02001146 goto restore;
Radek Krejcif8ae23e2016-07-26 17:11:17 +02001147 } else if (is_xmlws(*c)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001148 /* skip whitespaces */
1149 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +02001150 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001151 /* XMLDecl or PI - ignore it */
1152 c += 2;
1153 if (parse_ignore(c, "?>", &len)) {
Radek Krejcicf748252017-09-04 11:11:14 +02001154 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001155 }
1156 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001157 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001158 /* Comment - ignore it */
1159 c += 2;
1160 if (parse_ignore(c, "-->", &len)) {
Radek Krejcicf748252017-09-04 11:11:14 +02001161 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001162 }
1163 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001164 } else if (!strncmp(c, "<!", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001165 /* DOCTYPE */
1166 /* TODO - standalone ignore counting < and > */
1167 LOGERR(LY_EINVAL, "DOCTYPE not supported in XML documents.");
Radek Krejcicf748252017-09-04 11:11:14 +02001168 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001169 } else if (*c == '<') {
1170 /* element - process it in next loop to strictly follow XML
1171 * format
1172 */
1173 break;
Michal Vaskoc2e80562015-07-27 11:31:41 +02001174 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +01001175 LOGVAL(LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
Radek Krejcicf748252017-09-04 11:11:14 +02001176 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001177 }
1178 }
Radek Krejci02117302015-04-13 16:32:44 +02001179
Radek Krejcie1bacd72017-03-01 13:18:46 +01001180 root = lyxml_parse_elem(ctx, c, &len, NULL, options);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001181 if (!root) {
Radek Krejcicf748252017-09-04 11:11:14 +02001182 goto error;
Radek Krejci120f6242015-12-17 12:32:56 +01001183 } else if (!first) {
1184 first = root;
1185 } else {
1186 first->prev->next = root;
1187 root->prev = first->prev;
1188 first->prev = root;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001189 }
1190 c += len;
Radek Krejci02117302015-04-13 16:32:44 +02001191
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001192 /* ignore the rest of document where can be comments, PIs and whitespaces,
1193 * note that we are not detecting syntax errors in these parts
1194 */
1195 ign_xmlws(c);
1196 if (*c) {
Radek Krejci722b0072016-02-01 17:09:45 +01001197 if (options & LYXML_PARSE_MULTIROOT) {
Radek Krejci120f6242015-12-17 12:32:56 +01001198 goto repeat;
1199 } else {
1200 LOGWRN("There are some not parsed data:\n%s", c);
1201 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001202 }
Radek Krejci02117302015-04-13 16:32:44 +02001203
Radek Krejci17d1ae62017-09-04 11:51:33 +02001204restore:
Radek Krejcicf748252017-09-04 11:11:14 +02001205 /* reset parser context */
1206 ly_parser_data.ctx = ctx_prev;
1207
Radek Krejci120f6242015-12-17 12:32:56 +01001208 return first;
Radek Krejcicf748252017-09-04 11:11:14 +02001209
1210error:
1211 LY_TREE_FOR_SAFE(first, next, root) {
1212 lyxml_free(ctx, root);
1213 }
1214
1215 /* reset parser context */
1216 ly_parser_data.ctx = ctx_prev;
1217
1218 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +02001219}
1220
Radek Krejcic6704c82015-10-06 11:12:45 +02001221API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001222lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001223{
Radek Krejci6b3d9262015-12-03 13:45:27 +01001224 struct lyxml_elem *elem = NULL;
Radek Krejci0fb11502017-01-31 16:45:42 +01001225 size_t length;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001226 int fd;
1227 char *addr;
1228
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001229 if (!filename || !ctx) {
1230 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1231 return NULL;
1232 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001233
Pavol Vicanb2570c12015-11-12 13:50:20 +01001234 fd = open(filename, O_RDONLY);
1235 if (fd == -1) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001236 LOGERR(LY_EINVAL,"Opening file \"%s\" failed.", filename);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001237 return NULL;
1238 }
Radek Krejci0fb11502017-01-31 16:45:42 +01001239 addr = lyp_mmap(fd, 0, &length);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001240 if (addr == MAP_FAILED) {
Radek Krejci0fb11502017-01-31 16:45:42 +01001241 LOGERR(LY_ESYS, "Mapping file descriptor into memory failed (%s()).", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001242 goto error;
Radek Krejci10c216a2017-02-01 10:36:00 +01001243 } else if (!addr) {
1244 /* empty XML file */
1245 goto error;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001246 }
Radek Krejci6b3d9262015-12-03 13:45:27 +01001247
Radek Krejci722b0072016-02-01 17:09:45 +01001248 elem = lyxml_parse_mem(ctx, addr, options);
Radek Krejci0fb11502017-01-31 16:45:42 +01001249 lyp_munmap(addr, length);
Radek Krejci30793ab2015-12-03 13:45:45 +01001250 close(fd);
Radek Krejci6b3d9262015-12-03 13:45:27 +01001251
Pavol Vicanb2570c12015-11-12 13:50:20 +01001252 return elem;
1253
1254error:
Radek Krejci6b3d9262015-12-03 13:45:27 +01001255 if (fd != -1) {
1256 close(fd);
1257 }
1258
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001259 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001260}
Radek Krejci02117302015-04-13 16:32:44 +02001261
Michal Vasko5db027d2015-10-09 14:38:50 +02001262int
1263lyxml_dump_text(struct lyout *out, const char *text)
Radek Krejcif0023a92015-04-20 20:51:39 +02001264{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001265 unsigned int i, n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001266
Michal Vasko5db027d2015-10-09 14:38:50 +02001267 if (!text) {
1268 return 0;
1269 }
1270
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001271 for (i = n = 0; text[i]; i++) {
1272 switch (text[i]) {
1273 case '&':
Radek Krejci5248f132015-10-09 10:34:25 +02001274 n += ly_print(out, "&amp;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001275 break;
1276 case '<':
Radek Krejci5248f132015-10-09 10:34:25 +02001277 n += ly_print(out, "&lt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001278 break;
1279 case '>':
1280 /* not needed, just for readability */
Radek Krejci5248f132015-10-09 10:34:25 +02001281 n += ly_print(out, "&gt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001282 break;
Radek Krejci952a7252016-07-16 20:52:43 +02001283 case '"':
1284 n += ly_print(out, "&quot;");
1285 break;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001286 default:
Radek Krejci5248f132015-10-09 10:34:25 +02001287 ly_write(out, &text[i], 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001288 n++;
1289 }
1290 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001291
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001292 return n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001293}
1294
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001295static int
Michal Vaskob2f1db72016-11-16 13:57:35 +01001296dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options, int last_elem)
Radek Krejcif0023a92015-04-20 20:51:39 +02001297{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001298 int size = 0;
1299 struct lyxml_attr *a;
1300 struct lyxml_elem *child;
1301 const char *delim, *delim_outer;
1302 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001303
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001304 if (!e->name) {
1305 /* mixed content */
1306 if (e->content) {
Michal Vasko5db027d2015-10-09 14:38:50 +02001307 return lyxml_dump_text(out, e->content);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001308 } else {
1309 return 0;
1310 }
1311 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001312
Radek Krejci722b0072016-02-01 17:09:45 +01001313 delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001314 indent = 2 * level;
1315 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1316 delim = "";
1317 }
1318 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1319 delim_outer = "";
1320 indent = 0;
1321 }
Michal Vaskob2f1db72016-11-16 13:57:35 +01001322 if (last_elem && (options & LYXML_PRINT_NO_LAST_NEWLINE)) {
1323 delim_outer = "";
1324 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001325
Radek Krejci722b0072016-02-01 17:09:45 +01001326 if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN)) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001327 /* opening tag */
1328 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001329 size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001330 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001331 size += ly_print(out, "%*s<%s", indent, "", e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001332 }
Radek Krejci722b0072016-02-01 17:09:45 +01001333 } else if (options & LYXML_PRINT_CLOSE) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001334 indent = 0;
1335 goto close;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001336 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001337
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001338 /* attributes */
1339 for (a = e->attr; a; a = a->next) {
1340 if (a->type == LYXML_ATTR_NS) {
1341 if (a->name) {
Radek Krejci5248f132015-10-09 10:34:25 +02001342 size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001343 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001344 size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001345 }
1346 } else if (a->ns && a->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001347 size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001348 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001349 size += ly_print(out, " %s=\"%s\"", a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001350 }
1351 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001352
Radek Krejcic6704c82015-10-06 11:12:45 +02001353 /* apply options */
Radek Krejci722b0072016-02-01 17:09:45 +01001354 if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
Radek Krejci5248f132015-10-09 10:34:25 +02001355 size += ly_print(out, "/>%s", delim);
Radek Krejcic6704c82015-10-06 11:12:45 +02001356 return size;
Radek Krejci722b0072016-02-01 17:09:45 +01001357 } else if (options & LYXML_PRINT_OPEN) {
Radek Krejci5248f132015-10-09 10:34:25 +02001358 ly_print(out, ">");
Radek Krejcic6704c82015-10-06 11:12:45 +02001359 return ++size;
Radek Krejci722b0072016-02-01 17:09:45 +01001360 } else if (options & LYXML_PRINT_ATTRS) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001361 return size;
1362 }
1363
Michal Vasko3a611612016-04-14 10:12:56 +02001364 if (!e->child && (!e->content || !e->content[0])) {
Radek Krejci5248f132015-10-09 10:34:25 +02001365 size += ly_print(out, "/>%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001366 return size;
Michal Vasko3a611612016-04-14 10:12:56 +02001367 } else if (e->content && e->content[0]) {
Radek Krejci5248f132015-10-09 10:34:25 +02001368 ly_print(out, ">");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001369 size++;
Radek Krejcif0023a92015-04-20 20:51:39 +02001370
Michal Vasko5db027d2015-10-09 14:38:50 +02001371 size += lyxml_dump_text(out, e->content);
Radek Krejcif0023a92015-04-20 20:51:39 +02001372
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001373 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001374 size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001375 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001376 size += ly_print(out, "</%s>%s", e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001377 }
1378 return size;
1379 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001380 size += ly_print(out, ">%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001381 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001382
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001383 /* go recursively */
1384 LY_TREE_FOR(e->child, child) {
Radek Krejci722b0072016-02-01 17:09:45 +01001385 if (options & LYXML_PRINT_FORMAT) {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001386 size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT, 0);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001387 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001388 size += dump_elem(out, child, level, 0, 0);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001389 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001390 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001391
Radek Krejcic6704c82015-10-06 11:12:45 +02001392close:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001393 /* closing tag */
1394 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001395 size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001396 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001397 size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001398 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001399
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001400 return size;
Radek Krejcif0023a92015-04-20 20:51:39 +02001401}
1402
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001403static int
1404dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
1405{
Michal Vaskob2f1db72016-11-16 13:57:35 +01001406 const struct lyxml_elem *start, *iter, *next;
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001407 int ret = 0;
1408
1409 if (e->parent) {
1410 start = e->parent->child;
1411 } else {
1412 start = e;
1413 while(start->prev && start->prev->next) {
1414 start = start->prev;
1415 }
1416 }
1417
Michal Vaskob2f1db72016-11-16 13:57:35 +01001418 LY_TREE_FOR_SAFE(start, next, iter) {
1419 ret += dump_elem(out, iter, 0, options, (next ? 0 : 1));
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001420 }
1421
1422 return ret;
1423}
1424
Radek Krejcic6704c82015-10-06 11:12:45 +02001425API int
Radek Krejci722b0072016-02-01 17:09:45 +01001426lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001427{
Radek Krejci5248f132015-10-09 10:34:25 +02001428 struct lyout out;
1429
1430 if (!stream || !elem) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001431 return 0;
1432 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001433
Radek Krejci5248f132015-10-09 10:34:25 +02001434 out.type = LYOUT_STREAM;
1435 out.method.f = stream;
1436
Radek Krejci722b0072016-02-01 17:09:45 +01001437 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001438 return dump_siblings(&out, elem, options);
1439 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001440 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001441 }
Radek Krejci5248f132015-10-09 10:34:25 +02001442}
1443
1444API int
Radek Krejci722b0072016-02-01 17:09:45 +01001445lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
Radek Krejci5248f132015-10-09 10:34:25 +02001446{
1447 struct lyout out;
1448
1449 if (fd < 0 || !elem) {
1450 return 0;
1451 }
1452
1453 out.type = LYOUT_FD;
1454 out.method.fd = fd;
1455
Radek Krejci722b0072016-02-01 17:09:45 +01001456 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001457 return dump_siblings(&out, elem, options);
1458 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001459 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001460 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001461}
Radek Krejci6140e4e2015-10-09 15:50:55 +02001462
1463API int
Radek Krejci722b0072016-02-01 17:09:45 +01001464lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001465{
1466 struct lyout out;
1467 int r;
1468
1469 if (!strp || !elem) {
1470 return 0;
1471 }
1472
1473 out.type = LYOUT_MEMORY;
1474 out.method.mem.buf = NULL;
1475 out.method.mem.len = 0;
1476 out.method.mem.size = 0;
1477
Radek Krejci722b0072016-02-01 17:09:45 +01001478 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001479 r = dump_siblings(&out, elem, options);
1480 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001481 r = dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001482 }
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001483
1484 *strp = out.method.mem.buf;
1485 return r;
1486}
1487
1488API int
Radek Krejci722b0072016-02-01 17:09:45 +01001489lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
Radek Krejci6140e4e2015-10-09 15:50:55 +02001490{
1491 struct lyout out;
1492
1493 if (!writeclb || !elem) {
1494 return 0;
1495 }
1496
1497 out.type = LYOUT_CALLBACK;
Radek Krejci50929eb2015-10-09 18:14:15 +02001498 out.method.clb.f = writeclb;
1499 out.method.clb.arg = arg;
Radek Krejci6140e4e2015-10-09 15:50:55 +02001500
Radek Krejci722b0072016-02-01 17:09:45 +01001501 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001502 return dump_siblings(&out, elem, options);
1503 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001504 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001505 }
Radek Krejci6140e4e2015-10-09 15:50:55 +02001506}