blob: 3a2ddf612db397b2333c5c37fc334c7e9b312d8c [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejci54ea8de2015-04-09 18:02:56 +020013 */
14
Radek Krejci812b10a2015-05-28 16:48:25 +020015#include <assert.h>
Radek Krejci563427e2016-02-08 16:26:34 +010016#include <errno.h>
Radek Krejci709fee62015-04-15 13:56:19 +020017#include <ctype.h>
18#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020019#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020020#include <stdlib.h>
21#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020022#include <unistd.h>
Radek Krejci563427e2016-02-08 16:26:34 +010023#include <pthread.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010024#include <sys/stat.h>
25#include <sys/mman.h>
Radek Krejci563427e2016-02-08 16:26:34 +010026#include <sys/syscall.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010027#include <fcntl.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028
Radek Krejci06a704e2015-04-22 14:50:49 +020029#include "common.h"
Radek Krejci41912fe2015-10-22 10:22:12 +020030#include "dict_private.h"
Radek Krejci5248f132015-10-09 10:34:25 +020031#include "printer.h"
Radek Krejci5449d472015-10-26 14:35:56 +010032#include "parser.h"
Michal Vasko2d162e12015-09-24 14:33:29 +020033#include "tree_schema.h"
Michal Vaskofc5744d2015-10-22 12:09:34 +020034#include "xml_internal.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020035
Radek Krejci3045cf32015-05-28 10:58:52 +020036#define ign_xmlws(p) \
Radek Krejci563427e2016-02-08 16:26:34 +010037 while (is_xmlws(*p)) { \
Radek Krejci563427e2016-02-08 16:26:34 +010038 p++; \
39 }
Radek Krejci02117302015-04-13 16:32:44 +020040
Michal Vasko88c29542015-11-27 14:57:53 +010041static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);
42
Michal Vasko1e62a092015-12-01 12:27:20 +010043API const struct lyxml_ns *
44lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
Michal Vaskof8879c22015-08-21 09:07:36 +020045{
46 struct lyxml_attr *attr;
Michal Vaskof8879c22015-08-21 09:07:36 +020047
48 if (!elem) {
49 return NULL;
50 }
51
Michal Vaskof8879c22015-08-21 09:07:36 +020052 for (attr = elem->attr; attr; attr = attr->next) {
53 if (attr->type != LYXML_ATTR_NS) {
54 continue;
55 }
56 if (!attr->name) {
Radek Krejci13f3f152016-10-03 11:40:13 +020057 if (!prefix) {
Michal Vaskof8879c22015-08-21 09:07:36 +020058 /* default namespace found */
59 if (!attr->value) {
60 /* empty default namespace -> no default namespace */
61 return NULL;
62 }
63 return (struct lyxml_ns *)attr;
64 }
Radek Krejci7d39dae2016-10-03 17:33:01 +020065 } else if (prefix && !strcmp(attr->name, prefix)) {
Michal Vaskof8879c22015-08-21 09:07:36 +020066 /* prefix found */
67 return (struct lyxml_ns *)attr;
68 }
69 }
70
71 /* go recursively */
72 return lyxml_get_ns(elem->parent, prefix);
73}
74
Michal Vasko88c29542015-11-27 14:57:53 +010075static void
76lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
77{
78 const struct lyxml_ns *tmp_ns;
Michal Vaskof6109112015-12-03 14:00:42 +010079 struct lyxml_elem *ns_root, *attr_root;
Michal Vasko88c29542015-11-27 14:57:53 +010080
81 if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010082 /* find the root of attr */
83 for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +010084
85 /* find the root of attr NS */
86 for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);
87
Michal Vaskof6109112015-12-03 14:00:42 +010088 /* attr NS is defined outside attr parent subtree */
89 if (ns_root != attr_root) {
Michal Vasko88c29542015-11-27 14:57:53 +010090 if (copy_ns) {
91 tmp_ns = attr->ns;
92 /* we may have already copied the NS over? */
Radek Krejci66aca402016-05-24 15:23:02 +020093 attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +010094
95 /* we haven't copied it over, copy it now */
96 if (!attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010097 attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +010098 }
99 } else {
100 attr->ns = NULL;
101 }
102 }
103 }
104}
105
106static struct lyxml_attr *
Michal Vaskof8879c22015-08-21 09:07:36 +0200107lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
108{
109 struct lyxml_attr *result, *a;
110
111 if (!attr || !parent) {
112 return NULL;
113 }
114
115 if (attr->type == LYXML_ATTR_NS) {
116 /* this is correct, despite that all attributes seems like a standard
117 * attributes (struct lyxml_attr), some of them can be namespace
118 * definitions (and in that case they are struct lyxml_ns).
119 */
120 result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
121 } else {
122 result = calloc(1, sizeof (struct lyxml_attr));
123 }
Michal Vasko253035f2015-12-17 16:58:13 +0100124 if (!result) {
125 LOGMEM;
126 return NULL;
127 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200128 result->value = lydict_insert(ctx, attr->value, 0);
129 result->name = lydict_insert(ctx, attr->name, 0);
130 result->type = attr->type;
131
132 /* set namespace in case of standard attributes */
133 if (result->type == LYXML_ATTR_STD && attr->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100134 result->ns = attr->ns;
135 lyxml_correct_attr_ns(ctx, result, parent, 1);
Michal Vaskof8879c22015-08-21 09:07:36 +0200136 }
137
138 /* set parent pointer in case of namespace attribute */
139 if (result->type == LYXML_ATTR_NS) {
140 ((struct lyxml_ns *)result)->parent = parent;
141 }
142
143 /* put attribute into the parent's attributes list */
144 if (parent->attr) {
145 /* go to the end of the list */
146 for (a = parent->attr; a->next; a = a->next);
147 /* and append new attribute */
148 a->next = result;
149 } else {
150 /* add the first attribute in the list */
151 parent->attr = result;
152 }
153
154 return result;
155}
156
Michal Vaskof748dbc2016-04-05 11:27:47 +0200157void
Michal Vasko88c29542015-11-27 14:57:53 +0100158lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns, int correct_attrs)
159{
160 const struct lyxml_ns *tmp_ns;
Radek Krejcid5be5682016-01-14 16:23:22 +0100161 struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
Michal Vasko88c29542015-11-27 14:57:53 +0100162 struct lyxml_attr *attr;
163
164 /* find the root of elem */
165 for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
166
Radek Krejcid5be5682016-01-14 16:23:22 +0100167 LY_TREE_DFS_BEGIN(elem, tmp, iter) {
168 if (iter->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100169 /* find the root of elem NS */
Radek Krejcic071c542016-01-27 14:57:51 +0100170 for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +0100171
172 /* elem NS is defined outside elem subtree */
173 if (ns_root != elem_root) {
174 if (copy_ns) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100175 tmp_ns = iter->ns;
Michal Vasko88c29542015-11-27 14:57:53 +0100176 /* we may have already copied the NS over? */
Radek Krejcid5be5682016-01-14 16:23:22 +0100177 iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +0100178
179 /* we haven't copied it over, copy it now */
Radek Krejcid5be5682016-01-14 16:23:22 +0100180 if (!iter->ns) {
181 iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +0100182 }
183 } else {
Radek Krejcid5be5682016-01-14 16:23:22 +0100184 iter->ns = NULL;
Michal Vasko88c29542015-11-27 14:57:53 +0100185 }
186 }
187 }
188 if (correct_attrs) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100189 LY_TREE_FOR(iter->attr, attr) {
Michal Vasko88c29542015-11-27 14:57:53 +0100190 lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
191 }
192 }
Radek Krejcid5be5682016-01-14 16:23:22 +0100193 LY_TREE_DFS_END(elem, tmp, iter);
Michal Vasko88c29542015-11-27 14:57:53 +0100194 }
195}
196
Michal Vaskof8879c22015-08-21 09:07:36 +0200197struct lyxml_elem *
198lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
199{
200 struct lyxml_elem *result, *child;
201 struct lyxml_attr *attr;
202
203 if (!elem) {
204 return NULL;
205 }
206
207 result = calloc(1, sizeof *result);
Michal Vasko253035f2015-12-17 16:58:13 +0100208 if (!result) {
209 LOGMEM;
210 return NULL;
211 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200212 result->content = lydict_insert(ctx, elem->content, 0);
213 result->name = lydict_insert(ctx, elem->name, 0);
214 result->flags = elem->flags;
Michal Vaskof8879c22015-08-21 09:07:36 +0200215 result->prev = result;
216
217 if (parent) {
218 lyxml_add_child(ctx, parent, result);
219 }
220
Michal Vasko88c29542015-11-27 14:57:53 +0100221 /* keep old namespace for now */
222 result->ns = elem->ns;
223
224 /* correct namespaces */
225 lyxml_correct_elem_ns(ctx, result, 1, 0);
Michal Vaskof8879c22015-08-21 09:07:36 +0200226
227 /* duplicate attributes */
228 for (attr = elem->attr; attr; attr = attr->next) {
229 lyxml_dup_attr(ctx, result, attr);
230 }
231
232 if (!recursive) {
233 return result;
234 }
235
236 /* duplicate children */
237 LY_TREE_FOR(elem->child, child) {
238 lyxml_dup_elem(ctx, child, result, 1);
239 }
240
241 return result;
242}
243
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200244void
Michal Vaskof8879c22015-08-21 09:07:36 +0200245lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
Radek Krejci02117302015-04-13 16:32:44 +0200246{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200247 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200248
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200249 if (!elem) {
250 return;
251 }
Radek Krejci02117302015-04-13 16:32:44 +0200252
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200253 /* store pointers to important nodes */
254 parent = elem->parent;
Radek Krejcie1f13912015-05-26 15:17:38 +0200255
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200256 /* unlink from parent */
257 if (parent) {
258 if (parent->child == elem) {
259 /* we unlink the first child */
260 /* update the parent's link */
261 parent->child = elem->next;
262 }
263 /* forget about the parent */
264 elem->parent = NULL;
265 }
Radek Krejci02117302015-04-13 16:32:44 +0200266
Michal Vasko88c29542015-11-27 14:57:53 +0100267 if (copy_ns < 2) {
268 lyxml_correct_elem_ns(ctx, elem, copy_ns, 1);
269 }
270
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200271 /* unlink from siblings */
272 if (elem->prev == elem) {
273 /* there are no more siblings */
274 return;
275 }
276 if (elem->next) {
277 elem->next->prev = elem->prev;
278 } else {
279 /* unlinking the last element */
280 if (parent) {
281 first = parent->child;
282 } else {
283 first = elem;
Radek Krejcie4fffcf2016-02-23 16:06:25 +0100284 while (first->prev->next) {
285 first = first->prev;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200286 }
287 }
288 first->prev = elem->prev;
289 }
290 if (elem->prev->next) {
291 elem->prev->next = elem->next;
292 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200293
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200294 /* clean up the unlinked element */
295 elem->next = NULL;
296 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200297}
298
Michal Vasko345da0a2015-12-02 10:35:55 +0100299API void
300lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
301{
302 if (!elem) {
303 return;
304 }
305
306 lyxml_unlink_elem(ctx, elem, 1);
307}
308
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200309void
Radek Krejci00249f22015-07-07 13:43:28 +0200310lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200311{
Radek Krejci00249f22015-07-07 13:43:28 +0200312 struct lyxml_attr *aiter, *aprev;
313
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200314 if (!attr) {
315 return;
316 }
Radek Krejci02117302015-04-13 16:32:44 +0200317
Radek Krejci00249f22015-07-07 13:43:28 +0200318 if (parent) {
319 /* unlink attribute from the parent's list of attributes */
320 aprev = NULL;
321 for (aiter = parent->attr; aiter; aiter = aiter->next) {
322 if (aiter == attr) {
323 break;
324 }
325 aprev = aiter;
326 }
327 if (!aiter) {
328 /* attribute to remove not found */
329 return;
330 }
331
332 if (!aprev) {
333 /* attribute is first in parent's list of attributes */
334 parent->attr = attr->next;
335 } else {
336 /* reconnect previous attribute to the next */
337 aprev->next = attr->next;
338 }
339 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200340 lydict_remove(ctx, attr->name);
341 lydict_remove(ctx, attr->value);
342 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200343}
344
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200345void
346lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200347{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200348 struct lyxml_attr *a, *next;
349 if (!elem || !elem->attr) {
350 return;
351 }
Radek Krejci02117302015-04-13 16:32:44 +0200352
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200353 a = elem->attr;
354 do {
355 next = a->next;
Radek Krejci02117302015-04-13 16:32:44 +0200356
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200357 lydict_remove(ctx, a->name);
358 lydict_remove(ctx, a->value);
359 free(a);
Radek Krejci02117302015-04-13 16:32:44 +0200360
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200361 a = next;
362 } while (a);
Radek Krejci02117302015-04-13 16:32:44 +0200363}
364
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200365static void
Michal Vasko272e42f2015-12-02 12:20:37 +0100366lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200367{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200368 struct lyxml_elem *e, *next;
Radek Krejci02117302015-04-13 16:32:44 +0200369
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200370 if (!elem) {
371 return;
372 }
Radek Krejci02117302015-04-13 16:32:44 +0200373
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200374 lyxml_free_attrs(ctx, elem);
375 LY_TREE_FOR_SAFE(elem->child, next, e) {
Michal Vasko272e42f2015-12-02 12:20:37 +0100376 lyxml_free_elem(ctx, e);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200377 }
378 lydict_remove(ctx, elem->name);
379 lydict_remove(ctx, elem->content);
380 free(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200381}
382
Radek Krejcic6704c82015-10-06 11:12:45 +0200383API void
Michal Vasko345da0a2015-12-02 10:35:55 +0100384lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200385{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200386 if (!elem) {
387 return;
388 }
Radek Krejci02117302015-04-13 16:32:44 +0200389
Michal Vasko61f7ccb2015-10-23 10:15:08 +0200390 lyxml_unlink_elem(ctx, elem, 2);
Michal Vasko272e42f2015-12-02 12:20:37 +0100391 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200392}
393
Radek Krejci8f8db232016-05-23 16:48:21 +0200394API void
395lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
396{
397 struct lyxml_elem *iter, *aux;
398
399 if (!elem) {
400 return;
401 }
402
403 /* optimization - avoid freeing (unlinking) the last node of the siblings list */
404 /* so, first, free the node's predecessors to the beginning of the list ... */
405 for(iter = elem->prev; iter->next; iter = aux) {
406 aux = iter->prev;
407 lyxml_free(ctx, iter);
408 }
409 /* ... then, the node is the first in the siblings list, so free them all */
410 LY_TREE_FOR_SAFE(elem, aux, iter) {
411 lyxml_free(ctx, iter);
412 }
413}
414
Michal Vasko88c29542015-11-27 14:57:53 +0100415API const char *
Michal Vasko1e62a092015-12-01 12:27:20 +0100416lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
Radek Krejcida04f4a2015-05-21 12:54:09 +0200417{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200418 struct lyxml_attr *a;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200419
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200420 assert(elem);
421 assert(name);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200422
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200423 for (a = elem->attr; a; a = a->next) {
424 if (a->type != LYXML_ATTR_STD) {
425 continue;
426 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200427
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200428 if (!strcmp(name, a->name)) {
429 if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
430 return a->value;
431 }
432 }
433 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200434
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200435 return NULL;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200436}
437
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200438int
Michal Vaskof8879c22015-08-21 09:07:36 +0200439lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200440{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200441 struct lyxml_elem *e;
Radek Krejci02117302015-04-13 16:32:44 +0200442
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200443 assert(parent);
444 assert(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200445
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200446 /* (re)link element to parent */
447 if (elem->parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200448 lyxml_unlink_elem(ctx, elem, 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200449 }
450 elem->parent = parent;
Radek Krejci02117302015-04-13 16:32:44 +0200451
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200452 /* link parent to element */
453 if (parent->child) {
454 e = parent->child;
455 elem->prev = e->prev;
456 elem->next = NULL;
457 elem->prev->next = elem;
458 e->prev = elem;
459 } else {
460 parent->child = elem;
461 elem->prev = elem;
462 elem->next = NULL;
463 }
Radek Krejci02117302015-04-13 16:32:44 +0200464
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200465 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200466}
467
Michal Vasko3b855722015-08-28 16:01:18 +0200468int
Radek Krejci48464ed2016-03-17 15:44:09 +0100469lyxml_getutf8(const char *buf, unsigned int *read)
Radek Krejci02117302015-04-13 16:32:44 +0200470{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200471 int c, aux;
472 int i;
Radek Krejci02117302015-04-13 16:32:44 +0200473
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200474 c = buf[0];
475 *read = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200476
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200477 /* buf is NULL terminated string, so 0 means EOF */
478 if (!c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100479 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200480 return 0;
481 }
482 *read = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200483
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200484 /* process character byte(s) */
485 if ((c & 0xf8) == 0xf0) {
486 /* four bytes character */
487 *read = 4;
Radek Krejci02117302015-04-13 16:32:44 +0200488
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200489 c &= 0x07;
490 for (i = 1; i <= 3; i++) {
491 aux = buf[i];
492 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100493 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200494 return 0;
495 }
Radek Krejci02117302015-04-13 16:32:44 +0200496
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200497 c = (c << 6) | (aux & 0x3f);
498 }
Radek Krejci02117302015-04-13 16:32:44 +0200499
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200500 if (c < 0x1000 || c > 0x10ffff) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100501 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200502 return 0;
503 }
504 } else if ((c & 0xf0) == 0xe0) {
505 /* three bytes character */
506 *read = 3;
Radek Krejci02117302015-04-13 16:32:44 +0200507
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200508 c &= 0x0f;
509 for (i = 1; i <= 2; i++) {
510 aux = buf[i];
511 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100512 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200513 return 0;
514 }
Radek Krejci02117302015-04-13 16:32:44 +0200515
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200516 c = (c << 6) | (aux & 0x3f);
517 }
Radek Krejci02117302015-04-13 16:32:44 +0200518
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200519 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100520 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200521 return 0;
522 }
523 } else if ((c & 0xe0) == 0xc0) {
524 /* two bytes character */
525 *read = 2;
Radek Krejci02117302015-04-13 16:32:44 +0200526
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200527 aux = buf[1];
528 if ((aux & 0xc0) != 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100529 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200530 return 0;
531 }
532 c = ((c & 0x1f) << 6) | (aux & 0x3f);
Radek Krejci02117302015-04-13 16:32:44 +0200533
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200534 if (c < 0x80) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100535 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200536 return 0;
537 }
538 } else if (!(c & 0x80)) {
539 /* one byte character */
540 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
541 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100542 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200543 return 0;
544 }
545 } else {
546 /* invalid character */
Radek Krejci48464ed2016-03-17 15:44:09 +0100547 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200548 return 0;
549 }
Radek Krejci02117302015-04-13 16:32:44 +0200550
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200551 return c;
Radek Krejci02117302015-04-13 16:32:44 +0200552}
553
Michal Vasko0d343d12015-08-24 14:57:36 +0200554/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200555static int
556parse_ignore(const char *data, const char *endstr, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200557{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200558 unsigned int slen;
559 const char *c = data;
Radek Krejci02117302015-04-13 16:32:44 +0200560
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200561 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200562
Radek Krejcifb783942016-10-06 09:49:33 +0200563 while (*c && strncmp(c, endstr, slen)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200564 c++;
565 }
566 if (!*c) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100567 LOGVAL(LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200568 return EXIT_FAILURE;
569 }
570 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200571
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200572 *len = c - data;
573 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200574}
575
Michal Vasko0d343d12015-08-24 14:57:36 +0200576/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200577static char *
578parse_text(const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200579{
Radek Krejci709fee62015-04-15 13:56:19 +0200580#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200581
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200582 char buf[BUFSIZE];
583 char *result = NULL, *aux;
584 unsigned int r;
585 int o, size = 0;
586 int cdsect = 0;
587 int32_t n;
Radek Krejci709fee62015-04-15 13:56:19 +0200588
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200589 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
Radek Krejcifb783942016-10-06 09:49:33 +0200590 if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100591 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200592 goto error;
593 }
Radek Krejci709fee62015-04-15 13:56:19 +0200594
Radek Krejcia4a84062015-04-16 13:00:10 +0200595loop:
596
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200597 if (o > BUFSIZE - 3) {
598 /* add buffer into the result */
599 if (result) {
600 size = size + o;
Michal Vasko253035f2015-12-17 16:58:13 +0100601 aux = ly_realloc(result, size + 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200602 result = aux;
603 } else {
604 size = o;
605 result = malloc((size + 1) * sizeof *result);
606 }
Michal Vasko253035f2015-12-17 16:58:13 +0100607 if (!result) {
608 LOGMEM;
609 return NULL;
610 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200611 memcpy(&result[size - o], buf, o);
Radek Krejci709fee62015-04-15 13:56:19 +0200612
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200613 /* write again into the beginning of the buffer */
614 o = 0;
615 }
Radek Krejci709fee62015-04-15 13:56:19 +0200616
Radek Krejcifb783942016-10-06 09:49:33 +0200617 if (cdsect || !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200618 /* CDSect */
619 if (!cdsect) {
620 cdsect = 1;
621 *len += 9;
622 }
Radek Krejcifb783942016-10-06 09:49:33 +0200623 if (data[*len] && !strncmp(&data[*len], "]]>", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200624 *len += 3;
625 cdsect = 0;
626 o--; /* we don't write any data in this iteration */
627 } else {
628 buf[o] = data[*len];
629 (*len)++;
630 }
631 } else if (data[*len] == '&') {
632 (*len)++;
633 if (data[*len] != '#') {
634 /* entity reference - only predefined refs are supported */
Radek Krejcifb783942016-10-06 09:49:33 +0200635 if (!strncmp(&data[*len], "lt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200636 buf[o] = '<';
637 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200638 } else if (!strncmp(&data[*len], "gt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200639 buf[o] = '>';
640 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200641 } else if (!strncmp(&data[*len], "amp;", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200642 buf[o] = '&';
643 *len += 4;
Radek Krejcifb783942016-10-06 09:49:33 +0200644 } else if (!strncmp(&data[*len], "apos;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200645 buf[o] = '\'';
646 *len += 5;
Radek Krejcifb783942016-10-06 09:49:33 +0200647 } else if (!strncmp(&data[*len], "quot;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200648 buf[o] = '\"';
649 *len += 5;
650 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100651 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200652 goto error;
653 }
654 } else {
655 /* character reference */
656 (*len)++;
657 if (isdigit(data[*len])) {
658 for (n = 0; isdigit(data[*len]); (*len)++) {
659 n = (10 * n) + (data[*len] - '0');
660 }
661 if (data[*len] != ';') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100662 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200663 goto error;
664 }
665 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
666 for (n = 0; isxdigit(data[*len]); (*len)++) {
667 if (isdigit(data[*len])) {
668 r = (data[*len] - '0');
669 } else if (data[*len] > 'F') {
670 r = 10 + (data[*len] - 'a');
671 } else {
672 r = 10 + (data[*len] - 'A');
673 }
674 n = (16 * n) + r;
675 }
676 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +0100677 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200678 goto error;
Radek Krejci709fee62015-04-15 13:56:19 +0200679
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200680 }
Radek Krejci48464ed2016-03-17 15:44:09 +0100681 r = pututf8(&buf[o], n);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200682 if (!r) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100683 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200684 goto error;
685 }
686 o += r - 1; /* o is ++ in for loop */
687 (*len)++;
688 }
689 } else {
Radek Krejcideee60e2016-09-23 15:21:14 +0200690 r = copyutf8(&buf[o], &data[*len]);
691 if (!r) {
692 goto error;
693 }
694
695 o += r - 1; /* o is ++ in for loop */
696 (*len) = (*len) + r;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200697 }
698 }
Radek Krejci02117302015-04-13 16:32:44 +0200699
Radek Krejcifb783942016-10-06 09:49:33 +0200700 if (delim == '<' && !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200701 /* ignore loop's end condition on beginning of CDSect */
702 goto loop;
703 }
Radek Krejci709fee62015-04-15 13:56:19 +0200704#undef BUFSIZE
705
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200706 if (o) {
707 if (result) {
708 size = size + o;
709 aux = realloc(result, size + 1);
710 result = aux;
711 } else {
712 size = o;
713 result = malloc((size + 1) * sizeof *result);
714 }
Michal Vasko253035f2015-12-17 16:58:13 +0100715 if (!result) {
716 LOGMEM;
717 return NULL;
718 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200719 memcpy(&result[size - o], buf, o);
720 }
721 if (result) {
722 result[size] = '\0';
Radek Krejcia5269642015-07-20 19:04:11 +0200723 } else {
724 size = 0;
725 result = strdup("");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200726 }
Radek Krejci02117302015-04-13 16:32:44 +0200727
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200728 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200729
730error:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200731 free(result);
732 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200733}
734
Michal Vasko0d343d12015-08-24 14:57:36 +0200735/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200736static struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200737parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci674e1f82015-04-21 14:12:19 +0200738{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200739 const char *c = data, *start, *delim;
740 char prefix[32];
741 int uc;
Radek Krejci00249f22015-07-07 13:43:28 +0200742 struct lyxml_attr *attr = NULL, *a;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200743 unsigned int size;
Radek Krejci02117302015-04-13 16:32:44 +0200744
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200745 /* check if it is attribute or namespace */
Radek Krejcifb783942016-10-06 09:49:33 +0200746 if (!strncmp(c, "xmlns", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200747 /* namespace */
748 attr = calloc(1, sizeof (struct lyxml_ns));
Michal Vasko253035f2015-12-17 16:58:13 +0100749 if (!attr) {
750 LOGMEM;
751 return NULL;
752 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200753 attr->type = LYXML_ATTR_NS;
Radek Krejci00249f22015-07-07 13:43:28 +0200754 ((struct lyxml_ns *)attr)->parent = parent;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200755 c += 5;
756 if (*c != ':') {
757 /* default namespace, prefix will be empty */
758 goto equal;
759 }
760 c++; /* go after ':' to the prefix value */
761 } else {
762 /* attribute */
763 attr = calloc(1, sizeof *attr);
Michal Vasko253035f2015-12-17 16:58:13 +0100764 if (!attr) {
765 LOGMEM;
766 return NULL;
767 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200768 attr->type = LYXML_ATTR_STD;
769 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200770
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200771 /* process name part of the attribute */
772 start = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100773 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200774 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100775 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200776 free(attr);
777 return NULL;
778 }
779 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100780 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200781 while (is_xmlnamechar(uc)) {
782 if (attr->type == LYXML_ATTR_STD && *c == ':') {
783 /* attribute in a namespace */
784 start = c + 1;
Radek Krejci4ea08382015-04-21 09:41:40 +0200785
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200786 /* look for the prefix in namespaces */
787 memcpy(prefix, data, c - data);
788 prefix[c - data] = '\0';
Radek Krejci4476d412015-07-10 15:35:01 +0200789 attr->ns = lyxml_get_ns(parent, prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200790 }
791 c += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100792 uc = lyxml_getutf8(c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200793 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200794
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200795 /* store the name */
796 size = c - start;
797 attr->name = lydict_insert(ctx, start, size);
Radek Krejci674e1f82015-04-21 14:12:19 +0200798
799equal:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200800 /* check Eq mark that can be surrounded by whitespaces */
801 ign_xmlws(c);
802 if (*c != '=') {
Radek Krejci48464ed2016-03-17 15:44:09 +0100803 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200804 goto error;
805 }
806 c++;
807 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200808
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200809 /* process value part of the attribute */
810 if (!*c || (*c != '"' && *c != '\'')) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100811 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200812 goto error;
813 }
814 delim = c;
815 attr->value = lydict_insert_zc(ctx, parse_text(++c, *delim, &size));
816 if (ly_errno) {
817 goto error;
818 }
Radek Krejci02117302015-04-13 16:32:44 +0200819
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200820 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci00249f22015-07-07 13:43:28 +0200821
822 /* put attribute into the parent's attributes list */
823 if (parent->attr) {
824 /* go to the end of the list */
825 for (a = parent->attr; a->next; a = a->next);
826 /* and append new attribute */
827 a->next = attr;
828 } else {
829 /* add the first attribute in the list */
830 parent->attr = attr;
831 }
832
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200833 return attr;
Radek Krejci02117302015-04-13 16:32:44 +0200834
835error:
Radek Krejci00249f22015-07-07 13:43:28 +0200836 lyxml_free_attr(ctx, NULL, attr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200837 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +0200838}
839
Michal Vasko0d343d12015-08-24 14:57:36 +0200840/* logs directly */
Radek Krejci9a5daea2016-03-02 16:49:40 +0100841struct lyxml_elem *
842lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200843{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200844 const char *c = data, *start, *e;
845 const char *lws; /* leading white space for handling mixed content */
846 int uc;
847 char *str;
848 char prefix[32] = { 0 };
849 unsigned int prefix_len = 0;
850 struct lyxml_elem *elem = NULL, *child;
851 struct lyxml_attr *attr;
852 unsigned int size;
853 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200854
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200855 *len = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200856
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200857 if (*c != '<') {
858 return NULL;
859 }
Radek Krejci02117302015-04-13 16:32:44 +0200860
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200861 /* locate element name */
862 c++;
863 e = c;
Radek Krejci02117302015-04-13 16:32:44 +0200864
Radek Krejci48464ed2016-03-17 15:44:09 +0100865 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200866 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100867 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200868 return NULL;
869 }
870 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100871 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200872 while (is_xmlnamechar(uc)) {
873 if (*e == ':') {
874 if (prefix_len) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100875 LOGVAL(LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200876 goto error;
877 }
878 /* element in a namespace */
879 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200880
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200881 /* look for the prefix in namespaces */
882 memcpy(prefix, c, prefix_len = e - c);
883 prefix[prefix_len] = '\0';
884 c = start;
885 }
886 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100887 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200888 }
889 if (!*e) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100890 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200891 return NULL;
892 }
Radek Krejci02117302015-04-13 16:32:44 +0200893
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200894 /* allocate element structure */
895 elem = calloc(1, sizeof *elem);
Michal Vasko253035f2015-12-17 16:58:13 +0100896 if (!elem) {
897 LOGMEM;
898 return NULL;
899 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200900 elem->next = NULL;
901 elem->prev = elem;
902 if (parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200903 lyxml_add_child(ctx, parent, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200904 }
Radek Krejci02117302015-04-13 16:32:44 +0200905
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200906 /* store the name into the element structure */
907 elem->name = lydict_insert(ctx, c, e - c);
908 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200909
910process:
Radek Krejci00a0e712016-10-26 10:24:46 +0200911 ly_err_clean(1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200912 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +0200913 if (!strncmp("/>", c, 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200914 /* we are done, it was EmptyElemTag */
915 c += 2;
Michal Vasko44913842016-04-13 14:20:41 +0200916 elem->content = lydict_insert(ctx, "", 0);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200917 closed_flag = 1;
918 } else if (*c == '>') {
919 /* process element content */
920 c++;
921 lws = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200922
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200923 while (*c) {
Radek Krejcifb783942016-10-06 09:49:33 +0200924 if (!strncmp(c, "</", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200925 if (lws && !elem->child) {
926 /* leading white spaces were actually content */
927 goto store_content;
928 }
Radek Krejci02117302015-04-13 16:32:44 +0200929
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200930 /* Etag */
931 c += 2;
932 /* get name and check it */
933 e = c;
Radek Krejci48464ed2016-03-17 15:44:09 +0100934 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200935 if (!is_xmlnamestartchar(uc)) {
Radek Krejci48464ed2016-03-17 15:44:09 +0100936 LOGVAL(LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200937 goto error;
938 }
939 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100940 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200941 while (is_xmlnamechar(uc)) {
942 if (*e == ':') {
943 /* element in a namespace */
944 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200945
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200946 /* look for the prefix in namespaces */
947 if (memcmp(prefix, c, e - c)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200948 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
949 "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200950 goto error;
951 }
952 c = start;
953 }
954 e += size;
Radek Krejci48464ed2016-03-17 15:44:09 +0100955 uc = lyxml_getutf8(e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200956 }
957 if (!*e) {
Radek Krejci3cc10962016-04-13 15:03:27 +0200958 LOGVAL(LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200959 goto error;
960 }
Radek Krejci02117302015-04-13 16:32:44 +0200961
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200962 /* check that it corresponds to opening tag */
963 size = e - c;
964 str = malloc((size + 1) * sizeof *str);
Michal Vasko253035f2015-12-17 16:58:13 +0100965 if (!str) {
966 LOGMEM;
967 goto error;
968 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200969 memcpy(str, c, e - c);
970 str[e - c] = '\0';
971 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200972 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem,
973 "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200974 free(str);
975 goto error;
976 }
977 free(str);
978 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200979
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200980 ign_xmlws(c);
981 if (*c != '>') {
Michal Vaskoff9336a2016-05-10 10:48:48 +0200982 LOGVAL(LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200983 goto error;
984 }
985 c++;
Michal Vaskoe00b7892016-04-14 10:12:18 +0200986 if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
987 /* there was no content, but we don't want NULL (only if mixed content) */
988 elem->content = lydict_insert(ctx, "", 0);
989 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200990 closed_flag = 1;
991 break;
Radek Krejci02117302015-04-13 16:32:44 +0200992
Radek Krejcifb783942016-10-06 09:49:33 +0200993 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200994 if (lws) {
995 /* leading white spaces were only formatting */
996 lws = NULL;
997 }
998 /* PI - ignore it */
999 c += 2;
1000 if (parse_ignore(c, "?>", &size)) {
1001 goto error;
1002 }
1003 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001004 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001005 if (lws) {
1006 /* leading white spaces were only formatting */
1007 lws = NULL;
1008 }
1009 /* Comment - ignore it */
1010 c += 4;
1011 if (parse_ignore(c, "-->", &size)) {
1012 goto error;
1013 }
1014 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001015 } else if (!strncmp(c, "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001016 /* CDSect */
1017 goto store_content;
1018 } else if (*c == '<') {
1019 if (lws) {
1020 if (elem->flags & LYXML_ELEM_MIXED) {
1021 /* we have a mixed content */
1022 goto store_content;
1023 } else {
1024 /* leading white spaces were only formatting */
1025 lws = NULL;
1026 }
1027 }
1028 if (elem->content) {
1029 /* we have a mixed content */
1030 child = calloc(1, sizeof *child);
Michal Vasko253035f2015-12-17 16:58:13 +01001031 if (!child) {
1032 LOGMEM;
1033 goto error;
1034 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001035 child->content = elem->content;
1036 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001037 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001038 elem->flags |= LYXML_ELEM_MIXED;
1039 }
Radek Krejci9a5daea2016-03-02 16:49:40 +01001040 child = lyxml_parse_elem(ctx, c, &size, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001041 if (!child) {
1042 goto error;
1043 }
1044 c += size; /* move after processed child element */
1045 } else if (is_xmlws(*c)) {
1046 lws = c;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001047 ign_xmlws(c);
1048 } else {
Radek Krejci02117302015-04-13 16:32:44 +02001049store_content:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001050 /* store text content */
1051 if (lws) {
1052 /* process content including the leading white spaces */
1053 c = lws;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001054 lws = NULL;
1055 }
1056 elem->content = lydict_insert_zc(ctx, parse_text(c, '<', &size));
1057 if (ly_errno) {
1058 goto error;
1059 }
1060 c += size; /* move after processed text content */
Radek Krejci02117302015-04-13 16:32:44 +02001061
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001062 if (elem->child) {
1063 /* we have a mixed content */
1064 child = calloc(1, sizeof *child);
Michal Vasko253035f2015-12-17 16:58:13 +01001065 if (!child) {
1066 LOGMEM;
1067 goto error;
1068 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001069 child->content = elem->content;
1070 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001071 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001072 elem->flags |= LYXML_ELEM_MIXED;
1073 }
1074 }
1075 }
1076 } else {
1077 /* process attribute */
1078 attr = parse_attr(ctx, c, &size, elem);
1079 if (!attr) {
1080 goto error;
1081 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001082 c += size; /* move after processed attribute */
Radek Krejci02117302015-04-13 16:32:44 +02001083
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001084 /* check namespace */
1085 if (attr->type == LYXML_ATTR_NS) {
1086 if (!prefix[0] && !attr->name) {
1087 if (attr->value) {
1088 /* default prefix */
1089 elem->ns = (struct lyxml_ns *)attr;
1090 } else {
1091 /* xmlns="" -> no namespace */
1092 nons_flag = 1;
1093 }
Radek Krejcifb783942016-10-06 09:49:33 +02001094 } else if (prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001095 /* matching namespace with prefix */
1096 elem->ns = (struct lyxml_ns *)attr;
1097 }
1098 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001099
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001100 /* go back to finish element processing */
1101 goto process;
1102 }
Radek Krejci02117302015-04-13 16:32:44 +02001103
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001104 *len = c - data;
Radek Krejci02117302015-04-13 16:32:44 +02001105
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001106 if (!closed_flag) {
Radek Krejci48464ed2016-03-17 15:44:09 +01001107 LOGVAL(LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001108 goto error;
1109 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001110
Radek Krejci78a230a2015-07-07 17:04:40 +02001111 if (!elem->ns && !nons_flag && parent) {
Radek Krejci4476d412015-07-10 15:35:01 +02001112 elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001113 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001114
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001115 return elem;
Radek Krejci02117302015-04-13 16:32:44 +02001116
1117error:
Michal Vasko345da0a2015-12-02 10:35:55 +01001118 lyxml_free(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +02001119
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001120 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001121}
1122
Michal Vasko0d343d12015-08-24 14:57:36 +02001123/* logs directly */
Radek Krejcic6704c82015-10-06 11:12:45 +02001124API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001125lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001126{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001127 const char *c = data;
1128 unsigned int len;
Radek Krejci851ea662016-01-08 09:30:53 +01001129 struct lyxml_elem *root, *first = NULL, *next;
Radek Krejci02117302015-04-13 16:32:44 +02001130
Radek Krejci00a0e712016-10-26 10:24:46 +02001131 ly_err_clean(1);
Radek Krejci2342cf62016-01-29 16:48:23 +01001132
Radek Krejci120f6242015-12-17 12:32:56 +01001133repeat:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001134 /* process document */
Radek Krejcif8ae23e2016-07-26 17:11:17 +02001135 while (1) {
1136 if (!*c) {
1137 /* eof */
1138 return first;
1139 } else if (is_xmlws(*c)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001140 /* skip whitespaces */
1141 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +02001142 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001143 /* XMLDecl or PI - ignore it */
1144 c += 2;
1145 if (parse_ignore(c, "?>", &len)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001146 return NULL;
1147 }
1148 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001149 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001150 /* Comment - ignore it */
1151 c += 2;
1152 if (parse_ignore(c, "-->", &len)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001153 return NULL;
1154 }
1155 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001156 } else if (!strncmp(c, "<!", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001157 /* DOCTYPE */
1158 /* TODO - standalone ignore counting < and > */
1159 LOGERR(LY_EINVAL, "DOCTYPE not supported in XML documents.");
1160 return NULL;
1161 } else if (*c == '<') {
1162 /* element - process it in next loop to strictly follow XML
1163 * format
1164 */
1165 break;
Michal Vaskoc2e80562015-07-27 11:31:41 +02001166 } else {
Radek Krejci48464ed2016-03-17 15:44:09 +01001167 LOGVAL(LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
Michal Vaskoc2e80562015-07-27 11:31:41 +02001168 return NULL;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001169 }
1170 }
Radek Krejci02117302015-04-13 16:32:44 +02001171
Radek Krejci9a5daea2016-03-02 16:49:40 +01001172 root = lyxml_parse_elem(ctx, c, &len, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001173 if (!root) {
Michal Vaskobc58b4a2016-01-07 14:42:31 +01001174 if (first) {
Radek Krejci851ea662016-01-08 09:30:53 +01001175 LY_TREE_FOR_SAFE(first, next, root) {
Michal Vaskobc58b4a2016-01-07 14:42:31 +01001176 lyxml_free(ctx, root);
1177 }
Radek Krejci120f6242015-12-17 12:32:56 +01001178 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001179 return NULL;
Radek Krejci120f6242015-12-17 12:32:56 +01001180 } else if (!first) {
1181 first = root;
1182 } else {
1183 first->prev->next = root;
1184 root->prev = first->prev;
1185 first->prev = root;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001186 }
1187 c += len;
Radek Krejci02117302015-04-13 16:32:44 +02001188
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001189 /* ignore the rest of document where can be comments, PIs and whitespaces,
1190 * note that we are not detecting syntax errors in these parts
1191 */
1192 ign_xmlws(c);
1193 if (*c) {
Radek Krejci722b0072016-02-01 17:09:45 +01001194 if (options & LYXML_PARSE_MULTIROOT) {
Radek Krejci120f6242015-12-17 12:32:56 +01001195 goto repeat;
1196 } else {
1197 LOGWRN("There are some not parsed data:\n%s", c);
1198 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001199 }
Radek Krejci02117302015-04-13 16:32:44 +02001200
Radek Krejci120f6242015-12-17 12:32:56 +01001201 return first;
Radek Krejci02117302015-04-13 16:32:44 +02001202}
1203
Radek Krejcic6704c82015-10-06 11:12:45 +02001204API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001205lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001206{
Radek Krejci6b3d9262015-12-03 13:45:27 +01001207 struct lyxml_elem *elem = NULL;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001208 struct stat sb;
1209 int fd;
1210 char *addr;
1211
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001212 if (!filename || !ctx) {
1213 LOGERR(LY_EINVAL, "%s: Invalid parameter.", __func__);
1214 return NULL;
1215 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001216
Pavol Vicanb2570c12015-11-12 13:50:20 +01001217 fd = open(filename, O_RDONLY);
1218 if (fd == -1) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001219 LOGERR(LY_EINVAL,"Opening file \"%s\" failed.", filename);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001220 return NULL;
1221 }
1222 if (fstat(fd, &sb) == -1) {
1223 LOGERR(LY_EINVAL, "Unable to get file \"%s\" information.\n", filename);
1224 goto error;
1225 }
1226 if (!S_ISREG(sb.st_mode)) {
Radek Krejcib051f722016-02-25 15:12:21 +01001227 LOGERR(LY_EINVAL, "%s: Invalid parameter, input file is not a regular file", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001228 goto error;
1229 }
Pavol Vicanf7cc2852016-03-22 23:27:35 +01001230 addr = mmap(NULL, sb.st_size + 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001231 if (addr == MAP_FAILED) {
Radek Krejci6b3d9262015-12-03 13:45:27 +01001232 LOGERR(LY_EMEM,"Map file into memory failed (%s()).", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001233 goto error;
1234 }
Radek Krejci6b3d9262015-12-03 13:45:27 +01001235
Radek Krejci722b0072016-02-01 17:09:45 +01001236 elem = lyxml_parse_mem(ctx, addr, options);
Pavol Vicanf7cc2852016-03-22 23:27:35 +01001237 munmap(addr, sb.st_size +2);
Radek Krejci30793ab2015-12-03 13:45:45 +01001238 close(fd);
Radek Krejci6b3d9262015-12-03 13:45:27 +01001239
Pavol Vicanb2570c12015-11-12 13:50:20 +01001240 return elem;
1241
1242error:
Radek Krejci6b3d9262015-12-03 13:45:27 +01001243 if (fd != -1) {
1244 close(fd);
1245 }
1246
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001247 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001248}
Radek Krejci02117302015-04-13 16:32:44 +02001249
Michal Vasko5db027d2015-10-09 14:38:50 +02001250int
1251lyxml_dump_text(struct lyout *out, const char *text)
Radek Krejcif0023a92015-04-20 20:51:39 +02001252{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001253 unsigned int i, n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001254
Michal Vasko5db027d2015-10-09 14:38:50 +02001255 if (!text) {
1256 return 0;
1257 }
1258
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001259 for (i = n = 0; text[i]; i++) {
1260 switch (text[i]) {
1261 case '&':
Radek Krejci5248f132015-10-09 10:34:25 +02001262 n += ly_print(out, "&amp;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001263 break;
1264 case '<':
Radek Krejci5248f132015-10-09 10:34:25 +02001265 n += ly_print(out, "&lt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001266 break;
1267 case '>':
1268 /* not needed, just for readability */
Radek Krejci5248f132015-10-09 10:34:25 +02001269 n += ly_print(out, "&gt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001270 break;
Radek Krejci952a7252016-07-16 20:52:43 +02001271 case '"':
1272 n += ly_print(out, "&quot;");
1273 break;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001274 default:
Radek Krejci5248f132015-10-09 10:34:25 +02001275 ly_write(out, &text[i], 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001276 n++;
1277 }
1278 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001279
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001280 return n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001281}
1282
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001283static int
Michal Vasko1e62a092015-12-01 12:27:20 +01001284dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001285{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001286 int size = 0;
1287 struct lyxml_attr *a;
1288 struct lyxml_elem *child;
1289 const char *delim, *delim_outer;
1290 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001291
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001292 if (!e->name) {
1293 /* mixed content */
1294 if (e->content) {
Michal Vasko5db027d2015-10-09 14:38:50 +02001295 return lyxml_dump_text(out, e->content);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001296 } else {
1297 return 0;
1298 }
1299 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001300
Radek Krejci722b0072016-02-01 17:09:45 +01001301 delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001302 indent = 2 * level;
1303 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1304 delim = "";
1305 }
1306 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1307 delim_outer = "";
1308 indent = 0;
1309 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001310
Radek Krejci722b0072016-02-01 17:09:45 +01001311 if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN)) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001312 /* opening tag */
1313 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001314 size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001315 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001316 size += ly_print(out, "%*s<%s", indent, "", e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001317 }
Radek Krejci722b0072016-02-01 17:09:45 +01001318 } else if (options & LYXML_PRINT_CLOSE) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001319 indent = 0;
1320 goto close;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001321 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001322
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001323 /* attributes */
1324 for (a = e->attr; a; a = a->next) {
1325 if (a->type == LYXML_ATTR_NS) {
1326 if (a->name) {
Radek Krejci5248f132015-10-09 10:34:25 +02001327 size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001328 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001329 size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001330 }
1331 } else if (a->ns && a->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001332 size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001333 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001334 size += ly_print(out, " %s=\"%s\"", a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001335 }
1336 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001337
Radek Krejcic6704c82015-10-06 11:12:45 +02001338 /* apply options */
Radek Krejci722b0072016-02-01 17:09:45 +01001339 if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
Radek Krejci5248f132015-10-09 10:34:25 +02001340 size += ly_print(out, "/>%s", delim);
Radek Krejcic6704c82015-10-06 11:12:45 +02001341 return size;
Radek Krejci722b0072016-02-01 17:09:45 +01001342 } else if (options & LYXML_PRINT_OPEN) {
Radek Krejci5248f132015-10-09 10:34:25 +02001343 ly_print(out, ">");
Radek Krejcic6704c82015-10-06 11:12:45 +02001344 return ++size;
Radek Krejci722b0072016-02-01 17:09:45 +01001345 } else if (options & LYXML_PRINT_ATTRS) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001346 return size;
1347 }
1348
Michal Vasko3a611612016-04-14 10:12:56 +02001349 if (!e->child && (!e->content || !e->content[0])) {
Radek Krejci5248f132015-10-09 10:34:25 +02001350 size += ly_print(out, "/>%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001351 return size;
Michal Vasko3a611612016-04-14 10:12:56 +02001352 } else if (e->content && e->content[0]) {
Radek Krejci5248f132015-10-09 10:34:25 +02001353 ly_print(out, ">");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001354 size++;
Radek Krejcif0023a92015-04-20 20:51:39 +02001355
Michal Vasko5db027d2015-10-09 14:38:50 +02001356 size += lyxml_dump_text(out, e->content);
Radek Krejcif0023a92015-04-20 20:51:39 +02001357
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001358 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001359 size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001360 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001361 size += ly_print(out, "</%s>%s", e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001362 }
1363 return size;
1364 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001365 size += ly_print(out, ">%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001366 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001367
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001368 /* go recursively */
1369 LY_TREE_FOR(e->child, child) {
Radek Krejci722b0072016-02-01 17:09:45 +01001370 if (options & LYXML_PRINT_FORMAT) {
1371 size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001372 } else {
1373 size += dump_elem(out, child, level, 0);
1374 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001375 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001376
Radek Krejcic6704c82015-10-06 11:12:45 +02001377close:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001378 /* closing tag */
1379 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001380 size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001381 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001382 size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001383 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001384
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001385 return size;
Radek Krejcif0023a92015-04-20 20:51:39 +02001386}
1387
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001388static int
1389dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
1390{
1391 const struct lyxml_elem *start, *iter;
1392 int ret = 0;
1393
1394 if (e->parent) {
1395 start = e->parent->child;
1396 } else {
1397 start = e;
1398 while(start->prev && start->prev->next) {
1399 start = start->prev;
1400 }
1401 }
1402
1403 LY_TREE_FOR(start, iter) {
1404 ret += dump_elem(out, iter, 0, options);
1405 }
1406
1407 return ret;
1408}
1409
Radek Krejcic6704c82015-10-06 11:12:45 +02001410API int
Radek Krejci722b0072016-02-01 17:09:45 +01001411lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001412{
Radek Krejci5248f132015-10-09 10:34:25 +02001413 struct lyout out;
1414
1415 if (!stream || !elem) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001416 return 0;
1417 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001418
Radek Krejci5248f132015-10-09 10:34:25 +02001419 out.type = LYOUT_STREAM;
1420 out.method.f = stream;
1421
Radek Krejci722b0072016-02-01 17:09:45 +01001422 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001423 return dump_siblings(&out, elem, options);
1424 } else {
1425 return dump_elem(&out, elem, 0, options);
1426 }
Radek Krejci5248f132015-10-09 10:34:25 +02001427}
1428
1429API int
Radek Krejci722b0072016-02-01 17:09:45 +01001430lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
Radek Krejci5248f132015-10-09 10:34:25 +02001431{
1432 struct lyout out;
1433
1434 if (fd < 0 || !elem) {
1435 return 0;
1436 }
1437
1438 out.type = LYOUT_FD;
1439 out.method.fd = fd;
1440
Radek Krejci722b0072016-02-01 17:09:45 +01001441 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001442 return dump_siblings(&out, elem, options);
1443 } else {
1444 return dump_elem(&out, elem, 0, options);
1445 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001446}
Radek Krejci6140e4e2015-10-09 15:50:55 +02001447
1448API int
Radek Krejci722b0072016-02-01 17:09:45 +01001449lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001450{
1451 struct lyout out;
1452 int r;
1453
1454 if (!strp || !elem) {
1455 return 0;
1456 }
1457
1458 out.type = LYOUT_MEMORY;
1459 out.method.mem.buf = NULL;
1460 out.method.mem.len = 0;
1461 out.method.mem.size = 0;
1462
Radek Krejci722b0072016-02-01 17:09:45 +01001463 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001464 r = dump_siblings(&out, elem, options);
1465 } else {
1466 r = dump_elem(&out, elem, 0, options);
1467 }
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001468
1469 *strp = out.method.mem.buf;
1470 return r;
1471}
1472
1473API int
Radek Krejci722b0072016-02-01 17:09:45 +01001474lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
Radek Krejci6140e4e2015-10-09 15:50:55 +02001475{
1476 struct lyout out;
1477
1478 if (!writeclb || !elem) {
1479 return 0;
1480 }
1481
1482 out.type = LYOUT_CALLBACK;
Radek Krejci50929eb2015-10-09 18:14:15 +02001483 out.method.clb.f = writeclb;
1484 out.method.clb.arg = arg;
Radek Krejci6140e4e2015-10-09 15:50:55 +02001485
Radek Krejci722b0072016-02-01 17:09:45 +01001486 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001487 return dump_siblings(&out, elem, options);
1488 } else {
1489 return dump_elem(&out, elem, 0, options);
1490 }
Radek Krejci6140e4e2015-10-09 15:50:55 +02001491}