blob: 1c9408fae0558e14d70e1441a3e3cd40e5632d03 [file] [log] [blame]
Radek Krejci54ea8de2015-04-09 18:02:56 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejci54ea8de2015-04-09 18:02:56 +020013 */
14
Radek Krejci812b10a2015-05-28 16:48:25 +020015#include <assert.h>
Radek Krejci563427e2016-02-08 16:26:34 +010016#include <errno.h>
Radek Krejci709fee62015-04-15 13:56:19 +020017#include <ctype.h>
18#include <stdint.h>
Radek Krejcif0023a92015-04-20 20:51:39 +020019#include <stdio.h>
Radek Krejci02117302015-04-13 16:32:44 +020020#include <stdlib.h>
21#include <string.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020022#include <unistd.h>
Radek Krejci563427e2016-02-08 16:26:34 +010023#include <pthread.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010024#include <sys/stat.h>
25#include <sys/mman.h>
Radek Krejci563427e2016-02-08 16:26:34 +010026#include <sys/syscall.h>
Pavol Vicanb2570c12015-11-12 13:50:20 +010027#include <fcntl.h>
Radek Krejci54ea8de2015-04-09 18:02:56 +020028
Radek Krejci06a704e2015-04-22 14:50:49 +020029#include "common.h"
Michal Vasko6c810702018-03-14 16:23:21 +010030#include "hash_table.h"
Radek Krejci5248f132015-10-09 10:34:25 +020031#include "printer.h"
Radek Krejci5449d472015-10-26 14:35:56 +010032#include "parser.h"
Michal Vasko2d162e12015-09-24 14:33:29 +020033#include "tree_schema.h"
Michal Vaskofc5744d2015-10-22 12:09:34 +020034#include "xml_internal.h"
Radek Krejci54ea8de2015-04-09 18:02:56 +020035
Radek Krejci3045cf32015-05-28 10:58:52 +020036#define ign_xmlws(p) \
Radek Krejci563427e2016-02-08 16:26:34 +010037 while (is_xmlws(*p)) { \
Radek Krejci563427e2016-02-08 16:26:34 +010038 p++; \
39 }
Radek Krejci02117302015-04-13 16:32:44 +020040
Michal Vasko88c29542015-11-27 14:57:53 +010041static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);
42
Michal Vasko1e62a092015-12-01 12:27:20 +010043API const struct lyxml_ns *
44lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
Michal Vaskof8879c22015-08-21 09:07:36 +020045{
46 struct lyxml_attr *attr;
Michal Vaskof8879c22015-08-21 09:07:36 +020047
48 if (!elem) {
49 return NULL;
50 }
51
Michal Vaskof8879c22015-08-21 09:07:36 +020052 for (attr = elem->attr; attr; attr = attr->next) {
53 if (attr->type != LYXML_ATTR_NS) {
54 continue;
55 }
56 if (!attr->name) {
Radek Krejci13f3f152016-10-03 11:40:13 +020057 if (!prefix) {
Michal Vaskof8879c22015-08-21 09:07:36 +020058 /* default namespace found */
59 if (!attr->value) {
60 /* empty default namespace -> no default namespace */
61 return NULL;
62 }
63 return (struct lyxml_ns *)attr;
64 }
Radek Krejci7d39dae2016-10-03 17:33:01 +020065 } else if (prefix && !strcmp(attr->name, prefix)) {
Michal Vaskof8879c22015-08-21 09:07:36 +020066 /* prefix found */
67 return (struct lyxml_ns *)attr;
68 }
69 }
70
71 /* go recursively */
72 return lyxml_get_ns(elem->parent, prefix);
73}
74
Michal Vasko88c29542015-11-27 14:57:53 +010075static void
76lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
77{
78 const struct lyxml_ns *tmp_ns;
Michal Vaskof6109112015-12-03 14:00:42 +010079 struct lyxml_elem *ns_root, *attr_root;
Michal Vasko88c29542015-11-27 14:57:53 +010080
81 if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010082 /* find the root of attr */
83 for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +010084
85 /* find the root of attr NS */
86 for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);
87
Michal Vaskof6109112015-12-03 14:00:42 +010088 /* attr NS is defined outside attr parent subtree */
89 if (ns_root != attr_root) {
Michal Vasko88c29542015-11-27 14:57:53 +010090 if (copy_ns) {
91 tmp_ns = attr->ns;
92 /* we may have already copied the NS over? */
Radek Krejci66aca402016-05-24 15:23:02 +020093 attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +010094
95 /* we haven't copied it over, copy it now */
96 if (!attr->ns) {
Michal Vaskof6109112015-12-03 14:00:42 +010097 attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +010098 }
99 } else {
100 attr->ns = NULL;
101 }
102 }
103 }
104}
105
106static struct lyxml_attr *
Michal Vaskof8879c22015-08-21 09:07:36 +0200107lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
108{
109 struct lyxml_attr *result, *a;
110
111 if (!attr || !parent) {
112 return NULL;
113 }
114
115 if (attr->type == LYXML_ATTR_NS) {
116 /* this is correct, despite that all attributes seems like a standard
117 * attributes (struct lyxml_attr), some of them can be namespace
118 * definitions (and in that case they are struct lyxml_ns).
119 */
120 result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
121 } else {
122 result = calloc(1, sizeof (struct lyxml_attr));
123 }
Michal Vasko53b7da02018-02-13 15:28:42 +0100124 LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200125
Michal Vaskof8879c22015-08-21 09:07:36 +0200126 result->value = lydict_insert(ctx, attr->value, 0);
127 result->name = lydict_insert(ctx, attr->name, 0);
128 result->type = attr->type;
129
130 /* set namespace in case of standard attributes */
131 if (result->type == LYXML_ATTR_STD && attr->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100132 result->ns = attr->ns;
133 lyxml_correct_attr_ns(ctx, result, parent, 1);
Michal Vaskof8879c22015-08-21 09:07:36 +0200134 }
135
136 /* set parent pointer in case of namespace attribute */
137 if (result->type == LYXML_ATTR_NS) {
138 ((struct lyxml_ns *)result)->parent = parent;
139 }
140
141 /* put attribute into the parent's attributes list */
142 if (parent->attr) {
143 /* go to the end of the list */
144 for (a = parent->attr; a->next; a = a->next);
145 /* and append new attribute */
146 a->next = result;
147 } else {
148 /* add the first attribute in the list */
149 parent->attr = result;
150 }
151
152 return result;
153}
154
Michal Vaskof748dbc2016-04-05 11:27:47 +0200155void
Michal Vasko88c29542015-11-27 14:57:53 +0100156lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns, int correct_attrs)
157{
158 const struct lyxml_ns *tmp_ns;
Radek Krejcid5be5682016-01-14 16:23:22 +0100159 struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
Michal Vasko88c29542015-11-27 14:57:53 +0100160 struct lyxml_attr *attr;
161
162 /* find the root of elem */
163 for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
164
Radek Krejcid5be5682016-01-14 16:23:22 +0100165 LY_TREE_DFS_BEGIN(elem, tmp, iter) {
166 if (iter->ns) {
Michal Vasko88c29542015-11-27 14:57:53 +0100167 /* find the root of elem NS */
Radek Krejcic071c542016-01-27 14:57:51 +0100168 for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);
Michal Vasko88c29542015-11-27 14:57:53 +0100169
170 /* elem NS is defined outside elem subtree */
171 if (ns_root != elem_root) {
172 if (copy_ns) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100173 tmp_ns = iter->ns;
Michal Vasko88c29542015-11-27 14:57:53 +0100174 /* we may have already copied the NS over? */
Radek Krejcid5be5682016-01-14 16:23:22 +0100175 iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);
Michal Vasko88c29542015-11-27 14:57:53 +0100176
177 /* we haven't copied it over, copy it now */
Radek Krejcid5be5682016-01-14 16:23:22 +0100178 if (!iter->ns) {
179 iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
Michal Vasko88c29542015-11-27 14:57:53 +0100180 }
181 } else {
Radek Krejcid5be5682016-01-14 16:23:22 +0100182 iter->ns = NULL;
Michal Vasko88c29542015-11-27 14:57:53 +0100183 }
184 }
185 }
186 if (correct_attrs) {
Radek Krejcid5be5682016-01-14 16:23:22 +0100187 LY_TREE_FOR(iter->attr, attr) {
Michal Vasko88c29542015-11-27 14:57:53 +0100188 lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
189 }
190 }
Radek Krejcid5be5682016-01-14 16:23:22 +0100191 LY_TREE_DFS_END(elem, tmp, iter);
Michal Vasko88c29542015-11-27 14:57:53 +0100192 }
193}
194
Michal Vaskof8879c22015-08-21 09:07:36 +0200195struct lyxml_elem *
Michal Vaskodbc40582019-03-12 10:54:24 +0100196lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive, int with_siblings)
Michal Vaskof8879c22015-08-21 09:07:36 +0200197{
Michal Vaskodbc40582019-03-12 10:54:24 +0100198 struct lyxml_elem *dup, *result = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +0200199 struct lyxml_attr *attr;
200
201 if (!elem) {
202 return NULL;
203 }
204
Michal Vaskodbc40582019-03-12 10:54:24 +0100205 LY_TREE_FOR(elem, elem) {
206 dup = calloc(1, sizeof *dup);
207 LY_CHECK_ERR_RETURN(!dup, LOGMEM(ctx), NULL);
208 dup->content = lydict_insert(ctx, elem->content, 0);
209 dup->name = lydict_insert(ctx, elem->name, 0);
210 dup->flags = elem->flags;
211 dup->prev = dup;
Michal Vaskof8879c22015-08-21 09:07:36 +0200212
Michal Vaskodbc40582019-03-12 10:54:24 +0100213 if (parent) {
214 lyxml_add_child(ctx, parent, dup);
215 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200216
Michal Vaskodbc40582019-03-12 10:54:24 +0100217 /* keep old namespace for now */
218 dup->ns = elem->ns;
Michal Vasko88c29542015-11-27 14:57:53 +0100219
Michal Vaskodbc40582019-03-12 10:54:24 +0100220 /* duplicate attributes */
221 for (attr = elem->attr; attr; attr = attr->next) {
222 lyxml_dup_attr(ctx, dup, attr);
223 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200224
Michal Vaskodbc40582019-03-12 10:54:24 +0100225 /* correct namespaces */
226 lyxml_correct_elem_ns(ctx, dup, 1, 0);
Michal Vaskoa5c958f2018-12-11 08:31:42 +0100227
Michal Vaskodbc40582019-03-12 10:54:24 +0100228 if (recursive) {
229 /* duplicate children */
230 lyxml_dup_elem(ctx, elem->child, dup, 1, 1);
231 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200232
Michal Vasko85f218c2019-03-13 11:29:50 +0100233 /* set result (first sibling) */
Michal Vaskodbc40582019-03-12 10:54:24 +0100234 if (!result) {
235 result = dup;
Michal Vaskodbc40582019-03-12 10:54:24 +0100236 }
237
238 if (!with_siblings) {
239 break;
240 }
Michal Vaskof8879c22015-08-21 09:07:36 +0200241 }
242
243 return result;
244}
245
Radek Krejci6879d952017-01-09 12:49:19 +0100246API struct lyxml_elem *
247lyxml_dup(struct ly_ctx *ctx, struct lyxml_elem *root)
248{
Michal Vaskodbc40582019-03-12 10:54:24 +0100249 return lyxml_dup_elem(ctx, root, NULL, 1, 0);
Radek Krejci6879d952017-01-09 12:49:19 +0100250}
251
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200252void
Michal Vaskof8879c22015-08-21 09:07:36 +0200253lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
Radek Krejci02117302015-04-13 16:32:44 +0200254{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200255 struct lyxml_elem *parent, *first;
Radek Krejci02117302015-04-13 16:32:44 +0200256
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200257 if (!elem) {
258 return;
259 }
Radek Krejci02117302015-04-13 16:32:44 +0200260
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200261 /* store pointers to important nodes */
262 parent = elem->parent;
Radek Krejcie1f13912015-05-26 15:17:38 +0200263
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200264 /* unlink from parent */
265 if (parent) {
266 if (parent->child == elem) {
267 /* we unlink the first child */
268 /* update the parent's link */
269 parent->child = elem->next;
270 }
271 /* forget about the parent */
272 elem->parent = NULL;
273 }
Radek Krejci02117302015-04-13 16:32:44 +0200274
Michal Vasko88c29542015-11-27 14:57:53 +0100275 if (copy_ns < 2) {
276 lyxml_correct_elem_ns(ctx, elem, copy_ns, 1);
277 }
278
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200279 /* unlink from siblings */
280 if (elem->prev == elem) {
281 /* there are no more siblings */
282 return;
283 }
284 if (elem->next) {
285 elem->next->prev = elem->prev;
286 } else {
287 /* unlinking the last element */
288 if (parent) {
289 first = parent->child;
290 } else {
291 first = elem;
Radek Krejcie4fffcf2016-02-23 16:06:25 +0100292 while (first->prev->next) {
293 first = first->prev;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200294 }
295 }
296 first->prev = elem->prev;
297 }
298 if (elem->prev->next) {
299 elem->prev->next = elem->next;
300 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200301
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200302 /* clean up the unlinked element */
303 elem->next = NULL;
304 elem->prev = elem;
Radek Krejci02117302015-04-13 16:32:44 +0200305}
306
Michal Vasko345da0a2015-12-02 10:35:55 +0100307API void
308lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
309{
310 if (!elem) {
311 return;
312 }
313
314 lyxml_unlink_elem(ctx, elem, 1);
315}
316
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200317void
Radek Krejci00249f22015-07-07 13:43:28 +0200318lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
Radek Krejci02117302015-04-13 16:32:44 +0200319{
Radek Krejci00249f22015-07-07 13:43:28 +0200320 struct lyxml_attr *aiter, *aprev;
321
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200322 if (!attr) {
323 return;
324 }
Radek Krejci02117302015-04-13 16:32:44 +0200325
Radek Krejci00249f22015-07-07 13:43:28 +0200326 if (parent) {
327 /* unlink attribute from the parent's list of attributes */
328 aprev = NULL;
329 for (aiter = parent->attr; aiter; aiter = aiter->next) {
330 if (aiter == attr) {
331 break;
332 }
333 aprev = aiter;
334 }
335 if (!aiter) {
336 /* attribute to remove not found */
337 return;
338 }
339
340 if (!aprev) {
341 /* attribute is first in parent's list of attributes */
342 parent->attr = attr->next;
343 } else {
344 /* reconnect previous attribute to the next */
345 aprev->next = attr->next;
346 }
347 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200348 lydict_remove(ctx, attr->name);
349 lydict_remove(ctx, attr->value);
350 free(attr);
Radek Krejci02117302015-04-13 16:32:44 +0200351}
352
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200353void
354lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200355{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200356 struct lyxml_attr *a, *next;
357 if (!elem || !elem->attr) {
358 return;
359 }
Radek Krejci02117302015-04-13 16:32:44 +0200360
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200361 a = elem->attr;
362 do {
363 next = a->next;
Radek Krejci02117302015-04-13 16:32:44 +0200364
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200365 lydict_remove(ctx, a->name);
366 lydict_remove(ctx, a->value);
367 free(a);
Radek Krejci02117302015-04-13 16:32:44 +0200368
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200369 a = next;
370 } while (a);
Radek Krejci02117302015-04-13 16:32:44 +0200371}
372
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200373static void
Michal Vasko272e42f2015-12-02 12:20:37 +0100374lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200375{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200376 struct lyxml_elem *e, *next;
Radek Krejci02117302015-04-13 16:32:44 +0200377
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200378 if (!elem) {
379 return;
380 }
Radek Krejci02117302015-04-13 16:32:44 +0200381
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200382 lyxml_free_attrs(ctx, elem);
383 LY_TREE_FOR_SAFE(elem->child, next, e) {
Michal Vasko272e42f2015-12-02 12:20:37 +0100384 lyxml_free_elem(ctx, e);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200385 }
386 lydict_remove(ctx, elem->name);
387 lydict_remove(ctx, elem->content);
388 free(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200389}
390
Radek Krejcic6704c82015-10-06 11:12:45 +0200391API void
Michal Vasko345da0a2015-12-02 10:35:55 +0100392lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200393{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200394 if (!elem) {
395 return;
396 }
Radek Krejci02117302015-04-13 16:32:44 +0200397
Michal Vasko61f7ccb2015-10-23 10:15:08 +0200398 lyxml_unlink_elem(ctx, elem, 2);
Michal Vasko272e42f2015-12-02 12:20:37 +0100399 lyxml_free_elem(ctx, elem);
Radek Krejci02117302015-04-13 16:32:44 +0200400}
401
Radek Krejci8f8db232016-05-23 16:48:21 +0200402API void
403lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
404{
405 struct lyxml_elem *iter, *aux;
406
407 if (!elem) {
408 return;
409 }
410
411 /* optimization - avoid freeing (unlinking) the last node of the siblings list */
412 /* so, first, free the node's predecessors to the beginning of the list ... */
413 for(iter = elem->prev; iter->next; iter = aux) {
414 aux = iter->prev;
415 lyxml_free(ctx, iter);
416 }
417 /* ... then, the node is the first in the siblings list, so free them all */
418 LY_TREE_FOR_SAFE(elem, aux, iter) {
419 lyxml_free(ctx, iter);
420 }
421}
422
Michal Vasko88c29542015-11-27 14:57:53 +0100423API const char *
Michal Vasko1e62a092015-12-01 12:27:20 +0100424lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
Radek Krejcida04f4a2015-05-21 12:54:09 +0200425{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200426 struct lyxml_attr *a;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200427
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200428 assert(elem);
429 assert(name);
Radek Krejcida04f4a2015-05-21 12:54:09 +0200430
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200431 for (a = elem->attr; a; a = a->next) {
432 if (a->type != LYXML_ATTR_STD) {
433 continue;
434 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200435
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200436 if (!strcmp(name, a->name)) {
437 if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
438 return a->value;
439 }
440 }
441 }
Radek Krejcida04f4a2015-05-21 12:54:09 +0200442
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200443 return NULL;
Radek Krejcida04f4a2015-05-21 12:54:09 +0200444}
445
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200446int
Michal Vaskof8879c22015-08-21 09:07:36 +0200447lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
Radek Krejci02117302015-04-13 16:32:44 +0200448{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200449 struct lyxml_elem *e;
Radek Krejci02117302015-04-13 16:32:44 +0200450
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200451 assert(parent);
452 assert(elem);
Radek Krejci02117302015-04-13 16:32:44 +0200453
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200454 /* (re)link element to parent */
455 if (elem->parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200456 lyxml_unlink_elem(ctx, elem, 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200457 }
458 elem->parent = parent;
Radek Krejci02117302015-04-13 16:32:44 +0200459
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200460 /* link parent to element */
461 if (parent->child) {
462 e = parent->child;
463 elem->prev = e->prev;
464 elem->next = NULL;
465 elem->prev->next = elem;
466 e->prev = elem;
467 } else {
468 parent->child = elem;
469 elem->prev = elem;
470 elem->next = NULL;
471 }
Radek Krejci02117302015-04-13 16:32:44 +0200472
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200473 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200474}
475
Michal Vasko3b855722015-08-28 16:01:18 +0200476int
Michal Vasko53b7da02018-02-13 15:28:42 +0100477lyxml_getutf8(struct ly_ctx *ctx, const char *buf, unsigned int *read)
Radek Krejci02117302015-04-13 16:32:44 +0200478{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200479 int c, aux;
480 int i;
Radek Krejci02117302015-04-13 16:32:44 +0200481
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200482 c = buf[0];
483 *read = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200484
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200485 /* buf is NULL terminated string, so 0 means EOF */
486 if (!c) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100487 LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200488 return 0;
489 }
490 *read = 1;
Radek Krejci02117302015-04-13 16:32:44 +0200491
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200492 /* process character byte(s) */
493 if ((c & 0xf8) == 0xf0) {
494 /* four bytes character */
495 *read = 4;
Radek Krejci02117302015-04-13 16:32:44 +0200496
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200497 c &= 0x07;
498 for (i = 1; i <= 3; i++) {
499 aux = buf[i];
500 if ((aux & 0xc0) != 0x80) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100501 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200502 return 0;
503 }
Radek Krejci02117302015-04-13 16:32:44 +0200504
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200505 c = (c << 6) | (aux & 0x3f);
506 }
Radek Krejci02117302015-04-13 16:32:44 +0200507
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200508 if (c < 0x1000 || c > 0x10ffff) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100509 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200510 return 0;
511 }
512 } else if ((c & 0xf0) == 0xe0) {
513 /* three bytes character */
514 *read = 3;
Radek Krejci02117302015-04-13 16:32:44 +0200515
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200516 c &= 0x0f;
517 for (i = 1; i <= 2; i++) {
518 aux = buf[i];
519 if ((aux & 0xc0) != 0x80) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100520 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200521 return 0;
522 }
Radek Krejci02117302015-04-13 16:32:44 +0200523
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200524 c = (c << 6) | (aux & 0x3f);
525 }
Radek Krejci02117302015-04-13 16:32:44 +0200526
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200527 if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100528 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200529 return 0;
530 }
531 } else if ((c & 0xe0) == 0xc0) {
532 /* two bytes character */
533 *read = 2;
Radek Krejci02117302015-04-13 16:32:44 +0200534
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200535 aux = buf[1];
536 if ((aux & 0xc0) != 0x80) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100537 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200538 return 0;
539 }
540 c = ((c & 0x1f) << 6) | (aux & 0x3f);
Radek Krejci02117302015-04-13 16:32:44 +0200541
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200542 if (c < 0x80) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100543 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200544 return 0;
545 }
546 } else if (!(c & 0x80)) {
547 /* one byte character */
548 if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
549 /* invalid character */
Michal Vasko53b7da02018-02-13 15:28:42 +0100550 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200551 return 0;
552 }
553 } else {
554 /* invalid character */
Michal Vasko53b7da02018-02-13 15:28:42 +0100555 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200556 return 0;
557 }
Radek Krejci02117302015-04-13 16:32:44 +0200558
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200559 return c;
Radek Krejci02117302015-04-13 16:32:44 +0200560}
561
Michal Vasko0d343d12015-08-24 14:57:36 +0200562/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200563static int
Michal Vasko53b7da02018-02-13 15:28:42 +0100564parse_ignore(struct ly_ctx *ctx, const char *data, const char *endstr, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200565{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200566 unsigned int slen;
567 const char *c = data;
Radek Krejci02117302015-04-13 16:32:44 +0200568
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200569 slen = strlen(endstr);
Radek Krejci02117302015-04-13 16:32:44 +0200570
Radek Krejcifb783942016-10-06 09:49:33 +0200571 while (*c && strncmp(c, endstr, slen)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200572 c++;
573 }
574 if (!*c) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100575 LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200576 return EXIT_FAILURE;
577 }
578 c += slen;
Radek Krejci02117302015-04-13 16:32:44 +0200579
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200580 *len = c - data;
581 return EXIT_SUCCESS;
Radek Krejci02117302015-04-13 16:32:44 +0200582}
583
Michal Vasko53b7da02018-02-13 15:28:42 +0100584/* logs directly, fails when return == NULL and *len == 0 */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200585static char *
Michal Vasko53b7da02018-02-13 15:28:42 +0100586parse_text(struct ly_ctx *ctx, const char *data, char delim, unsigned int *len)
Radek Krejci02117302015-04-13 16:32:44 +0200587{
Radek Krejci709fee62015-04-15 13:56:19 +0200588#define BUFSIZE 1024
Radek Krejci02117302015-04-13 16:32:44 +0200589
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200590 char buf[BUFSIZE];
591 char *result = NULL, *aux;
592 unsigned int r;
593 int o, size = 0;
594 int cdsect = 0;
595 int32_t n;
Radek Krejci709fee62015-04-15 13:56:19 +0200596
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200597 for (*len = o = 0; cdsect || data[*len] != delim; o++) {
Radek Krejcifb783942016-10-06 09:49:33 +0200598 if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100599 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200600 goto error;
601 }
Radek Krejci709fee62015-04-15 13:56:19 +0200602
Radek Krejcia4a84062015-04-16 13:00:10 +0200603loop:
604
Radek Krejcia0802a82017-02-08 12:41:05 +0100605 if (o > BUFSIZE - 4) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200606 /* add buffer into the result */
607 if (result) {
608 size = size + o;
Radek Krejcia8d111f2017-05-31 13:57:37 +0200609 result = ly_realloc(result, size + 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200610 } else {
611 size = o;
612 result = malloc((size + 1) * sizeof *result);
613 }
Michal Vasko53b7da02018-02-13 15:28:42 +0100614 LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200615 memcpy(&result[size - o], buf, o);
Radek Krejci709fee62015-04-15 13:56:19 +0200616
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200617 /* write again into the beginning of the buffer */
618 o = 0;
619 }
Radek Krejci709fee62015-04-15 13:56:19 +0200620
Radek Krejcifb783942016-10-06 09:49:33 +0200621 if (cdsect || !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200622 /* CDSect */
623 if (!cdsect) {
624 cdsect = 1;
625 *len += 9;
626 }
Radek Krejcifb783942016-10-06 09:49:33 +0200627 if (data[*len] && !strncmp(&data[*len], "]]>", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200628 *len += 3;
629 cdsect = 0;
630 o--; /* we don't write any data in this iteration */
631 } else {
632 buf[o] = data[*len];
633 (*len)++;
634 }
635 } else if (data[*len] == '&') {
636 (*len)++;
637 if (data[*len] != '#') {
638 /* entity reference - only predefined refs are supported */
Radek Krejcifb783942016-10-06 09:49:33 +0200639 if (!strncmp(&data[*len], "lt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200640 buf[o] = '<';
641 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200642 } else if (!strncmp(&data[*len], "gt;", 3)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200643 buf[o] = '>';
644 *len += 3;
Radek Krejcifb783942016-10-06 09:49:33 +0200645 } else if (!strncmp(&data[*len], "amp;", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200646 buf[o] = '&';
647 *len += 4;
Radek Krejcifb783942016-10-06 09:49:33 +0200648 } else if (!strncmp(&data[*len], "apos;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200649 buf[o] = '\'';
650 *len += 5;
Radek Krejcifb783942016-10-06 09:49:33 +0200651 } else if (!strncmp(&data[*len], "quot;", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200652 buf[o] = '\"';
653 *len += 5;
654 } else {
Michal Vasko53b7da02018-02-13 15:28:42 +0100655 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200656 goto error;
657 }
658 } else {
659 /* character reference */
660 (*len)++;
661 if (isdigit(data[*len])) {
662 for (n = 0; isdigit(data[*len]); (*len)++) {
663 n = (10 * n) + (data[*len] - '0');
664 }
665 if (data[*len] != ';') {
Michal Vasko53b7da02018-02-13 15:28:42 +0100666 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200667 goto error;
668 }
669 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
670 for (n = 0; isxdigit(data[*len]); (*len)++) {
671 if (isdigit(data[*len])) {
672 r = (data[*len] - '0');
673 } else if (data[*len] > 'F') {
674 r = 10 + (data[*len] - 'a');
675 } else {
676 r = 10 + (data[*len] - 'A');
677 }
678 n = (16 * n) + r;
679 }
680 } else {
Michal Vasko53b7da02018-02-13 15:28:42 +0100681 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200682 goto error;
Radek Krejci709fee62015-04-15 13:56:19 +0200683
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200684 }
Michal Vasko53b7da02018-02-13 15:28:42 +0100685 r = pututf8(ctx, &buf[o], n);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200686 if (!r) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100687 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200688 goto error;
689 }
690 o += r - 1; /* o is ++ in for loop */
691 (*len)++;
692 }
693 } else {
Michal Vasko53b7da02018-02-13 15:28:42 +0100694 r = copyutf8(ctx, &buf[o], &data[*len]);
Radek Krejcideee60e2016-09-23 15:21:14 +0200695 if (!r) {
696 goto error;
697 }
698
699 o += r - 1; /* o is ++ in for loop */
700 (*len) = (*len) + r;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200701 }
702 }
Radek Krejci02117302015-04-13 16:32:44 +0200703
Radek Krejcifb783942016-10-06 09:49:33 +0200704 if (delim == '<' && !strncmp(&data[*len], "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200705 /* ignore loop's end condition on beginning of CDSect */
706 goto loop;
707 }
Radek Krejci709fee62015-04-15 13:56:19 +0200708#undef BUFSIZE
709
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200710 if (o) {
711 if (result) {
712 size = size + o;
713 aux = realloc(result, size + 1);
714 result = aux;
715 } else {
716 size = o;
717 result = malloc((size + 1) * sizeof *result);
718 }
Michal Vasko53b7da02018-02-13 15:28:42 +0100719 LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200720 memcpy(&result[size - o], buf, o);
721 }
722 if (result) {
723 result[size] = '\0';
Radek Krejcia5269642015-07-20 19:04:11 +0200724 } else {
725 size = 0;
726 result = strdup("");
Michal Vasko53b7da02018-02-13 15:28:42 +0100727 LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL)
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200728 }
Radek Krejci02117302015-04-13 16:32:44 +0200729
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200730 return result;
Radek Krejci709fee62015-04-15 13:56:19 +0200731
732error:
Michal Vasko53b7da02018-02-13 15:28:42 +0100733 *len = 0;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200734 free(result);
735 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200736}
737
Michal Vasko0d343d12015-08-24 14:57:36 +0200738/* logs directly */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200739static struct lyxml_attr *
Radek Krejci00249f22015-07-07 13:43:28 +0200740parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
Radek Krejci674e1f82015-04-21 14:12:19 +0200741{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200742 const char *c = data, *start, *delim;
aweast069a6c02018-05-30 16:44:18 -0500743 char *prefix = NULL, xml_flag, *str;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200744 int uc;
Radek Krejci00249f22015-07-07 13:43:28 +0200745 struct lyxml_attr *attr = NULL, *a;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200746 unsigned int size;
Radek Krejci02117302015-04-13 16:32:44 +0200747
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200748 /* check if it is attribute or namespace */
Radek Krejcifb783942016-10-06 09:49:33 +0200749 if (!strncmp(c, "xmlns", 5)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200750 /* namespace */
751 attr = calloc(1, sizeof (struct lyxml_ns));
Michal Vasko53b7da02018-02-13 15:28:42 +0100752 LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200753
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200754 attr->type = LYXML_ATTR_NS;
Radek Krejci00249f22015-07-07 13:43:28 +0200755 ((struct lyxml_ns *)attr)->parent = parent;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200756 c += 5;
757 if (*c != ':') {
758 /* default namespace, prefix will be empty */
759 goto equal;
760 }
761 c++; /* go after ':' to the prefix value */
762 } else {
763 /* attribute */
764 attr = calloc(1, sizeof *attr);
Michal Vasko53b7da02018-02-13 15:28:42 +0100765 LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200766
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200767 attr->type = LYXML_ATTR_STD;
768 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200769
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200770 /* process name part of the attribute */
771 start = c;
Michal Vasko53b7da02018-02-13 15:28:42 +0100772 uc = lyxml_getutf8(ctx, c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200773 if (!is_xmlnamestartchar(uc)) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100774 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200775 free(attr);
776 return NULL;
777 }
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100778 xml_flag = 4;
779 if (*c == 'x') {
780 xml_flag = 1;
781 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200782 c += size;
Michal Vasko53b7da02018-02-13 15:28:42 +0100783 uc = lyxml_getutf8(ctx, c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200784 while (is_xmlnamechar(uc)) {
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100785 if (attr->type == LYXML_ATTR_STD) {
786 if ((*c == ':') && (xml_flag != 3)) {
787 /* attribute in a namespace (but disregard the special "xml" namespace) */
788 start = c + 1;
Radek Krejci4ea08382015-04-21 09:41:40 +0200789
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100790 /* look for the prefix in namespaces */
aweast069a6c02018-05-30 16:44:18 -0500791 prefix = malloc((c - data + 1) * sizeof *prefix);
792 LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error);
Michal Vasko62d5a6b2018-01-03 14:31:39 +0100793 memcpy(prefix, data, c - data);
794 prefix[c - data] = '\0';
795 attr->ns = lyxml_get_ns(parent, prefix);
796 } else if (((*c == 'm') && (xml_flag == 1)) ||
797 ((*c == 'l') && (xml_flag == 2))) {
798 ++xml_flag;
799 } else {
800 xml_flag = 4;
801 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200802 }
803 c += size;
Michal Vasko53b7da02018-02-13 15:28:42 +0100804 uc = lyxml_getutf8(ctx, c, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200805 }
Radek Krejci674e1f82015-04-21 14:12:19 +0200806
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200807 /* store the name */
808 size = c - start;
809 attr->name = lydict_insert(ctx, start, size);
Radek Krejci674e1f82015-04-21 14:12:19 +0200810
811equal:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200812 /* check Eq mark that can be surrounded by whitespaces */
813 ign_xmlws(c);
814 if (*c != '=') {
Michal Vasko53b7da02018-02-13 15:28:42 +0100815 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200816 goto error;
817 }
818 c++;
819 ign_xmlws(c);
Radek Krejci02117302015-04-13 16:32:44 +0200820
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200821 /* process value part of the attribute */
822 if (!*c || (*c != '"' && *c != '\'')) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100823 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200824 goto error;
825 }
826 delim = c;
Michal Vasko53b7da02018-02-13 15:28:42 +0100827 str = parse_text(ctx, ++c, *delim, &size);
828 if (!str && !size) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200829 goto error;
830 }
Michal Vasko53b7da02018-02-13 15:28:42 +0100831 attr->value = lydict_insert_zc(ctx, str);
Radek Krejci02117302015-04-13 16:32:44 +0200832
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200833 *len = c + size + 1 - data; /* +1 is delimiter size */
Radek Krejci00249f22015-07-07 13:43:28 +0200834
835 /* put attribute into the parent's attributes list */
836 if (parent->attr) {
837 /* go to the end of the list */
838 for (a = parent->attr; a->next; a = a->next);
839 /* and append new attribute */
840 a->next = attr;
841 } else {
842 /* add the first attribute in the list */
843 parent->attr = attr;
844 }
845
aweast069a6c02018-05-30 16:44:18 -0500846 free(prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200847 return attr;
Radek Krejci02117302015-04-13 16:32:44 +0200848
849error:
Radek Krejci00249f22015-07-07 13:43:28 +0200850 lyxml_free_attr(ctx, NULL, attr);
aweast069a6c02018-05-30 16:44:18 -0500851 free(prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200852 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +0200853}
854
Michal Vasko0d343d12015-08-24 14:57:36 +0200855/* logs directly */
Radek Krejci9a5daea2016-03-02 16:49:40 +0100856struct lyxml_elem *
Radek Krejcie1bacd72017-03-01 13:18:46 +0100857lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +0200858{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200859 const char *c = data, *start, *e;
860 const char *lws; /* leading white space for handling mixed content */
861 int uc;
862 char *str;
aweast069a6c02018-05-30 16:44:18 -0500863 char *prefix = NULL;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200864 unsigned int prefix_len = 0;
865 struct lyxml_elem *elem = NULL, *child;
866 struct lyxml_attr *attr;
867 unsigned int size;
868 int nons_flag = 0, closed_flag = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200869
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200870 *len = 0;
Radek Krejci02117302015-04-13 16:32:44 +0200871
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200872 if (*c != '<') {
873 return NULL;
874 }
Radek Krejci02117302015-04-13 16:32:44 +0200875
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200876 /* locate element name */
877 c++;
878 e = c;
Radek Krejci02117302015-04-13 16:32:44 +0200879
Michal Vasko53b7da02018-02-13 15:28:42 +0100880 uc = lyxml_getutf8(ctx, e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200881 if (!is_xmlnamestartchar(uc)) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100882 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200883 return NULL;
884 }
885 e += size;
Michal Vasko53b7da02018-02-13 15:28:42 +0100886 uc = lyxml_getutf8(ctx, e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200887 while (is_xmlnamechar(uc)) {
888 if (*e == ':') {
889 if (prefix_len) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100890 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200891 goto error;
892 }
893 /* element in a namespace */
894 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200895
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200896 /* look for the prefix in namespaces */
aweast069a6c02018-05-30 16:44:18 -0500897 prefix_len = e - c;
Michal Vasko5f5096b2018-08-17 10:56:48 +0200898 LY_CHECK_ERR_GOTO(prefix, LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, e), error);
aweast069a6c02018-05-30 16:44:18 -0500899 prefix = malloc((prefix_len + 1) * sizeof *prefix);
900 LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error);
901 memcpy(prefix, c, prefix_len);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200902 prefix[prefix_len] = '\0';
903 c = start;
904 }
905 e += size;
Michal Vasko53b7da02018-02-13 15:28:42 +0100906 uc = lyxml_getutf8(ctx, e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200907 }
908 if (!*e) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100909 LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
aweast069a6c02018-05-30 16:44:18 -0500910 free(prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200911 return NULL;
912 }
Radek Krejci02117302015-04-13 16:32:44 +0200913
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200914 /* allocate element structure */
915 elem = calloc(1, sizeof *elem);
Michal Vasko64e6cf82018-08-17 10:32:23 +0200916 LY_CHECK_ERR_RETURN(!elem, free(prefix); LOGMEM(ctx), NULL);
Radek Krejcia8d111f2017-05-31 13:57:37 +0200917
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200918 elem->next = NULL;
919 elem->prev = elem;
920 if (parent) {
Michal Vaskof8879c22015-08-21 09:07:36 +0200921 lyxml_add_child(ctx, parent, elem);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200922 }
Radek Krejci02117302015-04-13 16:32:44 +0200923
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200924 /* store the name into the element structure */
925 elem->name = lydict_insert(ctx, c, e - c);
926 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200927
928process:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200929 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +0200930 if (!strncmp("/>", c, 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200931 /* we are done, it was EmptyElemTag */
932 c += 2;
Michal Vasko44913842016-04-13 14:20:41 +0200933 elem->content = lydict_insert(ctx, "", 0);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200934 closed_flag = 1;
935 } else if (*c == '>') {
936 /* process element content */
937 c++;
938 lws = NULL;
Radek Krejci02117302015-04-13 16:32:44 +0200939
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200940 while (*c) {
Radek Krejcifb783942016-10-06 09:49:33 +0200941 if (!strncmp(c, "</", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200942 if (lws && !elem->child) {
943 /* leading white spaces were actually content */
944 goto store_content;
945 }
Radek Krejci02117302015-04-13 16:32:44 +0200946
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200947 /* Etag */
948 c += 2;
949 /* get name and check it */
950 e = c;
Michal Vasko53b7da02018-02-13 15:28:42 +0100951 uc = lyxml_getutf8(ctx, e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200952 if (!is_xmlnamestartchar(uc)) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100953 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200954 goto error;
955 }
956 e += size;
Michal Vasko53b7da02018-02-13 15:28:42 +0100957 uc = lyxml_getutf8(ctx, e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200958 while (is_xmlnamechar(uc)) {
959 if (*e == ':') {
960 /* element in a namespace */
961 start = e + 1;
Radek Krejci674e1f82015-04-21 14:12:19 +0200962
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200963 /* look for the prefix in namespaces */
aweast069a6c02018-05-30 16:44:18 -0500964 if (!prefix || memcmp(prefix, c, e - c)) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100965 LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem,
Michal Vaskoff9336a2016-05-10 10:48:48 +0200966 "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200967 goto error;
968 }
969 c = start;
970 }
971 e += size;
Michal Vasko53b7da02018-02-13 15:28:42 +0100972 uc = lyxml_getutf8(ctx, e, &size);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200973 }
974 if (!*e) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100975 LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200976 goto error;
977 }
Radek Krejci02117302015-04-13 16:32:44 +0200978
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200979 /* check that it corresponds to opening tag */
980 size = e - c;
981 str = malloc((size + 1) * sizeof *str);
Michal Vasko53b7da02018-02-13 15:28:42 +0100982 LY_CHECK_ERR_GOTO(!str, LOGMEM(ctx), error);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200983 memcpy(str, c, e - c);
984 str[e - c] = '\0';
985 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100986 LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem,
Michal Vaskoff9336a2016-05-10 10:48:48 +0200987 "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200988 free(str);
989 goto error;
990 }
991 free(str);
992 c = e;
Radek Krejci02117302015-04-13 16:32:44 +0200993
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200994 ign_xmlws(c);
995 if (*c != '>') {
Michal Vasko53b7da02018-02-13 15:28:42 +0100996 LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200997 goto error;
998 }
999 c++;
Michal Vaskoe00b7892016-04-14 10:12:18 +02001000 if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
1001 /* there was no content, but we don't want NULL (only if mixed content) */
1002 elem->content = lydict_insert(ctx, "", 0);
1003 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001004 closed_flag = 1;
1005 break;
Radek Krejci02117302015-04-13 16:32:44 +02001006
Radek Krejcifb783942016-10-06 09:49:33 +02001007 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001008 if (lws) {
1009 /* leading white spaces were only formatting */
1010 lws = NULL;
1011 }
1012 /* PI - ignore it */
1013 c += 2;
Michal Vasko53b7da02018-02-13 15:28:42 +01001014 if (parse_ignore(ctx, c, "?>", &size)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001015 goto error;
1016 }
1017 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001018 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001019 if (lws) {
1020 /* leading white spaces were only formatting */
1021 lws = NULL;
1022 }
1023 /* Comment - ignore it */
1024 c += 4;
Michal Vasko53b7da02018-02-13 15:28:42 +01001025 if (parse_ignore(ctx, c, "-->", &size)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001026 goto error;
1027 }
1028 c += size;
Radek Krejcifb783942016-10-06 09:49:33 +02001029 } else if (!strncmp(c, "<![CDATA[", 9)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001030 /* CDSect */
1031 goto store_content;
1032 } else if (*c == '<') {
1033 if (lws) {
1034 if (elem->flags & LYXML_ELEM_MIXED) {
1035 /* we have a mixed content */
1036 goto store_content;
1037 } else {
1038 /* leading white spaces were only formatting */
1039 lws = NULL;
1040 }
1041 }
1042 if (elem->content) {
1043 /* we have a mixed content */
Radek Krejcie1bacd72017-03-01 13:18:46 +01001044 if (options & LYXML_PARSE_NOMIXEDCONTENT) {
Michal Vasko53b7da02018-02-13 15:28:42 +01001045 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
Radek Krejcie1bacd72017-03-01 13:18:46 +01001046 goto error;
1047 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001048 child = calloc(1, sizeof *child);
Michal Vasko53b7da02018-02-13 15:28:42 +01001049 LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001050 child->content = elem->content;
1051 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001052 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001053 elem->flags |= LYXML_ELEM_MIXED;
1054 }
Radek Krejcie1bacd72017-03-01 13:18:46 +01001055 child = lyxml_parse_elem(ctx, c, &size, elem, options);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001056 if (!child) {
1057 goto error;
1058 }
1059 c += size; /* move after processed child element */
1060 } else if (is_xmlws(*c)) {
1061 lws = c;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001062 ign_xmlws(c);
1063 } else {
Radek Krejci02117302015-04-13 16:32:44 +02001064store_content:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001065 /* store text content */
1066 if (lws) {
1067 /* process content including the leading white spaces */
1068 c = lws;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001069 lws = NULL;
1070 }
Michal Vasko53b7da02018-02-13 15:28:42 +01001071 str = parse_text(ctx, c, '<', &size);
1072 if (!str && !size) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001073 goto error;
1074 }
Michal Vasko53b7da02018-02-13 15:28:42 +01001075 elem->content = lydict_insert_zc(ctx, str);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001076 c += size; /* move after processed text content */
Radek Krejci02117302015-04-13 16:32:44 +02001077
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001078 if (elem->child) {
1079 /* we have a mixed content */
Radek Krejcie1bacd72017-03-01 13:18:46 +01001080 if (options & LYXML_PARSE_NOMIXEDCONTENT) {
Michal Vasko53b7da02018-02-13 15:28:42 +01001081 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
Radek Krejcie1bacd72017-03-01 13:18:46 +01001082 goto error;
1083 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001084 child = calloc(1, sizeof *child);
Michal Vasko53b7da02018-02-13 15:28:42 +01001085 LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001086 child->content = elem->content;
1087 elem->content = NULL;
Michal Vaskof8879c22015-08-21 09:07:36 +02001088 lyxml_add_child(ctx, elem, child);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001089 elem->flags |= LYXML_ELEM_MIXED;
1090 }
1091 }
1092 }
1093 } else {
1094 /* process attribute */
1095 attr = parse_attr(ctx, c, &size, elem);
1096 if (!attr) {
1097 goto error;
1098 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001099 c += size; /* move after processed attribute */
Radek Krejci02117302015-04-13 16:32:44 +02001100
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001101 /* check namespace */
1102 if (attr->type == LYXML_ATTR_NS) {
aweast069a6c02018-05-30 16:44:18 -05001103 if ((!prefix || !prefix[0]) && !attr->name) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001104 if (attr->value) {
1105 /* default prefix */
1106 elem->ns = (struct lyxml_ns *)attr;
1107 } else {
1108 /* xmlns="" -> no namespace */
1109 nons_flag = 1;
1110 }
aweast069a6c02018-05-30 16:44:18 -05001111 } else if (prefix && prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001112 /* matching namespace with prefix */
1113 elem->ns = (struct lyxml_ns *)attr;
1114 }
1115 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001116
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001117 /* go back to finish element processing */
1118 goto process;
1119 }
Radek Krejci02117302015-04-13 16:32:44 +02001120
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001121 *len = c - data;
Radek Krejci02117302015-04-13 16:32:44 +02001122
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001123 if (!closed_flag) {
Michal Vasko53b7da02018-02-13 15:28:42 +01001124 LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001125 goto error;
1126 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001127
Radek Krejci78a230a2015-07-07 17:04:40 +02001128 if (!elem->ns && !nons_flag && parent) {
Radek Krejci4476d412015-07-10 15:35:01 +02001129 elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001130 }
aweast069a6c02018-05-30 16:44:18 -05001131 free(prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001132 return elem;
Radek Krejci02117302015-04-13 16:32:44 +02001133
1134error:
Michal Vasko345da0a2015-12-02 10:35:55 +01001135 lyxml_free(ctx, elem);
aweast069a6c02018-05-30 16:44:18 -05001136 free(prefix);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001137 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001138}
1139
Michal Vasko0d343d12015-08-24 14:57:36 +02001140/* logs directly */
Radek Krejcic6704c82015-10-06 11:12:45 +02001141API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001142lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001143{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001144 const char *c = data;
1145 unsigned int len;
Radek Krejci851ea662016-01-08 09:30:53 +01001146 struct lyxml_elem *root, *first = NULL, *next;
Radek Krejci2342cf62016-01-29 16:48:23 +01001147
Radek Krejci19b9b252017-03-17 16:14:09 +01001148 if (!ctx) {
Michal Vasko53b7da02018-02-13 15:28:42 +01001149 LOGARG;
Radek Krejci19b9b252017-03-17 16:14:09 +01001150 return NULL;
1151 }
1152
Radek Krejci120f6242015-12-17 12:32:56 +01001153repeat:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001154 /* process document */
Radek Krejcif8ae23e2016-07-26 17:11:17 +02001155 while (1) {
1156 if (!*c) {
1157 /* eof */
Michal Vasko53b7da02018-02-13 15:28:42 +01001158 return first;
Radek Krejcif8ae23e2016-07-26 17:11:17 +02001159 } else if (is_xmlws(*c)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001160 /* skip whitespaces */
1161 ign_xmlws(c);
Radek Krejcifb783942016-10-06 09:49:33 +02001162 } else if (!strncmp(c, "<?", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001163 /* XMLDecl or PI - ignore it */
1164 c += 2;
Michal Vasko53b7da02018-02-13 15:28:42 +01001165 if (parse_ignore(ctx, c, "?>", &len)) {
Radek Krejcicf748252017-09-04 11:11:14 +02001166 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001167 }
1168 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001169 } else if (!strncmp(c, "<!--", 4)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001170 /* Comment - ignore it */
1171 c += 2;
Michal Vasko53b7da02018-02-13 15:28:42 +01001172 if (parse_ignore(ctx, c, "-->", &len)) {
Radek Krejcicf748252017-09-04 11:11:14 +02001173 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001174 }
1175 c += len;
Radek Krejcifb783942016-10-06 09:49:33 +02001176 } else if (!strncmp(c, "<!", 2)) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001177 /* DOCTYPE */
1178 /* TODO - standalone ignore counting < and > */
Michal Vasko53b7da02018-02-13 15:28:42 +01001179 LOGERR(ctx, LY_EINVAL, "DOCTYPE not supported in XML documents.");
Radek Krejcicf748252017-09-04 11:11:14 +02001180 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001181 } else if (*c == '<') {
1182 /* element - process it in next loop to strictly follow XML
1183 * format
1184 */
1185 break;
Michal Vaskoc2e80562015-07-27 11:31:41 +02001186 } else {
Michal Vasko53b7da02018-02-13 15:28:42 +01001187 LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
Radek Krejcicf748252017-09-04 11:11:14 +02001188 goto error;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001189 }
1190 }
Radek Krejci02117302015-04-13 16:32:44 +02001191
Radek Krejcie1bacd72017-03-01 13:18:46 +01001192 root = lyxml_parse_elem(ctx, c, &len, NULL, options);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001193 if (!root) {
Radek Krejcicf748252017-09-04 11:11:14 +02001194 goto error;
Radek Krejci120f6242015-12-17 12:32:56 +01001195 } else if (!first) {
1196 first = root;
1197 } else {
1198 first->prev->next = root;
1199 root->prev = first->prev;
1200 first->prev = root;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001201 }
1202 c += len;
Radek Krejci02117302015-04-13 16:32:44 +02001203
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001204 /* ignore the rest of document where can be comments, PIs and whitespaces,
1205 * note that we are not detecting syntax errors in these parts
1206 */
1207 ign_xmlws(c);
1208 if (*c) {
Radek Krejci722b0072016-02-01 17:09:45 +01001209 if (options & LYXML_PARSE_MULTIROOT) {
Radek Krejci120f6242015-12-17 12:32:56 +01001210 goto repeat;
1211 } else {
Michal Vasko53b7da02018-02-13 15:28:42 +01001212 LOGWRN(ctx, "There are some not parsed data:\n%s", c);
Radek Krejci120f6242015-12-17 12:32:56 +01001213 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001214 }
Radek Krejci02117302015-04-13 16:32:44 +02001215
Radek Krejci120f6242015-12-17 12:32:56 +01001216 return first;
Radek Krejcicf748252017-09-04 11:11:14 +02001217
1218error:
1219 LY_TREE_FOR_SAFE(first, next, root) {
1220 lyxml_free(ctx, root);
1221 }
Radek Krejcicf748252017-09-04 11:11:14 +02001222 return NULL;
Radek Krejci02117302015-04-13 16:32:44 +02001223}
1224
Radek Krejcic6704c82015-10-06 11:12:45 +02001225API struct lyxml_elem *
Radek Krejci722b0072016-02-01 17:09:45 +01001226lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
Radek Krejci54ea8de2015-04-09 18:02:56 +02001227{
Radek Krejci6b3d9262015-12-03 13:45:27 +01001228 struct lyxml_elem *elem = NULL;
Radek Krejci0fb11502017-01-31 16:45:42 +01001229 size_t length;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001230 int fd;
1231 char *addr;
1232
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001233 if (!filename || !ctx) {
Michal Vasko53b7da02018-02-13 15:28:42 +01001234 LOGARG;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001235 return NULL;
1236 }
Radek Krejci54ea8de2015-04-09 18:02:56 +02001237
Pavol Vicanb2570c12015-11-12 13:50:20 +01001238 fd = open(filename, O_RDONLY);
1239 if (fd == -1) {
Michal Vasko53b7da02018-02-13 15:28:42 +01001240 LOGERR(ctx, LY_EINVAL,"Opening file \"%s\" failed.", filename);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001241 return NULL;
1242 }
Michal Vasko53b7da02018-02-13 15:28:42 +01001243 if (lyp_mmap(ctx, fd, 0, &length, (void **)&addr)) {
1244 LOGERR(ctx, LY_ESYS, "Mapping file descriptor into memory failed (%s()).", __func__);
Pavol Vicanb2570c12015-11-12 13:50:20 +01001245 goto error;
Radek Krejci10c216a2017-02-01 10:36:00 +01001246 } else if (!addr) {
1247 /* empty XML file */
1248 goto error;
Pavol Vicanb2570c12015-11-12 13:50:20 +01001249 }
Radek Krejci6b3d9262015-12-03 13:45:27 +01001250
Radek Krejci722b0072016-02-01 17:09:45 +01001251 elem = lyxml_parse_mem(ctx, addr, options);
Radek Krejci0fb11502017-01-31 16:45:42 +01001252 lyp_munmap(addr, length);
Radek Krejci30793ab2015-12-03 13:45:45 +01001253 close(fd);
Radek Krejci6b3d9262015-12-03 13:45:27 +01001254
Pavol Vicanb2570c12015-11-12 13:50:20 +01001255 return elem;
1256
1257error:
Radek Krejci6b3d9262015-12-03 13:45:27 +01001258 if (fd != -1) {
1259 close(fd);
1260 }
1261
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001262 return NULL;
Radek Krejci54ea8de2015-04-09 18:02:56 +02001263}
Radek Krejci02117302015-04-13 16:32:44 +02001264
Michal Vasko5db027d2015-10-09 14:38:50 +02001265int
Radek Krejcieb827b72018-02-23 10:05:02 +01001266lyxml_dump_text(struct lyout *out, const char *text, LYXML_DATA_TYPE type)
Radek Krejcif0023a92015-04-20 20:51:39 +02001267{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001268 unsigned int i, n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001269
Michal Vasko5db027d2015-10-09 14:38:50 +02001270 if (!text) {
1271 return 0;
1272 }
1273
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001274 for (i = n = 0; text[i]; i++) {
1275 switch (text[i]) {
1276 case '&':
Radek Krejci5248f132015-10-09 10:34:25 +02001277 n += ly_print(out, "&amp;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001278 break;
1279 case '<':
Radek Krejci5248f132015-10-09 10:34:25 +02001280 n += ly_print(out, "&lt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001281 break;
1282 case '>':
1283 /* not needed, just for readability */
Radek Krejci5248f132015-10-09 10:34:25 +02001284 n += ly_print(out, "&gt;");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001285 break;
Radek Krejci952a7252016-07-16 20:52:43 +02001286 case '"':
Radek Krejcieb827b72018-02-23 10:05:02 +01001287 if (type == LYXML_DATA_ATTR) {
1288 n += ly_print(out, "&quot;");
1289 break;
1290 }
1291 /* falls through */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001292 default:
Radek Krejci5248f132015-10-09 10:34:25 +02001293 ly_write(out, &text[i], 1);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001294 n++;
1295 }
1296 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001297
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001298 return n;
Radek Krejcif0023a92015-04-20 20:51:39 +02001299}
1300
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001301static int
Michal Vaskob2f1db72016-11-16 13:57:35 +01001302dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options, int last_elem)
Radek Krejcif0023a92015-04-20 20:51:39 +02001303{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001304 int size = 0;
1305 struct lyxml_attr *a;
1306 struct lyxml_elem *child;
1307 const char *delim, *delim_outer;
1308 int indent;
Radek Krejcif0023a92015-04-20 20:51:39 +02001309
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001310 if (!e->name) {
1311 /* mixed content */
1312 if (e->content) {
Radek Krejcieb827b72018-02-23 10:05:02 +01001313 return lyxml_dump_text(out, e->content, LYXML_DATA_ELEM);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001314 } else {
1315 return 0;
1316 }
1317 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001318
Radek Krejci722b0072016-02-01 17:09:45 +01001319 delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001320 indent = 2 * level;
1321 if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1322 delim = "";
1323 }
1324 if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1325 delim_outer = "";
1326 indent = 0;
1327 }
Michal Vaskob2f1db72016-11-16 13:57:35 +01001328 if (last_elem && (options & LYXML_PRINT_NO_LAST_NEWLINE)) {
1329 delim_outer = "";
1330 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001331
Radek Krejci722b0072016-02-01 17:09:45 +01001332 if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN)) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001333 /* opening tag */
1334 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001335 size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001336 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001337 size += ly_print(out, "%*s<%s", indent, "", e->name);
Radek Krejcic6704c82015-10-06 11:12:45 +02001338 }
Radek Krejci722b0072016-02-01 17:09:45 +01001339 } else if (options & LYXML_PRINT_CLOSE) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001340 indent = 0;
1341 goto close;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001342 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001343
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001344 /* attributes */
1345 for (a = e->attr; a; a = a->next) {
1346 if (a->type == LYXML_ATTR_NS) {
1347 if (a->name) {
Radek Krejci5248f132015-10-09 10:34:25 +02001348 size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001349 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001350 size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001351 }
1352 } else if (a->ns && a->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001353 size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001354 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001355 size += ly_print(out, " %s=\"%s\"", a->name, a->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001356 }
1357 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001358
Radek Krejcic6704c82015-10-06 11:12:45 +02001359 /* apply options */
Radek Krejci722b0072016-02-01 17:09:45 +01001360 if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
Radek Krejci5248f132015-10-09 10:34:25 +02001361 size += ly_print(out, "/>%s", delim);
Radek Krejcic6704c82015-10-06 11:12:45 +02001362 return size;
Radek Krejci722b0072016-02-01 17:09:45 +01001363 } else if (options & LYXML_PRINT_OPEN) {
Radek Krejci5248f132015-10-09 10:34:25 +02001364 ly_print(out, ">");
Radek Krejcic6704c82015-10-06 11:12:45 +02001365 return ++size;
Radek Krejci722b0072016-02-01 17:09:45 +01001366 } else if (options & LYXML_PRINT_ATTRS) {
Radek Krejcic6704c82015-10-06 11:12:45 +02001367 return size;
1368 }
1369
Michal Vasko3a611612016-04-14 10:12:56 +02001370 if (!e->child && (!e->content || !e->content[0])) {
Radek Krejci5248f132015-10-09 10:34:25 +02001371 size += ly_print(out, "/>%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001372 return size;
Michal Vasko3a611612016-04-14 10:12:56 +02001373 } else if (e->content && e->content[0]) {
Radek Krejci5248f132015-10-09 10:34:25 +02001374 ly_print(out, ">");
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001375 size++;
Radek Krejcif0023a92015-04-20 20:51:39 +02001376
Radek Krejcieb827b72018-02-23 10:05:02 +01001377 size += lyxml_dump_text(out, e->content, LYXML_DATA_ELEM);
Radek Krejcif0023a92015-04-20 20:51:39 +02001378
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001379 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001380 size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001381 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001382 size += ly_print(out, "</%s>%s", e->name, delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001383 }
1384 return size;
1385 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001386 size += ly_print(out, ">%s", delim);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001387 }
Radek Krejci674e1f82015-04-21 14:12:19 +02001388
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001389 /* go recursively */
1390 LY_TREE_FOR(e->child, child) {
Radek Krejci722b0072016-02-01 17:09:45 +01001391 if (options & LYXML_PRINT_FORMAT) {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001392 size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT, 0);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001393 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001394 size += dump_elem(out, child, level, 0, 0);
Pavol Vicanbe7eef52015-10-22 14:07:48 +02001395 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001396 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001397
Radek Krejcic6704c82015-10-06 11:12:45 +02001398close:
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001399 /* closing tag */
1400 if (e->ns && e->ns->prefix) {
Radek Krejci5248f132015-10-09 10:34:25 +02001401 size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001402 } else {
Radek Krejci5248f132015-10-09 10:34:25 +02001403 size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001404 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001405
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001406 return size;
Radek Krejcif0023a92015-04-20 20:51:39 +02001407}
1408
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001409static int
1410dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
1411{
Michal Vaskob2f1db72016-11-16 13:57:35 +01001412 const struct lyxml_elem *start, *iter, *next;
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001413 int ret = 0;
1414
1415 if (e->parent) {
1416 start = e->parent->child;
1417 } else {
1418 start = e;
1419 while(start->prev && start->prev->next) {
1420 start = start->prev;
1421 }
1422 }
1423
Michal Vaskob2f1db72016-11-16 13:57:35 +01001424 LY_TREE_FOR_SAFE(start, next, iter) {
1425 ret += dump_elem(out, iter, 0, options, (next ? 0 : 1));
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001426 }
1427
1428 return ret;
1429}
1430
Radek Krejcic6704c82015-10-06 11:12:45 +02001431API int
Radek Krejci722b0072016-02-01 17:09:45 +01001432lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
Radek Krejcif0023a92015-04-20 20:51:39 +02001433{
Radek Krejci5248f132015-10-09 10:34:25 +02001434 struct lyout out;
1435
1436 if (!stream || !elem) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +02001437 return 0;
1438 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001439
Michal Vasko002db142018-07-03 13:52:59 +02001440 memset(&out, 0, sizeof out);
1441
Radek Krejci5248f132015-10-09 10:34:25 +02001442 out.type = LYOUT_STREAM;
1443 out.method.f = stream;
1444
Radek Krejci722b0072016-02-01 17:09:45 +01001445 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001446 return dump_siblings(&out, elem, options);
1447 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001448 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001449 }
Radek Krejci5248f132015-10-09 10:34:25 +02001450}
1451
1452API int
Radek Krejci722b0072016-02-01 17:09:45 +01001453lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
Radek Krejci5248f132015-10-09 10:34:25 +02001454{
1455 struct lyout out;
1456
1457 if (fd < 0 || !elem) {
1458 return 0;
1459 }
1460
Michal Vasko002db142018-07-03 13:52:59 +02001461 memset(&out, 0, sizeof out);
1462
Radek Krejci5248f132015-10-09 10:34:25 +02001463 out.type = LYOUT_FD;
1464 out.method.fd = fd;
1465
Radek Krejci722b0072016-02-01 17:09:45 +01001466 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001467 return dump_siblings(&out, elem, options);
1468 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001469 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001470 }
Radek Krejcif0023a92015-04-20 20:51:39 +02001471}
Radek Krejci6140e4e2015-10-09 15:50:55 +02001472
1473API int
Radek Krejci722b0072016-02-01 17:09:45 +01001474lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001475{
1476 struct lyout out;
1477 int r;
1478
1479 if (!strp || !elem) {
1480 return 0;
1481 }
1482
Michal Vasko002db142018-07-03 13:52:59 +02001483 memset(&out, 0, sizeof out);
1484
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001485 out.type = LYOUT_MEMORY;
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001486
Radek Krejci722b0072016-02-01 17:09:45 +01001487 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001488 r = dump_siblings(&out, elem, options);
1489 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001490 r = dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001491 }
Radek Krejci2fa0fc12015-10-14 18:14:29 +02001492
1493 *strp = out.method.mem.buf;
1494 return r;
1495}
1496
1497API int
Radek Krejci722b0072016-02-01 17:09:45 +01001498lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
Radek Krejci6140e4e2015-10-09 15:50:55 +02001499{
1500 struct lyout out;
1501
1502 if (!writeclb || !elem) {
1503 return 0;
1504 }
1505
Michal Vasko002db142018-07-03 13:52:59 +02001506 memset(&out, 0, sizeof out);
1507
Radek Krejci6140e4e2015-10-09 15:50:55 +02001508 out.type = LYOUT_CALLBACK;
Radek Krejci50929eb2015-10-09 18:14:15 +02001509 out.method.clb.f = writeclb;
1510 out.method.clb.arg = arg;
Radek Krejci6140e4e2015-10-09 15:50:55 +02001511
Radek Krejci722b0072016-02-01 17:09:45 +01001512 if (options & LYXML_PRINT_SIBLINGS) {
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001513 return dump_siblings(&out, elem, options);
1514 } else {
Michal Vaskob2f1db72016-11-16 13:57:35 +01001515 return dump_elem(&out, elem, 0, options, 1);
Radek Krejci8c56a5a2015-12-16 15:10:28 +01001516 }
Radek Krejci6140e4e2015-10-09 15:50:55 +02001517}