blob: d6f6cd700452dc1a30b5361af71db6224af375e2 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcica376bd2020-06-11 16:04:06 +020025#include <sys/types.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejci535ea9f2020-05-29 16:01:05 +020027#include "common.h"
28#include "dict.h"
29#include "printer.h"
30#include "tree.h"
31#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020032
Michal Vaskob36053d2020-03-26 15:49:30 +010033/* Move input p by s characters, if EOF log with lyxml_ctx c */
34#define move_input(c,s) c->input += s; LY_CHECK_ERR_RET(!c->input[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020035
Radek Krejcib1890642018-10-03 14:05:40 +020036/* Ignore whitespaces in the input string p */
Michal Vaskob36053d2020-03-26 15:49:30 +010037#define ign_xmlws(c) while (is_xmlws(*(c)->input)) {if (*(c)->input == '\n') {++c->line;} ++c->input;}
38
39static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only,
40 int *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020041
Radek Krejci4b74d5e2018-09-26 14:30:55 +020042/**
43 * @brief Ignore any characters until the delim of the size delim_len is read
44 *
45 * Detects number of read new lines.
46 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
47 * NULL-terminated input string.
48 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020049static const char *
50ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
51{
52 size_t i;
53 register const char *a, *b;
54
55 (*newlines) = 0;
56 for ( ; *input; ++input) {
57 if (*input != *delim) {
58 if (*input == '\n') {
59 ++(*newlines);
60 }
61 continue;
62 }
63 a = input;
64 b = delim;
65 for (i = 0; i < delim_len; ++i) {
66 if (*a++ != *b++) {
67 break;
68 }
69 }
70 if (i == delim_len) {
71 return input;
72 }
73 }
74 return NULL;
75}
76
Radek Krejci4b74d5e2018-09-26 14:30:55 +020077/**
Michal Vaskob36053d2020-03-26 15:49:30 +010078 * @brief Check/Get an XML identifier from the input string.
79 *
80 * The identifier must have at least one valid character complying the name start character constraints.
81 * The identifier is terminated by the first character, which does not comply to the name character constraints.
82 *
83 * See https://www.w3.org/TR/xml-names/#NT-NCName
84 *
85 * @param[in] xmlctx XML context.
86 * @param[out] start Pointer to the start of the identifier.
87 * @param[out] end Pointer ot the end of the identifier.
88 * @return LY_ERR value.
89 */
90static LY_ERR
91lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
92{
93 const char *s, *in;
94 uint32_t c;
95 size_t parsed;
96 LY_ERR rc;
97
98 in = s = xmlctx->input;
99
100 /* check NameStartChar (minus colon) */
101 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
102 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
103 LY_EVALID);
104 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
105 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
106 "Identifier \"%s\" starts with an invalid character.", in - parsed),
107 LY_EVALID);
108
109 /* check rest of the identifier */
110 do {
111 /* move only successfully parsed bytes */
112 xmlctx->input += parsed;
113
114 rc = ly_getutf8(&in, &c, &parsed);
115 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
116 } while (is_xmlqnamechar(c));
117
118 *start = s;
119 *end = xmlctx->input;
120 return LY_SUCCESS;
121}
122
123/**
124 * @brief Add namespace definition into XML context.
125 *
126 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
127 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
128 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
129 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
130 *
131 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
132 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
133 *
134 * @param[in] xmlctx XML context to work with.
135 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
136 * @param[in] prefix_len Length of the prefix.
137 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
138 * @return LY_ERR values.
139 */
140LY_ERR
141lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
142{
143 struct lyxml_ns *ns;
144
145 ns = malloc(sizeof *ns);
146 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
147
148 /* we need to connect the depth of the element where the namespace is defined with the
149 * namespace record to be able to maintain (remove) the record when the parser leaves
150 * (to its sibling or back to the parent) the element where the namespace was defined */
151 ns->depth = xmlctx->elements.count;
152
153 ns->uri = uri;
154 if (prefix) {
155 ns->prefix = strndup(prefix, prefix_len);
156 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
157 } else {
158 ns->prefix = NULL;
159 }
160
161 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
162 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
163 return LY_SUCCESS;
164}
165
166/**
167 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
168 *
169 * @param[in] xmlctx XML context to work with.
170 */
171void
172lyxml_ns_rm(struct lyxml_ctx *xmlctx)
173{
174 unsigned int u;
175
176 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
177 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
178 /* we are done, the namespaces from a single element are supposed to be together */
179 break;
180 }
181 /* remove the ns structure */
182 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
183 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
184 free(xmlctx->ns.objs[u]);
185 --xmlctx->ns.count;
186 }
187
188 if (!xmlctx->ns.count) {
189 /* cleanup the xmlctx's namespaces storage */
190 ly_set_erase(&xmlctx->ns, NULL);
191 }
192}
193
Michal Vaskob36053d2020-03-26 15:49:30 +0100194const struct lyxml_ns *
195lyxml_ns_get(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len)
196{
197 unsigned int u;
198 struct lyxml_ns *ns;
199
200 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
201 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
202 if (prefix && prefix_len) {
203 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
204 return ns;
205 }
206 } else if (!ns->prefix) {
207 /* default namespace */
208 return ns;
209 }
210 }
211
212 return NULL;
213}
214
Michal Vasko8cef5232020-06-15 17:59:47 +0200215/**
216 * @brief Skip in the input until EOF or just after the opening tag.
217 * Handles special XML constructs (comment, cdata, doctype).
218 *
219 * @param[in] xmlctx XML context to use.
220 * @return LY_ERR value.
221 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100222static LY_ERR
223lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
224{
225 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
226 const char *in, *endtag, *sectname;
227 size_t endtag_len, newlines;
228
229 while (1) {
230 ign_xmlws(xmlctx);
231
232 if (xmlctx->input[0] == '\0') {
233 /* EOF */
234 if (xmlctx->elements.count) {
235 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
236 return LY_EVALID;
237 }
238 return LY_SUCCESS;
239 } else if (xmlctx->input[0] != '<') {
240 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
241 xmlctx->input, "element tag start ('<')");
242 return LY_EVALID;
243 }
244 move_input(xmlctx, 1);
245
246 if (xmlctx->input[0] == '!') {
247 move_input(xmlctx, 1);
248 /* sections to ignore */
249 if (!strncmp(xmlctx->input, "--", 2)) {
250 /* comment */
251 move_input(xmlctx, 2);
252 sectname = "Comment";
253 endtag = "-->";
254 endtag_len = 3;
255 } else if (!strncmp(xmlctx->input, "[CDATA[", 7)) {
256 /* CDATA section */
257 move_input(xmlctx, 7);
258 sectname = "CData";
259 endtag = "]]>";
260 endtag_len = 3;
261 } else if (!strncmp(xmlctx->input, "DOCTYPE", 7)) {
262 /* Document type declaration - not supported */
263 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
264 return LY_EVALID;
265 } else {
266 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->input[-2]);
267 return LY_EVALID;
268 }
269 in = ign_todelim(xmlctx->input, endtag, endtag_len, &newlines);
270 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
271 xmlctx->line += newlines;
272 xmlctx->input = in + endtag_len;
273 } else if (xmlctx->input[0] == '?') {
274 in = ign_todelim(xmlctx->input, "?>", 2, &newlines);
275 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
276 xmlctx->line += newlines;
277 xmlctx->input = in + 2;
278 } else {
279 /* other non-WS character */
280 break;
281 }
282 }
283
284 return LY_SUCCESS;
285}
286
Michal Vasko8cef5232020-06-15 17:59:47 +0200287/**
288 * @brief Parse QName.
289 *
290 * @param[in] xmlctx XML context to use.
291 * @param[out] prefix Parsed prefix, may be NULL.
292 * @param[out] prefix_len Length of @p prefix.
293 * @param[out] name Parsed name.
294 * @param[out] name_len Length of @p name.
295 * @return LY_ERR value.
296 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100297static LY_ERR
298lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
299{
300 const char *start, *end;
301
302 *prefix = NULL;
303 *prefix_len = 0;
304
305 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
306 if (end[0] == ':') {
307 /* we have prefixed identifier */
308 *prefix = start;
309 *prefix_len = end - start;
310
311 move_input(xmlctx, 1);
312 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
313 }
314
315 *name = start;
316 *name_len = end - start;
317 return LY_SUCCESS;
318}
319
320/**
Radek Krejci7a7fa902018-09-25 17:08:21 +0200321 * Store UTF-8 character specified as 4byte integer into the dst buffer.
322 * Returns number of written bytes (4 max), expects that dst has enough space.
323 *
324 * UTF-8 mapping:
325 * 00000000 -- 0000007F: 0xxxxxxx
326 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
327 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
328 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
329 *
330 * Includes checking for valid characters (following RFC 7950, sec 9.4)
331 */
332static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200333lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200334{
335 if (value < 0x80) {
336 /* one byte character */
337 if (value < 0x20 &&
338 value != 0x09 &&
339 value != 0x0a &&
340 value != 0x0d) {
341 return LY_EINVAL;
342 }
343
344 dst[0] = value;
345 (*bytes_written) = 1;
346 } else if (value < 0x800) {
347 /* two bytes character */
348 dst[0] = 0xc0 | (value >> 6);
349 dst[1] = 0x80 | (value & 0x3f);
350 (*bytes_written) = 2;
351 } else if (value < 0xfffe) {
352 /* three bytes character */
353 if (((value & 0xf800) == 0xd800) ||
354 (value >= 0xfdd0 && value <= 0xfdef)) {
355 /* exclude surrogate blocks %xD800-DFFF */
356 /* exclude noncharacters %xFDD0-FDEF */
357 return LY_EINVAL;
358 }
359
360 dst[0] = 0xe0 | (value >> 12);
361 dst[1] = 0x80 | ((value >> 6) & 0x3f);
362 dst[2] = 0x80 | (value & 0x3f);
363
364 (*bytes_written) = 3;
365 } else if (value < 0x10fffe) {
366 if ((value & 0xffe) == 0xffe) {
367 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
368 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
369 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
370 return LY_EINVAL;
371 }
372 /* four bytes character */
373 dst[0] = 0xf0 | (value >> 18);
374 dst[1] = 0x80 | ((value >> 12) & 0x3f);
375 dst[2] = 0x80 | ((value >> 6) & 0x3f);
376 dst[3] = 0x80 | (value & 0x3f);
377
378 (*bytes_written) = 4;
379 }
380 return LY_SUCCESS;
381}
382
Michal Vasko8cef5232020-06-15 17:59:47 +0200383/**
384 * @brief Parse XML text content (value).
385 *
386 * @param[in] xmlctx XML context to use.
387 * @param[in] endchar Expected character to mark value end.
388 * @param[out] value Parsed value.
389 * @param[out] length Length of @p value.
390 * @param[out] ws_only Whether the value is empty/white-spaces only.
391 * @param[out] dynamic Whether the value was dynamically allocated.
392 * @return LY_ERR value.
393 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200394static LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100395lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, int *ws_only, int *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200396{
Michal Vaskob36053d2020-03-26 15:49:30 +0100397#define BUFSIZE 24
398#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200399
Michal Vaskob36053d2020-03-26 15:49:30 +0100400 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
401 const char *in = xmlctx->input, *start;
402 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200403 size_t offset; /* read offset in input buffer */
404 size_t len; /* length of the output string (write offset in output buffer) */
405 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200406 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200407 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100408 size_t u;
409 int ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200410
Michal Vaskob36053d2020-03-26 15:49:30 +0100411 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200412
Radek Krejcid70d1072018-10-09 14:20:47 +0200413 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100414 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200415 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200416
417 /* parse */
418 while (in[offset]) {
419 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100420 /* non WS */
421 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200422
Michal Vaskob36053d2020-03-26 15:49:30 +0100423 if (!buf) {
424 /* prepare output buffer */
425 buf = malloc(BUFSIZE);
426 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
427 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200428 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100429
430 /* allocate enough for the offset and next character,
431 * we will need 4 bytes at most since we support only the predefined
432 * (one-char) entities and character references */
433 if (len + offset + 4 >= size) {
434 buf = ly_realloc(buf, size + BUFSIZE_STEP);
435 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
436 size += BUFSIZE_STEP;
437 }
438
439 if (offset) {
440 /* store what we have so far */
441 memcpy(&buf[len], in, offset);
442 len += offset;
443 in += offset;
444 offset = 0;
445 }
446
Radek Krejci7a7fa902018-09-25 17:08:21 +0200447 ++offset;
448 if (in[offset] != '#') {
449 /* entity reference - only predefined references are supported */
450 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100451 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200452 in += 4; /* &lt; */
453 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100454 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200455 in += 4; /* &gt; */
456 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100457 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200458 in += 5; /* &amp; */
459 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100460 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200461 in += 6; /* &apos; */
462 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100463 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200464 in += 6; /* &quot; */
465 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100466 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200467 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200468 goto error;
469 }
470 offset = 0;
471 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100472 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200473 /* character reference */
474 ++offset;
475 if (isdigit(in[offset])) {
476 for (n = 0; isdigit(in[offset]); offset++) {
477 n = (10 * n) + (in[offset] - '0');
478 }
479 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
480 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
481 if (isdigit(in[offset])) {
482 u = (in[offset] - '0');
483 } else if (in[offset] > 'F') {
484 u = 10 + (in[offset] - 'a');
485 } else {
486 u = 10 + (in[offset] - 'A');
487 }
488 n = (16 * n) + u;
489 }
490 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100491 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200492 goto error;
493
494 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100495
Radek Krejci7a7fa902018-09-25 17:08:21 +0200496 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100497 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200498 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
499 error);
500 ++offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100501 LY_CHECK_ERR_GOTO(lyxml_pututf8(&buf[len], n, &u),
502 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
503 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200504 error);
505 len += u;
506 in += offset;
507 offset = 0;
508 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100509 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200510 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200511 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100512 /* realloc exact size string */
513 buf = ly_realloc(buf, len + offset + 1);
514 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
515 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200516 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100517
518 /* set terminating NULL byte */
519 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200520 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200521 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100522 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200523 goto success;
524 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100525 if (!is_xmlws(in[offset])) {
526 /* non WS */
527 ws = 0;
528 }
529
Radek Krejci7a7fa902018-09-25 17:08:21 +0200530 /* log lines */
531 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100532 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200533 }
534
535 /* continue */
536 ++offset;
537 }
538 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100539
540 /* EOF reached before endchar */
541 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
542
Radek Krejci7a7fa902018-09-25 17:08:21 +0200543error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100544 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200545 return LY_EVALID;
546
547success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200548 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100549 *value = buf;
550 *dynamic = 1;
551 } else {
552 *value = (char *)start;
553 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200554 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100555 *length = len;
556 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200557
Michal Vaskob36053d2020-03-26 15:49:30 +0100558 xmlctx->input = in;
559 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200560
561#undef BUFSIZE
562#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200563}
564
Michal Vasko8cef5232020-06-15 17:59:47 +0200565/**
566 * @brief Parse XML closing element and match it to a stored starting element.
567 *
568 * @param[in] xmlctx XML context to use.
569 * @param[in] prefix Expected closing element prefix.
570 * @param[in] prefix_len Length of @p prefix.
571 * @param[in] name Expected closing element name.
572 * @param[in] name_len Length of @p name.
573 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
574 * @return LY_ERR value.
575 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100576static LY_ERR
577lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
578 int empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200579{
Michal Vaskob36053d2020-03-26 15:49:30 +0100580 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200581
Michal Vaskob36053d2020-03-26 15:49:30 +0100582 /* match opening and closing element tags */
583 if (!xmlctx->elements.count) {
584 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
585 name_len, name);
586 return LY_EVALID;
587 }
Radek Krejcid972c252018-09-25 13:23:39 +0200588
Michal Vaskob36053d2020-03-26 15:49:30 +0100589 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
590 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
591 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
592 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
593 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
594 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
595 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
596 return LY_EVALID;
597 }
Radek Krejcid972c252018-09-25 13:23:39 +0200598
Michal Vaskob36053d2020-03-26 15:49:30 +0100599 /* opening and closing element tags matches, remove record from the opening tags list */
600 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200601
Michal Vaskob36053d2020-03-26 15:49:30 +0100602 /* remove also the namespaces connected with the element */
603 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200604
Michal Vaskob36053d2020-03-26 15:49:30 +0100605 /* skip WS */
606 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200607
Michal Vaskob36053d2020-03-26 15:49:30 +0100608 /* special "<elem/>" element */
609 if (empty && (xmlctx->input[0] == '/')) {
610 move_input(xmlctx, 1);
611 }
Michal Vasko52927e22020-03-16 17:26:14 +0100612
Michal Vaskob36053d2020-03-26 15:49:30 +0100613 /* parse closing tag */
614 if (xmlctx->input[0] != '>') {
615 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
616 xmlctx->input, "element tag termination ('>')");
617 return LY_EVALID;
618 }
Michal Vasko52927e22020-03-16 17:26:14 +0100619
Michal Vaskob36053d2020-03-26 15:49:30 +0100620 /* move after closing tag without checking for EOF */
621 ++xmlctx->input;
Michal Vasko52927e22020-03-16 17:26:14 +0100622
Radek Krejcid972c252018-09-25 13:23:39 +0200623 return LY_SUCCESS;
624}
625
Michal Vasko8cef5232020-06-15 17:59:47 +0200626/**
627 * @brief Store parsed opening element and parse any included namespaces.
628 *
629 * @param[in] xmlctx XML context to use.
630 * @param[in] prefix Parsed starting element prefix.
631 * @param[in] prefix_len Length of @p prefix.
632 * @param[in] name Parsed starting element name.
633 * @param[in] name_len Length of @p name.
634 * @return LY_ERR value.
635 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100636static LY_ERR
637lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200638{
Michal Vaskob36053d2020-03-26 15:49:30 +0100639 LY_ERR ret = LY_SUCCESS;
640 struct lyxml_elem *e;
641 const char *prev_input;
642 char *value;
643 size_t parsed, value_len;
644 int ws_only, dynamic, is_ns;
645 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200646
Michal Vaskob36053d2020-03-26 15:49:30 +0100647 /* store element opening tag information */
648 e = malloc(sizeof *e);
649 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
650 e->name = name;
651 e->prefix = prefix;
652 e->name_len = name_len;
653 e->prefix_len = prefix_len;
654 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
655
656 /* skip WS */
657 ign_xmlws(xmlctx);
658
659 /* parse and store all namespaces */
660 prev_input = xmlctx->input;
661 is_ns = 1;
662 while ((xmlctx->input[0] != '\0') && !ly_getutf8(&xmlctx->input, &c, &parsed) && is_xmlqnamestartchar(c)) {
663 xmlctx->input -= parsed;
664
665 /* parse attribute name */
666 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
667
668 /* parse the value */
669 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
670
671 /* store every namespace */
672 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
673 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
674 dynamic ? value : strndup(value, value_len)), cleanup);
675 dynamic = 0;
676 } else {
677 /* not a namespace */
678 is_ns = 0;
679 }
680 if (dynamic) {
681 free(value);
682 }
683
684 /* skip WS */
685 ign_xmlws(xmlctx);
686
687 if (is_ns) {
688 /* we can actually skip all the namespaces as there is no reason to parse them again */
689 prev_input = xmlctx->input;
690 }
Radek Krejcib1890642018-10-03 14:05:40 +0200691 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100692
693cleanup:
694 if (!ret) {
695 xmlctx->input = prev_input;
696 }
697 return ret;
698}
699
Michal Vasko8cef5232020-06-15 17:59:47 +0200700/**
701 * @brief Move parser to the attribute content and parse it.
702 *
703 * @param[in] xmlctx XML context to use.
704 * @param[out] value Parsed attribute value.
705 * @param[out] value_len Length of @p value.
706 * @param[out] ws_only Whether the value is empty/white-spaces only.
707 * @param[out] dynamic Whether the value was dynamically allocated.
708 * @return LY_ERR value.
709 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100710static LY_ERR
711lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only, int *dynamic)
712{
713 char quot;
714
715 /* skip WS */
716 ign_xmlws(xmlctx);
717
718 /* skip '=' */
719 if (xmlctx->input[0] == '\0') {
720 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
721 return LY_EVALID;
722 } else if (xmlctx->input[0] != '=') {
723 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
724 xmlctx->input, "'='");
725 return LY_EVALID;
726 }
727 move_input(xmlctx, 1);
728
729 /* skip WS */
730 ign_xmlws(xmlctx);
731
732 /* find quotes */
733 if (xmlctx->input[0] == '\0') {
734 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
735 return LY_EVALID;
736 } else if ((xmlctx->input[0] != '\'') && (xmlctx->input[0] != '\"')) {
737 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
738 xmlctx->input, "either single or double quotation mark");
739 return LY_EVALID;
740 }
741
742 /* remember quote */
743 quot = xmlctx->input[0];
744 move_input(xmlctx, 1);
745
746 /* parse attribute value */
747 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
748
749 /* move after ending quote (without checking for EOF) */
750 ++xmlctx->input;
751
752 return LY_SUCCESS;
753}
754
Michal Vasko8cef5232020-06-15 17:59:47 +0200755/**
756 * @brief Move parser to the next attribute and parse it.
757 *
758 * @param[in] xmlctx XML context to use.
759 * @param[out] prefix Parsed attribute prefix.
760 * @param[out] prefix_len Length of @p prefix.
761 * @param[out] name Parsed attribute name.
762 * @param[out] name_len Length of @p name.
763 * @return LY_ERR value.
764 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100765static LY_ERR
766lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
767{
768 const char *in;
769 char *value;
770 uint32_t c;
771 size_t parsed, value_len;
772 int ws_only, dynamic;
773
774 /* skip WS */
775 ign_xmlws(xmlctx);
776
777 /* parse only possible attributes */
778 while ((xmlctx->input[0] != '>') && (xmlctx->input[0] != '/')) {
779 in = xmlctx->input;
780 if (in[0] == '\0') {
781 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
782 return LY_EVALID;
783 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
784 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
785 "element tag end ('>' or '/>') or an attribute");
786 return LY_EVALID;
787 }
788
789 /* parse attribute name */
790 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
791
792 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
793 /* standard attribute */
794 break;
795 }
796
797 /* namespace, skip it */
798 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
799 if (dynamic) {
800 free(value);
801 }
802
803 /* skip WS */
804 ign_xmlws(xmlctx);
805 }
806
807 return LY_SUCCESS;
808}
809
Michal Vasko8cef5232020-06-15 17:59:47 +0200810/**
811 * @brief Move parser to the next element and parse it.
812 *
813 * @param[in] xmlctx XML context to use.
814 * @param[out] prefix Parsed element prefix.
815 * @param[out] prefix_len Length of @p prefix.
816 * @param[out] name Parse element name.
817 * @param[out] name_len Length of @p name.
818 * @return LY_ERR value.
819 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100820static LY_ERR
821lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
822 int *closing)
823{
824 /* skip WS until EOF or after opening tag '<' */
825 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
826 if (xmlctx->input[0] == '\0') {
827 /* set return values */
828 *prefix = *name = NULL;
829 *prefix_len = *name_len = 0;
830 return LY_SUCCESS;
831 }
832
833 if (xmlctx->input[0] == '/') {
834 move_input(xmlctx, 1);
835 *closing = 1;
836 } else {
837 *closing = 0;
838 }
839
840 /* skip WS */
841 ign_xmlws(xmlctx);
842
843 /* parse element name */
844 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
845
846 return LY_SUCCESS;
847}
848
849LY_ERR
850lyxml_ctx_new(const struct ly_ctx *ctx, const char *input, struct lyxml_ctx **xmlctx_p)
851{
852 LY_ERR ret = LY_SUCCESS;
853 struct lyxml_ctx *xmlctx;
854 int closing;
855
856 /* new context */
857 xmlctx = calloc(1, sizeof *xmlctx);
858 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
859 xmlctx->ctx = ctx;
860 xmlctx->line = 1;
861 xmlctx->input = input;
862
863 /* parse next element, if any */
864 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
865 &xmlctx->name_len, &closing), cleanup);
866
867 if (xmlctx->input[0] == '\0') {
868 /* update status */
869 xmlctx->status = LYXML_END;
870 } else if (closing) {
871 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
872 xmlctx->name_len, xmlctx->name);
873 ret = LY_EVALID;
874 goto cleanup;
875 } else {
876 /* open an element, also parses all enclosed namespaces */
877 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
878
879 /* update status */
880 xmlctx->status = LYXML_ELEMENT;
881 }
882
883cleanup:
884 if (ret) {
885 lyxml_ctx_free(xmlctx);
886 } else {
887 *xmlctx_p = xmlctx;
888 }
889 return ret;
890}
891
892LY_ERR
893lyxml_ctx_next(struct lyxml_ctx *xmlctx)
894{
895 LY_ERR ret = LY_SUCCESS;
896 int closing;
897 struct lyxml_elem *e;
898
899 /* if the value was not used, free it */
900 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
901 free((char *)xmlctx->value);
902 xmlctx->value = NULL;
903 xmlctx->dynamic = 0;
904 }
905
906 switch (xmlctx->status) {
907 /* content |</elem> */
908 case LYXML_ELEM_CONTENT:
909 /* handle special case when empty content for "<elem/>" was returned */
910 if (xmlctx->input[0] == '/') {
911 assert(xmlctx->elements.count);
912 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
913
914 /* close the element (parses closing tag) */
915 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1), cleanup);
916
917 /* update status */
918 xmlctx->status = LYXML_ELEM_CLOSE;
919 break;
920 }
921 /* fallthrough */
922
923 /* </elem>| <elem2>* */
924 case LYXML_ELEM_CLOSE:
925 /* parse next element, if any */
926 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
927 &xmlctx->name_len, &closing), cleanup);
928
929 if (xmlctx->input[0] == '\0') {
930 /* update status */
931 xmlctx->status = LYXML_END;
932 } else if (closing) {
933 /* close an element (parses also closing tag) */
934 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0), cleanup);
935
936 /* update status */
937 xmlctx->status = LYXML_ELEM_CLOSE;
938 } else {
939 /* open an element, also parses all enclosed namespaces */
940 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
941
942 /* update status */
943 xmlctx->status = LYXML_ELEMENT;
944 }
945 break;
946
947 /* <elem| attr='val'* > content */
948 case LYXML_ELEMENT:
949
950 /* attr='val'| attr='val'* > content */
951 case LYXML_ATTR_CONTENT:
952 /* parse attribute name, if any */
953 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len), cleanup);
954
955 if (xmlctx->input[0] == '>') {
956 /* no attributes but a closing tag */
957 move_input(xmlctx, 1);
958
959 /* parse element content */
960 LY_CHECK_GOTO(ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
961 &xmlctx->dynamic), cleanup);
962
963 if (!xmlctx->value_len) {
964 /* use empty value, easier to work with */
965 xmlctx->value = "";
966 assert(!xmlctx->dynamic);
967 }
968
969 /* update status */
970 xmlctx->status = LYXML_ELEM_CONTENT;
971 } else if (xmlctx->input[0] == '/') {
972 /* no content but we still return it */
973 xmlctx->value = "";
974 xmlctx->value_len = 0;
975 xmlctx->ws_only = 1;
976 xmlctx->dynamic = 0;
977
978 /* update status */
979 xmlctx->status = LYXML_ELEM_CONTENT;
980 } else {
981 /* update status */
982 xmlctx->status = LYXML_ATTRIBUTE;
983 }
984 break;
985
986 /* attr|='val' */
987 case LYXML_ATTRIBUTE:
988 /* skip formatting and parse value */
989 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
990 &xmlctx->dynamic), cleanup);
991
992 /* update status */
993 xmlctx->status = LYXML_ATTR_CONTENT;
994 break;
995
996 /* </elem> |EOF */
997 case LYXML_END:
998 /* nothing to do */
999 break;
1000 }
1001
1002cleanup:
1003 if (ret) {
1004 /* invalidate context */
1005 xmlctx->status = LYXML_END;
1006 }
1007 return ret;
1008}
1009
1010LY_ERR
1011lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
1012{
1013 LY_ERR ret = LY_SUCCESS;
1014 const char *prefix, *name, *prev_input;
1015 size_t prefix_len, name_len;
1016 int closing;
1017
1018 prev_input = xmlctx->input;
1019
1020 switch (xmlctx->status) {
1021 case LYXML_ELEM_CONTENT:
1022 if (xmlctx->input[0] == '/') {
1023 *next = LYXML_ELEM_CLOSE;
1024 break;
1025 }
1026 /* fallthrough */
1027 case LYXML_ELEM_CLOSE:
1028 /* parse next element, if any */
1029 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing), cleanup);
1030
1031 if (xmlctx->input[0] == '\0') {
1032 *next = LYXML_END;
1033 } else if (closing) {
1034 *next = LYXML_ELEM_CLOSE;
1035 } else {
1036 *next = LYXML_ELEMENT;
1037 }
1038 break;
1039 case LYXML_ELEMENT:
1040 case LYXML_ATTR_CONTENT:
1041 /* parse attribute name, if any */
1042 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
1043
1044 if ((xmlctx->input[0] == '>') || (xmlctx->input[0] == '/')) {
1045 *next = LYXML_ELEM_CONTENT;
1046 } else {
1047 *next = LYXML_ATTRIBUTE;
1048 }
1049 break;
1050 case LYXML_ATTRIBUTE:
1051 *next = LYXML_ATTR_CONTENT;
1052 break;
1053 case LYXML_END:
1054 *next = LYXML_END;
1055 break;
1056 }
1057
1058cleanup:
1059 xmlctx->input = prev_input;
1060 return ret;
1061}
1062
1063void
1064lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1065{
1066 uint32_t u;
1067
1068 if (!xmlctx) {
1069 return;
1070 }
1071
1072 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1073 free((char *)xmlctx->value);
1074 }
1075 ly_set_erase(&xmlctx->elements, free);
1076 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1077 /* remove the ns structure */
1078 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1079 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1080 free(xmlctx->ns.objs[u]);
1081 }
1082 ly_set_erase(&xmlctx->ns, NULL);
1083 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001084}
Radek Krejcie7b95092019-05-15 11:03:07 +02001085
1086LY_ERR
Radek Krejci241f6b52020-05-21 18:13:49 +02001087lyxml_dump_text(struct ly_out *out, const char *text, int attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001088{
Radek Krejcibaeb8382020-05-27 16:44:53 +02001089 ssize_t ret = LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001090 unsigned int u;
1091
1092 if (!text) {
1093 return 0;
1094 }
1095
1096 for (u = 0; text[u]; u++) {
1097 switch (text[u]) {
1098 case '&':
Radek Krejci241f6b52020-05-21 18:13:49 +02001099 ret = ly_print(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001100 break;
1101 case '<':
Radek Krejci241f6b52020-05-21 18:13:49 +02001102 ret = ly_print(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001103 break;
1104 case '>':
1105 /* not needed, just for readability */
Radek Krejci241f6b52020-05-21 18:13:49 +02001106 ret = ly_print(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001107 break;
1108 case '"':
1109 if (attribute) {
Radek Krejci241f6b52020-05-21 18:13:49 +02001110 ret = ly_print(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001111 break;
1112 }
1113 /* falls through */
1114 default:
Radek Krejcibaeb8382020-05-27 16:44:53 +02001115 ret = ly_write(out, &text[u], 1);
Radek Krejcie7b95092019-05-15 11:03:07 +02001116 }
1117 }
1118
Radek Krejcibaeb8382020-05-27 16:44:53 +02001119 return ret < 0 ? (-1 * ret) : 0;
Radek Krejcie7b95092019-05-15 11:03:07 +02001120}
1121
Michal Vasko52927e22020-03-16 17:26:14 +01001122LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001123lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001124{
1125 LY_ERR ret;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001126 LY_ARRAY_SIZE_TYPE u;
1127 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001128 const struct lyxml_ns *ns;
1129 const char *start, *stop;
1130 struct ly_prefix *prefixes = NULL;
1131 size_t len;
1132
1133 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1134 size_t bytes;
1135 ly_getutf8(&stop, &c, &bytes);
1136 if (is_xmlqnamestartchar(c)) {
1137 for (ly_getutf8(&stop, &c, &bytes);
1138 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1139 ly_getutf8(&stop, &c, &bytes));
1140 stop = stop - bytes;
1141 if (*stop == ':') {
1142 /* we have a possible prefix */
1143 len = stop - start;
Michal Vaskob36053d2020-03-26 15:49:30 +01001144 ns = lyxml_ns_get(xmlctx, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001145 if (ns) {
1146 struct ly_prefix *p = NULL;
1147
1148 /* check whether we do not already have this prefix stored */
1149 LY_ARRAY_FOR(prefixes, u) {
1150 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1151 p = &prefixes[u];
1152 break;
1153 }
1154 }
1155 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001156 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
1157 p->pref = lydict_insert(xmlctx->ctx, start, len);
1158 p->ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001159 } /* else the prefix already present */
1160 }
1161 }
1162 stop = stop + bytes;
1163 }
1164 }
1165
1166 *val_prefs = prefixes;
1167 return LY_SUCCESS;
1168
1169error:
1170 LY_ARRAY_FOR(prefixes, u) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001171 lydict_remove(xmlctx->ctx, prefixes[u].pref);
Michal Vasko52927e22020-03-16 17:26:14 +01001172 }
1173 LY_ARRAY_FREE(prefixes);
1174 return ret;
1175}
1176
1177LY_ERR
1178lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1179{
1180 const char *ptr1, *ptr2, *ns1, *ns2;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001181 LY_ARRAY_SIZE_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001182 int len;
1183
1184 if (!value1 && !value2) {
1185 return LY_SUCCESS;
1186 }
1187 if ((value1 && !value2) || (!value1 && value2)) {
1188 return LY_ENOT;
1189 }
1190
1191 ptr1 = value1;
1192 ptr2 = value2;
1193 while (ptr1[0] && ptr2[0]) {
1194 if (ptr1[0] != ptr2[0]) {
1195 /* it can be a start of prefix that maps to the same module */
1196 ns1 = ns2 = NULL;
1197 if (prefs1) {
1198 /* find module of the first prefix, if any */
1199 LY_ARRAY_FOR(prefs1, u1) {
1200 len = strlen(prefs1[u1].pref);
1201 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1202 ns1 = prefs1[u1].ns;
1203 break;
1204 }
1205 }
1206 }
1207 if (prefs2) {
1208 /* find module of the second prefix, if any */
1209 LY_ARRAY_FOR(prefs2, u2) {
1210 len = strlen(prefs2[u2].pref);
1211 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1212 ns2 = prefs2[u2].ns;
1213 break;
1214 }
1215 }
1216 }
1217
1218 if (!ns1 || !ns2 || (ns1 != ns2)) {
1219 /* not a prefix or maps to different namespaces */
1220 break;
1221 }
1222
1223 /* skip prefixes in both values (':' is skipped as iter) */
1224 ptr1 += strlen(prefs1[u1].pref);
1225 ptr2 += strlen(prefs2[u2].pref);
1226 }
1227
1228 ++ptr1;
1229 ++ptr2;
1230 }
1231 if (ptr1[0] || ptr2[0]) {
1232 /* not a match or simply different lengths */
1233 return LY_ENOT;
1234 }
1235
1236 return LY_SUCCESS;
1237}