blob: 004dbb907d5ccf3d5159736d8922396a42ba5b9d [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02007 * Copyright (c) 2015 - 2021 CESNET, z.s.p.o.
Radek Krejcid91dbaf2018-09-21 15:51:39 +02008 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Michal Vasko5aa44c02020-06-29 11:47:02 +020026#include "compat.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020027#include "in_internal.h"
Michal Vasko8f702ee2024-02-20 15:44:24 +010028#include "ly_common.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020029#include "out_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020030#include "tree.h"
Radek Krejci77114102021-03-10 15:21:57 +010031#include "tree_schema_internal.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020032
Michal Vaskob36053d2020-03-26 15:49:30 +010033/* Move input p by s characters, if EOF log with lyxml_ctx c */
Radek Krejci2efc45b2020-12-22 16:25:44 +010034#define move_input(c, s) \
35 ly_in_skip(c->in, s); \
36 LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020037
Radek Krejcib1890642018-10-03 14:05:40 +020038/* Ignore whitespaces in the input string p */
Radek Krejcidd713ce2021-01-04 23:12:12 +010039#define ign_xmlws(c) \
40 while (is_xmlws(*(c)->in->current)) { \
41 if (*(c)->in->current == '\n') { \
42 LY_IN_NEW_LINE((c)->in); \
43 } \
44 ly_in_skip(c->in, 1); \
45 }
Michal Vaskob36053d2020-03-26 15:49:30 +010046
Radek Krejci857189e2020-09-01 13:26:36 +020047static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only,
48 ly_bool *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020049
Radek Krejci4b74d5e2018-09-26 14:30:55 +020050/**
Radek Krejcidd713ce2021-01-04 23:12:12 +010051 * @brief Ignore and skip any characters until the delim of the size delim_len is read, including the delim
Radek Krejci4b74d5e2018-09-26 14:30:55 +020052 *
Radek Krejcidd713ce2021-01-04 23:12:12 +010053 * @param[in] xmlctx XML parser context to provide input handler and libyang context
54 * @param[in] in input handler to read the data, it is updated only in case the section is correctly terminated.
55 * @param[in] delim Delimiter to detect end of the section.
56 * @param[in] delim_len Length of the delimiter string to use.
57 * @param[in] sectname Section name to refer in error message.
Michal Vasko63f3d842020-07-08 10:10:14 +020058 */
Radek Krejcidd713ce2021-01-04 23:12:12 +010059LY_ERR
60skip_section(struct lyxml_ctx *xmlctx, const char *delim, size_t delim_len, const char *sectname)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020061{
62 size_t i;
Radek Krejcidd713ce2021-01-04 23:12:12 +010063 register const char *input, *a, *b;
64 uint64_t parsed = 0, newlines = 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020065
Radek Krejcidd713ce2021-01-04 23:12:12 +010066 for (input = xmlctx->in->current; *input; ++input, ++parsed) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020067 if (*input != *delim) {
Radek Krejcidd713ce2021-01-04 23:12:12 +010068 if (*input == '\n') {
69 ++newlines;
70 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +020071 continue;
72 }
73 a = input;
74 b = delim;
75 for (i = 0; i < delim_len; ++i) {
76 if (*a++ != *b++) {
77 break;
78 }
79 }
80 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020081 /* delim found */
Radek Krejcidd713ce2021-01-04 23:12:12 +010082 xmlctx->in->line += newlines;
83 ly_in_skip(xmlctx->in, parsed + delim_len);
84 return LY_SUCCESS;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020085 }
86 }
Michal Vasko63f3d842020-07-08 10:10:14 +020087
Radek Krejcidd713ce2021-01-04 23:12:12 +010088 /* delim not found,
89 * do not update input handler to refer to the beginning of the section in error message */
90 LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, sectname);
91 return LY_EVALID;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020092}
93
Radek Krejci4b74d5e2018-09-26 14:30:55 +020094/**
Michal Vaskob36053d2020-03-26 15:49:30 +010095 * @brief Check/Get an XML identifier from the input string.
96 *
97 * The identifier must have at least one valid character complying the name start character constraints.
98 * The identifier is terminated by the first character, which does not comply to the name character constraints.
99 *
100 * See https://www.w3.org/TR/xml-names/#NT-NCName
101 *
102 * @param[in] xmlctx XML context.
103 * @param[out] start Pointer to the start of the identifier.
104 * @param[out] end Pointer ot the end of the identifier.
105 * @return LY_ERR value.
106 */
107static LY_ERR
108lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
109{
110 const char *s, *in;
111 uint32_t c;
112 size_t parsed;
113 LY_ERR rc;
114
Michal Vasko63f3d842020-07-08 10:10:14 +0200115 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100116
117 /* check NameStartChar (minus colon) */
118 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100119 LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]),
Michal Vasko69730152020-10-09 16:30:07 +0200120 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100121 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100122 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Identifier \"%s\" starts with an invalid character.", in - parsed),
Michal Vasko69730152020-10-09 16:30:07 +0200123 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100124
125 /* check rest of the identifier */
126 do {
127 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200128 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100129
130 rc = ly_getutf8(&in, &c, &parsed);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100131 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100132 } while (is_xmlqnamechar(c));
133
134 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200135 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100136 return LY_SUCCESS;
137}
138
139/**
140 * @brief Add namespace definition into XML context.
141 *
142 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
143 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
144 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
145 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
146 *
147 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
148 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
149 *
150 * @param[in] xmlctx XML context to work with.
151 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
152 * @param[in] prefix_len Length of the prefix.
153 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
154 * @return LY_ERR values.
155 */
156LY_ERR
157lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
158{
Michal Vasko4b3f2952022-08-26 09:24:04 +0200159 LY_ERR rc = LY_SUCCESS;
Michal Vaskob36053d2020-03-26 15:49:30 +0100160 struct lyxml_ns *ns;
Michal Vasko4b3f2952022-08-26 09:24:04 +0200161 uint32_t i;
162
163 /* check for duplicates */
164 if (xmlctx->ns.count) {
165 i = xmlctx->ns.count;
166 do {
167 --i;
168 ns = xmlctx->ns.objs[i];
169 if (ns->depth < xmlctx->elements.count) {
170 /* only namespaces of parents, no need to check further */
171 break;
172 } else if (prefix && ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
173 if (!strcmp(ns->uri, uri)) {
174 /* exact same prefix and namespace, ignore */
175 goto cleanup;
176 }
177
178 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Duplicate XML NS prefix \"%s\" used for namespaces \"%s\" and \"%s\".",
179 ns->prefix, ns->uri, uri);
180 rc = LY_EVALID;
181 goto cleanup;
182 } else if (!prefix && !ns->prefix) {
183 if (!strcmp(ns->uri, uri)) {
184 /* exact same default namespace, ignore */
185 goto cleanup;
186 }
187
188 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Duplicate default XML namespaces \"%s\" and \"%s\".", ns->uri, uri);
189 rc = LY_EVALID;
190 goto cleanup;
191 }
192 } while (i);
193 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100194
195 ns = malloc(sizeof *ns);
Michal Vasko4b3f2952022-08-26 09:24:04 +0200196 LY_CHECK_ERR_GOTO(!ns, LOGMEM(xmlctx->ctx); rc = LY_EMEM, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100197
198 /* we need to connect the depth of the element where the namespace is defined with the
199 * namespace record to be able to maintain (remove) the record when the parser leaves
200 * (to its sibling or back to the parent) the element where the namespace was defined */
201 ns->depth = xmlctx->elements.count;
202
203 ns->uri = uri;
204 if (prefix) {
205 ns->prefix = strndup(prefix, prefix_len);
Michal Vasko4b3f2952022-08-26 09:24:04 +0200206 LY_CHECK_ERR_GOTO(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns); rc = LY_EMEM, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100207 } else {
208 ns->prefix = NULL;
209 }
210
Michal Vasko4b3f2952022-08-26 09:24:04 +0200211 rc = ly_set_add(&xmlctx->ns, ns, 1, NULL);
212 LY_CHECK_ERR_GOTO(rc, free(ns->prefix); free(ns), cleanup);
Radek Krejciba03a5a2020-08-27 14:40:41 +0200213
Michal Vasko4b3f2952022-08-26 09:24:04 +0200214 /* successfully stored */
215 uri = NULL;
216
217cleanup:
218 free(uri);
219 return rc;
Michal Vaskob36053d2020-03-26 15:49:30 +0100220}
221
Michal Vaskob36053d2020-03-26 15:49:30 +0100222void
223lyxml_ns_rm(struct lyxml_ctx *xmlctx)
224{
Michal Vasko5bb196d2023-08-09 10:41:14 +0200225 struct lyxml_ns *ns;
226 uint32_t u;
227
228 if (!xmlctx->ns.count) {
229 return;
230 }
231
232 u = xmlctx->ns.count;
233 do {
234 --u;
235 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
236
237 if (ns->depth != xmlctx->elements.count + 1) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100238 /* we are done, the namespaces from a single element are supposed to be together */
239 break;
240 }
Michal Vasko5bb196d2023-08-09 10:41:14 +0200241
Michal Vaskob36053d2020-03-26 15:49:30 +0100242 /* remove the ns structure */
Michal Vasko5bb196d2023-08-09 10:41:14 +0200243 free(ns->prefix);
244 free(ns->uri);
245 free(ns);
Michal Vaskob36053d2020-03-26 15:49:30 +0100246 --xmlctx->ns.count;
Michal Vasko5bb196d2023-08-09 10:41:14 +0200247 } while (u);
Michal Vaskob36053d2020-03-26 15:49:30 +0100248
249 if (!xmlctx->ns.count) {
250 /* cleanup the xmlctx's namespaces storage */
251 ly_set_erase(&xmlctx->ns, NULL);
252 }
253}
254
Michal Vaskob36053d2020-03-26 15:49:30 +0100255const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200256lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100257{
Michal Vaskob36053d2020-03-26 15:49:30 +0100258 struct lyxml_ns *ns;
Michal Vasko5bb196d2023-08-09 10:41:14 +0200259 uint32_t u;
Michal Vaskob36053d2020-03-26 15:49:30 +0100260
Michal Vasko5bb196d2023-08-09 10:41:14 +0200261 if (!ns_set->count) {
262 return NULL;
263 }
264
265 u = ns_set->count;
266 do {
267 --u;
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200268 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vasko5bb196d2023-08-09 10:41:14 +0200269
Michal Vaskob36053d2020-03-26 15:49:30 +0100270 if (prefix && prefix_len) {
271 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
272 return ns;
273 }
274 } else if (!ns->prefix) {
275 /* default namespace */
276 return ns;
277 }
Michal Vasko5bb196d2023-08-09 10:41:14 +0200278 } while (u);
Michal Vaskob36053d2020-03-26 15:49:30 +0100279
280 return NULL;
281}
282
Michal Vasko8cef5232020-06-15 17:59:47 +0200283/**
284 * @brief Skip in the input until EOF or just after the opening tag.
285 * Handles special XML constructs (comment, cdata, doctype).
286 *
287 * @param[in] xmlctx XML context to use.
288 * @return LY_ERR value.
289 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100290static LY_ERR
291lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
292{
293 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200294 const char *endtag, *sectname;
Radek Krejcidd713ce2021-01-04 23:12:12 +0100295 size_t endtag_len;
Michal Vaskob36053d2020-03-26 15:49:30 +0100296
297 while (1) {
298 ign_xmlws(xmlctx);
299
Michal Vasko63f3d842020-07-08 10:10:14 +0200300 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100301 /* EOF */
302 if (xmlctx->elements.count) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100303 LOGVAL(ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100304 return LY_EVALID;
305 }
306 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200307 } else if (xmlctx->in->current[0] != '<') {
Michal Vaskob597eef2022-08-26 09:24:27 +0200308 LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), xmlctx->in->current,
309 "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100310 return LY_EVALID;
311 }
312 move_input(xmlctx, 1);
313
Michal Vasko63f3d842020-07-08 10:10:14 +0200314 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100315 move_input(xmlctx, 1);
316 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200317 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100318 /* comment */
319 move_input(xmlctx, 2);
320 sectname = "Comment";
321 endtag = "-->";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100322 endtag_len = ly_strlen_const("-->");
Radek Krejcif13b87b2020-12-01 22:02:17 +0100323 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100324 /* Document type declaration - not supported */
Michal Vaskob597eef2022-08-26 09:24:27 +0200325 LOGVAL(ctx, LY_VCODE_NSUPP, "Document Type Declaration");
Michal Vaskob36053d2020-03-26 15:49:30 +0100326 return LY_EVALID;
327 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100328 LOGVAL(ctx, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100329 return LY_EVALID;
330 }
Radek Krejcidd713ce2021-01-04 23:12:12 +0100331 LY_CHECK_RET(skip_section(xmlctx, endtag, endtag_len, sectname));
Michal Vasko63f3d842020-07-08 10:10:14 +0200332 } else if (xmlctx->in->current[0] == '?') {
Radek Krejcidd713ce2021-01-04 23:12:12 +0100333 LY_CHECK_RET(skip_section(xmlctx, "?>", 2, "Declaration"));
Michal Vaskob36053d2020-03-26 15:49:30 +0100334 } else {
335 /* other non-WS character */
336 break;
337 }
338 }
339
340 return LY_SUCCESS;
341}
342
Michal Vasko8cef5232020-06-15 17:59:47 +0200343/**
344 * @brief Parse QName.
345 *
346 * @param[in] xmlctx XML context to use.
347 * @param[out] prefix Parsed prefix, may be NULL.
348 * @param[out] prefix_len Length of @p prefix.
349 * @param[out] name Parsed name.
350 * @param[out] name_len Length of @p name.
351 * @return LY_ERR value.
352 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100353static LY_ERR
354lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
355{
356 const char *start, *end;
357
358 *prefix = NULL;
359 *prefix_len = 0;
360
361 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
362 if (end[0] == ':') {
363 /* we have prefixed identifier */
364 *prefix = start;
365 *prefix_len = end - start;
366
367 move_input(xmlctx, 1);
368 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
369 }
370
371 *name = start;
372 *name_len = end - start;
373 return LY_SUCCESS;
374}
375
376/**
Michal Vasko8cf6f722022-02-18 13:08:23 +0100377 * @brief Prepare buffer for new data.
378 *
379 * @param[in] ctx Context for logging.
380 * @param[in,out] in XML input data.
381 * @param[in,out] offset Current offset in @p in.
382 * @param[in] need_space Needed additional free space that is allocated.
383 * @param[in,out] buf Dynamic buffer.
384 * @param[in,out] len Current @p buf length (used characters).
385 * @param[in,out] size Current @p buf size (allocated characters).
386 * @return LY_ERR value.
387 */
388static LY_ERR
389lyxml_parse_value_use_buf(const struct ly_ctx *ctx, const char **in, size_t *offset, size_t need_space, char **buf,
390 size_t *len, size_t *size)
391{
392#define BUFSIZE 24
393#define BUFSIZE_STEP 128
394
395 if (!*buf) {
396 /* prepare output buffer */
397 *buf = malloc(BUFSIZE);
398 LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM);
399 *size = BUFSIZE;
400 }
401
402 /* allocate needed space */
403 while (*len + *offset + need_space >= *size) {
404 *buf = ly_realloc(*buf, *size + BUFSIZE_STEP);
405 LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM);
406 *size += BUFSIZE_STEP;
407 }
408
409 if (*offset) {
410 /* store what we have so far */
411 memcpy(&(*buf)[*len], *in, *offset);
412 *len += *offset;
413 *in += *offset;
414 *offset = 0;
415 }
416
417 return LY_SUCCESS;
418
419#undef BUFSIZE
420#undef BUFSIZE_STEP
421}
422
423/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200424 * @brief Parse XML text content (value).
425 *
426 * @param[in] xmlctx XML context to use.
427 * @param[in] endchar Expected character to mark value end.
428 * @param[out] value Parsed value.
429 * @param[out] length Length of @p value.
430 * @param[out] ws_only Whether the value is empty/white-spaces only.
431 * @param[out] dynamic Whether the value was dynamically allocated.
432 * @return LY_ERR value.
433 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200434static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200435lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200436{
Michal Vaskob36053d2020-03-26 15:49:30 +0100437 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200438 const char *in = xmlctx->in->current, *start, *in_aux, *p;
Michal Vaskob36053d2020-03-26 15:49:30 +0100439 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200440 size_t offset; /* read offset in input buffer */
441 size_t len; /* length of the output string (write offset in output buffer) */
442 size_t size = 0; /* size of the output buffer */
Radek Krejci117d2082018-09-26 10:05:14 +0200443 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100444 size_t u;
Radek Krejci857189e2020-09-01 13:26:36 +0200445 ly_bool ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200446
Michal Vaskob36053d2020-03-26 15:49:30 +0100447 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200448
Radek Krejcid70d1072018-10-09 14:20:47 +0200449 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100450 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200451 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200452
453 /* parse */
454 while (in[offset]) {
455 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100456 /* non WS */
457 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200458
Michal Vasko8cf6f722022-02-18 13:08:23 +0100459 /* use buffer and allocate enough for the offset and next character,
Michal Vaskob36053d2020-03-26 15:49:30 +0100460 * we will need 4 bytes at most since we support only the predefined
461 * (one-char) entities and character references */
Michal Vasko8cf6f722022-02-18 13:08:23 +0100462 LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, 4, &buf, &len, &size));
Michal Vaskob36053d2020-03-26 15:49:30 +0100463
Radek Krejci7a7fa902018-09-25 17:08:21 +0200464 ++offset;
465 if (in[offset] != '#') {
466 /* entity reference - only predefined references are supported */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100467 if (!strncmp(&in[offset], "lt;", ly_strlen_const("lt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100468 buf[len++] = '<';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100469 in += ly_strlen_const("&lt;");
470 } else if (!strncmp(&in[offset], "gt;", ly_strlen_const("gt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100471 buf[len++] = '>';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100472 in += ly_strlen_const("&gt;");
473 } else if (!strncmp(&in[offset], "amp;", ly_strlen_const("amp;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100474 buf[len++] = '&';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100475 in += ly_strlen_const("&amp;");
476 } else if (!strncmp(&in[offset], "apos;", ly_strlen_const("apos;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100477 buf[len++] = '\'';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100478 in += ly_strlen_const("&apos;");
479 } else if (!strncmp(&in[offset], "quot;", ly_strlen_const("quot;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100480 buf[len++] = '\"';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100481 in += ly_strlen_const("&quot;");
Radek Krejci7a7fa902018-09-25 17:08:21 +0200482 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100483 LOGVAL(ctx, LYVE_SYNTAX, "Entity reference \"%.*s\" not supported, only predefined references allowed.",
484 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200485 goto error;
486 }
487 offset = 0;
488 } else {
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200489 p = &in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200490 /* character reference */
491 ++offset;
492 if (isdigit(in[offset])) {
493 for (n = 0; isdigit(in[offset]); offset++) {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100494 n = (LY_BASE_DEC * n) + (in[offset] - '0');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200495 }
Michal Vasko69730152020-10-09 16:30:07 +0200496 } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200497 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
498 if (isdigit(in[offset])) {
499 u = (in[offset] - '0');
500 } else if (in[offset] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100501 u = LY_BASE_DEC + (in[offset] - 'a');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200502 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100503 u = LY_BASE_DEC + (in[offset] - 'A');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200504 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100505 n = (LY_BASE_HEX * n) + u;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200506 }
507 } else {
Michal Vasko7b3a00e2023-08-09 11:58:03 +0200508 LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.12s\".", p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200509 goto error;
510
511 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100512
Michal Vasko8cf6f722022-02-18 13:08:23 +0100513 if (in[offset] != ';') {
514 LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";");
515 goto error;
516 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200517 ++offset;
Michal Vasko8cf6f722022-02-18 13:08:23 +0100518 if (ly_pututf8(&buf[len], n, &u)) {
Michal Vasko21eaa392024-02-20 15:48:42 +0100519 LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.12s\" (0x%08" PRIx32 ").", p, n);
Michal Vasko8cf6f722022-02-18 13:08:23 +0100520 goto error;
521 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200522 len += u;
523 in += offset;
524 offset = 0;
525 }
Michal Vasko8cf6f722022-02-18 13:08:23 +0100526 } else if (!strncmp(in + offset, "<![CDATA[", ly_strlen_const("<![CDATA["))) {
527 /* CDATA, find the end */
528 in_aux = strstr(in + offset + ly_strlen_const("<![CDATA["), "]]>");
529 if (!in_aux) {
530 LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, "CDATA");
531 goto error;
532 }
533 u = in_aux - (in + offset + ly_strlen_const("<![CDATA["));
534
535 /* use buffer, allocate enough for the whole CDATA */
536 LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, u, &buf, &len, &size));
537
538 /* skip CDATA tag */
539 in += ly_strlen_const("<![CDATA[");
540 assert(!offset);
541
542 /* analyze CDATA for non WS and newline chars */
543 for (n = 0; n < u; ++n) {
544 if (in[n] == '\n') {
545 LY_IN_NEW_LINE(xmlctx->in);
546 } else if (!is_xmlws(in[n])) {
547 ws = 0;
548 }
549 }
550
551 /* copy CDATA */
552 memcpy(buf + len, in, u);
553 len += u;
554
555 /* move input skipping the end tag */
556 in += u + ly_strlen_const("]]>");
Michal Vaskob36053d2020-03-26 15:49:30 +0100557 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200558 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200559 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100560 /* realloc exact size string */
561 buf = ly_realloc(buf, len + offset + 1);
562 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
563 size = len + offset + 1;
Michal Vasko08e9b112021-06-11 15:41:17 +0200564 if (offset) {
565 memcpy(&buf[len], in, offset);
566 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100567
568 /* set terminating NULL byte */
569 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200570 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200571 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100572 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200573 goto success;
574 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100575 if (!is_xmlws(in[offset])) {
576 /* non WS */
577 ws = 0;
578 }
579
Radek Krejci7a7fa902018-09-25 17:08:21 +0200580 /* log lines */
581 if (in[offset] == '\n') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100582 LY_IN_NEW_LINE(xmlctx->in);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200583 }
584
585 /* continue */
aPiecekb287b212021-05-04 14:24:25 +0200586 in_aux = &in[offset];
587 LY_CHECK_ERR_GOTO(ly_getutf8(&in_aux, &n, &u),
588 LOGVAL(ctx, LY_VCODE_INCHAR, in[offset]), error);
589 offset += u;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200590 }
591 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100592
593 /* EOF reached before endchar */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100594 LOGVAL(ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100595
Radek Krejci7a7fa902018-09-25 17:08:21 +0200596error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100597 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200598 return LY_EVALID;
599
600success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200601 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100602 *value = buf;
603 *dynamic = 1;
604 } else {
605 *value = (char *)start;
606 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200607 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100608 *length = len;
609 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200610
Radek Krejcid54412f2020-12-17 20:25:35 +0100611 xmlctx->in->current = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100612 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200613}
614
Michal Vasko8cef5232020-06-15 17:59:47 +0200615/**
616 * @brief Parse XML closing element and match it to a stored starting element.
617 *
618 * @param[in] xmlctx XML context to use.
619 * @param[in] prefix Expected closing element prefix.
620 * @param[in] prefix_len Length of @p prefix.
621 * @param[in] name Expected closing element name.
622 * @param[in] name_len Length of @p name.
623 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
624 * @return LY_ERR value.
625 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100626static LY_ERR
627lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200628 ly_bool empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200629{
Michal Vaskob36053d2020-03-26 15:49:30 +0100630 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200631
Michal Vaskob36053d2020-03-26 15:49:30 +0100632 /* match opening and closing element tags */
633 if (!xmlctx->elements.count) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100634 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Radek Krejci422afb12021-03-04 16:38:16 +0100635 (int)name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100636 return LY_EVALID;
637 }
Radek Krejcid972c252018-09-25 13:23:39 +0200638
Michal Vaskob36053d2020-03-26 15:49:30 +0100639 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
Michal Vasko69730152020-10-09 16:30:07 +0200640 if ((e->prefix_len != prefix_len) || (e->name_len != name_len) ||
641 (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100642 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
Radek Krejci422afb12021-03-04 16:38:16 +0100643 (int)e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", (int)e->name_len, e->name,
644 (int)prefix_len, prefix ? prefix : "", prefix ? ":" : "", (int)name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100645 return LY_EVALID;
646 }
Radek Krejcid972c252018-09-25 13:23:39 +0200647
Michal Vaskob36053d2020-03-26 15:49:30 +0100648 /* opening and closing element tags matches, remove record from the opening tags list */
649 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200650
Michal Vaskob36053d2020-03-26 15:49:30 +0100651 /* remove also the namespaces connected with the element */
652 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200653
Michal Vaskob36053d2020-03-26 15:49:30 +0100654 /* skip WS */
655 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200656
Michal Vaskob36053d2020-03-26 15:49:30 +0100657 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200658 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100659 move_input(xmlctx, 1);
660 }
Michal Vasko52927e22020-03-16 17:26:14 +0100661
Michal Vaskob36053d2020-03-26 15:49:30 +0100662 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200663 if (xmlctx->in->current[0] != '>') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100664 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200665 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100666 return LY_EVALID;
667 }
Michal Vasko52927e22020-03-16 17:26:14 +0100668
Michal Vaskob36053d2020-03-26 15:49:30 +0100669 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200670 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100671
Radek Krejcid972c252018-09-25 13:23:39 +0200672 return LY_SUCCESS;
673}
674
Michal Vasko8cef5232020-06-15 17:59:47 +0200675/**
676 * @brief Store parsed opening element and parse any included namespaces.
677 *
678 * @param[in] xmlctx XML context to use.
679 * @param[in] prefix Parsed starting element prefix.
680 * @param[in] prefix_len Length of @p prefix.
681 * @param[in] name Parsed starting element name.
682 * @param[in] name_len Length of @p name.
683 * @return LY_ERR value.
684 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100685static LY_ERR
686lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200687{
Michal Vaskob36053d2020-03-26 15:49:30 +0100688 LY_ERR ret = LY_SUCCESS;
689 struct lyxml_elem *e;
690 const char *prev_input;
Michal Vasko4fd91922021-09-15 08:51:06 +0200691 uint64_t prev_line;
Michal Vaskob36053d2020-03-26 15:49:30 +0100692 char *value;
693 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200694 ly_bool ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100695 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200696
Michal Vaskob36053d2020-03-26 15:49:30 +0100697 /* store element opening tag information */
698 e = malloc(sizeof *e);
699 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
700 e->name = name;
701 e->prefix = prefix;
702 e->name_len = name_len;
703 e->prefix_len = prefix_len;
aPiecek93582ed2021-05-25 14:49:06 +0200704
Radek Krejci3d92e442020-10-12 12:48:13 +0200705 LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL));
aPiecek93582ed2021-05-25 14:49:06 +0200706 if (xmlctx->elements.count > LY_MAX_BLOCK_DEPTH) {
Michal Vasko4fd91922021-09-15 08:51:06 +0200707 LOGERR(xmlctx->ctx, LY_EINVAL, "The maximum number of open elements has been exceeded.");
Michal Vaskobf2333e2022-12-05 10:41:40 +0100708 return LY_EINVAL;
aPiecek93582ed2021-05-25 14:49:06 +0200709 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100710
711 /* skip WS */
712 ign_xmlws(xmlctx);
713
714 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200715 prev_input = xmlctx->in->current;
Michal Vasko4fd91922021-09-15 08:51:06 +0200716 prev_line = xmlctx->in->line;
Michal Vaskob36053d2020-03-26 15:49:30 +0100717 is_ns = 1;
aPiecek785ad3d2021-05-10 15:51:13 +0200718 while ((xmlctx->in->current[0] != '\0') && !(ret = ly_getutf8(&xmlctx->in->current, &c, &parsed))) {
719 if (!is_xmlqnamestartchar(c)) {
720 break;
721 }
Michal Vasko63f3d842020-07-08 10:10:14 +0200722 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100723
724 /* parse attribute name */
725 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
726
727 /* parse the value */
728 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
729
730 /* store every namespace */
731 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100732 ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
733 dynamic ? value : strndup(value, value_len));
Michal Vaskob36053d2020-03-26 15:49:30 +0100734 dynamic = 0;
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100735 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100736 } else {
737 /* not a namespace */
738 is_ns = 0;
739 }
740 if (dynamic) {
741 free(value);
742 }
743
744 /* skip WS */
745 ign_xmlws(xmlctx);
746
747 if (is_ns) {
748 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200749 prev_input = xmlctx->in->current;
Michal Vasko4fd91922021-09-15 08:51:06 +0200750 prev_line = xmlctx->in->line;
Michal Vaskob36053d2020-03-26 15:49:30 +0100751 }
Radek Krejcib1890642018-10-03 14:05:40 +0200752 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100753
754cleanup:
755 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200756 xmlctx->in->current = prev_input;
Michal Vasko4fd91922021-09-15 08:51:06 +0200757 xmlctx->in->line = prev_line;
Michal Vaskob36053d2020-03-26 15:49:30 +0100758 }
759 return ret;
760}
761
Michal Vasko8cef5232020-06-15 17:59:47 +0200762/**
763 * @brief Move parser to the attribute content and parse it.
764 *
765 * @param[in] xmlctx XML context to use.
766 * @param[out] value Parsed attribute value.
767 * @param[out] value_len Length of @p value.
768 * @param[out] ws_only Whether the value is empty/white-spaces only.
769 * @param[out] dynamic Whether the value was dynamically allocated.
770 * @return LY_ERR value.
771 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100772static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200773lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100774{
775 char quot;
776
777 /* skip WS */
778 ign_xmlws(xmlctx);
779
780 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200781 if (xmlctx->in->current[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100782 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100783 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200784 } else if (xmlctx->in->current[0] != '=') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100785 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200786 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100787 return LY_EVALID;
788 }
789 move_input(xmlctx, 1);
790
791 /* skip WS */
792 ign_xmlws(xmlctx);
793
794 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200795 if (xmlctx->in->current[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100796 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100797 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200798 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100799 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200800 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100801 return LY_EVALID;
802 }
803
804 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200805 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100806 move_input(xmlctx, 1);
807
808 /* parse attribute value */
809 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
810
811 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200812 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100813
814 return LY_SUCCESS;
815}
816
Michal Vasko8cef5232020-06-15 17:59:47 +0200817/**
818 * @brief Move parser to the next attribute and parse it.
819 *
820 * @param[in] xmlctx XML context to use.
821 * @param[out] prefix Parsed attribute prefix.
822 * @param[out] prefix_len Length of @p prefix.
823 * @param[out] name Parsed attribute name.
824 * @param[out] name_len Length of @p name.
825 * @return LY_ERR value.
826 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100827static LY_ERR
828lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
829{
830 const char *in;
831 char *value;
832 uint32_t c;
833 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200834 ly_bool ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100835
836 /* skip WS */
837 ign_xmlws(xmlctx);
838
839 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200840 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
841 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100842 if (in[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100843 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100844 return LY_EVALID;
845 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100846 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
Michal Vasko69730152020-10-09 16:30:07 +0200847 "element tag end ('>' or '/>') or an attribute");
Michal Vaskob36053d2020-03-26 15:49:30 +0100848 return LY_EVALID;
849 }
850
851 /* parse attribute name */
852 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
853
854 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
855 /* standard attribute */
856 break;
857 }
858
859 /* namespace, skip it */
860 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
861 if (dynamic) {
862 free(value);
863 }
864
865 /* skip WS */
866 ign_xmlws(xmlctx);
867 }
868
869 return LY_SUCCESS;
870}
871
Michal Vasko8cef5232020-06-15 17:59:47 +0200872/**
873 * @brief Move parser to the next element and parse it.
874 *
875 * @param[in] xmlctx XML context to use.
876 * @param[out] prefix Parsed element prefix.
877 * @param[out] prefix_len Length of @p prefix.
878 * @param[out] name Parse element name.
879 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200880 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200881 * @return LY_ERR value.
882 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100883static LY_ERR
884lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200885 ly_bool *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100886{
887 /* skip WS until EOF or after opening tag '<' */
888 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200889 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100890 /* set return values */
891 *prefix = *name = NULL;
892 *prefix_len = *name_len = 0;
893 return LY_SUCCESS;
894 }
895
Michal Vasko63f3d842020-07-08 10:10:14 +0200896 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100897 move_input(xmlctx, 1);
898 *closing = 1;
899 } else {
900 *closing = 0;
901 }
902
903 /* skip WS */
904 ign_xmlws(xmlctx);
905
906 /* parse element name */
907 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
908
909 return LY_SUCCESS;
910}
911
912LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200913lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100914{
915 LY_ERR ret = LY_SUCCESS;
916 struct lyxml_ctx *xmlctx;
Radek Krejci857189e2020-09-01 13:26:36 +0200917 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100918
919 /* new context */
920 xmlctx = calloc(1, sizeof *xmlctx);
921 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
922 xmlctx->ctx = ctx;
Michal Vasko63f3d842020-07-08 10:10:14 +0200923 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100924
Michal Vaskof8ebf132022-11-21 14:06:48 +0100925 LOG_LOCSET(NULL, NULL, NULL, in);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100926
Michal Vaskob36053d2020-03-26 15:49:30 +0100927 /* parse next element, if any */
928 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
Michal Vasko69730152020-10-09 16:30:07 +0200929 &xmlctx->name_len, &closing), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100930
Michal Vasko63f3d842020-07-08 10:10:14 +0200931 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100932 /* update status */
933 xmlctx->status = LYXML_END;
934 } else if (closing) {
Radek Krejci422afb12021-03-04 16:38:16 +0100935 LOGVAL(ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", (int)xmlctx->name_len, xmlctx->name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100936 ret = LY_EVALID;
937 goto cleanup;
938 } else {
939 /* open an element, also parses all enclosed namespaces */
940 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
941
942 /* update status */
943 xmlctx->status = LYXML_ELEMENT;
944 }
945
946cleanup:
947 if (ret) {
948 lyxml_ctx_free(xmlctx);
949 } else {
950 *xmlctx_p = xmlctx;
951 }
952 return ret;
953}
954
955LY_ERR
956lyxml_ctx_next(struct lyxml_ctx *xmlctx)
957{
958 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200959 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100960 struct lyxml_elem *e;
961
962 /* if the value was not used, free it */
963 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
964 free((char *)xmlctx->value);
965 xmlctx->value = NULL;
966 xmlctx->dynamic = 0;
967 }
968
969 switch (xmlctx->status) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100970 case LYXML_ELEM_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100971 /* content |</elem> */
972
Michal Vaskob36053d2020-03-26 15:49:30 +0100973 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200974 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100975 assert(xmlctx->elements.count);
976 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
977
978 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200979 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
980 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100981
982 /* update status */
983 xmlctx->status = LYXML_ELEM_CLOSE;
984 break;
985 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100986 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100987 case LYXML_ELEM_CLOSE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100988 /* </elem>| <elem2>* */
989
Michal Vaskob36053d2020-03-26 15:49:30 +0100990 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200991 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
992 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100993
Michal Vasko63f3d842020-07-08 10:10:14 +0200994 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100995 /* update status */
996 xmlctx->status = LYXML_END;
997 } else if (closing) {
998 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200999 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
1000 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001001
1002 /* update status */
1003 xmlctx->status = LYXML_ELEM_CLOSE;
1004 } else {
1005 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +02001006 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
1007 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001008
1009 /* update status */
1010 xmlctx->status = LYXML_ELEMENT;
1011 }
1012 break;
1013
Michal Vaskob36053d2020-03-26 15:49:30 +01001014 case LYXML_ELEMENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +01001015 /* <elem| attr='val'* > content */
Michal Vaskob36053d2020-03-26 15:49:30 +01001016 case LYXML_ATTR_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +01001017 /* attr='val'| attr='val'* > content */
1018
Michal Vaskob36053d2020-03-26 15:49:30 +01001019 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001020 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
1021 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001022
Michal Vasko63f3d842020-07-08 10:10:14 +02001023 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +01001024 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +02001025 ly_in_skip(xmlctx->in, 1);
1026 if (!xmlctx->in->current[0]) {
Radek Krejci2efc45b2020-12-22 16:25:44 +01001027 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskof55ae202020-06-30 15:49:36 +02001028 ret = LY_EVALID;
1029 goto cleanup;
1030 }
Michal Vaskob36053d2020-03-26 15:49:30 +01001031
1032 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +02001033 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
Michal Vasko69730152020-10-09 16:30:07 +02001034 &xmlctx->dynamic);
Michal Vasko63f3d842020-07-08 10:10:14 +02001035 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001036
1037 if (!xmlctx->value_len) {
Radek IÅ¡a017270d2021-02-16 10:26:15 +01001038 /* empty value should by alocated staticaly, but check for in any case */
1039 if (xmlctx->dynamic) {
1040 free((char *) xmlctx->value);
1041 }
Michal Vaskob36053d2020-03-26 15:49:30 +01001042 /* use empty value, easier to work with */
1043 xmlctx->value = "";
Radek IÅ¡a017270d2021-02-16 10:26:15 +01001044 xmlctx->dynamic = 0;
Michal Vaskob36053d2020-03-26 15:49:30 +01001045 }
1046
1047 /* update status */
1048 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +02001049 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +01001050 /* no content but we still return it */
1051 xmlctx->value = "";
1052 xmlctx->value_len = 0;
1053 xmlctx->ws_only = 1;
1054 xmlctx->dynamic = 0;
1055
1056 /* update status */
1057 xmlctx->status = LYXML_ELEM_CONTENT;
1058 } else {
1059 /* update status */
1060 xmlctx->status = LYXML_ATTRIBUTE;
1061 }
1062 break;
1063
Michal Vaskob36053d2020-03-26 15:49:30 +01001064 case LYXML_ATTRIBUTE:
Radek Krejcif13b87b2020-12-01 22:02:17 +01001065 /* attr|='val' */
1066
Michal Vaskob36053d2020-03-26 15:49:30 +01001067 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +02001068 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
1069 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001070
1071 /* update status */
1072 xmlctx->status = LYXML_ATTR_CONTENT;
1073 break;
1074
Michal Vaskob36053d2020-03-26 15:49:30 +01001075 case LYXML_END:
Radek Krejcif13b87b2020-12-01 22:02:17 +01001076 /* </elem> |EOF */
Michal Vaskob36053d2020-03-26 15:49:30 +01001077 /* nothing to do */
1078 break;
1079 }
1080
1081cleanup:
1082 if (ret) {
1083 /* invalidate context */
1084 xmlctx->status = LYXML_END;
1085 }
1086 return ret;
1087}
1088
1089LY_ERR
1090lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
1091{
1092 LY_ERR ret = LY_SUCCESS;
1093 const char *prefix, *name, *prev_input;
1094 size_t prefix_len, name_len;
Radek Krejci857189e2020-09-01 13:26:36 +02001095 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +01001096
Michal Vasko63f3d842020-07-08 10:10:14 +02001097 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +01001098
1099 switch (xmlctx->status) {
1100 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +02001101 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +01001102 *next = LYXML_ELEM_CLOSE;
1103 break;
1104 }
Radek Krejcif13b87b2020-12-01 22:02:17 +01001105 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +01001106 case LYXML_ELEM_CLOSE:
1107 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001108 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
1109 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001110
Michal Vasko63f3d842020-07-08 10:10:14 +02001111 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +01001112 *next = LYXML_END;
1113 } else if (closing) {
1114 *next = LYXML_ELEM_CLOSE;
1115 } else {
1116 *next = LYXML_ELEMENT;
1117 }
1118 break;
1119 case LYXML_ELEMENT:
1120 case LYXML_ATTR_CONTENT:
1121 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001122 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
1123 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001124
Michal Vasko63f3d842020-07-08 10:10:14 +02001125 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001126 *next = LYXML_ELEM_CONTENT;
1127 } else {
1128 *next = LYXML_ATTRIBUTE;
1129 }
1130 break;
1131 case LYXML_ATTRIBUTE:
1132 *next = LYXML_ATTR_CONTENT;
1133 break;
1134 case LYXML_END:
1135 *next = LYXML_END;
1136 break;
1137 }
1138
1139cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001140 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001141 return ret;
1142}
1143
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001144/**
1145 * @brief Free all namespaces in XML context.
1146 *
1147 * @param[in] xmlctx XML context to use.
1148 */
1149static void
1150lyxml_ns_rm_all(struct lyxml_ctx *xmlctx)
1151{
1152 struct lyxml_ns *ns;
1153 uint32_t i;
1154
1155 for (i = 0; i < xmlctx->ns.count; ++i) {
1156 ns = xmlctx->ns.objs[i];
1157
1158 free(ns->prefix);
1159 free(ns->uri);
1160 free(ns);
1161 }
1162 ly_set_erase(&xmlctx->ns, NULL);
1163}
1164
Michal Vaskob36053d2020-03-26 15:49:30 +01001165void
1166lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1167{
Michal Vaskob36053d2020-03-26 15:49:30 +01001168 if (!xmlctx) {
1169 return;
1170 }
1171
Radek Krejciddace2c2021-01-08 11:30:56 +01001172 LOG_LOCBACK(0, 0, 0, 1);
Radek Krejci2efc45b2020-12-22 16:25:44 +01001173
Michal Vaskob36053d2020-03-26 15:49:30 +01001174 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1175 free((char *)xmlctx->value);
1176 }
1177 ly_set_erase(&xmlctx->elements, free);
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001178 lyxml_ns_rm_all(xmlctx);
Michal Vaskob36053d2020-03-26 15:49:30 +01001179 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001180}
Radek Krejcie7b95092019-05-15 11:03:07 +02001181
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001182/**
1183 * @brief Duplicate an XML element.
1184 *
1185 * @param[in] elem Element to duplicate.
1186 * @return Element duplicate.
1187 * @return NULL on error.
1188 */
1189static struct lyxml_elem *
1190lyxml_elem_dup(const struct lyxml_elem *elem)
1191{
1192 struct lyxml_elem *dup;
1193
1194 dup = malloc(sizeof *dup);
1195 LY_CHECK_ERR_RET(!dup, LOGMEM(NULL), NULL);
1196
1197 memcpy(dup, elem, sizeof *dup);
1198
1199 return dup;
1200}
1201
1202/**
1203 * @brief Duplicate an XML namespace.
1204 *
1205 * @param[in] ns Namespace to duplicate.
1206 * @return Namespace duplicate.
1207 * @return NULL on error.
1208 */
1209static struct lyxml_ns *
1210lyxml_ns_dup(const struct lyxml_ns *ns)
1211{
1212 struct lyxml_ns *dup;
1213
1214 dup = malloc(sizeof *dup);
1215 LY_CHECK_ERR_RET(!dup, LOGMEM(NULL), NULL);
1216
1217 if (ns->prefix) {
1218 dup->prefix = strdup(ns->prefix);
1219 LY_CHECK_ERR_RET(!dup->prefix, LOGMEM(NULL); free(dup), NULL);
1220 } else {
1221 dup->prefix = NULL;
1222 }
1223 dup->uri = strdup(ns->uri);
1224 LY_CHECK_ERR_RET(!dup->uri, LOGMEM(NULL); free(dup->prefix); free(dup), NULL);
1225 dup->depth = ns->depth;
1226
1227 return dup;
1228}
1229
1230LY_ERR
1231lyxml_ctx_backup(struct lyxml_ctx *xmlctx, struct lyxml_ctx *backup)
1232{
1233 uint32_t i;
1234
1235 /* first make shallow copy */
1236 memcpy(backup, xmlctx, sizeof *backup);
1237
1238 if ((xmlctx->status == LYXML_ELEM_CONTENT) && xmlctx->dynamic) {
1239 /* it was backed up, do not free */
1240 xmlctx->dynamic = 0;
1241 }
1242
Michal Vasko2b979d62022-05-10 09:28:56 +02001243 /* backup in */
1244 backup->b_current = xmlctx->in->current;
1245 backup->b_line = xmlctx->in->line;
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001246
1247 /* duplicate elements */
1248 backup->elements.objs = malloc(xmlctx->elements.size * sizeof(struct lyxml_elem));
Michal Vasko0c2403d2021-09-15 08:52:18 +02001249 LY_CHECK_ERR_RET(!backup->elements.objs, LOGMEM(xmlctx->ctx), LY_EMEM);
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001250 for (i = 0; i < xmlctx->elements.count; ++i) {
1251 backup->elements.objs[i] = lyxml_elem_dup(xmlctx->elements.objs[i]);
Michal Vasko0c2403d2021-09-15 08:52:18 +02001252 LY_CHECK_RET(!backup->elements.objs[i], LY_EMEM);
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001253 }
1254
1255 /* duplicate ns */
1256 backup->ns.objs = malloc(xmlctx->ns.size * sizeof(struct lyxml_ns));
Michal Vasko0c2403d2021-09-15 08:52:18 +02001257 LY_CHECK_ERR_RET(!backup->ns.objs, LOGMEM(xmlctx->ctx), LY_EMEM);
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001258 for (i = 0; i < xmlctx->ns.count; ++i) {
1259 backup->ns.objs[i] = lyxml_ns_dup(xmlctx->ns.objs[i]);
Michal Vasko0c2403d2021-09-15 08:52:18 +02001260 LY_CHECK_RET(!backup->ns.objs[i], LY_EMEM);
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001261 }
1262
1263 return LY_SUCCESS;
1264}
1265
1266void
1267lyxml_ctx_restore(struct lyxml_ctx *xmlctx, struct lyxml_ctx *backup)
1268{
1269 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1270 /* free dynamic value */
1271 free((char *)xmlctx->value);
1272 }
1273
1274 /* free elements */
1275 ly_set_erase(&xmlctx->elements, free);
1276
1277 /* free ns */
1278 lyxml_ns_rm_all(xmlctx);
1279
Michal Vasko2b979d62022-05-10 09:28:56 +02001280 /* restore in */
1281 xmlctx->in->current = backup->b_current;
1282 xmlctx->in->line = backup->b_line;
Michal Vaskoda8fbbf2021-06-16 11:44:44 +02001283 backup->in = xmlctx->in;
1284
1285 /* restore backup */
1286 memcpy(xmlctx, backup, sizeof *xmlctx);
1287}
1288
Radek Krejcie7b95092019-05-15 11:03:07 +02001289LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +02001290lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001291{
Michal Vasko5233e962020-08-14 14:26:20 +02001292 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001293
1294 if (!text) {
1295 return 0;
1296 }
1297
Radek Krejci1deb5be2020-08-26 16:43:36 +02001298 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001299 switch (text[u]) {
1300 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001301 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001302 break;
1303 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001304 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001305 break;
1306 case '>':
1307 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001308 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001309 break;
1310 case '"':
1311 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001312 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001313 break;
1314 }
Radek Krejcif13b87b2020-12-01 22:02:17 +01001315 /* fall through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001316 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001317 ret = ly_write_(out, &text[u], 1);
1318 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001319 }
Michal Vasko5233e962020-08-14 14:26:20 +02001320 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001321 }
1322
Michal Vasko5233e962020-08-14 14:26:20 +02001323 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001324}
1325
Michal Vasko52927e22020-03-16 17:26:14 +01001326LY_ERR
aPiecek2f63f952021-03-30 12:22:18 +02001327lyxml_value_compare(const struct ly_ctx *ctx1, const char *value1, void *val_prefix_data1,
1328 const struct ly_ctx *ctx2, const char *value2, void *val_prefix_data2)
Michal Vasko52927e22020-03-16 17:26:14 +01001329{
aPiecek2f63f952021-03-30 12:22:18 +02001330 const char *value1_iter, *value2_iter;
1331 const char *value1_next, *value2_next;
1332 uint32_t value1_len, value2_len;
1333 ly_bool is_prefix1, is_prefix2;
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001334 const struct lys_module *mod1, *mod2;
aPiecek2f63f952021-03-30 12:22:18 +02001335 LY_ERR ret;
Michal Vasko52927e22020-03-16 17:26:14 +01001336
1337 if (!value1 && !value2) {
1338 return LY_SUCCESS;
1339 }
1340 if ((value1 && !value2) || (!value1 && value2)) {
1341 return LY_ENOT;
1342 }
1343
aPiecek2f63f952021-03-30 12:22:18 +02001344 if (!ctx2) {
1345 ctx2 = ctx1;
1346 }
Michal Vasko52927e22020-03-16 17:26:14 +01001347
aPiecek2f63f952021-03-30 12:22:18 +02001348 ret = LY_SUCCESS;
1349 for (value1_iter = value1, value2_iter = value2;
1350 value1_iter && value2_iter;
1351 value1_iter = value1_next, value2_iter = value2_next) {
aPieceke3f828d2021-05-10 15:34:41 +02001352 if ((ret = ly_value_prefix_next(value1_iter, NULL, &value1_len, &is_prefix1, &value1_next))) {
1353 break;
1354 }
1355 if ((ret = ly_value_prefix_next(value2_iter, NULL, &value2_len, &is_prefix2, &value2_next))) {
1356 break;
1357 }
aPiecek2f63f952021-03-30 12:22:18 +02001358
1359 if (is_prefix1 != is_prefix2) {
1360 ret = LY_ENOT;
1361 break;
1362 }
1363
1364 if (!is_prefix1) {
1365 if (value1_len != value2_len) {
1366 ret = LY_ENOT;
1367 break;
1368 }
1369 if (strncmp(value1_iter, value2_iter, value1_len)) {
1370 ret = LY_ENOT;
1371 break;
1372 }
1373 continue;
1374 }
1375
1376 mod1 = mod2 = NULL;
1377 if (val_prefix_data1) {
1378 /* find module of the first prefix, if any */
Radek Krejci8df109d2021-04-23 12:19:08 +02001379 mod1 = ly_resolve_prefix(ctx1, value1_iter, value1_len, LY_VALUE_XML, val_prefix_data1);
aPiecek2f63f952021-03-30 12:22:18 +02001380 }
1381 if (val_prefix_data2) {
Radek Krejci8df109d2021-04-23 12:19:08 +02001382 mod2 = ly_resolve_prefix(ctx2, value2_iter, value2_len, LY_VALUE_XML, val_prefix_data2);
aPiecek2f63f952021-03-30 12:22:18 +02001383 }
1384 if (!mod1 || !mod2) {
1385 /* not a prefix or maps to different namespaces */
1386 ret = LY_ENOT;
1387 break;
1388 }
1389
1390 if (mod1->ctx == mod2->ctx) {
1391 /* same contexts */
1392 if ((mod1->name != mod2->name) || (mod1->revision != mod2->revision)) {
1393 ret = LY_ENOT;
1394 break;
1395 }
1396 } else {
1397 /* different contexts */
1398 if (strcmp(mod1->name, mod2->name)) {
1399 ret = LY_ENOT;
Michal Vasko52927e22020-03-16 17:26:14 +01001400 break;
1401 }
1402
aPiecek2f63f952021-03-30 12:22:18 +02001403 if (mod1->revision || mod2->revision) {
1404 if (!mod1->revision || !mod2->revision) {
1405 ret = LY_ENOT;
1406 break;
1407 }
1408 if (strcmp(mod1->revision, mod2->revision)) {
1409 ret = LY_ENOT;
1410 break;
1411 }
1412 }
Michal Vasko52927e22020-03-16 17:26:14 +01001413 }
Michal Vasko52927e22020-03-16 17:26:14 +01001414 }
1415
aPiecek2f63f952021-03-30 12:22:18 +02001416 if (value1_iter || value2_iter) {
1417 ret = LY_ENOT;
1418 }
1419
1420 return ret;
Michal Vasko52927e22020-03-16 17:26:14 +01001421}