blob: e4bc8bd2d92242a8341b0992ac287c8e5aaf35f9 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejci535ea9f2020-05-29 16:01:05 +020026#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020027#include "compat.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020028#include "in_internal.h"
29#include "out_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020030#include "tree.h"
Radek Krejci77114102021-03-10 15:21:57 +010031#include "tree_schema_internal.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020032
Michal Vaskob36053d2020-03-26 15:49:30 +010033/* Move input p by s characters, if EOF log with lyxml_ctx c */
Radek Krejci2efc45b2020-12-22 16:25:44 +010034#define move_input(c, s) \
35 ly_in_skip(c->in, s); \
36 LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020037
Radek Krejcib1890642018-10-03 14:05:40 +020038/* Ignore whitespaces in the input string p */
Radek Krejcidd713ce2021-01-04 23:12:12 +010039#define ign_xmlws(c) \
40 while (is_xmlws(*(c)->in->current)) { \
41 if (*(c)->in->current == '\n') { \
42 LY_IN_NEW_LINE((c)->in); \
43 } \
44 ly_in_skip(c->in, 1); \
45 }
Michal Vaskob36053d2020-03-26 15:49:30 +010046
Radek Krejci857189e2020-09-01 13:26:36 +020047static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only,
48 ly_bool *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020049
Radek Krejci4b74d5e2018-09-26 14:30:55 +020050/**
Radek Krejcidd713ce2021-01-04 23:12:12 +010051 * @brief Ignore and skip any characters until the delim of the size delim_len is read, including the delim
Radek Krejci4b74d5e2018-09-26 14:30:55 +020052 *
Radek Krejcidd713ce2021-01-04 23:12:12 +010053 * @param[in] xmlctx XML parser context to provide input handler and libyang context
54 * @param[in] in input handler to read the data, it is updated only in case the section is correctly terminated.
55 * @param[in] delim Delimiter to detect end of the section.
56 * @param[in] delim_len Length of the delimiter string to use.
57 * @param[in] sectname Section name to refer in error message.
Michal Vasko63f3d842020-07-08 10:10:14 +020058 */
Radek Krejcidd713ce2021-01-04 23:12:12 +010059LY_ERR
60skip_section(struct lyxml_ctx *xmlctx, const char *delim, size_t delim_len, const char *sectname)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020061{
62 size_t i;
Radek Krejcidd713ce2021-01-04 23:12:12 +010063 register const char *input, *a, *b;
64 uint64_t parsed = 0, newlines = 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020065
Radek Krejcidd713ce2021-01-04 23:12:12 +010066 for (input = xmlctx->in->current; *input; ++input, ++parsed) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020067 if (*input != *delim) {
Radek Krejcidd713ce2021-01-04 23:12:12 +010068 if (*input == '\n') {
69 ++newlines;
70 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +020071 continue;
72 }
73 a = input;
74 b = delim;
75 for (i = 0; i < delim_len; ++i) {
76 if (*a++ != *b++) {
77 break;
78 }
79 }
80 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020081 /* delim found */
Radek Krejcidd713ce2021-01-04 23:12:12 +010082 xmlctx->in->line += newlines;
83 ly_in_skip(xmlctx->in, parsed + delim_len);
84 return LY_SUCCESS;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020085 }
86 }
Michal Vasko63f3d842020-07-08 10:10:14 +020087
Radek Krejcidd713ce2021-01-04 23:12:12 +010088 /* delim not found,
89 * do not update input handler to refer to the beginning of the section in error message */
90 LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, sectname);
91 return LY_EVALID;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020092}
93
Radek Krejci4b74d5e2018-09-26 14:30:55 +020094/**
Michal Vaskob36053d2020-03-26 15:49:30 +010095 * @brief Check/Get an XML identifier from the input string.
96 *
97 * The identifier must have at least one valid character complying the name start character constraints.
98 * The identifier is terminated by the first character, which does not comply to the name character constraints.
99 *
100 * See https://www.w3.org/TR/xml-names/#NT-NCName
101 *
102 * @param[in] xmlctx XML context.
103 * @param[out] start Pointer to the start of the identifier.
104 * @param[out] end Pointer ot the end of the identifier.
105 * @return LY_ERR value.
106 */
107static LY_ERR
108lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
109{
110 const char *s, *in;
111 uint32_t c;
112 size_t parsed;
113 LY_ERR rc;
114
Michal Vasko63f3d842020-07-08 10:10:14 +0200115 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100116
117 /* check NameStartChar (minus colon) */
118 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100119 LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]),
Michal Vasko69730152020-10-09 16:30:07 +0200120 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100121 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100122 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Identifier \"%s\" starts with an invalid character.", in - parsed),
Michal Vasko69730152020-10-09 16:30:07 +0200123 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100124
125 /* check rest of the identifier */
126 do {
127 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200128 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100129
130 rc = ly_getutf8(&in, &c, &parsed);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100131 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100132 } while (is_xmlqnamechar(c));
133
134 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200135 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100136 return LY_SUCCESS;
137}
138
139/**
140 * @brief Add namespace definition into XML context.
141 *
142 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
143 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
144 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
145 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
146 *
147 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
148 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
149 *
150 * @param[in] xmlctx XML context to work with.
151 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
152 * @param[in] prefix_len Length of the prefix.
153 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
154 * @return LY_ERR values.
155 */
156LY_ERR
157lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
158{
Radek Krejciba03a5a2020-08-27 14:40:41 +0200159 LY_ERR ret = LY_SUCCESS;
Michal Vaskob36053d2020-03-26 15:49:30 +0100160 struct lyxml_ns *ns;
161
162 ns = malloc(sizeof *ns);
163 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
164
165 /* we need to connect the depth of the element where the namespace is defined with the
166 * namespace record to be able to maintain (remove) the record when the parser leaves
167 * (to its sibling or back to the parent) the element where the namespace was defined */
168 ns->depth = xmlctx->elements.count;
169
170 ns->uri = uri;
171 if (prefix) {
172 ns->prefix = strndup(prefix, prefix_len);
173 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
174 } else {
175 ns->prefix = NULL;
176 }
177
Radek Krejci3d92e442020-10-12 12:48:13 +0200178 ret = ly_set_add(&xmlctx->ns, ns, 1, NULL);
Radek Krejciba03a5a2020-08-27 14:40:41 +0200179 LY_CHECK_ERR_RET(ret, free(ns->prefix); free(ns->uri); free(ns), ret);
180
Michal Vaskob36053d2020-03-26 15:49:30 +0100181 return LY_SUCCESS;
182}
183
184/**
185 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
186 *
187 * @param[in] xmlctx XML context to work with.
188 */
189void
190lyxml_ns_rm(struct lyxml_ctx *xmlctx)
191{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200192 for (uint32_t u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100193 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
194 /* we are done, the namespaces from a single element are supposed to be together */
195 break;
196 }
197 /* remove the ns structure */
198 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
199 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
200 free(xmlctx->ns.objs[u]);
201 --xmlctx->ns.count;
202 }
203
204 if (!xmlctx->ns.count) {
205 /* cleanup the xmlctx's namespaces storage */
206 ly_set_erase(&xmlctx->ns, NULL);
207 }
208}
209
Michal Vaskob36053d2020-03-26 15:49:30 +0100210const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200211lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100212{
Michal Vaskob36053d2020-03-26 15:49:30 +0100213 struct lyxml_ns *ns;
214
Radek Krejci1deb5be2020-08-26 16:43:36 +0200215 for (uint32_t u = ns_set->count - 1; u + 1 > 0; --u) {
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200216 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vaskob36053d2020-03-26 15:49:30 +0100217 if (prefix && prefix_len) {
218 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
219 return ns;
220 }
221 } else if (!ns->prefix) {
222 /* default namespace */
223 return ns;
224 }
225 }
226
227 return NULL;
228}
229
Michal Vasko8cef5232020-06-15 17:59:47 +0200230/**
231 * @brief Skip in the input until EOF or just after the opening tag.
232 * Handles special XML constructs (comment, cdata, doctype).
233 *
234 * @param[in] xmlctx XML context to use.
235 * @return LY_ERR value.
236 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100237static LY_ERR
238lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
239{
240 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200241 const char *endtag, *sectname;
Radek Krejcidd713ce2021-01-04 23:12:12 +0100242 size_t endtag_len;
Michal Vaskob36053d2020-03-26 15:49:30 +0100243
244 while (1) {
245 ign_xmlws(xmlctx);
246
Michal Vasko63f3d842020-07-08 10:10:14 +0200247 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100248 /* EOF */
249 if (xmlctx->elements.count) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100250 LOGVAL(ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100251 return LY_EVALID;
252 }
253 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200254 } else if (xmlctx->in->current[0] != '<') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100255 LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200256 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100257 return LY_EVALID;
258 }
259 move_input(xmlctx, 1);
260
Michal Vasko63f3d842020-07-08 10:10:14 +0200261 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100262 move_input(xmlctx, 1);
263 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200264 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100265 /* comment */
266 move_input(xmlctx, 2);
267 sectname = "Comment";
268 endtag = "-->";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100269 endtag_len = ly_strlen_const("-->");
270 } else if (!strncmp(xmlctx->in->current, "[CDATA[", ly_strlen_const("[CDATA["))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100271 /* CDATA section */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100272 move_input(xmlctx, ly_strlen_const("[CDATA["));
Michal Vaskob36053d2020-03-26 15:49:30 +0100273 sectname = "CData";
274 endtag = "]]>";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100275 endtag_len = ly_strlen_const("]]>");
276 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100277 /* Document type declaration - not supported */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100278 LOGVAL(ctx, LY_VCODE_NSUPP, "Document Type Declaration");
Michal Vaskob36053d2020-03-26 15:49:30 +0100279 return LY_EVALID;
280 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100281 LOGVAL(ctx, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100282 return LY_EVALID;
283 }
Radek Krejcidd713ce2021-01-04 23:12:12 +0100284 LY_CHECK_RET(skip_section(xmlctx, endtag, endtag_len, sectname));
Michal Vasko63f3d842020-07-08 10:10:14 +0200285 } else if (xmlctx->in->current[0] == '?') {
Radek Krejcidd713ce2021-01-04 23:12:12 +0100286 LY_CHECK_RET(skip_section(xmlctx, "?>", 2, "Declaration"));
Michal Vaskob36053d2020-03-26 15:49:30 +0100287 } else {
288 /* other non-WS character */
289 break;
290 }
291 }
292
293 return LY_SUCCESS;
294}
295
Michal Vasko8cef5232020-06-15 17:59:47 +0200296/**
297 * @brief Parse QName.
298 *
299 * @param[in] xmlctx XML context to use.
300 * @param[out] prefix Parsed prefix, may be NULL.
301 * @param[out] prefix_len Length of @p prefix.
302 * @param[out] name Parsed name.
303 * @param[out] name_len Length of @p name.
304 * @return LY_ERR value.
305 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100306static LY_ERR
307lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
308{
309 const char *start, *end;
310
311 *prefix = NULL;
312 *prefix_len = 0;
313
314 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
315 if (end[0] == ':') {
316 /* we have prefixed identifier */
317 *prefix = start;
318 *prefix_len = end - start;
319
320 move_input(xmlctx, 1);
321 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
322 }
323
324 *name = start;
325 *name_len = end - start;
326 return LY_SUCCESS;
327}
328
329/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200330 * @brief Parse XML text content (value).
331 *
332 * @param[in] xmlctx XML context to use.
333 * @param[in] endchar Expected character to mark value end.
334 * @param[out] value Parsed value.
335 * @param[out] length Length of @p value.
336 * @param[out] ws_only Whether the value is empty/white-spaces only.
337 * @param[out] dynamic Whether the value was dynamically allocated.
338 * @return LY_ERR value.
339 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200340static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200341lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200342{
Michal Vaskob36053d2020-03-26 15:49:30 +0100343#define BUFSIZE 24
344#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200345
Michal Vaskob36053d2020-03-26 15:49:30 +0100346 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
aPiecekb287b212021-05-04 14:24:25 +0200347 const char *in = xmlctx->in->current, *start, *in_aux;
Michal Vaskob36053d2020-03-26 15:49:30 +0100348 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200349 size_t offset; /* read offset in input buffer */
350 size_t len; /* length of the output string (write offset in output buffer) */
351 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200352 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200353 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100354 size_t u;
Radek Krejci857189e2020-09-01 13:26:36 +0200355 ly_bool ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200356
Michal Vaskob36053d2020-03-26 15:49:30 +0100357 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200358
Radek Krejcid70d1072018-10-09 14:20:47 +0200359 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100360 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200361 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200362
363 /* parse */
364 while (in[offset]) {
365 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100366 /* non WS */
367 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200368
Michal Vaskob36053d2020-03-26 15:49:30 +0100369 if (!buf) {
370 /* prepare output buffer */
371 buf = malloc(BUFSIZE);
372 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
373 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200374 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100375
376 /* allocate enough for the offset and next character,
377 * we will need 4 bytes at most since we support only the predefined
378 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200379 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100380 buf = ly_realloc(buf, size + BUFSIZE_STEP);
381 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
382 size += BUFSIZE_STEP;
383 }
384
385 if (offset) {
386 /* store what we have so far */
387 memcpy(&buf[len], in, offset);
388 len += offset;
389 in += offset;
390 offset = 0;
391 }
392
Radek Krejci7a7fa902018-09-25 17:08:21 +0200393 ++offset;
394 if (in[offset] != '#') {
395 /* entity reference - only predefined references are supported */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100396 if (!strncmp(&in[offset], "lt;", ly_strlen_const("lt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100397 buf[len++] = '<';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100398 in += ly_strlen_const("&lt;");
399 } else if (!strncmp(&in[offset], "gt;", ly_strlen_const("gt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100400 buf[len++] = '>';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100401 in += ly_strlen_const("&gt;");
402 } else if (!strncmp(&in[offset], "amp;", ly_strlen_const("amp;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100403 buf[len++] = '&';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100404 in += ly_strlen_const("&amp;");
405 } else if (!strncmp(&in[offset], "apos;", ly_strlen_const("apos;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100406 buf[len++] = '\'';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100407 in += ly_strlen_const("&apos;");
408 } else if (!strncmp(&in[offset], "quot;", ly_strlen_const("quot;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100409 buf[len++] = '\"';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100410 in += ly_strlen_const("&quot;");
Radek Krejci7a7fa902018-09-25 17:08:21 +0200411 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100412 LOGVAL(ctx, LYVE_SYNTAX, "Entity reference \"%.*s\" not supported, only predefined references allowed.",
413 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200414 goto error;
415 }
416 offset = 0;
417 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100418 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200419 /* character reference */
420 ++offset;
421 if (isdigit(in[offset])) {
422 for (n = 0; isdigit(in[offset]); offset++) {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100423 n = (LY_BASE_DEC * n) + (in[offset] - '0');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200424 }
Michal Vasko69730152020-10-09 16:30:07 +0200425 } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200426 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
427 if (isdigit(in[offset])) {
428 u = (in[offset] - '0');
429 } else if (in[offset] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100430 u = LY_BASE_DEC + (in[offset] - 'a');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200431 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100432 u = LY_BASE_DEC + (in[offset] - 'A');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200433 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100434 n = (LY_BASE_HEX * n) + u;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200435 }
436 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100437 LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200438 goto error;
439
440 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100441
Radek Krejci7a7fa902018-09-25 17:08:21 +0200442 LY_CHECK_ERR_GOTO(in[offset] != ';',
Radek Krejci2efc45b2020-12-22 16:25:44 +0100443 LOGVAL(ctx, LY_VCODE_INSTREXP,
Michal Vasko69730152020-10-09 16:30:07 +0200444 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
445 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200446 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200447 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100448 LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Michal Vasko69730152020-10-09 16:30:07 +0200449 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200450 len += u;
451 in += offset;
452 offset = 0;
453 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100454 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200455 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200456 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100457 /* realloc exact size string */
458 buf = ly_realloc(buf, len + offset + 1);
459 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
460 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200461 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100462
463 /* set terminating NULL byte */
464 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200465 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200466 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100467 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200468 goto success;
469 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100470 if (!is_xmlws(in[offset])) {
471 /* non WS */
472 ws = 0;
473 }
474
Radek Krejci7a7fa902018-09-25 17:08:21 +0200475 /* log lines */
476 if (in[offset] == '\n') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100477 LY_IN_NEW_LINE(xmlctx->in);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200478 }
479
480 /* continue */
aPiecekb287b212021-05-04 14:24:25 +0200481 in_aux = &in[offset];
482 LY_CHECK_ERR_GOTO(ly_getutf8(&in_aux, &n, &u),
483 LOGVAL(ctx, LY_VCODE_INCHAR, in[offset]), error);
484 offset += u;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200485 }
486 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100487
488 /* EOF reached before endchar */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100489 LOGVAL(ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100490
Radek Krejci7a7fa902018-09-25 17:08:21 +0200491error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100492 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200493 return LY_EVALID;
494
495success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200496 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100497 *value = buf;
498 *dynamic = 1;
499 } else {
500 *value = (char *)start;
501 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200502 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100503 *length = len;
504 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200505
Radek Krejcid54412f2020-12-17 20:25:35 +0100506 xmlctx->in->current = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100507 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200508
509#undef BUFSIZE
510#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200511}
512
Michal Vasko8cef5232020-06-15 17:59:47 +0200513/**
514 * @brief Parse XML closing element and match it to a stored starting element.
515 *
516 * @param[in] xmlctx XML context to use.
517 * @param[in] prefix Expected closing element prefix.
518 * @param[in] prefix_len Length of @p prefix.
519 * @param[in] name Expected closing element name.
520 * @param[in] name_len Length of @p name.
521 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
522 * @return LY_ERR value.
523 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100524static LY_ERR
525lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200526 ly_bool empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200527{
Michal Vaskob36053d2020-03-26 15:49:30 +0100528 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200529
Michal Vaskob36053d2020-03-26 15:49:30 +0100530 /* match opening and closing element tags */
531 if (!xmlctx->elements.count) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100532 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Radek Krejci422afb12021-03-04 16:38:16 +0100533 (int)name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100534 return LY_EVALID;
535 }
Radek Krejcid972c252018-09-25 13:23:39 +0200536
Michal Vaskob36053d2020-03-26 15:49:30 +0100537 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
Michal Vasko69730152020-10-09 16:30:07 +0200538 if ((e->prefix_len != prefix_len) || (e->name_len != name_len) ||
539 (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100540 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
Radek Krejci422afb12021-03-04 16:38:16 +0100541 (int)e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", (int)e->name_len, e->name,
542 (int)prefix_len, prefix ? prefix : "", prefix ? ":" : "", (int)name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100543 return LY_EVALID;
544 }
Radek Krejcid972c252018-09-25 13:23:39 +0200545
Michal Vaskob36053d2020-03-26 15:49:30 +0100546 /* opening and closing element tags matches, remove record from the opening tags list */
547 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200548
Michal Vaskob36053d2020-03-26 15:49:30 +0100549 /* remove also the namespaces connected with the element */
550 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200551
Michal Vaskob36053d2020-03-26 15:49:30 +0100552 /* skip WS */
553 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200554
Michal Vaskob36053d2020-03-26 15:49:30 +0100555 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200556 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100557 move_input(xmlctx, 1);
558 }
Michal Vasko52927e22020-03-16 17:26:14 +0100559
Michal Vaskob36053d2020-03-26 15:49:30 +0100560 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200561 if (xmlctx->in->current[0] != '>') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100562 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200563 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100564 return LY_EVALID;
565 }
Michal Vasko52927e22020-03-16 17:26:14 +0100566
Michal Vaskob36053d2020-03-26 15:49:30 +0100567 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200568 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100569
Radek Krejcid972c252018-09-25 13:23:39 +0200570 return LY_SUCCESS;
571}
572
Michal Vasko8cef5232020-06-15 17:59:47 +0200573/**
574 * @brief Store parsed opening element and parse any included namespaces.
575 *
576 * @param[in] xmlctx XML context to use.
577 * @param[in] prefix Parsed starting element prefix.
578 * @param[in] prefix_len Length of @p prefix.
579 * @param[in] name Parsed starting element name.
580 * @param[in] name_len Length of @p name.
581 * @return LY_ERR value.
582 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100583static LY_ERR
584lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200585{
Michal Vaskob36053d2020-03-26 15:49:30 +0100586 LY_ERR ret = LY_SUCCESS;
587 struct lyxml_elem *e;
588 const char *prev_input;
589 char *value;
590 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200591 ly_bool ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100592 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200593
Michal Vaskob36053d2020-03-26 15:49:30 +0100594 /* store element opening tag information */
595 e = malloc(sizeof *e);
596 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
597 e->name = name;
598 e->prefix = prefix;
599 e->name_len = name_len;
600 e->prefix_len = prefix_len;
Radek Krejci3d92e442020-10-12 12:48:13 +0200601 LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL));
Michal Vaskob36053d2020-03-26 15:49:30 +0100602
603 /* skip WS */
604 ign_xmlws(xmlctx);
605
606 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200607 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100608 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200609 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
610 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100611
612 /* parse attribute name */
613 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
614
615 /* parse the value */
616 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
617
618 /* store every namespace */
619 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100620 ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
621 dynamic ? value : strndup(value, value_len));
Michal Vaskob36053d2020-03-26 15:49:30 +0100622 dynamic = 0;
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100623 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100624 } else {
625 /* not a namespace */
626 is_ns = 0;
627 }
628 if (dynamic) {
629 free(value);
630 }
631
632 /* skip WS */
633 ign_xmlws(xmlctx);
634
635 if (is_ns) {
636 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200637 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100638 }
Radek Krejcib1890642018-10-03 14:05:40 +0200639 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100640
641cleanup:
642 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200643 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100644 }
645 return ret;
646}
647
Michal Vasko8cef5232020-06-15 17:59:47 +0200648/**
649 * @brief Move parser to the attribute content and parse it.
650 *
651 * @param[in] xmlctx XML context to use.
652 * @param[out] value Parsed attribute value.
653 * @param[out] value_len Length of @p value.
654 * @param[out] ws_only Whether the value is empty/white-spaces only.
655 * @param[out] dynamic Whether the value was dynamically allocated.
656 * @return LY_ERR value.
657 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100658static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200659lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100660{
661 char quot;
662
663 /* skip WS */
664 ign_xmlws(xmlctx);
665
666 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200667 if (xmlctx->in->current[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100668 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100669 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200670 } else if (xmlctx->in->current[0] != '=') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100671 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200672 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100673 return LY_EVALID;
674 }
675 move_input(xmlctx, 1);
676
677 /* skip WS */
678 ign_xmlws(xmlctx);
679
680 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200681 if (xmlctx->in->current[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100682 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100683 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200684 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100685 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200686 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100687 return LY_EVALID;
688 }
689
690 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200691 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100692 move_input(xmlctx, 1);
693
694 /* parse attribute value */
695 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
696
697 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200698 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100699
700 return LY_SUCCESS;
701}
702
Michal Vasko8cef5232020-06-15 17:59:47 +0200703/**
704 * @brief Move parser to the next attribute and parse it.
705 *
706 * @param[in] xmlctx XML context to use.
707 * @param[out] prefix Parsed attribute prefix.
708 * @param[out] prefix_len Length of @p prefix.
709 * @param[out] name Parsed attribute name.
710 * @param[out] name_len Length of @p name.
711 * @return LY_ERR value.
712 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100713static LY_ERR
714lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
715{
716 const char *in;
717 char *value;
718 uint32_t c;
719 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200720 ly_bool ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100721
722 /* skip WS */
723 ign_xmlws(xmlctx);
724
725 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200726 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
727 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100728 if (in[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100729 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100730 return LY_EVALID;
731 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100732 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
Michal Vasko69730152020-10-09 16:30:07 +0200733 "element tag end ('>' or '/>') or an attribute");
Michal Vaskob36053d2020-03-26 15:49:30 +0100734 return LY_EVALID;
735 }
736
737 /* parse attribute name */
738 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
739
740 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
741 /* standard attribute */
742 break;
743 }
744
745 /* namespace, skip it */
746 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
747 if (dynamic) {
748 free(value);
749 }
750
751 /* skip WS */
752 ign_xmlws(xmlctx);
753 }
754
755 return LY_SUCCESS;
756}
757
Michal Vasko8cef5232020-06-15 17:59:47 +0200758/**
759 * @brief Move parser to the next element and parse it.
760 *
761 * @param[in] xmlctx XML context to use.
762 * @param[out] prefix Parsed element prefix.
763 * @param[out] prefix_len Length of @p prefix.
764 * @param[out] name Parse element name.
765 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200766 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200767 * @return LY_ERR value.
768 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100769static LY_ERR
770lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200771 ly_bool *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100772{
773 /* skip WS until EOF or after opening tag '<' */
774 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200775 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100776 /* set return values */
777 *prefix = *name = NULL;
778 *prefix_len = *name_len = 0;
779 return LY_SUCCESS;
780 }
781
Michal Vasko63f3d842020-07-08 10:10:14 +0200782 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100783 move_input(xmlctx, 1);
784 *closing = 1;
785 } else {
786 *closing = 0;
787 }
788
789 /* skip WS */
790 ign_xmlws(xmlctx);
791
792 /* parse element name */
793 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
794
795 return LY_SUCCESS;
796}
797
798LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200799lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100800{
801 LY_ERR ret = LY_SUCCESS;
802 struct lyxml_ctx *xmlctx;
Radek Krejci857189e2020-09-01 13:26:36 +0200803 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100804
805 /* new context */
806 xmlctx = calloc(1, sizeof *xmlctx);
807 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
808 xmlctx->ctx = ctx;
Michal Vasko63f3d842020-07-08 10:10:14 +0200809 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100810
Radek Krejciddace2c2021-01-08 11:30:56 +0100811 LOG_LOCINIT(NULL, NULL, NULL, in);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100812
Michal Vaskob36053d2020-03-26 15:49:30 +0100813 /* parse next element, if any */
814 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
Michal Vasko69730152020-10-09 16:30:07 +0200815 &xmlctx->name_len, &closing), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100816
Michal Vasko63f3d842020-07-08 10:10:14 +0200817 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100818 /* update status */
819 xmlctx->status = LYXML_END;
820 } else if (closing) {
Radek Krejci422afb12021-03-04 16:38:16 +0100821 LOGVAL(ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", (int)xmlctx->name_len, xmlctx->name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100822 ret = LY_EVALID;
823 goto cleanup;
824 } else {
825 /* open an element, also parses all enclosed namespaces */
826 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
827
828 /* update status */
829 xmlctx->status = LYXML_ELEMENT;
830 }
831
832cleanup:
833 if (ret) {
834 lyxml_ctx_free(xmlctx);
835 } else {
836 *xmlctx_p = xmlctx;
837 }
838 return ret;
839}
840
841LY_ERR
842lyxml_ctx_next(struct lyxml_ctx *xmlctx)
843{
844 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200845 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100846 struct lyxml_elem *e;
847
848 /* if the value was not used, free it */
849 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
850 free((char *)xmlctx->value);
851 xmlctx->value = NULL;
852 xmlctx->dynamic = 0;
853 }
854
855 switch (xmlctx->status) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100856 case LYXML_ELEM_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100857 /* content |</elem> */
858
Michal Vaskob36053d2020-03-26 15:49:30 +0100859 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200860 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100861 assert(xmlctx->elements.count);
862 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
863
864 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200865 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
866 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100867
868 /* update status */
869 xmlctx->status = LYXML_ELEM_CLOSE;
870 break;
871 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100872 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100873 case LYXML_ELEM_CLOSE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100874 /* </elem>| <elem2>* */
875
Michal Vaskob36053d2020-03-26 15:49:30 +0100876 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200877 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
878 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100879
Michal Vasko63f3d842020-07-08 10:10:14 +0200880 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100881 /* update status */
882 xmlctx->status = LYXML_END;
883 } else if (closing) {
884 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200885 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
886 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100887
888 /* update status */
889 xmlctx->status = LYXML_ELEM_CLOSE;
890 } else {
891 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200892 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
893 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100894
895 /* update status */
896 xmlctx->status = LYXML_ELEMENT;
897 }
898 break;
899
Michal Vaskob36053d2020-03-26 15:49:30 +0100900 case LYXML_ELEMENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100901 /* <elem| attr='val'* > content */
Michal Vaskob36053d2020-03-26 15:49:30 +0100902 case LYXML_ATTR_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100903 /* attr='val'| attr='val'* > content */
904
Michal Vaskob36053d2020-03-26 15:49:30 +0100905 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200906 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
907 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100908
Michal Vasko63f3d842020-07-08 10:10:14 +0200909 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100910 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200911 ly_in_skip(xmlctx->in, 1);
912 if (!xmlctx->in->current[0]) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100913 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskof55ae202020-06-30 15:49:36 +0200914 ret = LY_EVALID;
915 goto cleanup;
916 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100917
918 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200919 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
Michal Vasko69730152020-10-09 16:30:07 +0200920 &xmlctx->dynamic);
Michal Vasko63f3d842020-07-08 10:10:14 +0200921 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100922
923 if (!xmlctx->value_len) {
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100924 /* empty value should by alocated staticaly, but check for in any case */
925 if (xmlctx->dynamic) {
926 free((char *) xmlctx->value);
927 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100928 /* use empty value, easier to work with */
929 xmlctx->value = "";
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100930 xmlctx->dynamic = 0;
Michal Vaskob36053d2020-03-26 15:49:30 +0100931 }
932
933 /* update status */
934 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200935 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100936 /* no content but we still return it */
937 xmlctx->value = "";
938 xmlctx->value_len = 0;
939 xmlctx->ws_only = 1;
940 xmlctx->dynamic = 0;
941
942 /* update status */
943 xmlctx->status = LYXML_ELEM_CONTENT;
944 } else {
945 /* update status */
946 xmlctx->status = LYXML_ATTRIBUTE;
947 }
948 break;
949
Michal Vaskob36053d2020-03-26 15:49:30 +0100950 case LYXML_ATTRIBUTE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100951 /* attr|='val' */
952
Michal Vaskob36053d2020-03-26 15:49:30 +0100953 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +0200954 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
955 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100956
957 /* update status */
958 xmlctx->status = LYXML_ATTR_CONTENT;
959 break;
960
Michal Vaskob36053d2020-03-26 15:49:30 +0100961 case LYXML_END:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100962 /* </elem> |EOF */
Michal Vaskob36053d2020-03-26 15:49:30 +0100963 /* nothing to do */
964 break;
965 }
966
967cleanup:
968 if (ret) {
969 /* invalidate context */
970 xmlctx->status = LYXML_END;
971 }
972 return ret;
973}
974
975LY_ERR
976lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
977{
978 LY_ERR ret = LY_SUCCESS;
979 const char *prefix, *name, *prev_input;
980 size_t prefix_len, name_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200981 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100982
Michal Vasko63f3d842020-07-08 10:10:14 +0200983 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100984
985 switch (xmlctx->status) {
986 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +0200987 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100988 *next = LYXML_ELEM_CLOSE;
989 break;
990 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100991 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100992 case LYXML_ELEM_CLOSE:
993 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200994 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
995 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100996
Michal Vasko63f3d842020-07-08 10:10:14 +0200997 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100998 *next = LYXML_END;
999 } else if (closing) {
1000 *next = LYXML_ELEM_CLOSE;
1001 } else {
1002 *next = LYXML_ELEMENT;
1003 }
1004 break;
1005 case LYXML_ELEMENT:
1006 case LYXML_ATTR_CONTENT:
1007 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001008 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
1009 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001010
Michal Vasko63f3d842020-07-08 10:10:14 +02001011 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001012 *next = LYXML_ELEM_CONTENT;
1013 } else {
1014 *next = LYXML_ATTRIBUTE;
1015 }
1016 break;
1017 case LYXML_ATTRIBUTE:
1018 *next = LYXML_ATTR_CONTENT;
1019 break;
1020 case LYXML_END:
1021 *next = LYXML_END;
1022 break;
1023 }
1024
1025cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001026 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001027 return ret;
1028}
1029
1030void
1031lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1032{
1033 uint32_t u;
1034
1035 if (!xmlctx) {
1036 return;
1037 }
1038
Radek Krejciddace2c2021-01-08 11:30:56 +01001039 LOG_LOCBACK(0, 0, 0, 1);
Radek Krejci2efc45b2020-12-22 16:25:44 +01001040
Michal Vaskob36053d2020-03-26 15:49:30 +01001041 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1042 free((char *)xmlctx->value);
1043 }
1044 ly_set_erase(&xmlctx->elements, free);
1045 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1046 /* remove the ns structure */
1047 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1048 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1049 free(xmlctx->ns.objs[u]);
1050 }
1051 ly_set_erase(&xmlctx->ns, NULL);
1052 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001053}
Radek Krejcie7b95092019-05-15 11:03:07 +02001054
1055LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +02001056lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001057{
Michal Vasko5233e962020-08-14 14:26:20 +02001058 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001059
1060 if (!text) {
1061 return 0;
1062 }
1063
Radek Krejci1deb5be2020-08-26 16:43:36 +02001064 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001065 switch (text[u]) {
1066 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001067 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001068 break;
1069 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001070 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001071 break;
1072 case '>':
1073 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001074 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001075 break;
1076 case '"':
1077 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001078 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001079 break;
1080 }
Radek Krejcif13b87b2020-12-01 22:02:17 +01001081 /* fall through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001082 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001083 ret = ly_write_(out, &text[u], 1);
1084 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001085 }
Michal Vasko5233e962020-08-14 14:26:20 +02001086 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001087 }
1088
Michal Vasko5233e962020-08-14 14:26:20 +02001089 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001090}
1091
Michal Vasko52927e22020-03-16 17:26:14 +01001092LY_ERR
aPiecek2f63f952021-03-30 12:22:18 +02001093lyxml_value_compare(const struct ly_ctx *ctx1, const char *value1, void *val_prefix_data1,
1094 const struct ly_ctx *ctx2, const char *value2, void *val_prefix_data2)
Michal Vasko52927e22020-03-16 17:26:14 +01001095{
aPiecek2f63f952021-03-30 12:22:18 +02001096 const char *value1_iter, *value2_iter;
1097 const char *value1_next, *value2_next;
1098 uint32_t value1_len, value2_len;
1099 ly_bool is_prefix1, is_prefix2;
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001100 const struct lys_module *mod1, *mod2;
aPiecek2f63f952021-03-30 12:22:18 +02001101 LY_ERR ret;
Michal Vasko52927e22020-03-16 17:26:14 +01001102
1103 if (!value1 && !value2) {
1104 return LY_SUCCESS;
1105 }
1106 if ((value1 && !value2) || (!value1 && value2)) {
1107 return LY_ENOT;
1108 }
1109
aPiecek2f63f952021-03-30 12:22:18 +02001110 if (!ctx2) {
1111 ctx2 = ctx1;
1112 }
Michal Vasko52927e22020-03-16 17:26:14 +01001113
aPiecek2f63f952021-03-30 12:22:18 +02001114 ret = LY_SUCCESS;
1115 for (value1_iter = value1, value2_iter = value2;
1116 value1_iter && value2_iter;
1117 value1_iter = value1_next, value2_iter = value2_next) {
1118 value1_len = ly_value_prefix_next(value1_iter, NULL, &is_prefix1, &value1_next);
1119 value2_len = ly_value_prefix_next(value2_iter, NULL, &is_prefix2, &value2_next);
1120
1121 if (is_prefix1 != is_prefix2) {
1122 ret = LY_ENOT;
1123 break;
1124 }
1125
1126 if (!is_prefix1) {
1127 if (value1_len != value2_len) {
1128 ret = LY_ENOT;
1129 break;
1130 }
1131 if (strncmp(value1_iter, value2_iter, value1_len)) {
1132 ret = LY_ENOT;
1133 break;
1134 }
1135 continue;
1136 }
1137
1138 mod1 = mod2 = NULL;
1139 if (val_prefix_data1) {
1140 /* find module of the first prefix, if any */
Radek Krejci8df109d2021-04-23 12:19:08 +02001141 mod1 = ly_resolve_prefix(ctx1, value1_iter, value1_len, LY_VALUE_XML, val_prefix_data1);
aPiecek2f63f952021-03-30 12:22:18 +02001142 }
1143 if (val_prefix_data2) {
Radek Krejci8df109d2021-04-23 12:19:08 +02001144 mod2 = ly_resolve_prefix(ctx2, value2_iter, value2_len, LY_VALUE_XML, val_prefix_data2);
aPiecek2f63f952021-03-30 12:22:18 +02001145 }
1146 if (!mod1 || !mod2) {
1147 /* not a prefix or maps to different namespaces */
1148 ret = LY_ENOT;
1149 break;
1150 }
1151
1152 if (mod1->ctx == mod2->ctx) {
1153 /* same contexts */
1154 if ((mod1->name != mod2->name) || (mod1->revision != mod2->revision)) {
1155 ret = LY_ENOT;
1156 break;
1157 }
1158 } else {
1159 /* different contexts */
1160 if (strcmp(mod1->name, mod2->name)) {
1161 ret = LY_ENOT;
Michal Vasko52927e22020-03-16 17:26:14 +01001162 break;
1163 }
1164
aPiecek2f63f952021-03-30 12:22:18 +02001165 if (mod1->revision || mod2->revision) {
1166 if (!mod1->revision || !mod2->revision) {
1167 ret = LY_ENOT;
1168 break;
1169 }
1170 if (strcmp(mod1->revision, mod2->revision)) {
1171 ret = LY_ENOT;
1172 break;
1173 }
1174 }
Michal Vasko52927e22020-03-16 17:26:14 +01001175 }
Michal Vasko52927e22020-03-16 17:26:14 +01001176 }
1177
aPiecek2f63f952021-03-30 12:22:18 +02001178 if (value1_iter || value2_iter) {
1179 ret = LY_ENOT;
1180 }
1181
1182 return ret;
Michal Vasko52927e22020-03-16 17:26:14 +01001183}