blob: 48a11d64d3e35463a646cede58ce8ecb36a34a5a [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejci535ea9f2020-05-29 16:01:05 +020026#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020027#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020028#include "dict.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020029#include "in_internal.h"
30#include "out_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020031#include "tree.h"
32#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020033
Michal Vaskob36053d2020-03-26 15:49:30 +010034/* Move input p by s characters, if EOF log with lyxml_ctx c */
Radek Krejci2efc45b2020-12-22 16:25:44 +010035#define move_input(c, s) \
36 ly_in_skip(c->in, s); \
37 LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020038
Radek Krejcib1890642018-10-03 14:05:40 +020039/* Ignore whitespaces in the input string p */
Radek Krejcidd713ce2021-01-04 23:12:12 +010040#define ign_xmlws(c) \
41 while (is_xmlws(*(c)->in->current)) { \
42 if (*(c)->in->current == '\n') { \
43 LY_IN_NEW_LINE((c)->in); \
44 } \
45 ly_in_skip(c->in, 1); \
46 }
Michal Vaskob36053d2020-03-26 15:49:30 +010047
Radek Krejci857189e2020-09-01 13:26:36 +020048static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only,
49 ly_bool *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020050
Radek Krejci4b74d5e2018-09-26 14:30:55 +020051/**
Radek Krejcidd713ce2021-01-04 23:12:12 +010052 * @brief Ignore and skip any characters until the delim of the size delim_len is read, including the delim
Radek Krejci4b74d5e2018-09-26 14:30:55 +020053 *
Radek Krejcidd713ce2021-01-04 23:12:12 +010054 * @param[in] xmlctx XML parser context to provide input handler and libyang context
55 * @param[in] in input handler to read the data, it is updated only in case the section is correctly terminated.
56 * @param[in] delim Delimiter to detect end of the section.
57 * @param[in] delim_len Length of the delimiter string to use.
58 * @param[in] sectname Section name to refer in error message.
Michal Vasko63f3d842020-07-08 10:10:14 +020059 */
Radek Krejcidd713ce2021-01-04 23:12:12 +010060LY_ERR
61skip_section(struct lyxml_ctx *xmlctx, const char *delim, size_t delim_len, const char *sectname)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020062{
63 size_t i;
Radek Krejcidd713ce2021-01-04 23:12:12 +010064 register const char *input, *a, *b;
65 uint64_t parsed = 0, newlines = 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020066
Radek Krejcidd713ce2021-01-04 23:12:12 +010067 for (input = xmlctx->in->current; *input; ++input, ++parsed) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020068 if (*input != *delim) {
Radek Krejcidd713ce2021-01-04 23:12:12 +010069 if (*input == '\n') {
70 ++newlines;
71 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +020072 continue;
73 }
74 a = input;
75 b = delim;
76 for (i = 0; i < delim_len; ++i) {
77 if (*a++ != *b++) {
78 break;
79 }
80 }
81 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020082 /* delim found */
Radek Krejcidd713ce2021-01-04 23:12:12 +010083 xmlctx->in->line += newlines;
84 ly_in_skip(xmlctx->in, parsed + delim_len);
85 return LY_SUCCESS;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020086 }
87 }
Michal Vasko63f3d842020-07-08 10:10:14 +020088
Radek Krejcidd713ce2021-01-04 23:12:12 +010089 /* delim not found,
90 * do not update input handler to refer to the beginning of the section in error message */
91 LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, sectname);
92 return LY_EVALID;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020093}
94
Radek Krejci4b74d5e2018-09-26 14:30:55 +020095/**
Michal Vaskob36053d2020-03-26 15:49:30 +010096 * @brief Check/Get an XML identifier from the input string.
97 *
98 * The identifier must have at least one valid character complying the name start character constraints.
99 * The identifier is terminated by the first character, which does not comply to the name character constraints.
100 *
101 * See https://www.w3.org/TR/xml-names/#NT-NCName
102 *
103 * @param[in] xmlctx XML context.
104 * @param[out] start Pointer to the start of the identifier.
105 * @param[out] end Pointer ot the end of the identifier.
106 * @return LY_ERR value.
107 */
108static LY_ERR
109lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
110{
111 const char *s, *in;
112 uint32_t c;
113 size_t parsed;
114 LY_ERR rc;
115
Michal Vasko63f3d842020-07-08 10:10:14 +0200116 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100117
118 /* check NameStartChar (minus colon) */
119 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100120 LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]),
Michal Vasko69730152020-10-09 16:30:07 +0200121 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100122 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100123 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Identifier \"%s\" starts with an invalid character.", in - parsed),
Michal Vasko69730152020-10-09 16:30:07 +0200124 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100125
126 /* check rest of the identifier */
127 do {
128 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200129 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100130
131 rc = ly_getutf8(&in, &c, &parsed);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100132 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100133 } while (is_xmlqnamechar(c));
134
135 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200136 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100137 return LY_SUCCESS;
138}
139
140/**
141 * @brief Add namespace definition into XML context.
142 *
143 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
144 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
145 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
146 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
147 *
148 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
149 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
150 *
151 * @param[in] xmlctx XML context to work with.
152 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
153 * @param[in] prefix_len Length of the prefix.
154 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
155 * @return LY_ERR values.
156 */
157LY_ERR
158lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
159{
Radek Krejciba03a5a2020-08-27 14:40:41 +0200160 LY_ERR ret = LY_SUCCESS;
Michal Vaskob36053d2020-03-26 15:49:30 +0100161 struct lyxml_ns *ns;
162
163 ns = malloc(sizeof *ns);
164 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
165
166 /* we need to connect the depth of the element where the namespace is defined with the
167 * namespace record to be able to maintain (remove) the record when the parser leaves
168 * (to its sibling or back to the parent) the element where the namespace was defined */
169 ns->depth = xmlctx->elements.count;
170
171 ns->uri = uri;
172 if (prefix) {
173 ns->prefix = strndup(prefix, prefix_len);
174 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
175 } else {
176 ns->prefix = NULL;
177 }
178
Radek Krejci3d92e442020-10-12 12:48:13 +0200179 ret = ly_set_add(&xmlctx->ns, ns, 1, NULL);
Radek Krejciba03a5a2020-08-27 14:40:41 +0200180 LY_CHECK_ERR_RET(ret, free(ns->prefix); free(ns->uri); free(ns), ret);
181
Michal Vaskob36053d2020-03-26 15:49:30 +0100182 return LY_SUCCESS;
183}
184
185/**
186 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
187 *
188 * @param[in] xmlctx XML context to work with.
189 */
190void
191lyxml_ns_rm(struct lyxml_ctx *xmlctx)
192{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200193 for (uint32_t u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100194 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
195 /* we are done, the namespaces from a single element are supposed to be together */
196 break;
197 }
198 /* remove the ns structure */
199 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
200 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
201 free(xmlctx->ns.objs[u]);
202 --xmlctx->ns.count;
203 }
204
205 if (!xmlctx->ns.count) {
206 /* cleanup the xmlctx's namespaces storage */
207 ly_set_erase(&xmlctx->ns, NULL);
208 }
209}
210
Michal Vaskob36053d2020-03-26 15:49:30 +0100211const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200212lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100213{
Michal Vaskob36053d2020-03-26 15:49:30 +0100214 struct lyxml_ns *ns;
215
Radek Krejci1deb5be2020-08-26 16:43:36 +0200216 for (uint32_t u = ns_set->count - 1; u + 1 > 0; --u) {
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200217 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vaskob36053d2020-03-26 15:49:30 +0100218 if (prefix && prefix_len) {
219 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
220 return ns;
221 }
222 } else if (!ns->prefix) {
223 /* default namespace */
224 return ns;
225 }
226 }
227
228 return NULL;
229}
230
Michal Vasko8cef5232020-06-15 17:59:47 +0200231/**
232 * @brief Skip in the input until EOF or just after the opening tag.
233 * Handles special XML constructs (comment, cdata, doctype).
234 *
235 * @param[in] xmlctx XML context to use.
236 * @return LY_ERR value.
237 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100238static LY_ERR
239lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
240{
241 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200242 const char *endtag, *sectname;
Radek Krejcidd713ce2021-01-04 23:12:12 +0100243 size_t endtag_len;
Michal Vaskob36053d2020-03-26 15:49:30 +0100244
245 while (1) {
246 ign_xmlws(xmlctx);
247
Michal Vasko63f3d842020-07-08 10:10:14 +0200248 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100249 /* EOF */
250 if (xmlctx->elements.count) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100251 LOGVAL(ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100252 return LY_EVALID;
253 }
254 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200255 } else if (xmlctx->in->current[0] != '<') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100256 LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200257 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100258 return LY_EVALID;
259 }
260 move_input(xmlctx, 1);
261
Michal Vasko63f3d842020-07-08 10:10:14 +0200262 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100263 move_input(xmlctx, 1);
264 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200265 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100266 /* comment */
267 move_input(xmlctx, 2);
268 sectname = "Comment";
269 endtag = "-->";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100270 endtag_len = ly_strlen_const("-->");
271 } else if (!strncmp(xmlctx->in->current, "[CDATA[", ly_strlen_const("[CDATA["))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100272 /* CDATA section */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100273 move_input(xmlctx, ly_strlen_const("[CDATA["));
Michal Vaskob36053d2020-03-26 15:49:30 +0100274 sectname = "CData";
275 endtag = "]]>";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100276 endtag_len = ly_strlen_const("]]>");
277 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100278 /* Document type declaration - not supported */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100279 LOGVAL(ctx, LY_VCODE_NSUPP, "Document Type Declaration");
Michal Vaskob36053d2020-03-26 15:49:30 +0100280 return LY_EVALID;
281 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100282 LOGVAL(ctx, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100283 return LY_EVALID;
284 }
Radek Krejcidd713ce2021-01-04 23:12:12 +0100285 LY_CHECK_RET(skip_section(xmlctx, endtag, endtag_len, sectname));
Michal Vasko63f3d842020-07-08 10:10:14 +0200286 } else if (xmlctx->in->current[0] == '?') {
Radek Krejcidd713ce2021-01-04 23:12:12 +0100287 LY_CHECK_RET(skip_section(xmlctx, "?>", 2, "Declaration"));
Michal Vaskob36053d2020-03-26 15:49:30 +0100288 } else {
289 /* other non-WS character */
290 break;
291 }
292 }
293
294 return LY_SUCCESS;
295}
296
Michal Vasko8cef5232020-06-15 17:59:47 +0200297/**
298 * @brief Parse QName.
299 *
300 * @param[in] xmlctx XML context to use.
301 * @param[out] prefix Parsed prefix, may be NULL.
302 * @param[out] prefix_len Length of @p prefix.
303 * @param[out] name Parsed name.
304 * @param[out] name_len Length of @p name.
305 * @return LY_ERR value.
306 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100307static LY_ERR
308lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
309{
310 const char *start, *end;
311
312 *prefix = NULL;
313 *prefix_len = 0;
314
315 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
316 if (end[0] == ':') {
317 /* we have prefixed identifier */
318 *prefix = start;
319 *prefix_len = end - start;
320
321 move_input(xmlctx, 1);
322 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
323 }
324
325 *name = start;
326 *name_len = end - start;
327 return LY_SUCCESS;
328}
329
330/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200331 * @brief Parse XML text content (value).
332 *
333 * @param[in] xmlctx XML context to use.
334 * @param[in] endchar Expected character to mark value end.
335 * @param[out] value Parsed value.
336 * @param[out] length Length of @p value.
337 * @param[out] ws_only Whether the value is empty/white-spaces only.
338 * @param[out] dynamic Whether the value was dynamically allocated.
339 * @return LY_ERR value.
340 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200341static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200342lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200343{
Michal Vaskob36053d2020-03-26 15:49:30 +0100344#define BUFSIZE 24
345#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200346
Michal Vaskob36053d2020-03-26 15:49:30 +0100347 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200348 const char *in = xmlctx->in->current, *start;
Michal Vaskob36053d2020-03-26 15:49:30 +0100349 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200350 size_t offset; /* read offset in input buffer */
351 size_t len; /* length of the output string (write offset in output buffer) */
352 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200353 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200354 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100355 size_t u;
Radek Krejci857189e2020-09-01 13:26:36 +0200356 ly_bool ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200357
Michal Vaskob36053d2020-03-26 15:49:30 +0100358 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200359
Radek Krejcid70d1072018-10-09 14:20:47 +0200360 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100361 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200362 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200363
364 /* parse */
365 while (in[offset]) {
366 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100367 /* non WS */
368 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200369
Michal Vaskob36053d2020-03-26 15:49:30 +0100370 if (!buf) {
371 /* prepare output buffer */
372 buf = malloc(BUFSIZE);
373 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
374 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200375 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100376
377 /* allocate enough for the offset and next character,
378 * we will need 4 bytes at most since we support only the predefined
379 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200380 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100381 buf = ly_realloc(buf, size + BUFSIZE_STEP);
382 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
383 size += BUFSIZE_STEP;
384 }
385
386 if (offset) {
387 /* store what we have so far */
388 memcpy(&buf[len], in, offset);
389 len += offset;
390 in += offset;
391 offset = 0;
392 }
393
Radek Krejci7a7fa902018-09-25 17:08:21 +0200394 ++offset;
395 if (in[offset] != '#') {
396 /* entity reference - only predefined references are supported */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100397 if (!strncmp(&in[offset], "lt;", ly_strlen_const("lt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100398 buf[len++] = '<';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100399 in += ly_strlen_const("&lt;");
400 } else if (!strncmp(&in[offset], "gt;", ly_strlen_const("gt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100401 buf[len++] = '>';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100402 in += ly_strlen_const("&gt;");
403 } else if (!strncmp(&in[offset], "amp;", ly_strlen_const("amp;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100404 buf[len++] = '&';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100405 in += ly_strlen_const("&amp;");
406 } else if (!strncmp(&in[offset], "apos;", ly_strlen_const("apos;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100407 buf[len++] = '\'';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100408 in += ly_strlen_const("&apos;");
409 } else if (!strncmp(&in[offset], "quot;", ly_strlen_const("quot;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100410 buf[len++] = '\"';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100411 in += ly_strlen_const("&quot;");
Radek Krejci7a7fa902018-09-25 17:08:21 +0200412 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100413 LOGVAL(ctx, LYVE_SYNTAX, "Entity reference \"%.*s\" not supported, only predefined references allowed.",
414 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200415 goto error;
416 }
417 offset = 0;
418 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100419 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200420 /* character reference */
421 ++offset;
422 if (isdigit(in[offset])) {
423 for (n = 0; isdigit(in[offset]); offset++) {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100424 n = (LY_BASE_DEC * n) + (in[offset] - '0');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200425 }
Michal Vasko69730152020-10-09 16:30:07 +0200426 } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200427 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
428 if (isdigit(in[offset])) {
429 u = (in[offset] - '0');
430 } else if (in[offset] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100431 u = LY_BASE_DEC + (in[offset] - 'a');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200432 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100433 u = LY_BASE_DEC + (in[offset] - 'A');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200434 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100435 n = (LY_BASE_HEX * n) + u;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200436 }
437 } else {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100438 LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200439 goto error;
440
441 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100442
Radek Krejci7a7fa902018-09-25 17:08:21 +0200443 LY_CHECK_ERR_GOTO(in[offset] != ';',
Radek Krejci2efc45b2020-12-22 16:25:44 +0100444 LOGVAL(ctx, LY_VCODE_INSTREXP,
Michal Vasko69730152020-10-09 16:30:07 +0200445 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
446 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200447 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200448 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
Radek Krejci2efc45b2020-12-22 16:25:44 +0100449 LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Michal Vasko69730152020-10-09 16:30:07 +0200450 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200451 len += u;
452 in += offset;
453 offset = 0;
454 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100455 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200456 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200457 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100458 /* realloc exact size string */
459 buf = ly_realloc(buf, len + offset + 1);
460 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
461 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200462 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100463
464 /* set terminating NULL byte */
465 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200466 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200467 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100468 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200469 goto success;
470 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100471 if (!is_xmlws(in[offset])) {
472 /* non WS */
473 ws = 0;
474 }
475
Radek Krejci7a7fa902018-09-25 17:08:21 +0200476 /* log lines */
477 if (in[offset] == '\n') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100478 LY_IN_NEW_LINE(xmlctx->in);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200479 }
480
481 /* continue */
482 ++offset;
483 }
484 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100485
486 /* EOF reached before endchar */
Radek Krejci2efc45b2020-12-22 16:25:44 +0100487 LOGVAL(ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100488
Radek Krejci7a7fa902018-09-25 17:08:21 +0200489error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100490 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200491 return LY_EVALID;
492
493success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200494 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100495 *value = buf;
496 *dynamic = 1;
497 } else {
498 *value = (char *)start;
499 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200500 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100501 *length = len;
502 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200503
Radek Krejcid54412f2020-12-17 20:25:35 +0100504 xmlctx->in->current = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100505 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200506
507#undef BUFSIZE
508#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200509}
510
Michal Vasko8cef5232020-06-15 17:59:47 +0200511/**
512 * @brief Parse XML closing element and match it to a stored starting element.
513 *
514 * @param[in] xmlctx XML context to use.
515 * @param[in] prefix Expected closing element prefix.
516 * @param[in] prefix_len Length of @p prefix.
517 * @param[in] name Expected closing element name.
518 * @param[in] name_len Length of @p name.
519 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
520 * @return LY_ERR value.
521 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100522static LY_ERR
523lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200524 ly_bool empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200525{
Michal Vaskob36053d2020-03-26 15:49:30 +0100526 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200527
Michal Vaskob36053d2020-03-26 15:49:30 +0100528 /* match opening and closing element tags */
529 if (!xmlctx->elements.count) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100530 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Radek Krejci422afb12021-03-04 16:38:16 +0100531 (int)name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100532 return LY_EVALID;
533 }
Radek Krejcid972c252018-09-25 13:23:39 +0200534
Michal Vaskob36053d2020-03-26 15:49:30 +0100535 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
Michal Vasko69730152020-10-09 16:30:07 +0200536 if ((e->prefix_len != prefix_len) || (e->name_len != name_len) ||
537 (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100538 LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
Radek Krejci422afb12021-03-04 16:38:16 +0100539 (int)e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", (int)e->name_len, e->name,
540 (int)prefix_len, prefix ? prefix : "", prefix ? ":" : "", (int)name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100541 return LY_EVALID;
542 }
Radek Krejcid972c252018-09-25 13:23:39 +0200543
Michal Vaskob36053d2020-03-26 15:49:30 +0100544 /* opening and closing element tags matches, remove record from the opening tags list */
545 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200546
Michal Vaskob36053d2020-03-26 15:49:30 +0100547 /* remove also the namespaces connected with the element */
548 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200549
Michal Vaskob36053d2020-03-26 15:49:30 +0100550 /* skip WS */
551 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200552
Michal Vaskob36053d2020-03-26 15:49:30 +0100553 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200554 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100555 move_input(xmlctx, 1);
556 }
Michal Vasko52927e22020-03-16 17:26:14 +0100557
Michal Vaskob36053d2020-03-26 15:49:30 +0100558 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200559 if (xmlctx->in->current[0] != '>') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100560 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200561 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100562 return LY_EVALID;
563 }
Michal Vasko52927e22020-03-16 17:26:14 +0100564
Michal Vaskob36053d2020-03-26 15:49:30 +0100565 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200566 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100567
Radek Krejcid972c252018-09-25 13:23:39 +0200568 return LY_SUCCESS;
569}
570
Michal Vasko8cef5232020-06-15 17:59:47 +0200571/**
572 * @brief Store parsed opening element and parse any included namespaces.
573 *
574 * @param[in] xmlctx XML context to use.
575 * @param[in] prefix Parsed starting element prefix.
576 * @param[in] prefix_len Length of @p prefix.
577 * @param[in] name Parsed starting element name.
578 * @param[in] name_len Length of @p name.
579 * @return LY_ERR value.
580 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100581static LY_ERR
582lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200583{
Michal Vaskob36053d2020-03-26 15:49:30 +0100584 LY_ERR ret = LY_SUCCESS;
585 struct lyxml_elem *e;
586 const char *prev_input;
587 char *value;
588 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200589 ly_bool ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100590 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200591
Michal Vaskob36053d2020-03-26 15:49:30 +0100592 /* store element opening tag information */
593 e = malloc(sizeof *e);
594 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
595 e->name = name;
596 e->prefix = prefix;
597 e->name_len = name_len;
598 e->prefix_len = prefix_len;
Radek Krejci3d92e442020-10-12 12:48:13 +0200599 LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL));
Michal Vaskob36053d2020-03-26 15:49:30 +0100600
601 /* skip WS */
602 ign_xmlws(xmlctx);
603
604 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200605 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100606 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200607 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
608 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100609
610 /* parse attribute name */
611 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
612
613 /* parse the value */
614 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
615
616 /* store every namespace */
617 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100618 ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
619 dynamic ? value : strndup(value, value_len));
Michal Vaskob36053d2020-03-26 15:49:30 +0100620 dynamic = 0;
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100621 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100622 } else {
623 /* not a namespace */
624 is_ns = 0;
625 }
626 if (dynamic) {
627 free(value);
628 }
629
630 /* skip WS */
631 ign_xmlws(xmlctx);
632
633 if (is_ns) {
634 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200635 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100636 }
Radek Krejcib1890642018-10-03 14:05:40 +0200637 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100638
639cleanup:
640 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200641 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100642 }
643 return ret;
644}
645
Michal Vasko8cef5232020-06-15 17:59:47 +0200646/**
647 * @brief Move parser to the attribute content and parse it.
648 *
649 * @param[in] xmlctx XML context to use.
650 * @param[out] value Parsed attribute value.
651 * @param[out] value_len Length of @p value.
652 * @param[out] ws_only Whether the value is empty/white-spaces only.
653 * @param[out] dynamic Whether the value was dynamically allocated.
654 * @return LY_ERR value.
655 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100656static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200657lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100658{
659 char quot;
660
661 /* skip WS */
662 ign_xmlws(xmlctx);
663
664 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200665 if (xmlctx->in->current[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100666 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100667 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200668 } else if (xmlctx->in->current[0] != '=') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100669 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200670 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100671 return LY_EVALID;
672 }
673 move_input(xmlctx, 1);
674
675 /* skip WS */
676 ign_xmlws(xmlctx);
677
678 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200679 if (xmlctx->in->current[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100680 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100681 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200682 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100683 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200684 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100685 return LY_EVALID;
686 }
687
688 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200689 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100690 move_input(xmlctx, 1);
691
692 /* parse attribute value */
693 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
694
695 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200696 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100697
698 return LY_SUCCESS;
699}
700
Michal Vasko8cef5232020-06-15 17:59:47 +0200701/**
702 * @brief Move parser to the next attribute and parse it.
703 *
704 * @param[in] xmlctx XML context to use.
705 * @param[out] prefix Parsed attribute prefix.
706 * @param[out] prefix_len Length of @p prefix.
707 * @param[out] name Parsed attribute name.
708 * @param[out] name_len Length of @p name.
709 * @return LY_ERR value.
710 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100711static LY_ERR
712lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
713{
714 const char *in;
715 char *value;
716 uint32_t c;
717 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200718 ly_bool ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100719
720 /* skip WS */
721 ign_xmlws(xmlctx);
722
723 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200724 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
725 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100726 if (in[0] == '\0') {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100727 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100728 return LY_EVALID;
729 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100730 LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
Michal Vasko69730152020-10-09 16:30:07 +0200731 "element tag end ('>' or '/>') or an attribute");
Michal Vaskob36053d2020-03-26 15:49:30 +0100732 return LY_EVALID;
733 }
734
735 /* parse attribute name */
736 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
737
738 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
739 /* standard attribute */
740 break;
741 }
742
743 /* namespace, skip it */
744 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
745 if (dynamic) {
746 free(value);
747 }
748
749 /* skip WS */
750 ign_xmlws(xmlctx);
751 }
752
753 return LY_SUCCESS;
754}
755
Michal Vasko8cef5232020-06-15 17:59:47 +0200756/**
757 * @brief Move parser to the next element and parse it.
758 *
759 * @param[in] xmlctx XML context to use.
760 * @param[out] prefix Parsed element prefix.
761 * @param[out] prefix_len Length of @p prefix.
762 * @param[out] name Parse element name.
763 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200764 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200765 * @return LY_ERR value.
766 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100767static LY_ERR
768lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200769 ly_bool *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100770{
771 /* skip WS until EOF or after opening tag '<' */
772 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200773 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100774 /* set return values */
775 *prefix = *name = NULL;
776 *prefix_len = *name_len = 0;
777 return LY_SUCCESS;
778 }
779
Michal Vasko63f3d842020-07-08 10:10:14 +0200780 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100781 move_input(xmlctx, 1);
782 *closing = 1;
783 } else {
784 *closing = 0;
785 }
786
787 /* skip WS */
788 ign_xmlws(xmlctx);
789
790 /* parse element name */
791 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
792
793 return LY_SUCCESS;
794}
795
796LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200797lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100798{
799 LY_ERR ret = LY_SUCCESS;
800 struct lyxml_ctx *xmlctx;
Radek Krejci857189e2020-09-01 13:26:36 +0200801 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100802
803 /* new context */
804 xmlctx = calloc(1, sizeof *xmlctx);
805 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
806 xmlctx->ctx = ctx;
Michal Vasko63f3d842020-07-08 10:10:14 +0200807 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100808
Radek Krejciddace2c2021-01-08 11:30:56 +0100809 LOG_LOCINIT(NULL, NULL, NULL, in);
Radek Krejci2efc45b2020-12-22 16:25:44 +0100810
Michal Vaskob36053d2020-03-26 15:49:30 +0100811 /* parse next element, if any */
812 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
Michal Vasko69730152020-10-09 16:30:07 +0200813 &xmlctx->name_len, &closing), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100814
Michal Vasko63f3d842020-07-08 10:10:14 +0200815 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100816 /* update status */
817 xmlctx->status = LYXML_END;
818 } else if (closing) {
Radek Krejci422afb12021-03-04 16:38:16 +0100819 LOGVAL(ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", (int)xmlctx->name_len, xmlctx->name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100820 ret = LY_EVALID;
821 goto cleanup;
822 } else {
823 /* open an element, also parses all enclosed namespaces */
824 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
825
826 /* update status */
827 xmlctx->status = LYXML_ELEMENT;
828 }
829
830cleanup:
831 if (ret) {
832 lyxml_ctx_free(xmlctx);
833 } else {
834 *xmlctx_p = xmlctx;
835 }
836 return ret;
837}
838
839LY_ERR
840lyxml_ctx_next(struct lyxml_ctx *xmlctx)
841{
842 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200843 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100844 struct lyxml_elem *e;
845
846 /* if the value was not used, free it */
847 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
848 free((char *)xmlctx->value);
849 xmlctx->value = NULL;
850 xmlctx->dynamic = 0;
851 }
852
853 switch (xmlctx->status) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100854 case LYXML_ELEM_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100855 /* content |</elem> */
856
Michal Vaskob36053d2020-03-26 15:49:30 +0100857 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200858 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100859 assert(xmlctx->elements.count);
860 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
861
862 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200863 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
864 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100865
866 /* update status */
867 xmlctx->status = LYXML_ELEM_CLOSE;
868 break;
869 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100870 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100871 case LYXML_ELEM_CLOSE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100872 /* </elem>| <elem2>* */
873
Michal Vaskob36053d2020-03-26 15:49:30 +0100874 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200875 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
876 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100877
Michal Vasko63f3d842020-07-08 10:10:14 +0200878 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100879 /* update status */
880 xmlctx->status = LYXML_END;
881 } else if (closing) {
882 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200883 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
884 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100885
886 /* update status */
887 xmlctx->status = LYXML_ELEM_CLOSE;
888 } else {
889 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200890 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
891 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100892
893 /* update status */
894 xmlctx->status = LYXML_ELEMENT;
895 }
896 break;
897
Michal Vaskob36053d2020-03-26 15:49:30 +0100898 case LYXML_ELEMENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100899 /* <elem| attr='val'* > content */
Michal Vaskob36053d2020-03-26 15:49:30 +0100900 case LYXML_ATTR_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100901 /* attr='val'| attr='val'* > content */
902
Michal Vaskob36053d2020-03-26 15:49:30 +0100903 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200904 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
905 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100906
Michal Vasko63f3d842020-07-08 10:10:14 +0200907 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100908 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200909 ly_in_skip(xmlctx->in, 1);
910 if (!xmlctx->in->current[0]) {
Radek Krejci2efc45b2020-12-22 16:25:44 +0100911 LOGVAL(xmlctx->ctx, LY_VCODE_EOF);
Michal Vaskof55ae202020-06-30 15:49:36 +0200912 ret = LY_EVALID;
913 goto cleanup;
914 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100915
916 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200917 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
Michal Vasko69730152020-10-09 16:30:07 +0200918 &xmlctx->dynamic);
Michal Vasko63f3d842020-07-08 10:10:14 +0200919 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100920
921 if (!xmlctx->value_len) {
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100922 /* empty value should by alocated staticaly, but check for in any case */
923 if (xmlctx->dynamic) {
924 free((char *) xmlctx->value);
925 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100926 /* use empty value, easier to work with */
927 xmlctx->value = "";
Radek IÅ¡a017270d2021-02-16 10:26:15 +0100928 xmlctx->dynamic = 0;
Michal Vaskob36053d2020-03-26 15:49:30 +0100929 }
930
931 /* update status */
932 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200933 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100934 /* no content but we still return it */
935 xmlctx->value = "";
936 xmlctx->value_len = 0;
937 xmlctx->ws_only = 1;
938 xmlctx->dynamic = 0;
939
940 /* update status */
941 xmlctx->status = LYXML_ELEM_CONTENT;
942 } else {
943 /* update status */
944 xmlctx->status = LYXML_ATTRIBUTE;
945 }
946 break;
947
Michal Vaskob36053d2020-03-26 15:49:30 +0100948 case LYXML_ATTRIBUTE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100949 /* attr|='val' */
950
Michal Vaskob36053d2020-03-26 15:49:30 +0100951 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +0200952 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
953 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100954
955 /* update status */
956 xmlctx->status = LYXML_ATTR_CONTENT;
957 break;
958
Michal Vaskob36053d2020-03-26 15:49:30 +0100959 case LYXML_END:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100960 /* </elem> |EOF */
Michal Vaskob36053d2020-03-26 15:49:30 +0100961 /* nothing to do */
962 break;
963 }
964
965cleanup:
966 if (ret) {
967 /* invalidate context */
968 xmlctx->status = LYXML_END;
969 }
970 return ret;
971}
972
973LY_ERR
974lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
975{
976 LY_ERR ret = LY_SUCCESS;
977 const char *prefix, *name, *prev_input;
978 size_t prefix_len, name_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200979 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100980
Michal Vasko63f3d842020-07-08 10:10:14 +0200981 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100982
983 switch (xmlctx->status) {
984 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +0200985 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100986 *next = LYXML_ELEM_CLOSE;
987 break;
988 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100989 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100990 case LYXML_ELEM_CLOSE:
991 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200992 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
993 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100994
Michal Vasko63f3d842020-07-08 10:10:14 +0200995 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100996 *next = LYXML_END;
997 } else if (closing) {
998 *next = LYXML_ELEM_CLOSE;
999 } else {
1000 *next = LYXML_ELEMENT;
1001 }
1002 break;
1003 case LYXML_ELEMENT:
1004 case LYXML_ATTR_CONTENT:
1005 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001006 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
1007 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001008
Michal Vasko63f3d842020-07-08 10:10:14 +02001009 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001010 *next = LYXML_ELEM_CONTENT;
1011 } else {
1012 *next = LYXML_ATTRIBUTE;
1013 }
1014 break;
1015 case LYXML_ATTRIBUTE:
1016 *next = LYXML_ATTR_CONTENT;
1017 break;
1018 case LYXML_END:
1019 *next = LYXML_END;
1020 break;
1021 }
1022
1023cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001024 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001025 return ret;
1026}
1027
1028void
1029lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1030{
1031 uint32_t u;
1032
1033 if (!xmlctx) {
1034 return;
1035 }
1036
Radek Krejciddace2c2021-01-08 11:30:56 +01001037 LOG_LOCBACK(0, 0, 0, 1);
Radek Krejci2efc45b2020-12-22 16:25:44 +01001038
Michal Vaskob36053d2020-03-26 15:49:30 +01001039 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1040 free((char *)xmlctx->value);
1041 }
1042 ly_set_erase(&xmlctx->elements, free);
1043 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1044 /* remove the ns structure */
1045 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1046 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1047 free(xmlctx->ns.objs[u]);
1048 }
1049 ly_set_erase(&xmlctx->ns, NULL);
1050 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001051}
Radek Krejcie7b95092019-05-15 11:03:07 +02001052
1053LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +02001054lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001055{
Michal Vasko5233e962020-08-14 14:26:20 +02001056 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001057
1058 if (!text) {
1059 return 0;
1060 }
1061
Radek Krejci1deb5be2020-08-26 16:43:36 +02001062 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001063 switch (text[u]) {
1064 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001065 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001066 break;
1067 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001068 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001069 break;
1070 case '>':
1071 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001072 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001073 break;
1074 case '"':
1075 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001076 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001077 break;
1078 }
Radek Krejcif13b87b2020-12-01 22:02:17 +01001079 /* fall through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001080 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001081 ret = ly_write_(out, &text[u], 1);
1082 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001083 }
Michal Vasko5233e962020-08-14 14:26:20 +02001084 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001085 }
1086
Michal Vasko5233e962020-08-14 14:26:20 +02001087 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001088}
1089
Michal Vasko52927e22020-03-16 17:26:14 +01001090LY_ERR
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001091lyxml_value_compare(const struct ly_ctx *ctx, const char *value1, void *val_prefix_data1, const char *value2,
1092 void *val_prefix_data2)
Michal Vasko52927e22020-03-16 17:26:14 +01001093{
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001094 const char *ptr1, *ptr2, *end1, *end2;
1095 const struct lys_module *mod1, *mod2;
Michal Vasko52927e22020-03-16 17:26:14 +01001096
1097 if (!value1 && !value2) {
1098 return LY_SUCCESS;
1099 }
1100 if ((value1 && !value2) || (!value1 && value2)) {
1101 return LY_ENOT;
1102 }
1103
1104 ptr1 = value1;
1105 ptr2 = value2;
1106 while (ptr1[0] && ptr2[0]) {
1107 if (ptr1[0] != ptr2[0]) {
1108 /* it can be a start of prefix that maps to the same module */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001109 mod1 = mod2 = NULL;
1110 if (val_prefix_data1 && (end1 = strchr(ptr1, ':'))) {
Michal Vasko52927e22020-03-16 17:26:14 +01001111 /* find module of the first prefix, if any */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001112 mod1 = ly_resolve_prefix(ctx, ptr1, end1 - ptr1, LY_PREF_XML, val_prefix_data1);
Michal Vasko52927e22020-03-16 17:26:14 +01001113 }
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001114 if (val_prefix_data2 && (end2 = strchr(ptr2, ':'))) {
Michal Vasko52927e22020-03-16 17:26:14 +01001115 /* find module of the second prefix, if any */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001116 mod2 = ly_resolve_prefix(ctx, ptr2, end2 - ptr2, LY_PREF_XML, val_prefix_data2);
Michal Vasko52927e22020-03-16 17:26:14 +01001117 }
1118
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001119 if (!mod1 || !mod2 || (mod1 != mod2)) {
Michal Vasko52927e22020-03-16 17:26:14 +01001120 /* not a prefix or maps to different namespaces */
1121 break;
1122 }
1123
1124 /* skip prefixes in both values (':' is skipped as iter) */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001125 ptr1 = end1;
1126 ptr2 = end2;
Michal Vasko52927e22020-03-16 17:26:14 +01001127 }
1128
1129 ++ptr1;
1130 ++ptr2;
1131 }
1132 if (ptr1[0] || ptr2[0]) {
1133 /* not a match or simply different lengths */
1134 return LY_ENOT;
1135 }
1136
1137 return LY_SUCCESS;
1138}