blob: 6f87b810d5022cc5de642bd98c61f500f5c4d238 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejci535ea9f2020-05-29 16:01:05 +020026#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020027#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020028#include "dict.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020029#include "in_internal.h"
30#include "out_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020031#include "tree.h"
32#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020033
Michal Vaskob36053d2020-03-26 15:49:30 +010034/* Move input p by s characters, if EOF log with lyxml_ctx c */
Radek Krejcid54412f2020-12-17 20:25:35 +010035#define move_input(c, s) ly_in_skip(c->in, s); LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->in->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020036
Radek Krejcib1890642018-10-03 14:05:40 +020037/* Ignore whitespaces in the input string p */
Radek Krejcid54412f2020-12-17 20:25:35 +010038#define ign_xmlws(c) while (is_xmlws(*(c)->in->current)) {ly_in_skip(c->in, 1);}
Michal Vaskob36053d2020-03-26 15:49:30 +010039
Radek Krejci857189e2020-09-01 13:26:36 +020040static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only,
41 ly_bool *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020042
Radek Krejci4b74d5e2018-09-26 14:30:55 +020043/**
44 * @brief Ignore any characters until the delim of the size delim_len is read
45 *
46 * Detects number of read new lines.
Radek Krejci857189e2020-09-01 13:26:36 +020047 * Returns Boolean value whether delim was found or not.
Michal Vasko63f3d842020-07-08 10:10:14 +020048 */
Radek Krejci857189e2020-09-01 13:26:36 +020049static ly_bool
Radek Krejcid54412f2020-12-17 20:25:35 +010050ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *parsed)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020051{
52 size_t i;
53 register const char *a, *b;
54
Michal Vasko63f3d842020-07-08 10:10:14 +020055 (*parsed) = 0;
56 for ( ; *input; ++input, ++(*parsed)) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020057 if (*input != *delim) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020058 continue;
59 }
60 a = input;
61 b = delim;
62 for (i = 0; i < delim_len; ++i) {
63 if (*a++ != *b++) {
64 break;
65 }
66 }
67 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020068 /* delim found */
69 return 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020070 }
71 }
Michal Vasko63f3d842020-07-08 10:10:14 +020072
73 /* delim not found */
Radek Krejci1deb5be2020-08-26 16:43:36 +020074 return 1;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020075}
76
Radek Krejci4b74d5e2018-09-26 14:30:55 +020077/**
Michal Vaskob36053d2020-03-26 15:49:30 +010078 * @brief Check/Get an XML identifier from the input string.
79 *
80 * The identifier must have at least one valid character complying the name start character constraints.
81 * The identifier is terminated by the first character, which does not comply to the name character constraints.
82 *
83 * See https://www.w3.org/TR/xml-names/#NT-NCName
84 *
85 * @param[in] xmlctx XML context.
86 * @param[out] start Pointer to the start of the identifier.
87 * @param[out] end Pointer ot the end of the identifier.
88 * @return LY_ERR value.
89 */
90static LY_ERR
91lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
92{
93 const char *s, *in;
94 uint32_t c;
95 size_t parsed;
96 LY_ERR rc;
97
Michal Vasko63f3d842020-07-08 10:10:14 +020098 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +010099
100 /* check NameStartChar (minus colon) */
101 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
Radek Krejcid54412f2020-12-17 20:25:35 +0100102 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INCHAR, in[0]),
Michal Vasko69730152020-10-09 16:30:07 +0200103 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100104 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
Radek Krejcid54412f2020-12-17 20:25:35 +0100105 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200106 "Identifier \"%s\" starts with an invalid character.", in - parsed),
107 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100108
109 /* check rest of the identifier */
110 do {
111 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200112 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100113
114 rc = ly_getutf8(&in, &c, &parsed);
Radek Krejcid54412f2020-12-17 20:25:35 +0100115 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100116 } while (is_xmlqnamechar(c));
117
118 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200119 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100120 return LY_SUCCESS;
121}
122
123/**
124 * @brief Add namespace definition into XML context.
125 *
126 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
127 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
128 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
129 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
130 *
131 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
132 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
133 *
134 * @param[in] xmlctx XML context to work with.
135 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
136 * @param[in] prefix_len Length of the prefix.
137 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
138 * @return LY_ERR values.
139 */
140LY_ERR
141lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
142{
Radek Krejciba03a5a2020-08-27 14:40:41 +0200143 LY_ERR ret = LY_SUCCESS;
Michal Vaskob36053d2020-03-26 15:49:30 +0100144 struct lyxml_ns *ns;
145
146 ns = malloc(sizeof *ns);
147 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
148
149 /* we need to connect the depth of the element where the namespace is defined with the
150 * namespace record to be able to maintain (remove) the record when the parser leaves
151 * (to its sibling or back to the parent) the element where the namespace was defined */
152 ns->depth = xmlctx->elements.count;
153
154 ns->uri = uri;
155 if (prefix) {
156 ns->prefix = strndup(prefix, prefix_len);
157 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
158 } else {
159 ns->prefix = NULL;
160 }
161
Radek Krejci3d92e442020-10-12 12:48:13 +0200162 ret = ly_set_add(&xmlctx->ns, ns, 1, NULL);
Radek Krejciba03a5a2020-08-27 14:40:41 +0200163 LY_CHECK_ERR_RET(ret, free(ns->prefix); free(ns->uri); free(ns), ret);
164
Michal Vaskob36053d2020-03-26 15:49:30 +0100165 return LY_SUCCESS;
166}
167
168/**
169 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
170 *
171 * @param[in] xmlctx XML context to work with.
172 */
173void
174lyxml_ns_rm(struct lyxml_ctx *xmlctx)
175{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200176 for (uint32_t u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100177 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
178 /* we are done, the namespaces from a single element are supposed to be together */
179 break;
180 }
181 /* remove the ns structure */
182 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
183 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
184 free(xmlctx->ns.objs[u]);
185 --xmlctx->ns.count;
186 }
187
188 if (!xmlctx->ns.count) {
189 /* cleanup the xmlctx's namespaces storage */
190 ly_set_erase(&xmlctx->ns, NULL);
191 }
192}
193
Michal Vaskob36053d2020-03-26 15:49:30 +0100194const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200195lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100196{
Michal Vaskob36053d2020-03-26 15:49:30 +0100197 struct lyxml_ns *ns;
198
Radek Krejci1deb5be2020-08-26 16:43:36 +0200199 for (uint32_t u = ns_set->count - 1; u + 1 > 0; --u) {
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200200 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vaskob36053d2020-03-26 15:49:30 +0100201 if (prefix && prefix_len) {
202 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
203 return ns;
204 }
205 } else if (!ns->prefix) {
206 /* default namespace */
207 return ns;
208 }
209 }
210
211 return NULL;
212}
213
Michal Vasko8cef5232020-06-15 17:59:47 +0200214/**
215 * @brief Skip in the input until EOF or just after the opening tag.
216 * Handles special XML constructs (comment, cdata, doctype).
217 *
218 * @param[in] xmlctx XML context to use.
219 * @return LY_ERR value.
220 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100221static LY_ERR
222lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
223{
224 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200225 const char *endtag, *sectname;
Radek Krejcid54412f2020-12-17 20:25:35 +0100226 size_t endtag_len, parsed;
Radek Krejci857189e2020-09-01 13:26:36 +0200227 ly_bool rc;
Michal Vaskob36053d2020-03-26 15:49:30 +0100228
229 while (1) {
230 ign_xmlws(xmlctx);
231
Michal Vasko63f3d842020-07-08 10:10:14 +0200232 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100233 /* EOF */
234 if (xmlctx->elements.count) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100235 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100236 return LY_EVALID;
237 }
238 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200239 } else if (xmlctx->in->current[0] != '<') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100240 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200241 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100242 return LY_EVALID;
243 }
244 move_input(xmlctx, 1);
245
Michal Vasko63f3d842020-07-08 10:10:14 +0200246 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100247 move_input(xmlctx, 1);
248 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200249 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100250 /* comment */
251 move_input(xmlctx, 2);
252 sectname = "Comment";
253 endtag = "-->";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100254 endtag_len = ly_strlen_const("-->");
255 } else if (!strncmp(xmlctx->in->current, "[CDATA[", ly_strlen_const("[CDATA["))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100256 /* CDATA section */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100257 move_input(xmlctx, ly_strlen_const("[CDATA["));
Michal Vaskob36053d2020-03-26 15:49:30 +0100258 sectname = "CData";
259 endtag = "]]>";
Radek Krejcif13b87b2020-12-01 22:02:17 +0100260 endtag_len = ly_strlen_const("]]>");
261 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100262 /* Document type declaration - not supported */
Radek Krejcid54412f2020-12-17 20:25:35 +0100263 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_NSUPP, "Document Type Declaration");
Michal Vaskob36053d2020-03-26 15:49:30 +0100264 return LY_EVALID;
265 } else {
Radek Krejcid54412f2020-12-17 20:25:35 +0100266 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".",
Michal Vasko69730152020-10-09 16:30:07 +0200267 &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100268 return LY_EVALID;
269 }
Radek Krejcid54412f2020-12-17 20:25:35 +0100270 rc = ign_todelim(xmlctx->in->current, endtag, endtag_len, &parsed);
271 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_NTERM, sectname), LY_EVALID);
Michal Vasko63f3d842020-07-08 10:10:14 +0200272 ly_in_skip(xmlctx->in, parsed + endtag_len);
273 } else if (xmlctx->in->current[0] == '?') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100274 rc = ign_todelim(xmlctx->in->current, "?>", 2, &parsed);
275 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
Michal Vasko63f3d842020-07-08 10:10:14 +0200276 ly_in_skip(xmlctx->in, parsed + 2);
Michal Vaskob36053d2020-03-26 15:49:30 +0100277 } else {
278 /* other non-WS character */
279 break;
280 }
281 }
282
283 return LY_SUCCESS;
284}
285
Michal Vasko8cef5232020-06-15 17:59:47 +0200286/**
287 * @brief Parse QName.
288 *
289 * @param[in] xmlctx XML context to use.
290 * @param[out] prefix Parsed prefix, may be NULL.
291 * @param[out] prefix_len Length of @p prefix.
292 * @param[out] name Parsed name.
293 * @param[out] name_len Length of @p name.
294 * @return LY_ERR value.
295 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100296static LY_ERR
297lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
298{
299 const char *start, *end;
300
301 *prefix = NULL;
302 *prefix_len = 0;
303
304 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
305 if (end[0] == ':') {
306 /* we have prefixed identifier */
307 *prefix = start;
308 *prefix_len = end - start;
309
310 move_input(xmlctx, 1);
311 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
312 }
313
314 *name = start;
315 *name_len = end - start;
316 return LY_SUCCESS;
317}
318
319/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200320 * @brief Parse XML text content (value).
321 *
322 * @param[in] xmlctx XML context to use.
323 * @param[in] endchar Expected character to mark value end.
324 * @param[out] value Parsed value.
325 * @param[out] length Length of @p value.
326 * @param[out] ws_only Whether the value is empty/white-spaces only.
327 * @param[out] dynamic Whether the value was dynamically allocated.
328 * @return LY_ERR value.
329 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200330static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200331lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200332{
Michal Vaskob36053d2020-03-26 15:49:30 +0100333#define BUFSIZE 24
334#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200335
Michal Vaskob36053d2020-03-26 15:49:30 +0100336 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200337 const char *in = xmlctx->in->current, *start;
Michal Vaskob36053d2020-03-26 15:49:30 +0100338 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200339 size_t offset; /* read offset in input buffer */
340 size_t len; /* length of the output string (write offset in output buffer) */
341 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200342 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200343 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100344 size_t u;
Radek Krejci857189e2020-09-01 13:26:36 +0200345 ly_bool ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200346
Michal Vaskob36053d2020-03-26 15:49:30 +0100347 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200348
Radek Krejcid70d1072018-10-09 14:20:47 +0200349 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100350 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200351 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200352
353 /* parse */
354 while (in[offset]) {
355 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100356 /* non WS */
357 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200358
Michal Vaskob36053d2020-03-26 15:49:30 +0100359 if (!buf) {
360 /* prepare output buffer */
361 buf = malloc(BUFSIZE);
362 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
363 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200364 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100365
366 /* allocate enough for the offset and next character,
367 * we will need 4 bytes at most since we support only the predefined
368 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200369 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100370 buf = ly_realloc(buf, size + BUFSIZE_STEP);
371 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
372 size += BUFSIZE_STEP;
373 }
374
375 if (offset) {
376 /* store what we have so far */
377 memcpy(&buf[len], in, offset);
378 len += offset;
379 in += offset;
380 offset = 0;
381 }
382
Radek Krejci7a7fa902018-09-25 17:08:21 +0200383 ++offset;
384 if (in[offset] != '#') {
385 /* entity reference - only predefined references are supported */
Radek Krejcif13b87b2020-12-01 22:02:17 +0100386 if (!strncmp(&in[offset], "lt;", ly_strlen_const("lt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100387 buf[len++] = '<';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100388 in += ly_strlen_const("&lt;");
389 } else if (!strncmp(&in[offset], "gt;", ly_strlen_const("gt;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100390 buf[len++] = '>';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100391 in += ly_strlen_const("&gt;");
392 } else if (!strncmp(&in[offset], "amp;", ly_strlen_const("amp;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100393 buf[len++] = '&';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100394 in += ly_strlen_const("&amp;");
395 } else if (!strncmp(&in[offset], "apos;", ly_strlen_const("apos;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100396 buf[len++] = '\'';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100397 in += ly_strlen_const("&apos;");
398 } else if (!strncmp(&in[offset], "quot;", ly_strlen_const("quot;"))) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100399 buf[len++] = '\"';
Radek Krejcif13b87b2020-12-01 22:02:17 +0100400 in += ly_strlen_const("&quot;");
Radek Krejci7a7fa902018-09-25 17:08:21 +0200401 } else {
Radek Krejcid54412f2020-12-17 20:25:35 +0100402 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200403 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200404 goto error;
405 }
406 offset = 0;
407 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100408 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200409 /* character reference */
410 ++offset;
411 if (isdigit(in[offset])) {
412 for (n = 0; isdigit(in[offset]); offset++) {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100413 n = (LY_BASE_DEC * n) + (in[offset] - '0');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200414 }
Michal Vasko69730152020-10-09 16:30:07 +0200415 } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200416 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
417 if (isdigit(in[offset])) {
418 u = (in[offset] - '0');
419 } else if (in[offset] > 'F') {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100420 u = LY_BASE_DEC + (in[offset] - 'a');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200421 } else {
Radek Krejcif13b87b2020-12-01 22:02:17 +0100422 u = LY_BASE_DEC + (in[offset] - 'A');
Radek Krejci7a7fa902018-09-25 17:08:21 +0200423 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100424 n = (LY_BASE_HEX * n) + u;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200425 }
426 } else {
Radek Krejcid54412f2020-12-17 20:25:35 +0100427 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200428 goto error;
429
430 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100431
Radek Krejci7a7fa902018-09-25 17:08:21 +0200432 LY_CHECK_ERR_GOTO(in[offset] != ';',
Radek Krejcid54412f2020-12-17 20:25:35 +0100433 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INSTREXP,
Michal Vasko69730152020-10-09 16:30:07 +0200434 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
435 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200436 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200437 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
Radek Krejcid54412f2020-12-17 20:25:35 +0100438 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200439 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
440 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200441 len += u;
442 in += offset;
443 offset = 0;
444 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100445 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200446 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200447 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100448 /* realloc exact size string */
449 buf = ly_realloc(buf, len + offset + 1);
450 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
451 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200452 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100453
454 /* set terminating NULL byte */
455 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200456 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200457 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100458 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200459 goto success;
460 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100461 if (!is_xmlws(in[offset])) {
462 /* non WS */
463 ws = 0;
464 }
465
Radek Krejci7a7fa902018-09-25 17:08:21 +0200466 /* log lines */
467 if (in[offset] == '\n') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100468 LY_IN_NEW_LINE(xmlctx->in);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200469 }
470
471 /* continue */
472 ++offset;
473 }
474 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100475
476 /* EOF reached before endchar */
Radek Krejcid54412f2020-12-17 20:25:35 +0100477 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100478
Radek Krejci7a7fa902018-09-25 17:08:21 +0200479error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100480 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200481 return LY_EVALID;
482
483success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200484 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100485 *value = buf;
486 *dynamic = 1;
487 } else {
488 *value = (char *)start;
489 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200490 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100491 *length = len;
492 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200493
Radek Krejcid54412f2020-12-17 20:25:35 +0100494 xmlctx->in->current = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100495 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200496
497#undef BUFSIZE
498#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200499}
500
Michal Vasko8cef5232020-06-15 17:59:47 +0200501/**
502 * @brief Parse XML closing element and match it to a stored starting element.
503 *
504 * @param[in] xmlctx XML context to use.
505 * @param[in] prefix Expected closing element prefix.
506 * @param[in] prefix_len Length of @p prefix.
507 * @param[in] name Expected closing element name.
508 * @param[in] name_len Length of @p name.
509 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
510 * @return LY_ERR value.
511 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100512static LY_ERR
513lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200514 ly_bool empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200515{
Michal Vaskob36053d2020-03-26 15:49:30 +0100516 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200517
Michal Vaskob36053d2020-03-26 15:49:30 +0100518 /* match opening and closing element tags */
519 if (!xmlctx->elements.count) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100520 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Michal Vasko69730152020-10-09 16:30:07 +0200521 name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100522 return LY_EVALID;
523 }
Radek Krejcid972c252018-09-25 13:23:39 +0200524
Michal Vaskob36053d2020-03-26 15:49:30 +0100525 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
Michal Vasko69730152020-10-09 16:30:07 +0200526 if ((e->prefix_len != prefix_len) || (e->name_len != name_len) ||
527 (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100528 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200529 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
530 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
531 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100532 return LY_EVALID;
533 }
Radek Krejcid972c252018-09-25 13:23:39 +0200534
Michal Vaskob36053d2020-03-26 15:49:30 +0100535 /* opening and closing element tags matches, remove record from the opening tags list */
536 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200537
Michal Vaskob36053d2020-03-26 15:49:30 +0100538 /* remove also the namespaces connected with the element */
539 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200540
Michal Vaskob36053d2020-03-26 15:49:30 +0100541 /* skip WS */
542 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200543
Michal Vaskob36053d2020-03-26 15:49:30 +0100544 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200545 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100546 move_input(xmlctx, 1);
547 }
Michal Vasko52927e22020-03-16 17:26:14 +0100548
Michal Vaskob36053d2020-03-26 15:49:30 +0100549 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200550 if (xmlctx->in->current[0] != '>') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100551 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200552 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100553 return LY_EVALID;
554 }
Michal Vasko52927e22020-03-16 17:26:14 +0100555
Michal Vaskob36053d2020-03-26 15:49:30 +0100556 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200557 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100558
Radek Krejcid972c252018-09-25 13:23:39 +0200559 return LY_SUCCESS;
560}
561
Michal Vasko8cef5232020-06-15 17:59:47 +0200562/**
563 * @brief Store parsed opening element and parse any included namespaces.
564 *
565 * @param[in] xmlctx XML context to use.
566 * @param[in] prefix Parsed starting element prefix.
567 * @param[in] prefix_len Length of @p prefix.
568 * @param[in] name Parsed starting element name.
569 * @param[in] name_len Length of @p name.
570 * @return LY_ERR value.
571 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100572static LY_ERR
573lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200574{
Michal Vaskob36053d2020-03-26 15:49:30 +0100575 LY_ERR ret = LY_SUCCESS;
576 struct lyxml_elem *e;
577 const char *prev_input;
578 char *value;
579 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200580 ly_bool ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100581 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200582
Michal Vaskob36053d2020-03-26 15:49:30 +0100583 /* store element opening tag information */
584 e = malloc(sizeof *e);
585 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
586 e->name = name;
587 e->prefix = prefix;
588 e->name_len = name_len;
589 e->prefix_len = prefix_len;
Radek Krejci3d92e442020-10-12 12:48:13 +0200590 LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL));
Michal Vaskob36053d2020-03-26 15:49:30 +0100591
592 /* skip WS */
593 ign_xmlws(xmlctx);
594
595 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200596 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100597 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200598 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
599 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100600
601 /* parse attribute name */
602 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
603
604 /* parse the value */
605 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
606
607 /* store every namespace */
608 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
609 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
Michal Vasko69730152020-10-09 16:30:07 +0200610 dynamic ? value : strndup(value, value_len)), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100611 dynamic = 0;
612 } else {
613 /* not a namespace */
614 is_ns = 0;
615 }
616 if (dynamic) {
617 free(value);
618 }
619
620 /* skip WS */
621 ign_xmlws(xmlctx);
622
623 if (is_ns) {
624 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200625 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100626 }
Radek Krejcib1890642018-10-03 14:05:40 +0200627 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100628
629cleanup:
630 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200631 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100632 }
633 return ret;
634}
635
Michal Vasko8cef5232020-06-15 17:59:47 +0200636/**
637 * @brief Move parser to the attribute content and parse it.
638 *
639 * @param[in] xmlctx XML context to use.
640 * @param[out] value Parsed attribute value.
641 * @param[out] value_len Length of @p value.
642 * @param[out] ws_only Whether the value is empty/white-spaces only.
643 * @param[out] dynamic Whether the value was dynamically allocated.
644 * @return LY_ERR value.
645 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100646static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200647lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100648{
649 char quot;
650
651 /* skip WS */
652 ign_xmlws(xmlctx);
653
654 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200655 if (xmlctx->in->current[0] == '\0') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100656 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100657 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200658 } else if (xmlctx->in->current[0] != '=') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100659 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200660 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100661 return LY_EVALID;
662 }
663 move_input(xmlctx, 1);
664
665 /* skip WS */
666 ign_xmlws(xmlctx);
667
668 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200669 if (xmlctx->in->current[0] == '\0') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100670 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100671 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200672 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100673 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200674 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100675 return LY_EVALID;
676 }
677
678 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200679 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100680 move_input(xmlctx, 1);
681
682 /* parse attribute value */
683 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
684
685 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200686 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100687
688 return LY_SUCCESS;
689}
690
Michal Vasko8cef5232020-06-15 17:59:47 +0200691/**
692 * @brief Move parser to the next attribute and parse it.
693 *
694 * @param[in] xmlctx XML context to use.
695 * @param[out] prefix Parsed attribute prefix.
696 * @param[out] prefix_len Length of @p prefix.
697 * @param[out] name Parsed attribute name.
698 * @param[out] name_len Length of @p name.
699 * @return LY_ERR value.
700 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100701static LY_ERR
702lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
703{
704 const char *in;
705 char *value;
706 uint32_t c;
707 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200708 ly_bool ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100709
710 /* skip WS */
711 ign_xmlws(xmlctx);
712
713 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200714 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
715 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100716 if (in[0] == '\0') {
Radek Krejcid54412f2020-12-17 20:25:35 +0100717 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_EOF);
Michal Vaskob36053d2020-03-26 15:49:30 +0100718 return LY_EVALID;
719 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100720 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
Michal Vasko69730152020-10-09 16:30:07 +0200721 "element tag end ('>' or '/>') or an attribute");
Michal Vaskob36053d2020-03-26 15:49:30 +0100722 return LY_EVALID;
723 }
724
725 /* parse attribute name */
726 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
727
728 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
729 /* standard attribute */
730 break;
731 }
732
733 /* namespace, skip it */
734 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
735 if (dynamic) {
736 free(value);
737 }
738
739 /* skip WS */
740 ign_xmlws(xmlctx);
741 }
742
743 return LY_SUCCESS;
744}
745
Michal Vasko8cef5232020-06-15 17:59:47 +0200746/**
747 * @brief Move parser to the next element and parse it.
748 *
749 * @param[in] xmlctx XML context to use.
750 * @param[out] prefix Parsed element prefix.
751 * @param[out] prefix_len Length of @p prefix.
752 * @param[out] name Parse element name.
753 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200754 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200755 * @return LY_ERR value.
756 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100757static LY_ERR
758lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200759 ly_bool *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100760{
761 /* skip WS until EOF or after opening tag '<' */
762 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200763 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100764 /* set return values */
765 *prefix = *name = NULL;
766 *prefix_len = *name_len = 0;
767 return LY_SUCCESS;
768 }
769
Michal Vasko63f3d842020-07-08 10:10:14 +0200770 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100771 move_input(xmlctx, 1);
772 *closing = 1;
773 } else {
774 *closing = 0;
775 }
776
777 /* skip WS */
778 ign_xmlws(xmlctx);
779
780 /* parse element name */
781 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
782
783 return LY_SUCCESS;
784}
785
786LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200787lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100788{
789 LY_ERR ret = LY_SUCCESS;
790 struct lyxml_ctx *xmlctx;
Radek Krejci857189e2020-09-01 13:26:36 +0200791 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100792
793 /* new context */
794 xmlctx = calloc(1, sizeof *xmlctx);
795 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
796 xmlctx->ctx = ctx;
Michal Vasko63f3d842020-07-08 10:10:14 +0200797 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100798
799 /* parse next element, if any */
800 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
Michal Vasko69730152020-10-09 16:30:07 +0200801 &xmlctx->name_len, &closing), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100802
Michal Vasko63f3d842020-07-08 10:10:14 +0200803 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100804 /* update status */
805 xmlctx->status = LYXML_END;
806 } else if (closing) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100807 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->in->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Michal Vasko69730152020-10-09 16:30:07 +0200808 xmlctx->name_len, xmlctx->name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100809 ret = LY_EVALID;
810 goto cleanup;
811 } else {
812 /* open an element, also parses all enclosed namespaces */
813 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
814
815 /* update status */
816 xmlctx->status = LYXML_ELEMENT;
817 }
818
819cleanup:
820 if (ret) {
821 lyxml_ctx_free(xmlctx);
822 } else {
823 *xmlctx_p = xmlctx;
824 }
825 return ret;
826}
827
828LY_ERR
829lyxml_ctx_next(struct lyxml_ctx *xmlctx)
830{
831 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200832 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100833 struct lyxml_elem *e;
834
835 /* if the value was not used, free it */
836 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
837 free((char *)xmlctx->value);
838 xmlctx->value = NULL;
839 xmlctx->dynamic = 0;
840 }
841
842 switch (xmlctx->status) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100843 case LYXML_ELEM_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100844 /* content |</elem> */
845
Michal Vaskob36053d2020-03-26 15:49:30 +0100846 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200847 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100848 assert(xmlctx->elements.count);
849 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
850
851 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200852 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
853 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100854
855 /* update status */
856 xmlctx->status = LYXML_ELEM_CLOSE;
857 break;
858 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100859 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100860 case LYXML_ELEM_CLOSE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100861 /* </elem>| <elem2>* */
862
Michal Vaskob36053d2020-03-26 15:49:30 +0100863 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200864 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
865 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100866
Michal Vasko63f3d842020-07-08 10:10:14 +0200867 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100868 /* update status */
869 xmlctx->status = LYXML_END;
870 } else if (closing) {
871 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200872 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
873 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100874
875 /* update status */
876 xmlctx->status = LYXML_ELEM_CLOSE;
877 } else {
878 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200879 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
880 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100881
882 /* update status */
883 xmlctx->status = LYXML_ELEMENT;
884 }
885 break;
886
Michal Vaskob36053d2020-03-26 15:49:30 +0100887 case LYXML_ELEMENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100888 /* <elem| attr='val'* > content */
Michal Vaskob36053d2020-03-26 15:49:30 +0100889 case LYXML_ATTR_CONTENT:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100890 /* attr='val'| attr='val'* > content */
891
Michal Vaskob36053d2020-03-26 15:49:30 +0100892 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200893 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
894 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100895
Michal Vasko63f3d842020-07-08 10:10:14 +0200896 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100897 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200898 ly_in_skip(xmlctx->in, 1);
899 if (!xmlctx->in->current[0]) {
Radek Krejcid54412f2020-12-17 20:25:35 +0100900 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->in->line, LY_VCODE_EOF);
Michal Vaskof55ae202020-06-30 15:49:36 +0200901 ret = LY_EVALID;
902 goto cleanup;
903 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100904
905 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200906 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
Michal Vasko69730152020-10-09 16:30:07 +0200907 &xmlctx->dynamic);
Michal Vasko63f3d842020-07-08 10:10:14 +0200908 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100909
910 if (!xmlctx->value_len) {
911 /* use empty value, easier to work with */
912 xmlctx->value = "";
913 assert(!xmlctx->dynamic);
914 }
915
916 /* update status */
917 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200918 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100919 /* no content but we still return it */
920 xmlctx->value = "";
921 xmlctx->value_len = 0;
922 xmlctx->ws_only = 1;
923 xmlctx->dynamic = 0;
924
925 /* update status */
926 xmlctx->status = LYXML_ELEM_CONTENT;
927 } else {
928 /* update status */
929 xmlctx->status = LYXML_ATTRIBUTE;
930 }
931 break;
932
Michal Vaskob36053d2020-03-26 15:49:30 +0100933 case LYXML_ATTRIBUTE:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100934 /* attr|='val' */
935
Michal Vaskob36053d2020-03-26 15:49:30 +0100936 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +0200937 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
938 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100939
940 /* update status */
941 xmlctx->status = LYXML_ATTR_CONTENT;
942 break;
943
Michal Vaskob36053d2020-03-26 15:49:30 +0100944 case LYXML_END:
Radek Krejcif13b87b2020-12-01 22:02:17 +0100945 /* </elem> |EOF */
Michal Vaskob36053d2020-03-26 15:49:30 +0100946 /* nothing to do */
947 break;
948 }
949
950cleanup:
951 if (ret) {
952 /* invalidate context */
953 xmlctx->status = LYXML_END;
954 }
955 return ret;
956}
957
958LY_ERR
959lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
960{
961 LY_ERR ret = LY_SUCCESS;
962 const char *prefix, *name, *prev_input;
963 size_t prefix_len, name_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200964 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100965
Michal Vasko63f3d842020-07-08 10:10:14 +0200966 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100967
968 switch (xmlctx->status) {
969 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +0200970 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100971 *next = LYXML_ELEM_CLOSE;
972 break;
973 }
Radek Krejcif13b87b2020-12-01 22:02:17 +0100974 /* fall through */
Michal Vaskob36053d2020-03-26 15:49:30 +0100975 case LYXML_ELEM_CLOSE:
976 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200977 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
978 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100979
Michal Vasko63f3d842020-07-08 10:10:14 +0200980 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100981 *next = LYXML_END;
982 } else if (closing) {
983 *next = LYXML_ELEM_CLOSE;
984 } else {
985 *next = LYXML_ELEMENT;
986 }
987 break;
988 case LYXML_ELEMENT:
989 case LYXML_ATTR_CONTENT:
990 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200991 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
992 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100993
Michal Vasko63f3d842020-07-08 10:10:14 +0200994 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100995 *next = LYXML_ELEM_CONTENT;
996 } else {
997 *next = LYXML_ATTRIBUTE;
998 }
999 break;
1000 case LYXML_ATTRIBUTE:
1001 *next = LYXML_ATTR_CONTENT;
1002 break;
1003 case LYXML_END:
1004 *next = LYXML_END;
1005 break;
1006 }
1007
1008cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001009 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001010 return ret;
1011}
1012
1013void
1014lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1015{
1016 uint32_t u;
1017
1018 if (!xmlctx) {
1019 return;
1020 }
1021
1022 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1023 free((char *)xmlctx->value);
1024 }
1025 ly_set_erase(&xmlctx->elements, free);
1026 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1027 /* remove the ns structure */
1028 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1029 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1030 free(xmlctx->ns.objs[u]);
1031 }
1032 ly_set_erase(&xmlctx->ns, NULL);
1033 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001034}
Radek Krejcie7b95092019-05-15 11:03:07 +02001035
1036LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +02001037lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001038{
Michal Vasko5233e962020-08-14 14:26:20 +02001039 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001040
1041 if (!text) {
1042 return 0;
1043 }
1044
Radek Krejci1deb5be2020-08-26 16:43:36 +02001045 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001046 switch (text[u]) {
1047 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001048 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001049 break;
1050 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001051 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001052 break;
1053 case '>':
1054 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001055 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001056 break;
1057 case '"':
1058 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001059 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001060 break;
1061 }
Radek Krejcif13b87b2020-12-01 22:02:17 +01001062 /* fall through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001063 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001064 ret = ly_write_(out, &text[u], 1);
1065 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001066 }
Michal Vasko5233e962020-08-14 14:26:20 +02001067 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001068 }
1069
Michal Vasko5233e962020-08-14 14:26:20 +02001070 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001071}
1072
Michal Vasko52927e22020-03-16 17:26:14 +01001073LY_ERR
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001074lyxml_value_compare(const struct ly_ctx *ctx, const char *value1, void *val_prefix_data1, const char *value2,
1075 void *val_prefix_data2)
Michal Vasko52927e22020-03-16 17:26:14 +01001076{
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001077 const char *ptr1, *ptr2, *end1, *end2;
1078 const struct lys_module *mod1, *mod2;
Michal Vasko52927e22020-03-16 17:26:14 +01001079
1080 if (!value1 && !value2) {
1081 return LY_SUCCESS;
1082 }
1083 if ((value1 && !value2) || (!value1 && value2)) {
1084 return LY_ENOT;
1085 }
1086
1087 ptr1 = value1;
1088 ptr2 = value2;
1089 while (ptr1[0] && ptr2[0]) {
1090 if (ptr1[0] != ptr2[0]) {
1091 /* it can be a start of prefix that maps to the same module */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001092 mod1 = mod2 = NULL;
1093 if (val_prefix_data1 && (end1 = strchr(ptr1, ':'))) {
Michal Vasko52927e22020-03-16 17:26:14 +01001094 /* find module of the first prefix, if any */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001095 mod1 = ly_resolve_prefix(ctx, ptr1, end1 - ptr1, LY_PREF_XML, val_prefix_data1);
Michal Vasko52927e22020-03-16 17:26:14 +01001096 }
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001097 if (val_prefix_data2 && (end2 = strchr(ptr2, ':'))) {
Michal Vasko52927e22020-03-16 17:26:14 +01001098 /* find module of the second prefix, if any */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001099 mod2 = ly_resolve_prefix(ctx, ptr2, end2 - ptr2, LY_PREF_XML, val_prefix_data2);
Michal Vasko52927e22020-03-16 17:26:14 +01001100 }
1101
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001102 if (!mod1 || !mod2 || (mod1 != mod2)) {
Michal Vasko52927e22020-03-16 17:26:14 +01001103 /* not a prefix or maps to different namespaces */
1104 break;
1105 }
1106
1107 /* skip prefixes in both values (':' is skipped as iter) */
Michal Vasko6b5cb2a2020-11-11 19:11:21 +01001108 ptr1 = end1;
1109 ptr2 = end2;
Michal Vasko52927e22020-03-16 17:26:14 +01001110 }
1111
1112 ++ptr1;
1113 ++ptr2;
1114 }
1115 if (ptr1[0] || ptr2[0]) {
1116 /* not a match or simply different lengths */
1117 return LY_ENOT;
1118 }
1119
1120 return LY_SUCCESS;
1121}