blob: ad4f54e90fe059ad35e697edadb07c03dff5c028 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcica376bd2020-06-11 16:04:06 +020025#include <sys/types.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejci535ea9f2020-05-29 16:01:05 +020027#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020028#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020029#include "dict.h"
Michal Vasko63f3d842020-07-08 10:10:14 +020030#include "parser_internal.h"
Michal Vasko5233e962020-08-14 14:26:20 +020031#include "printer_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020032#include "tree.h"
33#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020034
Michal Vaskob36053d2020-03-26 15:49:30 +010035/* Move input p by s characters, if EOF log with lyxml_ctx c */
Michal Vaskod989ba02020-08-24 10:59:24 +020036#define move_input(c, s) ly_in_skip(c->in, s); LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020037
Radek Krejcib1890642018-10-03 14:05:40 +020038/* Ignore whitespaces in the input string p */
Michal Vasko63f3d842020-07-08 10:10:14 +020039#define ign_xmlws(c) while (is_xmlws(*(c)->in->current)) {if (*(c)->in->current == '\n') {++c->line;} ly_in_skip(c->in, 1);}
Michal Vaskob36053d2020-03-26 15:49:30 +010040
Radek Krejci1deb5be2020-08-26 16:43:36 +020041static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, uint8_t *ws_only,
42 uint8_t *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020043
Radek Krejci4b74d5e2018-09-26 14:30:55 +020044/**
45 * @brief Ignore any characters until the delim of the size delim_len is read
46 *
47 * Detects number of read new lines.
Radek Krejci1deb5be2020-08-26 16:43:36 +020048 * Returns 0 if delim was found, 1 if was not.
Michal Vasko63f3d842020-07-08 10:10:14 +020049 */
Radek Krejci1deb5be2020-08-26 16:43:36 +020050static uint8_t
Michal Vasko63f3d842020-07-08 10:10:14 +020051ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines, size_t *parsed)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020052{
53 size_t i;
54 register const char *a, *b;
55
56 (*newlines) = 0;
Michal Vasko63f3d842020-07-08 10:10:14 +020057 (*parsed) = 0;
58 for ( ; *input; ++input, ++(*parsed)) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020059 if (*input != *delim) {
60 if (*input == '\n') {
61 ++(*newlines);
62 }
63 continue;
64 }
65 a = input;
66 b = delim;
67 for (i = 0; i < delim_len; ++i) {
68 if (*a++ != *b++) {
69 break;
70 }
71 }
72 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020073 /* delim found */
74 return 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020075 }
76 }
Michal Vasko63f3d842020-07-08 10:10:14 +020077
78 /* delim not found */
Radek Krejci1deb5be2020-08-26 16:43:36 +020079 return 1;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020080}
81
Radek Krejci4b74d5e2018-09-26 14:30:55 +020082/**
Michal Vaskob36053d2020-03-26 15:49:30 +010083 * @brief Check/Get an XML identifier from the input string.
84 *
85 * The identifier must have at least one valid character complying the name start character constraints.
86 * The identifier is terminated by the first character, which does not comply to the name character constraints.
87 *
88 * See https://www.w3.org/TR/xml-names/#NT-NCName
89 *
90 * @param[in] xmlctx XML context.
91 * @param[out] start Pointer to the start of the identifier.
92 * @param[out] end Pointer ot the end of the identifier.
93 * @return LY_ERR value.
94 */
95static LY_ERR
96lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
97{
98 const char *s, *in;
99 uint32_t c;
100 size_t parsed;
101 LY_ERR rc;
102
Michal Vasko63f3d842020-07-08 10:10:14 +0200103 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100104
105 /* check NameStartChar (minus colon) */
106 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
107 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
108 LY_EVALID);
109 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
110 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
111 "Identifier \"%s\" starts with an invalid character.", in - parsed),
112 LY_EVALID);
113
114 /* check rest of the identifier */
115 do {
116 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200117 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100118
119 rc = ly_getutf8(&in, &c, &parsed);
120 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
121 } while (is_xmlqnamechar(c));
122
123 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200124 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100125 return LY_SUCCESS;
126}
127
128/**
129 * @brief Add namespace definition into XML context.
130 *
131 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
132 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
133 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
134 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
135 *
136 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
137 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
138 *
139 * @param[in] xmlctx XML context to work with.
140 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
141 * @param[in] prefix_len Length of the prefix.
142 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
143 * @return LY_ERR values.
144 */
145LY_ERR
146lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
147{
148 struct lyxml_ns *ns;
149
150 ns = malloc(sizeof *ns);
151 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
152
153 /* we need to connect the depth of the element where the namespace is defined with the
154 * namespace record to be able to maintain (remove) the record when the parser leaves
155 * (to its sibling or back to the parent) the element where the namespace was defined */
156 ns->depth = xmlctx->elements.count;
157
158 ns->uri = uri;
159 if (prefix) {
160 ns->prefix = strndup(prefix, prefix_len);
161 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
162 } else {
163 ns->prefix = NULL;
164 }
165
166 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
167 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
168 return LY_SUCCESS;
169}
170
171/**
172 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
173 *
174 * @param[in] xmlctx XML context to work with.
175 */
176void
177lyxml_ns_rm(struct lyxml_ctx *xmlctx)
178{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200179 for (uint32_t u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100180 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
181 /* we are done, the namespaces from a single element are supposed to be together */
182 break;
183 }
184 /* remove the ns structure */
185 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
186 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
187 free(xmlctx->ns.objs[u]);
188 --xmlctx->ns.count;
189 }
190
191 if (!xmlctx->ns.count) {
192 /* cleanup the xmlctx's namespaces storage */
193 ly_set_erase(&xmlctx->ns, NULL);
194 }
195}
196
Michal Vaskob36053d2020-03-26 15:49:30 +0100197const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200198lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100199{
Michal Vaskob36053d2020-03-26 15:49:30 +0100200 struct lyxml_ns *ns;
201
Radek Krejci1deb5be2020-08-26 16:43:36 +0200202 for (uint32_t u = ns_set->count - 1; u + 1 > 0; --u) {
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200203 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vaskob36053d2020-03-26 15:49:30 +0100204 if (prefix && prefix_len) {
205 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
206 return ns;
207 }
208 } else if (!ns->prefix) {
209 /* default namespace */
210 return ns;
211 }
212 }
213
214 return NULL;
215}
216
Michal Vasko8cef5232020-06-15 17:59:47 +0200217/**
218 * @brief Skip in the input until EOF or just after the opening tag.
219 * Handles special XML constructs (comment, cdata, doctype).
220 *
221 * @param[in] xmlctx XML context to use.
222 * @return LY_ERR value.
223 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100224static LY_ERR
225lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
226{
227 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200228 const char *endtag, *sectname;
229 size_t endtag_len, newlines, parsed;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200230 uint8_t rc;
Michal Vaskob36053d2020-03-26 15:49:30 +0100231
232 while (1) {
233 ign_xmlws(xmlctx);
234
Michal Vasko63f3d842020-07-08 10:10:14 +0200235 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100236 /* EOF */
237 if (xmlctx->elements.count) {
238 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
239 return LY_EVALID;
240 }
241 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200242 } else if (xmlctx->in->current[0] != '<') {
243 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
244 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100245 return LY_EVALID;
246 }
247 move_input(xmlctx, 1);
248
Michal Vasko63f3d842020-07-08 10:10:14 +0200249 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100250 move_input(xmlctx, 1);
251 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200252 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100253 /* comment */
254 move_input(xmlctx, 2);
255 sectname = "Comment";
256 endtag = "-->";
257 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200258 } else if (!strncmp(xmlctx->in->current, "[CDATA[", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100259 /* CDATA section */
260 move_input(xmlctx, 7);
261 sectname = "CData";
262 endtag = "]]>";
263 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200264 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100265 /* Document type declaration - not supported */
266 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
267 return LY_EVALID;
268 } else {
Michal Vasko63f3d842020-07-08 10:10:14 +0200269 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".",
270 &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100271 return LY_EVALID;
272 }
Michal Vasko63f3d842020-07-08 10:10:14 +0200273 rc = ign_todelim(xmlctx->in->current, endtag, endtag_len, &newlines, &parsed);
274 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100275 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200276 ly_in_skip(xmlctx->in, parsed + endtag_len);
277 } else if (xmlctx->in->current[0] == '?') {
278 rc = ign_todelim(xmlctx->in->current, "?>", 2, &newlines, &parsed);
279 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100280 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200281 ly_in_skip(xmlctx->in, parsed + 2);
Michal Vaskob36053d2020-03-26 15:49:30 +0100282 } else {
283 /* other non-WS character */
284 break;
285 }
286 }
287
288 return LY_SUCCESS;
289}
290
Michal Vasko8cef5232020-06-15 17:59:47 +0200291/**
292 * @brief Parse QName.
293 *
294 * @param[in] xmlctx XML context to use.
295 * @param[out] prefix Parsed prefix, may be NULL.
296 * @param[out] prefix_len Length of @p prefix.
297 * @param[out] name Parsed name.
298 * @param[out] name_len Length of @p name.
299 * @return LY_ERR value.
300 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100301static LY_ERR
302lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
303{
304 const char *start, *end;
305
306 *prefix = NULL;
307 *prefix_len = 0;
308
309 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
310 if (end[0] == ':') {
311 /* we have prefixed identifier */
312 *prefix = start;
313 *prefix_len = end - start;
314
315 move_input(xmlctx, 1);
316 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
317 }
318
319 *name = start;
320 *name_len = end - start;
321 return LY_SUCCESS;
322}
323
324/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200325 * @brief Parse XML text content (value).
326 *
327 * @param[in] xmlctx XML context to use.
328 * @param[in] endchar Expected character to mark value end.
329 * @param[out] value Parsed value.
330 * @param[out] length Length of @p value.
331 * @param[out] ws_only Whether the value is empty/white-spaces only.
332 * @param[out] dynamic Whether the value was dynamically allocated.
333 * @return LY_ERR value.
334 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200335static LY_ERR
Radek Krejci1deb5be2020-08-26 16:43:36 +0200336lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, uint8_t *ws_only, uint8_t *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200337{
Michal Vaskob36053d2020-03-26 15:49:30 +0100338#define BUFSIZE 24
339#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200340
Michal Vaskob36053d2020-03-26 15:49:30 +0100341 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200342 const char *in = xmlctx->in->current, *start;
Michal Vaskob36053d2020-03-26 15:49:30 +0100343 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200344 size_t offset; /* read offset in input buffer */
345 size_t len; /* length of the output string (write offset in output buffer) */
346 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200347 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200348 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100349 size_t u;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200350 uint8_t ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200351
Michal Vaskob36053d2020-03-26 15:49:30 +0100352 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200353
Radek Krejcid70d1072018-10-09 14:20:47 +0200354 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100355 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200356 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200357
358 /* parse */
359 while (in[offset]) {
360 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100361 /* non WS */
362 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200363
Michal Vaskob36053d2020-03-26 15:49:30 +0100364 if (!buf) {
365 /* prepare output buffer */
366 buf = malloc(BUFSIZE);
367 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
368 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200369 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100370
371 /* allocate enough for the offset and next character,
372 * we will need 4 bytes at most since we support only the predefined
373 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200374 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100375 buf = ly_realloc(buf, size + BUFSIZE_STEP);
376 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
377 size += BUFSIZE_STEP;
378 }
379
380 if (offset) {
381 /* store what we have so far */
382 memcpy(&buf[len], in, offset);
383 len += offset;
384 in += offset;
385 offset = 0;
386 }
387
Radek Krejci7a7fa902018-09-25 17:08:21 +0200388 ++offset;
389 if (in[offset] != '#') {
390 /* entity reference - only predefined references are supported */
391 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100392 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200393 in += 4; /* &lt; */
394 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100395 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200396 in += 4; /* &gt; */
397 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100398 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200399 in += 5; /* &amp; */
400 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100401 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200402 in += 6; /* &apos; */
403 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100404 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200405 in += 6; /* &quot; */
406 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100407 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Michal Vasko44f3d2c2020-08-24 09:49:38 +0200408 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200409 goto error;
410 }
411 offset = 0;
412 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100413 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200414 /* character reference */
415 ++offset;
416 if (isdigit(in[offset])) {
417 for (n = 0; isdigit(in[offset]); offset++) {
418 n = (10 * n) + (in[offset] - '0');
419 }
420 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
421 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
422 if (isdigit(in[offset])) {
423 u = (in[offset] - '0');
424 } else if (in[offset] > 'F') {
425 u = 10 + (in[offset] - 'a');
426 } else {
427 u = 10 + (in[offset] - 'A');
428 }
429 n = (16 * n) + u;
430 }
431 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100432 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200433 goto error;
434
435 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100436
Radek Krejci7a7fa902018-09-25 17:08:21 +0200437 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100438 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200439 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
440 error);
441 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200442 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
Michal Vaskob36053d2020-03-26 15:49:30 +0100443 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
444 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200445 error);
446 len += u;
447 in += offset;
448 offset = 0;
449 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100450 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200451 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200452 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100453 /* realloc exact size string */
454 buf = ly_realloc(buf, len + offset + 1);
455 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
456 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200457 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100458
459 /* set terminating NULL byte */
460 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200461 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200462 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100463 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200464 goto success;
465 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100466 if (!is_xmlws(in[offset])) {
467 /* non WS */
468 ws = 0;
469 }
470
Radek Krejci7a7fa902018-09-25 17:08:21 +0200471 /* log lines */
472 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100473 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200474 }
475
476 /* continue */
477 ++offset;
478 }
479 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100480
481 /* EOF reached before endchar */
482 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
483
Radek Krejci7a7fa902018-09-25 17:08:21 +0200484error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100485 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200486 return LY_EVALID;
487
488success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200489 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100490 *value = buf;
491 *dynamic = 1;
492 } else {
493 *value = (char *)start;
494 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200495 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100496 *length = len;
497 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200498
Michal Vasko63f3d842020-07-08 10:10:14 +0200499 ly_in_skip(xmlctx->in, in - xmlctx->in->current);
Michal Vaskob36053d2020-03-26 15:49:30 +0100500 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200501
502#undef BUFSIZE
503#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200504}
505
Michal Vasko8cef5232020-06-15 17:59:47 +0200506/**
507 * @brief Parse XML closing element and match it to a stored starting element.
508 *
509 * @param[in] xmlctx XML context to use.
510 * @param[in] prefix Expected closing element prefix.
511 * @param[in] prefix_len Length of @p prefix.
512 * @param[in] name Expected closing element name.
513 * @param[in] name_len Length of @p name.
514 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
515 * @return LY_ERR value.
516 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100517static LY_ERR
518lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci1deb5be2020-08-26 16:43:36 +0200519 uint8_t empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200520{
Michal Vaskob36053d2020-03-26 15:49:30 +0100521 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200522
Michal Vaskob36053d2020-03-26 15:49:30 +0100523 /* match opening and closing element tags */
524 if (!xmlctx->elements.count) {
525 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
526 name_len, name);
527 return LY_EVALID;
528 }
Radek Krejcid972c252018-09-25 13:23:39 +0200529
Michal Vaskob36053d2020-03-26 15:49:30 +0100530 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
531 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
532 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
533 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
534 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
535 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
536 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
537 return LY_EVALID;
538 }
Radek Krejcid972c252018-09-25 13:23:39 +0200539
Michal Vaskob36053d2020-03-26 15:49:30 +0100540 /* opening and closing element tags matches, remove record from the opening tags list */
541 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200542
Michal Vaskob36053d2020-03-26 15:49:30 +0100543 /* remove also the namespaces connected with the element */
544 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200545
Michal Vaskob36053d2020-03-26 15:49:30 +0100546 /* skip WS */
547 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200548
Michal Vaskob36053d2020-03-26 15:49:30 +0100549 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200550 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100551 move_input(xmlctx, 1);
552 }
Michal Vasko52927e22020-03-16 17:26:14 +0100553
Michal Vaskob36053d2020-03-26 15:49:30 +0100554 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200555 if (xmlctx->in->current[0] != '>') {
556 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
557 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100558 return LY_EVALID;
559 }
Michal Vasko52927e22020-03-16 17:26:14 +0100560
Michal Vaskob36053d2020-03-26 15:49:30 +0100561 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200562 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100563
Radek Krejcid972c252018-09-25 13:23:39 +0200564 return LY_SUCCESS;
565}
566
Michal Vasko8cef5232020-06-15 17:59:47 +0200567/**
568 * @brief Store parsed opening element and parse any included namespaces.
569 *
570 * @param[in] xmlctx XML context to use.
571 * @param[in] prefix Parsed starting element prefix.
572 * @param[in] prefix_len Length of @p prefix.
573 * @param[in] name Parsed starting element name.
574 * @param[in] name_len Length of @p name.
575 * @return LY_ERR value.
576 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100577static LY_ERR
578lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200579{
Michal Vaskob36053d2020-03-26 15:49:30 +0100580 LY_ERR ret = LY_SUCCESS;
581 struct lyxml_elem *e;
582 const char *prev_input;
583 char *value;
584 size_t parsed, value_len;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200585 uint8_t ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100586 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200587
Michal Vaskob36053d2020-03-26 15:49:30 +0100588 /* store element opening tag information */
589 e = malloc(sizeof *e);
590 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
591 e->name = name;
592 e->prefix = prefix;
593 e->name_len = name_len;
594 e->prefix_len = prefix_len;
595 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
596
597 /* skip WS */
598 ign_xmlws(xmlctx);
599
600 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200601 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100602 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200603 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
604 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100605
606 /* parse attribute name */
607 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
608
609 /* parse the value */
610 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
611
612 /* store every namespace */
613 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
614 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
615 dynamic ? value : strndup(value, value_len)), cleanup);
616 dynamic = 0;
617 } else {
618 /* not a namespace */
619 is_ns = 0;
620 }
621 if (dynamic) {
622 free(value);
623 }
624
625 /* skip WS */
626 ign_xmlws(xmlctx);
627
628 if (is_ns) {
629 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200630 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100631 }
Radek Krejcib1890642018-10-03 14:05:40 +0200632 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100633
634cleanup:
635 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200636 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100637 }
638 return ret;
639}
640
Michal Vasko8cef5232020-06-15 17:59:47 +0200641/**
642 * @brief Move parser to the attribute content and parse it.
643 *
644 * @param[in] xmlctx XML context to use.
645 * @param[out] value Parsed attribute value.
646 * @param[out] value_len Length of @p value.
647 * @param[out] ws_only Whether the value is empty/white-spaces only.
648 * @param[out] dynamic Whether the value was dynamically allocated.
649 * @return LY_ERR value.
650 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100651static LY_ERR
Radek Krejci1deb5be2020-08-26 16:43:36 +0200652lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, uint8_t *ws_only, uint8_t *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100653{
654 char quot;
655
656 /* skip WS */
657 ign_xmlws(xmlctx);
658
659 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200660 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100661 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
662 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200663 } else if (xmlctx->in->current[0] != '=') {
664 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
665 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100666 return LY_EVALID;
667 }
668 move_input(xmlctx, 1);
669
670 /* skip WS */
671 ign_xmlws(xmlctx);
672
673 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200674 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100675 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
676 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200677 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
678 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
679 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100680 return LY_EVALID;
681 }
682
683 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200684 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100685 move_input(xmlctx, 1);
686
687 /* parse attribute value */
688 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
689
690 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200691 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100692
693 return LY_SUCCESS;
694}
695
Michal Vasko8cef5232020-06-15 17:59:47 +0200696/**
697 * @brief Move parser to the next attribute and parse it.
698 *
699 * @param[in] xmlctx XML context to use.
700 * @param[out] prefix Parsed attribute prefix.
701 * @param[out] prefix_len Length of @p prefix.
702 * @param[out] name Parsed attribute name.
703 * @param[out] name_len Length of @p name.
704 * @return LY_ERR value.
705 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100706static LY_ERR
707lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
708{
709 const char *in;
710 char *value;
711 uint32_t c;
712 size_t parsed, value_len;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200713 uint8_t ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100714
715 /* skip WS */
716 ign_xmlws(xmlctx);
717
718 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200719 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
720 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100721 if (in[0] == '\0') {
722 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
723 return LY_EVALID;
724 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
725 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
726 "element tag end ('>' or '/>') or an attribute");
727 return LY_EVALID;
728 }
729
730 /* parse attribute name */
731 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
732
733 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
734 /* standard attribute */
735 break;
736 }
737
738 /* namespace, skip it */
739 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
740 if (dynamic) {
741 free(value);
742 }
743
744 /* skip WS */
745 ign_xmlws(xmlctx);
746 }
747
748 return LY_SUCCESS;
749}
750
Michal Vasko8cef5232020-06-15 17:59:47 +0200751/**
752 * @brief Move parser to the next element and parse it.
753 *
754 * @param[in] xmlctx XML context to use.
755 * @param[out] prefix Parsed element prefix.
756 * @param[out] prefix_len Length of @p prefix.
757 * @param[out] name Parse element name.
758 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200759 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200760 * @return LY_ERR value.
761 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100762static LY_ERR
763lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci1deb5be2020-08-26 16:43:36 +0200764 uint8_t *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100765{
766 /* skip WS until EOF or after opening tag '<' */
767 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200768 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100769 /* set return values */
770 *prefix = *name = NULL;
771 *prefix_len = *name_len = 0;
772 return LY_SUCCESS;
773 }
774
Michal Vasko63f3d842020-07-08 10:10:14 +0200775 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100776 move_input(xmlctx, 1);
777 *closing = 1;
778 } else {
779 *closing = 0;
780 }
781
782 /* skip WS */
783 ign_xmlws(xmlctx);
784
785 /* parse element name */
786 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
787
788 return LY_SUCCESS;
789}
790
791LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200792lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100793{
794 LY_ERR ret = LY_SUCCESS;
795 struct lyxml_ctx *xmlctx;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200796 uint8_t closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100797
798 /* new context */
799 xmlctx = calloc(1, sizeof *xmlctx);
800 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
801 xmlctx->ctx = ctx;
802 xmlctx->line = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200803 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100804
805 /* parse next element, if any */
806 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
807 &xmlctx->name_len, &closing), cleanup);
808
Michal Vasko63f3d842020-07-08 10:10:14 +0200809 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100810 /* update status */
811 xmlctx->status = LYXML_END;
812 } else if (closing) {
813 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
814 xmlctx->name_len, xmlctx->name);
815 ret = LY_EVALID;
816 goto cleanup;
817 } else {
818 /* open an element, also parses all enclosed namespaces */
819 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
820
821 /* update status */
822 xmlctx->status = LYXML_ELEMENT;
823 }
824
825cleanup:
826 if (ret) {
827 lyxml_ctx_free(xmlctx);
828 } else {
829 *xmlctx_p = xmlctx;
830 }
831 return ret;
832}
833
834LY_ERR
835lyxml_ctx_next(struct lyxml_ctx *xmlctx)
836{
837 LY_ERR ret = LY_SUCCESS;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200838 uint8_t closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100839 struct lyxml_elem *e;
840
841 /* if the value was not used, free it */
842 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
843 free((char *)xmlctx->value);
844 xmlctx->value = NULL;
845 xmlctx->dynamic = 0;
846 }
847
848 switch (xmlctx->status) {
849 /* content |</elem> */
850 case LYXML_ELEM_CONTENT:
851 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200852 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100853 assert(xmlctx->elements.count);
854 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
855
856 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200857 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
858 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100859
860 /* update status */
861 xmlctx->status = LYXML_ELEM_CLOSE;
862 break;
863 }
Radek Krejci0f969882020-08-21 16:56:47 +0200864 /* fallthrough */
Michal Vaskob36053d2020-03-26 15:49:30 +0100865
866 /* </elem>| <elem2>* */
867 case LYXML_ELEM_CLOSE:
868 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200869 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
870 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100871
Michal Vasko63f3d842020-07-08 10:10:14 +0200872 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100873 /* update status */
874 xmlctx->status = LYXML_END;
875 } else if (closing) {
876 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200877 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
878 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100879
880 /* update status */
881 xmlctx->status = LYXML_ELEM_CLOSE;
882 } else {
883 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200884 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
885 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100886
887 /* update status */
888 xmlctx->status = LYXML_ELEMENT;
889 }
890 break;
891
892 /* <elem| attr='val'* > content */
893 case LYXML_ELEMENT:
894
895 /* attr='val'| attr='val'* > content */
896 case LYXML_ATTR_CONTENT:
897 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200898 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
899 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100900
Michal Vasko63f3d842020-07-08 10:10:14 +0200901 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100902 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200903 ly_in_skip(xmlctx->in, 1);
904 if (!xmlctx->in->current[0]) {
Michal Vaskof55ae202020-06-30 15:49:36 +0200905 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
906 ret = LY_EVALID;
907 goto cleanup;
908 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100909
910 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200911 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
912 &xmlctx->dynamic);
913 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100914
915 if (!xmlctx->value_len) {
916 /* use empty value, easier to work with */
917 xmlctx->value = "";
918 assert(!xmlctx->dynamic);
919 }
920
921 /* update status */
922 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200923 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100924 /* no content but we still return it */
925 xmlctx->value = "";
926 xmlctx->value_len = 0;
927 xmlctx->ws_only = 1;
928 xmlctx->dynamic = 0;
929
930 /* update status */
931 xmlctx->status = LYXML_ELEM_CONTENT;
932 } else {
933 /* update status */
934 xmlctx->status = LYXML_ATTRIBUTE;
935 }
936 break;
937
938 /* attr|='val' */
939 case LYXML_ATTRIBUTE:
940 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +0200941 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
942 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100943
944 /* update status */
945 xmlctx->status = LYXML_ATTR_CONTENT;
946 break;
947
948 /* </elem> |EOF */
949 case LYXML_END:
950 /* nothing to do */
951 break;
952 }
953
954cleanup:
955 if (ret) {
956 /* invalidate context */
957 xmlctx->status = LYXML_END;
958 }
959 return ret;
960}
961
962LY_ERR
963lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
964{
965 LY_ERR ret = LY_SUCCESS;
966 const char *prefix, *name, *prev_input;
967 size_t prefix_len, name_len;
Radek Krejci1deb5be2020-08-26 16:43:36 +0200968 uint8_t closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100969
Michal Vasko63f3d842020-07-08 10:10:14 +0200970 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100971
972 switch (xmlctx->status) {
973 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +0200974 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100975 *next = LYXML_ELEM_CLOSE;
976 break;
977 }
Radek Krejci0f969882020-08-21 16:56:47 +0200978 /* fallthrough */
Michal Vaskob36053d2020-03-26 15:49:30 +0100979 case LYXML_ELEM_CLOSE:
980 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200981 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
982 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100983
Michal Vasko63f3d842020-07-08 10:10:14 +0200984 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100985 *next = LYXML_END;
986 } else if (closing) {
987 *next = LYXML_ELEM_CLOSE;
988 } else {
989 *next = LYXML_ELEMENT;
990 }
991 break;
992 case LYXML_ELEMENT:
993 case LYXML_ATTR_CONTENT:
994 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200995 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
996 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100997
Michal Vasko63f3d842020-07-08 10:10:14 +0200998 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100999 *next = LYXML_ELEM_CONTENT;
1000 } else {
1001 *next = LYXML_ATTRIBUTE;
1002 }
1003 break;
1004 case LYXML_ATTRIBUTE:
1005 *next = LYXML_ATTR_CONTENT;
1006 break;
1007 case LYXML_END:
1008 *next = LYXML_END;
1009 break;
1010 }
1011
1012cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001013 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001014 return ret;
1015}
1016
1017void
1018lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1019{
1020 uint32_t u;
1021
1022 if (!xmlctx) {
1023 return;
1024 }
1025
1026 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1027 free((char *)xmlctx->value);
1028 }
1029 ly_set_erase(&xmlctx->elements, free);
1030 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1031 /* remove the ns structure */
1032 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1033 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1034 free(xmlctx->ns.objs[u]);
1035 }
1036 ly_set_erase(&xmlctx->ns, NULL);
1037 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001038}
Radek Krejcie7b95092019-05-15 11:03:07 +02001039
1040LY_ERR
Radek Krejci1deb5be2020-08-26 16:43:36 +02001041lyxml_dump_text(struct ly_out *out, const char *text, uint8_t attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001042{
Michal Vasko5233e962020-08-14 14:26:20 +02001043 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001044
1045 if (!text) {
1046 return 0;
1047 }
1048
Radek Krejci1deb5be2020-08-26 16:43:36 +02001049 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001050 switch (text[u]) {
1051 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001052 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001053 break;
1054 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001055 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001056 break;
1057 case '>':
1058 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001059 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001060 break;
1061 case '"':
1062 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001063 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001064 break;
1065 }
Radek Krejci0f969882020-08-21 16:56:47 +02001066 /* falls through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001067 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001068 ret = ly_write_(out, &text[u], 1);
1069 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001070 }
Michal Vasko5233e962020-08-14 14:26:20 +02001071 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001072 }
1073
Michal Vasko5233e962020-08-14 14:26:20 +02001074 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001075}
1076
Michal Vasko52927e22020-03-16 17:26:14 +01001077LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001078lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001079{
1080 LY_ERR ret;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001081 LY_ARRAY_COUNT_TYPE u;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001082 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001083 const struct lyxml_ns *ns;
1084 const char *start, *stop;
1085 struct ly_prefix *prefixes = NULL;
1086 size_t len;
1087
1088 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1089 size_t bytes;
1090 ly_getutf8(&stop, &c, &bytes);
1091 if (is_xmlqnamestartchar(c)) {
1092 for (ly_getutf8(&stop, &c, &bytes);
1093 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
Radek Krejci1e008d22020-08-17 11:37:37 +02001094 ly_getutf8(&stop, &c, &bytes)) {}
Michal Vasko52927e22020-03-16 17:26:14 +01001095 stop = stop - bytes;
1096 if (*stop == ':') {
1097 /* we have a possible prefix */
1098 len = stop - start;
Michal Vaskoc8a230d2020-08-14 12:17:10 +02001099 ns = lyxml_ns_get(&xmlctx->ns, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001100 if (ns) {
1101 struct ly_prefix *p = NULL;
1102
1103 /* check whether we do not already have this prefix stored */
1104 LY_ARRAY_FOR(prefixes, u) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001105 if (!ly_strncmp(prefixes[u].id, start, len)) {
Michal Vasko52927e22020-03-16 17:26:14 +01001106 p = &prefixes[u];
1107 break;
1108 }
1109 }
1110 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001111 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
Radek Krejci1798aae2020-07-14 13:26:06 +02001112 p->id = lydict_insert(xmlctx->ctx, start, len);
1113 p->module_ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001114 } /* else the prefix already present */
1115 }
1116 }
1117 stop = stop + bytes;
1118 }
1119 }
1120
1121 *val_prefs = prefixes;
1122 return LY_SUCCESS;
1123
1124error:
1125 LY_ARRAY_FOR(prefixes, u) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001126 lydict_remove(xmlctx->ctx, prefixes[u].id);
1127 lydict_remove(xmlctx->ctx, prefixes[u].module_ns);
Michal Vasko52927e22020-03-16 17:26:14 +01001128 }
1129 LY_ARRAY_FREE(prefixes);
1130 return ret;
1131}
1132
1133LY_ERR
1134lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1135{
1136 const char *ptr1, *ptr2, *ns1, *ns2;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001137 LY_ARRAY_COUNT_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001138
1139 if (!value1 && !value2) {
1140 return LY_SUCCESS;
1141 }
1142 if ((value1 && !value2) || (!value1 && value2)) {
1143 return LY_ENOT;
1144 }
1145
1146 ptr1 = value1;
1147 ptr2 = value2;
1148 while (ptr1[0] && ptr2[0]) {
1149 if (ptr1[0] != ptr2[0]) {
1150 /* it can be a start of prefix that maps to the same module */
Radek Krejci1deb5be2020-08-26 16:43:36 +02001151 size_t len;
Michal Vasko52927e22020-03-16 17:26:14 +01001152 ns1 = ns2 = NULL;
Michal Vaskoed4fcfe2020-07-08 10:38:56 +02001153 u1 = u2 = 0;
Michal Vasko52927e22020-03-16 17:26:14 +01001154 if (prefs1) {
1155 /* find module of the first prefix, if any */
1156 LY_ARRAY_FOR(prefs1, u1) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001157 len = strlen(prefs1[u1].id);
1158 if (!strncmp(ptr1, prefs1[u1].id, len) && (ptr1[len] == ':')) {
1159 ns1 = prefs1[u1].module_ns;
Michal Vasko52927e22020-03-16 17:26:14 +01001160 break;
1161 }
1162 }
1163 }
1164 if (prefs2) {
1165 /* find module of the second prefix, if any */
1166 LY_ARRAY_FOR(prefs2, u2) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001167 len = strlen(prefs2[u2].id);
1168 if (!strncmp(ptr2, prefs2[u2].id, len) && (ptr2[len] == ':')) {
1169 ns2 = prefs2[u2].module_ns;
Michal Vasko52927e22020-03-16 17:26:14 +01001170 break;
1171 }
1172 }
1173 }
1174
1175 if (!ns1 || !ns2 || (ns1 != ns2)) {
1176 /* not a prefix or maps to different namespaces */
1177 break;
1178 }
1179
1180 /* skip prefixes in both values (':' is skipped as iter) */
Radek Krejci1798aae2020-07-14 13:26:06 +02001181 ptr1 += strlen(prefs1[u1].id);
1182 ptr2 += strlen(prefs2[u2].id);
Michal Vasko52927e22020-03-16 17:26:14 +01001183 }
1184
1185 ++ptr1;
1186 ++ptr2;
1187 }
1188 if (ptr1[0] || ptr2[0]) {
1189 /* not a match or simply different lengths */
1190 return LY_ENOT;
1191 }
1192
1193 return LY_SUCCESS;
1194}