blob: 6f4344f113278898dabaa5b821298b842e6224d2 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcica376bd2020-06-11 16:04:06 +020025#include <sys/types.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejci535ea9f2020-05-29 16:01:05 +020027#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020028#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020029#include "dict.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020030#include "in_internal.h"
31#include "out_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020032#include "tree.h"
33#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020034
Michal Vaskob36053d2020-03-26 15:49:30 +010035/* Move input p by s characters, if EOF log with lyxml_ctx c */
Michal Vaskod989ba02020-08-24 10:59:24 +020036#define move_input(c, s) ly_in_skip(c->in, s); LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020037
Radek Krejcib1890642018-10-03 14:05:40 +020038/* Ignore whitespaces in the input string p */
Michal Vasko63f3d842020-07-08 10:10:14 +020039#define ign_xmlws(c) while (is_xmlws(*(c)->in->current)) {if (*(c)->in->current == '\n') {++c->line;} ly_in_skip(c->in, 1);}
Michal Vaskob36053d2020-03-26 15:49:30 +010040
Radek Krejci857189e2020-09-01 13:26:36 +020041static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only,
42 ly_bool *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020043
Radek Krejci4b74d5e2018-09-26 14:30:55 +020044/**
45 * @brief Ignore any characters until the delim of the size delim_len is read
46 *
47 * Detects number of read new lines.
Radek Krejci857189e2020-09-01 13:26:36 +020048 * Returns Boolean value whether delim was found or not.
Michal Vasko63f3d842020-07-08 10:10:14 +020049 */
Radek Krejci857189e2020-09-01 13:26:36 +020050static ly_bool
Michal Vasko63f3d842020-07-08 10:10:14 +020051ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines, size_t *parsed)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020052{
53 size_t i;
54 register const char *a, *b;
55
56 (*newlines) = 0;
Michal Vasko63f3d842020-07-08 10:10:14 +020057 (*parsed) = 0;
58 for ( ; *input; ++input, ++(*parsed)) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020059 if (*input != *delim) {
60 if (*input == '\n') {
61 ++(*newlines);
62 }
63 continue;
64 }
65 a = input;
66 b = delim;
67 for (i = 0; i < delim_len; ++i) {
68 if (*a++ != *b++) {
69 break;
70 }
71 }
72 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020073 /* delim found */
74 return 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020075 }
76 }
Michal Vasko63f3d842020-07-08 10:10:14 +020077
78 /* delim not found */
Radek Krejci1deb5be2020-08-26 16:43:36 +020079 return 1;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020080}
81
Radek Krejci4b74d5e2018-09-26 14:30:55 +020082/**
Michal Vaskob36053d2020-03-26 15:49:30 +010083 * @brief Check/Get an XML identifier from the input string.
84 *
85 * The identifier must have at least one valid character complying the name start character constraints.
86 * The identifier is terminated by the first character, which does not comply to the name character constraints.
87 *
88 * See https://www.w3.org/TR/xml-names/#NT-NCName
89 *
90 * @param[in] xmlctx XML context.
91 * @param[out] start Pointer to the start of the identifier.
92 * @param[out] end Pointer ot the end of the identifier.
93 * @return LY_ERR value.
94 */
95static LY_ERR
96lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
97{
98 const char *s, *in;
99 uint32_t c;
100 size_t parsed;
101 LY_ERR rc;
102
Michal Vasko63f3d842020-07-08 10:10:14 +0200103 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100104
105 /* check NameStartChar (minus colon) */
106 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
Michal Vasko69730152020-10-09 16:30:07 +0200107 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
108 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100109 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
Michal Vasko69730152020-10-09 16:30:07 +0200110 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
111 "Identifier \"%s\" starts with an invalid character.", in - parsed),
112 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100113
114 /* check rest of the identifier */
115 do {
116 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200117 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100118
119 rc = ly_getutf8(&in, &c, &parsed);
120 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
121 } while (is_xmlqnamechar(c));
122
123 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200124 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100125 return LY_SUCCESS;
126}
127
128/**
129 * @brief Add namespace definition into XML context.
130 *
131 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
132 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
133 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
134 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
135 *
136 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
137 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
138 *
139 * @param[in] xmlctx XML context to work with.
140 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
141 * @param[in] prefix_len Length of the prefix.
142 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
143 * @return LY_ERR values.
144 */
145LY_ERR
146lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
147{
Radek Krejciba03a5a2020-08-27 14:40:41 +0200148 LY_ERR ret = LY_SUCCESS;
Michal Vaskob36053d2020-03-26 15:49:30 +0100149 struct lyxml_ns *ns;
150
151 ns = malloc(sizeof *ns);
152 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
153
154 /* we need to connect the depth of the element where the namespace is defined with the
155 * namespace record to be able to maintain (remove) the record when the parser leaves
156 * (to its sibling or back to the parent) the element where the namespace was defined */
157 ns->depth = xmlctx->elements.count;
158
159 ns->uri = uri;
160 if (prefix) {
161 ns->prefix = strndup(prefix, prefix_len);
162 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
163 } else {
164 ns->prefix = NULL;
165 }
166
Radek Krejci3d92e442020-10-12 12:48:13 +0200167 ret = ly_set_add(&xmlctx->ns, ns, 1, NULL);
Radek Krejciba03a5a2020-08-27 14:40:41 +0200168 LY_CHECK_ERR_RET(ret, free(ns->prefix); free(ns->uri); free(ns), ret);
169
Michal Vaskob36053d2020-03-26 15:49:30 +0100170 return LY_SUCCESS;
171}
172
173/**
174 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
175 *
176 * @param[in] xmlctx XML context to work with.
177 */
178void
179lyxml_ns_rm(struct lyxml_ctx *xmlctx)
180{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200181 for (uint32_t u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100182 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
183 /* we are done, the namespaces from a single element are supposed to be together */
184 break;
185 }
186 /* remove the ns structure */
187 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
188 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
189 free(xmlctx->ns.objs[u]);
190 --xmlctx->ns.count;
191 }
192
193 if (!xmlctx->ns.count) {
194 /* cleanup the xmlctx's namespaces storage */
195 ly_set_erase(&xmlctx->ns, NULL);
196 }
197}
198
Michal Vaskob36053d2020-03-26 15:49:30 +0100199const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200200lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100201{
Michal Vaskob36053d2020-03-26 15:49:30 +0100202 struct lyxml_ns *ns;
203
Radek Krejci1deb5be2020-08-26 16:43:36 +0200204 for (uint32_t u = ns_set->count - 1; u + 1 > 0; --u) {
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200205 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vaskob36053d2020-03-26 15:49:30 +0100206 if (prefix && prefix_len) {
207 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
208 return ns;
209 }
210 } else if (!ns->prefix) {
211 /* default namespace */
212 return ns;
213 }
214 }
215
216 return NULL;
217}
218
Michal Vasko8cef5232020-06-15 17:59:47 +0200219/**
220 * @brief Skip in the input until EOF or just after the opening tag.
221 * Handles special XML constructs (comment, cdata, doctype).
222 *
223 * @param[in] xmlctx XML context to use.
224 * @return LY_ERR value.
225 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100226static LY_ERR
227lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
228{
229 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200230 const char *endtag, *sectname;
231 size_t endtag_len, newlines, parsed;
Radek Krejci857189e2020-09-01 13:26:36 +0200232 ly_bool rc;
Michal Vaskob36053d2020-03-26 15:49:30 +0100233
234 while (1) {
235 ign_xmlws(xmlctx);
236
Michal Vasko63f3d842020-07-08 10:10:14 +0200237 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100238 /* EOF */
239 if (xmlctx->elements.count) {
240 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
241 return LY_EVALID;
242 }
243 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200244 } else if (xmlctx->in->current[0] != '<') {
245 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200246 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100247 return LY_EVALID;
248 }
249 move_input(xmlctx, 1);
250
Michal Vasko63f3d842020-07-08 10:10:14 +0200251 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100252 move_input(xmlctx, 1);
253 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200254 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100255 /* comment */
256 move_input(xmlctx, 2);
257 sectname = "Comment";
258 endtag = "-->";
259 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200260 } else if (!strncmp(xmlctx->in->current, "[CDATA[", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100261 /* CDATA section */
262 move_input(xmlctx, 7);
263 sectname = "CData";
264 endtag = "]]>";
265 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200266 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100267 /* Document type declaration - not supported */
268 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
269 return LY_EVALID;
270 } else {
Michal Vasko63f3d842020-07-08 10:10:14 +0200271 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".",
Michal Vasko69730152020-10-09 16:30:07 +0200272 &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100273 return LY_EVALID;
274 }
Michal Vasko63f3d842020-07-08 10:10:14 +0200275 rc = ign_todelim(xmlctx->in->current, endtag, endtag_len, &newlines, &parsed);
276 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100277 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200278 ly_in_skip(xmlctx->in, parsed + endtag_len);
279 } else if (xmlctx->in->current[0] == '?') {
280 rc = ign_todelim(xmlctx->in->current, "?>", 2, &newlines, &parsed);
281 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100282 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200283 ly_in_skip(xmlctx->in, parsed + 2);
Michal Vaskob36053d2020-03-26 15:49:30 +0100284 } else {
285 /* other non-WS character */
286 break;
287 }
288 }
289
290 return LY_SUCCESS;
291}
292
Michal Vasko8cef5232020-06-15 17:59:47 +0200293/**
294 * @brief Parse QName.
295 *
296 * @param[in] xmlctx XML context to use.
297 * @param[out] prefix Parsed prefix, may be NULL.
298 * @param[out] prefix_len Length of @p prefix.
299 * @param[out] name Parsed name.
300 * @param[out] name_len Length of @p name.
301 * @return LY_ERR value.
302 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100303static LY_ERR
304lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
305{
306 const char *start, *end;
307
308 *prefix = NULL;
309 *prefix_len = 0;
310
311 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
312 if (end[0] == ':') {
313 /* we have prefixed identifier */
314 *prefix = start;
315 *prefix_len = end - start;
316
317 move_input(xmlctx, 1);
318 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
319 }
320
321 *name = start;
322 *name_len = end - start;
323 return LY_SUCCESS;
324}
325
326/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200327 * @brief Parse XML text content (value).
328 *
329 * @param[in] xmlctx XML context to use.
330 * @param[in] endchar Expected character to mark value end.
331 * @param[out] value Parsed value.
332 * @param[out] length Length of @p value.
333 * @param[out] ws_only Whether the value is empty/white-spaces only.
334 * @param[out] dynamic Whether the value was dynamically allocated.
335 * @return LY_ERR value.
336 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200337static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200338lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200339{
Michal Vaskob36053d2020-03-26 15:49:30 +0100340#define BUFSIZE 24
341#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200342
Michal Vaskob36053d2020-03-26 15:49:30 +0100343 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200344 const char *in = xmlctx->in->current, *start;
Michal Vaskob36053d2020-03-26 15:49:30 +0100345 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200346 size_t offset; /* read offset in input buffer */
347 size_t len; /* length of the output string (write offset in output buffer) */
348 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200349 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200350 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100351 size_t u;
Radek Krejci857189e2020-09-01 13:26:36 +0200352 ly_bool ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200353
Michal Vaskob36053d2020-03-26 15:49:30 +0100354 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200355
Radek Krejcid70d1072018-10-09 14:20:47 +0200356 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100357 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200358 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200359
360 /* parse */
361 while (in[offset]) {
362 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100363 /* non WS */
364 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200365
Michal Vaskob36053d2020-03-26 15:49:30 +0100366 if (!buf) {
367 /* prepare output buffer */
368 buf = malloc(BUFSIZE);
369 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
370 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200371 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100372
373 /* allocate enough for the offset and next character,
374 * we will need 4 bytes at most since we support only the predefined
375 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200376 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100377 buf = ly_realloc(buf, size + BUFSIZE_STEP);
378 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
379 size += BUFSIZE_STEP;
380 }
381
382 if (offset) {
383 /* store what we have so far */
384 memcpy(&buf[len], in, offset);
385 len += offset;
386 in += offset;
387 offset = 0;
388 }
389
Radek Krejci7a7fa902018-09-25 17:08:21 +0200390 ++offset;
391 if (in[offset] != '#') {
392 /* entity reference - only predefined references are supported */
393 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100394 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200395 in += 4; /* &lt; */
396 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100397 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200398 in += 4; /* &gt; */
399 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100400 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200401 in += 5; /* &amp; */
402 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100403 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200404 in += 6; /* &apos; */
405 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100406 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200407 in += 6; /* &quot; */
408 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100409 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200410 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200411 goto error;
412 }
413 offset = 0;
414 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100415 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200416 /* character reference */
417 ++offset;
418 if (isdigit(in[offset])) {
419 for (n = 0; isdigit(in[offset]); offset++) {
420 n = (10 * n) + (in[offset] - '0');
421 }
Michal Vasko69730152020-10-09 16:30:07 +0200422 } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200423 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
424 if (isdigit(in[offset])) {
425 u = (in[offset] - '0');
426 } else if (in[offset] > 'F') {
427 u = 10 + (in[offset] - 'a');
428 } else {
429 u = 10 + (in[offset] - 'A');
430 }
431 n = (16 * n) + u;
432 }
433 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100434 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200435 goto error;
436
437 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100438
Radek Krejci7a7fa902018-09-25 17:08:21 +0200439 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vasko69730152020-10-09 16:30:07 +0200440 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
441 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
442 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200443 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200444 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
Michal Vasko69730152020-10-09 16:30:07 +0200445 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
446 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
447 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200448 len += u;
449 in += offset;
450 offset = 0;
451 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100452 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200453 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200454 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100455 /* realloc exact size string */
456 buf = ly_realloc(buf, len + offset + 1);
457 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
458 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200459 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100460
461 /* set terminating NULL byte */
462 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200463 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200464 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100465 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200466 goto success;
467 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100468 if (!is_xmlws(in[offset])) {
469 /* non WS */
470 ws = 0;
471 }
472
Radek Krejci7a7fa902018-09-25 17:08:21 +0200473 /* log lines */
474 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100475 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200476 }
477
478 /* continue */
479 ++offset;
480 }
481 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100482
483 /* EOF reached before endchar */
484 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
485
Radek Krejci7a7fa902018-09-25 17:08:21 +0200486error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100487 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200488 return LY_EVALID;
489
490success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200491 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100492 *value = buf;
493 *dynamic = 1;
494 } else {
495 *value = (char *)start;
496 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200497 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100498 *length = len;
499 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200500
Michal Vasko63f3d842020-07-08 10:10:14 +0200501 ly_in_skip(xmlctx->in, in - xmlctx->in->current);
Michal Vaskob36053d2020-03-26 15:49:30 +0100502 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200503
504#undef BUFSIZE
505#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200506}
507
Michal Vasko8cef5232020-06-15 17:59:47 +0200508/**
509 * @brief Parse XML closing element and match it to a stored starting element.
510 *
511 * @param[in] xmlctx XML context to use.
512 * @param[in] prefix Expected closing element prefix.
513 * @param[in] prefix_len Length of @p prefix.
514 * @param[in] name Expected closing element name.
515 * @param[in] name_len Length of @p name.
516 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
517 * @return LY_ERR value.
518 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100519static LY_ERR
520lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200521 ly_bool empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200522{
Michal Vaskob36053d2020-03-26 15:49:30 +0100523 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200524
Michal Vaskob36053d2020-03-26 15:49:30 +0100525 /* match opening and closing element tags */
526 if (!xmlctx->elements.count) {
527 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Michal Vasko69730152020-10-09 16:30:07 +0200528 name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100529 return LY_EVALID;
530 }
Radek Krejcid972c252018-09-25 13:23:39 +0200531
Michal Vaskob36053d2020-03-26 15:49:30 +0100532 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
Michal Vasko69730152020-10-09 16:30:07 +0200533 if ((e->prefix_len != prefix_len) || (e->name_len != name_len) ||
534 (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100535 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200536 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
537 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
538 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100539 return LY_EVALID;
540 }
Radek Krejcid972c252018-09-25 13:23:39 +0200541
Michal Vaskob36053d2020-03-26 15:49:30 +0100542 /* opening and closing element tags matches, remove record from the opening tags list */
543 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200544
Michal Vaskob36053d2020-03-26 15:49:30 +0100545 /* remove also the namespaces connected with the element */
546 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200547
Michal Vaskob36053d2020-03-26 15:49:30 +0100548 /* skip WS */
549 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200550
Michal Vaskob36053d2020-03-26 15:49:30 +0100551 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200552 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100553 move_input(xmlctx, 1);
554 }
Michal Vasko52927e22020-03-16 17:26:14 +0100555
Michal Vaskob36053d2020-03-26 15:49:30 +0100556 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200557 if (xmlctx->in->current[0] != '>') {
558 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200559 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100560 return LY_EVALID;
561 }
Michal Vasko52927e22020-03-16 17:26:14 +0100562
Michal Vaskob36053d2020-03-26 15:49:30 +0100563 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200564 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100565
Radek Krejcid972c252018-09-25 13:23:39 +0200566 return LY_SUCCESS;
567}
568
Michal Vasko8cef5232020-06-15 17:59:47 +0200569/**
570 * @brief Store parsed opening element and parse any included namespaces.
571 *
572 * @param[in] xmlctx XML context to use.
573 * @param[in] prefix Parsed starting element prefix.
574 * @param[in] prefix_len Length of @p prefix.
575 * @param[in] name Parsed starting element name.
576 * @param[in] name_len Length of @p name.
577 * @return LY_ERR value.
578 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100579static LY_ERR
580lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200581{
Michal Vaskob36053d2020-03-26 15:49:30 +0100582 LY_ERR ret = LY_SUCCESS;
583 struct lyxml_elem *e;
584 const char *prev_input;
585 char *value;
586 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200587 ly_bool ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100588 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200589
Michal Vaskob36053d2020-03-26 15:49:30 +0100590 /* store element opening tag information */
591 e = malloc(sizeof *e);
592 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
593 e->name = name;
594 e->prefix = prefix;
595 e->name_len = name_len;
596 e->prefix_len = prefix_len;
Radek Krejci3d92e442020-10-12 12:48:13 +0200597 LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL));
Michal Vaskob36053d2020-03-26 15:49:30 +0100598
599 /* skip WS */
600 ign_xmlws(xmlctx);
601
602 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200603 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100604 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200605 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
606 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100607
608 /* parse attribute name */
609 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
610
611 /* parse the value */
612 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
613
614 /* store every namespace */
615 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
616 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
Michal Vasko69730152020-10-09 16:30:07 +0200617 dynamic ? value : strndup(value, value_len)), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100618 dynamic = 0;
619 } else {
620 /* not a namespace */
621 is_ns = 0;
622 }
623 if (dynamic) {
624 free(value);
625 }
626
627 /* skip WS */
628 ign_xmlws(xmlctx);
629
630 if (is_ns) {
631 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200632 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100633 }
Radek Krejcib1890642018-10-03 14:05:40 +0200634 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100635
636cleanup:
637 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200638 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100639 }
640 return ret;
641}
642
Michal Vasko8cef5232020-06-15 17:59:47 +0200643/**
644 * @brief Move parser to the attribute content and parse it.
645 *
646 * @param[in] xmlctx XML context to use.
647 * @param[out] value Parsed attribute value.
648 * @param[out] value_len Length of @p value.
649 * @param[out] ws_only Whether the value is empty/white-spaces only.
650 * @param[out] dynamic Whether the value was dynamically allocated.
651 * @return LY_ERR value.
652 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100653static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200654lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100655{
656 char quot;
657
658 /* skip WS */
659 ign_xmlws(xmlctx);
660
661 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200662 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100663 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
664 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200665 } else if (xmlctx->in->current[0] != '=') {
666 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200667 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100668 return LY_EVALID;
669 }
670 move_input(xmlctx, 1);
671
672 /* skip WS */
673 ign_xmlws(xmlctx);
674
675 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200676 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100677 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
678 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200679 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
680 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200681 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100682 return LY_EVALID;
683 }
684
685 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200686 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100687 move_input(xmlctx, 1);
688
689 /* parse attribute value */
690 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
691
692 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200693 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100694
695 return LY_SUCCESS;
696}
697
Michal Vasko8cef5232020-06-15 17:59:47 +0200698/**
699 * @brief Move parser to the next attribute and parse it.
700 *
701 * @param[in] xmlctx XML context to use.
702 * @param[out] prefix Parsed attribute prefix.
703 * @param[out] prefix_len Length of @p prefix.
704 * @param[out] name Parsed attribute name.
705 * @param[out] name_len Length of @p name.
706 * @return LY_ERR value.
707 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100708static LY_ERR
709lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
710{
711 const char *in;
712 char *value;
713 uint32_t c;
714 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200715 ly_bool ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100716
717 /* skip WS */
718 ign_xmlws(xmlctx);
719
720 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200721 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
722 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100723 if (in[0] == '\0') {
724 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
725 return LY_EVALID;
726 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
727 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
Michal Vasko69730152020-10-09 16:30:07 +0200728 "element tag end ('>' or '/>') or an attribute");
Michal Vaskob36053d2020-03-26 15:49:30 +0100729 return LY_EVALID;
730 }
731
732 /* parse attribute name */
733 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
734
735 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
736 /* standard attribute */
737 break;
738 }
739
740 /* namespace, skip it */
741 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
742 if (dynamic) {
743 free(value);
744 }
745
746 /* skip WS */
747 ign_xmlws(xmlctx);
748 }
749
750 return LY_SUCCESS;
751}
752
Michal Vasko8cef5232020-06-15 17:59:47 +0200753/**
754 * @brief Move parser to the next element and parse it.
755 *
756 * @param[in] xmlctx XML context to use.
757 * @param[out] prefix Parsed element prefix.
758 * @param[out] prefix_len Length of @p prefix.
759 * @param[out] name Parse element name.
760 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200761 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200762 * @return LY_ERR value.
763 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100764static LY_ERR
765lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200766 ly_bool *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100767{
768 /* skip WS until EOF or after opening tag '<' */
769 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200770 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100771 /* set return values */
772 *prefix = *name = NULL;
773 *prefix_len = *name_len = 0;
774 return LY_SUCCESS;
775 }
776
Michal Vasko63f3d842020-07-08 10:10:14 +0200777 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100778 move_input(xmlctx, 1);
779 *closing = 1;
780 } else {
781 *closing = 0;
782 }
783
784 /* skip WS */
785 ign_xmlws(xmlctx);
786
787 /* parse element name */
788 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
789
790 return LY_SUCCESS;
791}
792
793LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200794lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100795{
796 LY_ERR ret = LY_SUCCESS;
797 struct lyxml_ctx *xmlctx;
Radek Krejci857189e2020-09-01 13:26:36 +0200798 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100799
800 /* new context */
801 xmlctx = calloc(1, sizeof *xmlctx);
802 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
803 xmlctx->ctx = ctx;
804 xmlctx->line = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200805 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100806
807 /* parse next element, if any */
808 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
Michal Vasko69730152020-10-09 16:30:07 +0200809 &xmlctx->name_len, &closing), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100810
Michal Vasko63f3d842020-07-08 10:10:14 +0200811 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100812 /* update status */
813 xmlctx->status = LYXML_END;
814 } else if (closing) {
815 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Michal Vasko69730152020-10-09 16:30:07 +0200816 xmlctx->name_len, xmlctx->name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100817 ret = LY_EVALID;
818 goto cleanup;
819 } else {
820 /* open an element, also parses all enclosed namespaces */
821 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
822
823 /* update status */
824 xmlctx->status = LYXML_ELEMENT;
825 }
826
827cleanup:
828 if (ret) {
829 lyxml_ctx_free(xmlctx);
830 } else {
831 *xmlctx_p = xmlctx;
832 }
833 return ret;
834}
835
836LY_ERR
837lyxml_ctx_next(struct lyxml_ctx *xmlctx)
838{
839 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200840 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100841 struct lyxml_elem *e;
842
843 /* if the value was not used, free it */
844 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
845 free((char *)xmlctx->value);
846 xmlctx->value = NULL;
847 xmlctx->dynamic = 0;
848 }
849
850 switch (xmlctx->status) {
851 /* content |</elem> */
852 case LYXML_ELEM_CONTENT:
853 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200854 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100855 assert(xmlctx->elements.count);
856 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
857
858 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200859 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
860 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100861
862 /* update status */
863 xmlctx->status = LYXML_ELEM_CLOSE;
864 break;
865 }
Radek Krejci0f969882020-08-21 16:56:47 +0200866 /* fallthrough */
Michal Vaskob36053d2020-03-26 15:49:30 +0100867
868 /* </elem>| <elem2>* */
869 case LYXML_ELEM_CLOSE:
870 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200871 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
872 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100873
Michal Vasko63f3d842020-07-08 10:10:14 +0200874 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100875 /* update status */
876 xmlctx->status = LYXML_END;
877 } else if (closing) {
878 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200879 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
880 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100881
882 /* update status */
883 xmlctx->status = LYXML_ELEM_CLOSE;
884 } else {
885 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200886 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
887 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100888
889 /* update status */
890 xmlctx->status = LYXML_ELEMENT;
891 }
892 break;
893
894 /* <elem| attr='val'* > content */
895 case LYXML_ELEMENT:
896
897 /* attr='val'| attr='val'* > content */
898 case LYXML_ATTR_CONTENT:
899 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200900 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
901 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100902
Michal Vasko63f3d842020-07-08 10:10:14 +0200903 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100904 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200905 ly_in_skip(xmlctx->in, 1);
906 if (!xmlctx->in->current[0]) {
Michal Vaskof55ae202020-06-30 15:49:36 +0200907 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
908 ret = LY_EVALID;
909 goto cleanup;
910 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100911
912 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200913 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
Michal Vasko69730152020-10-09 16:30:07 +0200914 &xmlctx->dynamic);
Michal Vasko63f3d842020-07-08 10:10:14 +0200915 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100916
917 if (!xmlctx->value_len) {
918 /* use empty value, easier to work with */
919 xmlctx->value = "";
920 assert(!xmlctx->dynamic);
921 }
922
923 /* update status */
924 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200925 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100926 /* no content but we still return it */
927 xmlctx->value = "";
928 xmlctx->value_len = 0;
929 xmlctx->ws_only = 1;
930 xmlctx->dynamic = 0;
931
932 /* update status */
933 xmlctx->status = LYXML_ELEM_CONTENT;
934 } else {
935 /* update status */
936 xmlctx->status = LYXML_ATTRIBUTE;
937 }
938 break;
939
940 /* attr|='val' */
941 case LYXML_ATTRIBUTE:
942 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +0200943 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
944 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100945
946 /* update status */
947 xmlctx->status = LYXML_ATTR_CONTENT;
948 break;
949
950 /* </elem> |EOF */
951 case LYXML_END:
952 /* nothing to do */
953 break;
954 }
955
956cleanup:
957 if (ret) {
958 /* invalidate context */
959 xmlctx->status = LYXML_END;
960 }
961 return ret;
962}
963
964LY_ERR
965lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
966{
967 LY_ERR ret = LY_SUCCESS;
968 const char *prefix, *name, *prev_input;
969 size_t prefix_len, name_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200970 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100971
Michal Vasko63f3d842020-07-08 10:10:14 +0200972 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100973
974 switch (xmlctx->status) {
975 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +0200976 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100977 *next = LYXML_ELEM_CLOSE;
978 break;
979 }
Radek Krejci0f969882020-08-21 16:56:47 +0200980 /* fallthrough */
Michal Vaskob36053d2020-03-26 15:49:30 +0100981 case LYXML_ELEM_CLOSE:
982 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200983 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
984 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100985
Michal Vasko63f3d842020-07-08 10:10:14 +0200986 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100987 *next = LYXML_END;
988 } else if (closing) {
989 *next = LYXML_ELEM_CLOSE;
990 } else {
991 *next = LYXML_ELEMENT;
992 }
993 break;
994 case LYXML_ELEMENT:
995 case LYXML_ATTR_CONTENT:
996 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200997 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
998 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100999
Michal Vasko63f3d842020-07-08 10:10:14 +02001000 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001001 *next = LYXML_ELEM_CONTENT;
1002 } else {
1003 *next = LYXML_ATTRIBUTE;
1004 }
1005 break;
1006 case LYXML_ATTRIBUTE:
1007 *next = LYXML_ATTR_CONTENT;
1008 break;
1009 case LYXML_END:
1010 *next = LYXML_END;
1011 break;
1012 }
1013
1014cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001015 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001016 return ret;
1017}
1018
1019void
1020lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1021{
1022 uint32_t u;
1023
1024 if (!xmlctx) {
1025 return;
1026 }
1027
1028 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1029 free((char *)xmlctx->value);
1030 }
1031 ly_set_erase(&xmlctx->elements, free);
1032 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1033 /* remove the ns structure */
1034 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1035 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1036 free(xmlctx->ns.objs[u]);
1037 }
1038 ly_set_erase(&xmlctx->ns, NULL);
1039 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001040}
Radek Krejcie7b95092019-05-15 11:03:07 +02001041
1042LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +02001043lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001044{
Michal Vasko5233e962020-08-14 14:26:20 +02001045 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001046
1047 if (!text) {
1048 return 0;
1049 }
1050
Radek Krejci1deb5be2020-08-26 16:43:36 +02001051 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001052 switch (text[u]) {
1053 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001054 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001055 break;
1056 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001057 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001058 break;
1059 case '>':
1060 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001061 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001062 break;
1063 case '"':
1064 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001065 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001066 break;
1067 }
Radek Krejci0f969882020-08-21 16:56:47 +02001068 /* falls through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001069 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001070 ret = ly_write_(out, &text[u], 1);
1071 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001072 }
Michal Vasko5233e962020-08-14 14:26:20 +02001073 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001074 }
1075
Michal Vasko5233e962020-08-14 14:26:20 +02001076 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001077}
1078
Michal Vasko52927e22020-03-16 17:26:14 +01001079LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001080lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001081{
1082 LY_ERR ret;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001083 LY_ARRAY_COUNT_TYPE u;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001084 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001085 const struct lyxml_ns *ns;
1086 const char *start, *stop;
1087 struct ly_prefix *prefixes = NULL;
1088 size_t len;
1089
1090 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1091 size_t bytes;
1092 ly_getutf8(&stop, &c, &bytes);
1093 if (is_xmlqnamestartchar(c)) {
1094 for (ly_getutf8(&stop, &c, &bytes);
1095 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
Radek Krejci1e008d22020-08-17 11:37:37 +02001096 ly_getutf8(&stop, &c, &bytes)) {}
Michal Vasko52927e22020-03-16 17:26:14 +01001097 stop = stop - bytes;
1098 if (*stop == ':') {
1099 /* we have a possible prefix */
1100 len = stop - start;
Michal Vaskoc8a230d2020-08-14 12:17:10 +02001101 ns = lyxml_ns_get(&xmlctx->ns, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001102 if (ns) {
1103 struct ly_prefix *p = NULL;
1104
1105 /* check whether we do not already have this prefix stored */
1106 LY_ARRAY_FOR(prefixes, u) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001107 if (!ly_strncmp(prefixes[u].id, start, len)) {
Michal Vasko52927e22020-03-16 17:26:14 +01001108 p = &prefixes[u];
1109 break;
1110 }
1111 }
1112 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001113 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
Radek Krejci011e4aa2020-09-04 15:22:31 +02001114 LY_CHECK_GOTO(ret = lydict_insert(xmlctx->ctx, start, len, &p->id), error);
1115 LY_CHECK_GOTO(ret = lydict_insert(xmlctx->ctx, ns->uri, 0, &p->module_ns), error);
Michal Vasko52927e22020-03-16 17:26:14 +01001116 } /* else the prefix already present */
1117 }
1118 }
1119 stop = stop + bytes;
1120 }
1121 }
1122
1123 *val_prefs = prefixes;
1124 return LY_SUCCESS;
1125
1126error:
1127 LY_ARRAY_FOR(prefixes, u) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001128 lydict_remove(xmlctx->ctx, prefixes[u].id);
1129 lydict_remove(xmlctx->ctx, prefixes[u].module_ns);
Michal Vasko52927e22020-03-16 17:26:14 +01001130 }
1131 LY_ARRAY_FREE(prefixes);
1132 return ret;
1133}
1134
1135LY_ERR
1136lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1137{
1138 const char *ptr1, *ptr2, *ns1, *ns2;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001139 LY_ARRAY_COUNT_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001140
1141 if (!value1 && !value2) {
1142 return LY_SUCCESS;
1143 }
1144 if ((value1 && !value2) || (!value1 && value2)) {
1145 return LY_ENOT;
1146 }
1147
1148 ptr1 = value1;
1149 ptr2 = value2;
1150 while (ptr1[0] && ptr2[0]) {
1151 if (ptr1[0] != ptr2[0]) {
1152 /* it can be a start of prefix that maps to the same module */
Radek Krejci1deb5be2020-08-26 16:43:36 +02001153 size_t len;
Michal Vasko52927e22020-03-16 17:26:14 +01001154 ns1 = ns2 = NULL;
Michal Vaskoed4fcfe2020-07-08 10:38:56 +02001155 u1 = u2 = 0;
Michal Vasko52927e22020-03-16 17:26:14 +01001156 if (prefs1) {
1157 /* find module of the first prefix, if any */
1158 LY_ARRAY_FOR(prefs1, u1) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001159 len = strlen(prefs1[u1].id);
1160 if (!strncmp(ptr1, prefs1[u1].id, len) && (ptr1[len] == ':')) {
1161 ns1 = prefs1[u1].module_ns;
Michal Vasko52927e22020-03-16 17:26:14 +01001162 break;
1163 }
1164 }
1165 }
1166 if (prefs2) {
1167 /* find module of the second prefix, if any */
1168 LY_ARRAY_FOR(prefs2, u2) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001169 len = strlen(prefs2[u2].id);
1170 if (!strncmp(ptr2, prefs2[u2].id, len) && (ptr2[len] == ':')) {
1171 ns2 = prefs2[u2].module_ns;
Michal Vasko52927e22020-03-16 17:26:14 +01001172 break;
1173 }
1174 }
1175 }
1176
1177 if (!ns1 || !ns2 || (ns1 != ns2)) {
1178 /* not a prefix or maps to different namespaces */
1179 break;
1180 }
1181
1182 /* skip prefixes in both values (':' is skipped as iter) */
Radek Krejci1798aae2020-07-14 13:26:06 +02001183 ptr1 += strlen(prefs1[u1].id);
1184 ptr2 += strlen(prefs2[u2].id);
Michal Vasko52927e22020-03-16 17:26:14 +01001185 }
1186
1187 ++ptr1;
1188 ++ptr2;
1189 }
1190 if (ptr1[0] || ptr2[0]) {
1191 /* not a match or simply different lengths */
1192 return LY_ENOT;
1193 }
1194
1195 return LY_SUCCESS;
1196}