blob: 0a4338808a9d81eb3c0ab51278aef0abe491e7a8 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejci535ea9f2020-05-29 16:01:05 +020026#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020027#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020028#include "dict.h"
Michal Vaskoafac7822020-10-20 14:22:26 +020029#include "in_internal.h"
30#include "out_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020031#include "tree.h"
32#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020033
Michal Vaskob36053d2020-03-26 15:49:30 +010034/* Move input p by s characters, if EOF log with lyxml_ctx c */
Michal Vaskod989ba02020-08-24 10:59:24 +020035#define move_input(c, s) ly_in_skip(c->in, s); LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020036
Radek Krejcib1890642018-10-03 14:05:40 +020037/* Ignore whitespaces in the input string p */
Michal Vasko63f3d842020-07-08 10:10:14 +020038#define ign_xmlws(c) while (is_xmlws(*(c)->in->current)) {if (*(c)->in->current == '\n') {++c->line;} ly_in_skip(c->in, 1);}
Michal Vaskob36053d2020-03-26 15:49:30 +010039
Radek Krejci857189e2020-09-01 13:26:36 +020040static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only,
41 ly_bool *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020042
Radek Krejci4b74d5e2018-09-26 14:30:55 +020043/**
44 * @brief Ignore any characters until the delim of the size delim_len is read
45 *
46 * Detects number of read new lines.
Radek Krejci857189e2020-09-01 13:26:36 +020047 * Returns Boolean value whether delim was found or not.
Michal Vasko63f3d842020-07-08 10:10:14 +020048 */
Radek Krejci857189e2020-09-01 13:26:36 +020049static ly_bool
Michal Vasko63f3d842020-07-08 10:10:14 +020050ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines, size_t *parsed)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020051{
52 size_t i;
53 register const char *a, *b;
54
55 (*newlines) = 0;
Michal Vasko63f3d842020-07-08 10:10:14 +020056 (*parsed) = 0;
57 for ( ; *input; ++input, ++(*parsed)) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020058 if (*input != *delim) {
59 if (*input == '\n') {
60 ++(*newlines);
61 }
62 continue;
63 }
64 a = input;
65 b = delim;
66 for (i = 0; i < delim_len; ++i) {
67 if (*a++ != *b++) {
68 break;
69 }
70 }
71 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020072 /* delim found */
73 return 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020074 }
75 }
Michal Vasko63f3d842020-07-08 10:10:14 +020076
77 /* delim not found */
Radek Krejci1deb5be2020-08-26 16:43:36 +020078 return 1;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020079}
80
Radek Krejci4b74d5e2018-09-26 14:30:55 +020081/**
Michal Vaskob36053d2020-03-26 15:49:30 +010082 * @brief Check/Get an XML identifier from the input string.
83 *
84 * The identifier must have at least one valid character complying the name start character constraints.
85 * The identifier is terminated by the first character, which does not comply to the name character constraints.
86 *
87 * See https://www.w3.org/TR/xml-names/#NT-NCName
88 *
89 * @param[in] xmlctx XML context.
90 * @param[out] start Pointer to the start of the identifier.
91 * @param[out] end Pointer ot the end of the identifier.
92 * @return LY_ERR value.
93 */
94static LY_ERR
95lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
96{
97 const char *s, *in;
98 uint32_t c;
99 size_t parsed;
100 LY_ERR rc;
101
Michal Vasko63f3d842020-07-08 10:10:14 +0200102 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100103
104 /* check NameStartChar (minus colon) */
105 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
Michal Vasko69730152020-10-09 16:30:07 +0200106 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
107 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100108 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
Michal Vasko69730152020-10-09 16:30:07 +0200109 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
110 "Identifier \"%s\" starts with an invalid character.", in - parsed),
111 LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100112
113 /* check rest of the identifier */
114 do {
115 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200116 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100117
118 rc = ly_getutf8(&in, &c, &parsed);
119 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
120 } while (is_xmlqnamechar(c));
121
122 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200123 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100124 return LY_SUCCESS;
125}
126
127/**
128 * @brief Add namespace definition into XML context.
129 *
130 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
131 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
132 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
133 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
134 *
135 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
136 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
137 *
138 * @param[in] xmlctx XML context to work with.
139 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
140 * @param[in] prefix_len Length of the prefix.
141 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
142 * @return LY_ERR values.
143 */
144LY_ERR
145lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
146{
Radek Krejciba03a5a2020-08-27 14:40:41 +0200147 LY_ERR ret = LY_SUCCESS;
Michal Vaskob36053d2020-03-26 15:49:30 +0100148 struct lyxml_ns *ns;
149
150 ns = malloc(sizeof *ns);
151 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
152
153 /* we need to connect the depth of the element where the namespace is defined with the
154 * namespace record to be able to maintain (remove) the record when the parser leaves
155 * (to its sibling or back to the parent) the element where the namespace was defined */
156 ns->depth = xmlctx->elements.count;
157
158 ns->uri = uri;
159 if (prefix) {
160 ns->prefix = strndup(prefix, prefix_len);
161 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
162 } else {
163 ns->prefix = NULL;
164 }
165
Radek Krejci3d92e442020-10-12 12:48:13 +0200166 ret = ly_set_add(&xmlctx->ns, ns, 1, NULL);
Radek Krejciba03a5a2020-08-27 14:40:41 +0200167 LY_CHECK_ERR_RET(ret, free(ns->prefix); free(ns->uri); free(ns), ret);
168
Michal Vaskob36053d2020-03-26 15:49:30 +0100169 return LY_SUCCESS;
170}
171
172/**
173 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
174 *
175 * @param[in] xmlctx XML context to work with.
176 */
177void
178lyxml_ns_rm(struct lyxml_ctx *xmlctx)
179{
Radek Krejci1deb5be2020-08-26 16:43:36 +0200180 for (uint32_t u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100181 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
182 /* we are done, the namespaces from a single element are supposed to be together */
183 break;
184 }
185 /* remove the ns structure */
186 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
187 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
188 free(xmlctx->ns.objs[u]);
189 --xmlctx->ns.count;
190 }
191
192 if (!xmlctx->ns.count) {
193 /* cleanup the xmlctx's namespaces storage */
194 ly_set_erase(&xmlctx->ns, NULL);
195 }
196}
197
Michal Vaskob36053d2020-03-26 15:49:30 +0100198const struct lyxml_ns *
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200199lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len)
Michal Vaskob36053d2020-03-26 15:49:30 +0100200{
Michal Vaskob36053d2020-03-26 15:49:30 +0100201 struct lyxml_ns *ns;
202
Radek Krejci1deb5be2020-08-26 16:43:36 +0200203 for (uint32_t u = ns_set->count - 1; u + 1 > 0; --u) {
Michal Vaskoc8a230d2020-08-14 12:17:10 +0200204 ns = (struct lyxml_ns *)ns_set->objs[u];
Michal Vaskob36053d2020-03-26 15:49:30 +0100205 if (prefix && prefix_len) {
206 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
207 return ns;
208 }
209 } else if (!ns->prefix) {
210 /* default namespace */
211 return ns;
212 }
213 }
214
215 return NULL;
216}
217
Michal Vasko8cef5232020-06-15 17:59:47 +0200218/**
219 * @brief Skip in the input until EOF or just after the opening tag.
220 * Handles special XML constructs (comment, cdata, doctype).
221 *
222 * @param[in] xmlctx XML context to use.
223 * @return LY_ERR value.
224 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100225static LY_ERR
226lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
227{
228 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200229 const char *endtag, *sectname;
230 size_t endtag_len, newlines, parsed;
Radek Krejci857189e2020-09-01 13:26:36 +0200231 ly_bool rc;
Michal Vaskob36053d2020-03-26 15:49:30 +0100232
233 while (1) {
234 ign_xmlws(xmlctx);
235
Michal Vasko63f3d842020-07-08 10:10:14 +0200236 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100237 /* EOF */
238 if (xmlctx->elements.count) {
239 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
240 return LY_EVALID;
241 }
242 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200243 } else if (xmlctx->in->current[0] != '<') {
244 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200245 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100246 return LY_EVALID;
247 }
248 move_input(xmlctx, 1);
249
Michal Vasko63f3d842020-07-08 10:10:14 +0200250 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100251 move_input(xmlctx, 1);
252 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200253 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100254 /* comment */
255 move_input(xmlctx, 2);
256 sectname = "Comment";
257 endtag = "-->";
258 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200259 } else if (!strncmp(xmlctx->in->current, "[CDATA[", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100260 /* CDATA section */
261 move_input(xmlctx, 7);
262 sectname = "CData";
263 endtag = "]]>";
264 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200265 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100266 /* Document type declaration - not supported */
267 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
268 return LY_EVALID;
269 } else {
Michal Vasko63f3d842020-07-08 10:10:14 +0200270 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".",
Michal Vasko69730152020-10-09 16:30:07 +0200271 &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100272 return LY_EVALID;
273 }
Michal Vasko63f3d842020-07-08 10:10:14 +0200274 rc = ign_todelim(xmlctx->in->current, endtag, endtag_len, &newlines, &parsed);
275 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100276 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200277 ly_in_skip(xmlctx->in, parsed + endtag_len);
278 } else if (xmlctx->in->current[0] == '?') {
279 rc = ign_todelim(xmlctx->in->current, "?>", 2, &newlines, &parsed);
280 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100281 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200282 ly_in_skip(xmlctx->in, parsed + 2);
Michal Vaskob36053d2020-03-26 15:49:30 +0100283 } else {
284 /* other non-WS character */
285 break;
286 }
287 }
288
289 return LY_SUCCESS;
290}
291
Michal Vasko8cef5232020-06-15 17:59:47 +0200292/**
293 * @brief Parse QName.
294 *
295 * @param[in] xmlctx XML context to use.
296 * @param[out] prefix Parsed prefix, may be NULL.
297 * @param[out] prefix_len Length of @p prefix.
298 * @param[out] name Parsed name.
299 * @param[out] name_len Length of @p name.
300 * @return LY_ERR value.
301 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100302static LY_ERR
303lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
304{
305 const char *start, *end;
306
307 *prefix = NULL;
308 *prefix_len = 0;
309
310 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
311 if (end[0] == ':') {
312 /* we have prefixed identifier */
313 *prefix = start;
314 *prefix_len = end - start;
315
316 move_input(xmlctx, 1);
317 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
318 }
319
320 *name = start;
321 *name_len = end - start;
322 return LY_SUCCESS;
323}
324
325/**
Michal Vasko8cef5232020-06-15 17:59:47 +0200326 * @brief Parse XML text content (value).
327 *
328 * @param[in] xmlctx XML context to use.
329 * @param[in] endchar Expected character to mark value end.
330 * @param[out] value Parsed value.
331 * @param[out] length Length of @p value.
332 * @param[out] ws_only Whether the value is empty/white-spaces only.
333 * @param[out] dynamic Whether the value was dynamically allocated.
334 * @return LY_ERR value.
335 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200336static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200337lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200338{
Michal Vaskob36053d2020-03-26 15:49:30 +0100339#define BUFSIZE 24
340#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200341
Michal Vaskob36053d2020-03-26 15:49:30 +0100342 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200343 const char *in = xmlctx->in->current, *start;
Michal Vaskob36053d2020-03-26 15:49:30 +0100344 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200345 size_t offset; /* read offset in input buffer */
346 size_t len; /* length of the output string (write offset in output buffer) */
347 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200348 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200349 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100350 size_t u;
Radek Krejci857189e2020-09-01 13:26:36 +0200351 ly_bool ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200352
Michal Vaskob36053d2020-03-26 15:49:30 +0100353 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200354
Radek Krejcid70d1072018-10-09 14:20:47 +0200355 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100356 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200357 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200358
359 /* parse */
360 while (in[offset]) {
361 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100362 /* non WS */
363 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200364
Michal Vaskob36053d2020-03-26 15:49:30 +0100365 if (!buf) {
366 /* prepare output buffer */
367 buf = malloc(BUFSIZE);
368 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
369 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200370 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100371
372 /* allocate enough for the offset and next character,
373 * we will need 4 bytes at most since we support only the predefined
374 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200375 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100376 buf = ly_realloc(buf, size + BUFSIZE_STEP);
377 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
378 size += BUFSIZE_STEP;
379 }
380
381 if (offset) {
382 /* store what we have so far */
383 memcpy(&buf[len], in, offset);
384 len += offset;
385 in += offset;
386 offset = 0;
387 }
388
Radek Krejci7a7fa902018-09-25 17:08:21 +0200389 ++offset;
390 if (in[offset] != '#') {
391 /* entity reference - only predefined references are supported */
392 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100393 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200394 in += 4; /* &lt; */
395 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100396 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200397 in += 4; /* &gt; */
398 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100399 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200400 in += 5; /* &amp; */
401 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100402 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200403 in += 6; /* &apos; */
404 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100405 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200406 in += 6; /* &quot; */
407 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100408 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200409 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset - 1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200410 goto error;
411 }
412 offset = 0;
413 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100414 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200415 /* character reference */
416 ++offset;
417 if (isdigit(in[offset])) {
418 for (n = 0; isdigit(in[offset]); offset++) {
419 n = (10 * n) + (in[offset] - '0');
420 }
Michal Vasko69730152020-10-09 16:30:07 +0200421 } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200422 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
423 if (isdigit(in[offset])) {
424 u = (in[offset] - '0');
425 } else if (in[offset] > 'F') {
426 u = 10 + (in[offset] - 'a');
427 } else {
428 u = 10 + (in[offset] - 'A');
429 }
430 n = (16 * n) + u;
431 }
432 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100433 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200434 goto error;
435
436 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100437
Radek Krejci7a7fa902018-09-25 17:08:21 +0200438 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vasko69730152020-10-09 16:30:07 +0200439 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
440 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
441 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200442 ++offset;
Radek Krejci50f0c6b2020-06-18 16:31:48 +0200443 LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
Michal Vasko69730152020-10-09 16:30:07 +0200444 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
445 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
446 error);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200447 len += u;
448 in += offset;
449 offset = 0;
450 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100451 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200452 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200453 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100454 /* realloc exact size string */
455 buf = ly_realloc(buf, len + offset + 1);
456 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
457 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200458 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100459
460 /* set terminating NULL byte */
461 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200462 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200463 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100464 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200465 goto success;
466 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100467 if (!is_xmlws(in[offset])) {
468 /* non WS */
469 ws = 0;
470 }
471
Radek Krejci7a7fa902018-09-25 17:08:21 +0200472 /* log lines */
473 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100474 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200475 }
476
477 /* continue */
478 ++offset;
479 }
480 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100481
482 /* EOF reached before endchar */
483 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
484
Radek Krejci7a7fa902018-09-25 17:08:21 +0200485error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100486 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200487 return LY_EVALID;
488
489success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200490 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100491 *value = buf;
492 *dynamic = 1;
493 } else {
494 *value = (char *)start;
495 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200496 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100497 *length = len;
498 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200499
Michal Vasko63f3d842020-07-08 10:10:14 +0200500 ly_in_skip(xmlctx->in, in - xmlctx->in->current);
Michal Vaskob36053d2020-03-26 15:49:30 +0100501 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200502
503#undef BUFSIZE
504#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200505}
506
Michal Vasko8cef5232020-06-15 17:59:47 +0200507/**
508 * @brief Parse XML closing element and match it to a stored starting element.
509 *
510 * @param[in] xmlctx XML context to use.
511 * @param[in] prefix Expected closing element prefix.
512 * @param[in] prefix_len Length of @p prefix.
513 * @param[in] name Expected closing element name.
514 * @param[in] name_len Length of @p name.
515 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
516 * @return LY_ERR value.
517 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100518static LY_ERR
519lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200520 ly_bool empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200521{
Michal Vaskob36053d2020-03-26 15:49:30 +0100522 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200523
Michal Vaskob36053d2020-03-26 15:49:30 +0100524 /* match opening and closing element tags */
525 if (!xmlctx->elements.count) {
526 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Michal Vasko69730152020-10-09 16:30:07 +0200527 name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100528 return LY_EVALID;
529 }
Radek Krejcid972c252018-09-25 13:23:39 +0200530
Michal Vaskob36053d2020-03-26 15:49:30 +0100531 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
Michal Vasko69730152020-10-09 16:30:07 +0200532 if ((e->prefix_len != prefix_len) || (e->name_len != name_len) ||
533 (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100534 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Michal Vasko69730152020-10-09 16:30:07 +0200535 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
536 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
537 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100538 return LY_EVALID;
539 }
Radek Krejcid972c252018-09-25 13:23:39 +0200540
Michal Vaskob36053d2020-03-26 15:49:30 +0100541 /* opening and closing element tags matches, remove record from the opening tags list */
542 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200543
Michal Vaskob36053d2020-03-26 15:49:30 +0100544 /* remove also the namespaces connected with the element */
545 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200546
Michal Vaskob36053d2020-03-26 15:49:30 +0100547 /* skip WS */
548 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200549
Michal Vaskob36053d2020-03-26 15:49:30 +0100550 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200551 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100552 move_input(xmlctx, 1);
553 }
Michal Vasko52927e22020-03-16 17:26:14 +0100554
Michal Vaskob36053d2020-03-26 15:49:30 +0100555 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200556 if (xmlctx->in->current[0] != '>') {
557 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200558 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100559 return LY_EVALID;
560 }
Michal Vasko52927e22020-03-16 17:26:14 +0100561
Michal Vaskob36053d2020-03-26 15:49:30 +0100562 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200563 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100564
Radek Krejcid972c252018-09-25 13:23:39 +0200565 return LY_SUCCESS;
566}
567
Michal Vasko8cef5232020-06-15 17:59:47 +0200568/**
569 * @brief Store parsed opening element and parse any included namespaces.
570 *
571 * @param[in] xmlctx XML context to use.
572 * @param[in] prefix Parsed starting element prefix.
573 * @param[in] prefix_len Length of @p prefix.
574 * @param[in] name Parsed starting element name.
575 * @param[in] name_len Length of @p name.
576 * @return LY_ERR value.
577 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100578static LY_ERR
579lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200580{
Michal Vaskob36053d2020-03-26 15:49:30 +0100581 LY_ERR ret = LY_SUCCESS;
582 struct lyxml_elem *e;
583 const char *prev_input;
584 char *value;
585 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200586 ly_bool ws_only, dynamic, is_ns;
Michal Vaskob36053d2020-03-26 15:49:30 +0100587 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200588
Michal Vaskob36053d2020-03-26 15:49:30 +0100589 /* store element opening tag information */
590 e = malloc(sizeof *e);
591 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
592 e->name = name;
593 e->prefix = prefix;
594 e->name_len = name_len;
595 e->prefix_len = prefix_len;
Radek Krejci3d92e442020-10-12 12:48:13 +0200596 LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL));
Michal Vaskob36053d2020-03-26 15:49:30 +0100597
598 /* skip WS */
599 ign_xmlws(xmlctx);
600
601 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200602 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100603 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200604 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
605 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100606
607 /* parse attribute name */
608 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
609
610 /* parse the value */
611 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
612
613 /* store every namespace */
614 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
615 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
Michal Vasko69730152020-10-09 16:30:07 +0200616 dynamic ? value : strndup(value, value_len)), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100617 dynamic = 0;
618 } else {
619 /* not a namespace */
620 is_ns = 0;
621 }
622 if (dynamic) {
623 free(value);
624 }
625
626 /* skip WS */
627 ign_xmlws(xmlctx);
628
629 if (is_ns) {
630 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200631 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100632 }
Radek Krejcib1890642018-10-03 14:05:40 +0200633 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100634
635cleanup:
636 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200637 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100638 }
639 return ret;
640}
641
Michal Vasko8cef5232020-06-15 17:59:47 +0200642/**
643 * @brief Move parser to the attribute content and parse it.
644 *
645 * @param[in] xmlctx XML context to use.
646 * @param[out] value Parsed attribute value.
647 * @param[out] value_len Length of @p value.
648 * @param[out] ws_only Whether the value is empty/white-spaces only.
649 * @param[out] dynamic Whether the value was dynamically allocated.
650 * @return LY_ERR value.
651 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100652static LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +0200653lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic)
Michal Vaskob36053d2020-03-26 15:49:30 +0100654{
655 char quot;
656
657 /* skip WS */
658 ign_xmlws(xmlctx);
659
660 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200661 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100662 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
663 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200664 } else if (xmlctx->in->current[0] != '=') {
665 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200666 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100667 return LY_EVALID;
668 }
669 move_input(xmlctx, 1);
670
671 /* skip WS */
672 ign_xmlws(xmlctx);
673
674 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200675 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100676 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
677 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200678 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
679 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
Michal Vasko69730152020-10-09 16:30:07 +0200680 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100681 return LY_EVALID;
682 }
683
684 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200685 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100686 move_input(xmlctx, 1);
687
688 /* parse attribute value */
689 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
690
691 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200692 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100693
694 return LY_SUCCESS;
695}
696
Michal Vasko8cef5232020-06-15 17:59:47 +0200697/**
698 * @brief Move parser to the next attribute and parse it.
699 *
700 * @param[in] xmlctx XML context to use.
701 * @param[out] prefix Parsed attribute prefix.
702 * @param[out] prefix_len Length of @p prefix.
703 * @param[out] name Parsed attribute name.
704 * @param[out] name_len Length of @p name.
705 * @return LY_ERR value.
706 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100707static LY_ERR
708lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
709{
710 const char *in;
711 char *value;
712 uint32_t c;
713 size_t parsed, value_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200714 ly_bool ws_only, dynamic;
Michal Vaskob36053d2020-03-26 15:49:30 +0100715
716 /* skip WS */
717 ign_xmlws(xmlctx);
718
719 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200720 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
721 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100722 if (in[0] == '\0') {
723 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
724 return LY_EVALID;
725 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
726 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
Michal Vasko69730152020-10-09 16:30:07 +0200727 "element tag end ('>' or '/>') or an attribute");
Michal Vaskob36053d2020-03-26 15:49:30 +0100728 return LY_EVALID;
729 }
730
731 /* parse attribute name */
732 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
733
734 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
735 /* standard attribute */
736 break;
737 }
738
739 /* namespace, skip it */
740 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
741 if (dynamic) {
742 free(value);
743 }
744
745 /* skip WS */
746 ign_xmlws(xmlctx);
747 }
748
749 return LY_SUCCESS;
750}
751
Michal Vasko8cef5232020-06-15 17:59:47 +0200752/**
753 * @brief Move parser to the next element and parse it.
754 *
755 * @param[in] xmlctx XML context to use.
756 * @param[out] prefix Parsed element prefix.
757 * @param[out] prefix_len Length of @p prefix.
758 * @param[out] name Parse element name.
759 * @param[out] name_len Length of @p name.
Radek Krejci1deb5be2020-08-26 16:43:36 +0200760 * @param[out] closing Flag if the element is closing (includes '/').
Michal Vasko8cef5232020-06-15 17:59:47 +0200761 * @return LY_ERR value.
762 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100763static LY_ERR
764lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
Radek Krejci857189e2020-09-01 13:26:36 +0200765 ly_bool *closing)
Michal Vaskob36053d2020-03-26 15:49:30 +0100766{
767 /* skip WS until EOF or after opening tag '<' */
768 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200769 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100770 /* set return values */
771 *prefix = *name = NULL;
772 *prefix_len = *name_len = 0;
773 return LY_SUCCESS;
774 }
775
Michal Vasko63f3d842020-07-08 10:10:14 +0200776 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100777 move_input(xmlctx, 1);
778 *closing = 1;
779 } else {
780 *closing = 0;
781 }
782
783 /* skip WS */
784 ign_xmlws(xmlctx);
785
786 /* parse element name */
787 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
788
789 return LY_SUCCESS;
790}
791
792LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200793lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100794{
795 LY_ERR ret = LY_SUCCESS;
796 struct lyxml_ctx *xmlctx;
Radek Krejci857189e2020-09-01 13:26:36 +0200797 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100798
799 /* new context */
800 xmlctx = calloc(1, sizeof *xmlctx);
801 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
802 xmlctx->ctx = ctx;
803 xmlctx->line = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200804 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100805
806 /* parse next element, if any */
807 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
Michal Vasko69730152020-10-09 16:30:07 +0200808 &xmlctx->name_len, &closing), cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100809
Michal Vasko63f3d842020-07-08 10:10:14 +0200810 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100811 /* update status */
812 xmlctx->status = LYXML_END;
813 } else if (closing) {
814 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
Michal Vasko69730152020-10-09 16:30:07 +0200815 xmlctx->name_len, xmlctx->name);
Michal Vaskob36053d2020-03-26 15:49:30 +0100816 ret = LY_EVALID;
817 goto cleanup;
818 } else {
819 /* open an element, also parses all enclosed namespaces */
820 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
821
822 /* update status */
823 xmlctx->status = LYXML_ELEMENT;
824 }
825
826cleanup:
827 if (ret) {
828 lyxml_ctx_free(xmlctx);
829 } else {
830 *xmlctx_p = xmlctx;
831 }
832 return ret;
833}
834
835LY_ERR
836lyxml_ctx_next(struct lyxml_ctx *xmlctx)
837{
838 LY_ERR ret = LY_SUCCESS;
Radek Krejci857189e2020-09-01 13:26:36 +0200839 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100840 struct lyxml_elem *e;
841
842 /* if the value was not used, free it */
843 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
844 free((char *)xmlctx->value);
845 xmlctx->value = NULL;
846 xmlctx->dynamic = 0;
847 }
848
849 switch (xmlctx->status) {
850 /* content |</elem> */
851 case LYXML_ELEM_CONTENT:
852 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200853 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100854 assert(xmlctx->elements.count);
855 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
856
857 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200858 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
859 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100860
861 /* update status */
862 xmlctx->status = LYXML_ELEM_CLOSE;
863 break;
864 }
Radek Krejci0f969882020-08-21 16:56:47 +0200865 /* fallthrough */
Michal Vaskob36053d2020-03-26 15:49:30 +0100866
867 /* </elem>| <elem2>* */
868 case LYXML_ELEM_CLOSE:
869 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200870 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
871 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100872
Michal Vasko63f3d842020-07-08 10:10:14 +0200873 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100874 /* update status */
875 xmlctx->status = LYXML_END;
876 } else if (closing) {
877 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200878 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
879 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100880
881 /* update status */
882 xmlctx->status = LYXML_ELEM_CLOSE;
883 } else {
884 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200885 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
886 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100887
888 /* update status */
889 xmlctx->status = LYXML_ELEMENT;
890 }
891 break;
892
893 /* <elem| attr='val'* > content */
894 case LYXML_ELEMENT:
895
896 /* attr='val'| attr='val'* > content */
897 case LYXML_ATTR_CONTENT:
898 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200899 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
900 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100901
Michal Vasko63f3d842020-07-08 10:10:14 +0200902 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100903 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200904 ly_in_skip(xmlctx->in, 1);
905 if (!xmlctx->in->current[0]) {
Michal Vaskof55ae202020-06-30 15:49:36 +0200906 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
907 ret = LY_EVALID;
908 goto cleanup;
909 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100910
911 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200912 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
Michal Vasko69730152020-10-09 16:30:07 +0200913 &xmlctx->dynamic);
Michal Vasko63f3d842020-07-08 10:10:14 +0200914 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100915
916 if (!xmlctx->value_len) {
917 /* use empty value, easier to work with */
918 xmlctx->value = "";
919 assert(!xmlctx->dynamic);
920 }
921
922 /* update status */
923 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200924 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100925 /* no content but we still return it */
926 xmlctx->value = "";
927 xmlctx->value_len = 0;
928 xmlctx->ws_only = 1;
929 xmlctx->dynamic = 0;
930
931 /* update status */
932 xmlctx->status = LYXML_ELEM_CONTENT;
933 } else {
934 /* update status */
935 xmlctx->status = LYXML_ATTRIBUTE;
936 }
937 break;
938
939 /* attr|='val' */
940 case LYXML_ATTRIBUTE:
941 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +0200942 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
943 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100944
945 /* update status */
946 xmlctx->status = LYXML_ATTR_CONTENT;
947 break;
948
949 /* </elem> |EOF */
950 case LYXML_END:
951 /* nothing to do */
952 break;
953 }
954
955cleanup:
956 if (ret) {
957 /* invalidate context */
958 xmlctx->status = LYXML_END;
959 }
960 return ret;
961}
962
963LY_ERR
964lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
965{
966 LY_ERR ret = LY_SUCCESS;
967 const char *prefix, *name, *prev_input;
968 size_t prefix_len, name_len;
Radek Krejci857189e2020-09-01 13:26:36 +0200969 ly_bool closing;
Michal Vaskob36053d2020-03-26 15:49:30 +0100970
Michal Vasko63f3d842020-07-08 10:10:14 +0200971 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100972
973 switch (xmlctx->status) {
974 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +0200975 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100976 *next = LYXML_ELEM_CLOSE;
977 break;
978 }
Radek Krejci0f969882020-08-21 16:56:47 +0200979 /* fallthrough */
Michal Vaskob36053d2020-03-26 15:49:30 +0100980 case LYXML_ELEM_CLOSE:
981 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200982 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
983 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100984
Michal Vasko63f3d842020-07-08 10:10:14 +0200985 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100986 *next = LYXML_END;
987 } else if (closing) {
988 *next = LYXML_ELEM_CLOSE;
989 } else {
990 *next = LYXML_ELEMENT;
991 }
992 break;
993 case LYXML_ELEMENT:
994 case LYXML_ATTR_CONTENT:
995 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200996 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
997 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100998
Michal Vasko63f3d842020-07-08 10:10:14 +0200999 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001000 *next = LYXML_ELEM_CONTENT;
1001 } else {
1002 *next = LYXML_ATTRIBUTE;
1003 }
1004 break;
1005 case LYXML_ATTRIBUTE:
1006 *next = LYXML_ATTR_CONTENT;
1007 break;
1008 case LYXML_END:
1009 *next = LYXML_END;
1010 break;
1011 }
1012
1013cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001014 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001015 return ret;
1016}
1017
1018void
1019lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1020{
1021 uint32_t u;
1022
1023 if (!xmlctx) {
1024 return;
1025 }
1026
1027 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1028 free((char *)xmlctx->value);
1029 }
1030 ly_set_erase(&xmlctx->elements, free);
1031 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1032 /* remove the ns structure */
1033 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1034 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1035 free(xmlctx->ns.objs[u]);
1036 }
1037 ly_set_erase(&xmlctx->ns, NULL);
1038 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001039}
Radek Krejcie7b95092019-05-15 11:03:07 +02001040
1041LY_ERR
Radek Krejci857189e2020-09-01 13:26:36 +02001042lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001043{
Michal Vasko5233e962020-08-14 14:26:20 +02001044 LY_ERR ret;
Radek Krejcie7b95092019-05-15 11:03:07 +02001045
1046 if (!text) {
1047 return 0;
1048 }
1049
Radek Krejci1deb5be2020-08-26 16:43:36 +02001050 for (uint64_t u = 0; text[u]; u++) {
Radek Krejcie7b95092019-05-15 11:03:07 +02001051 switch (text[u]) {
1052 case '&':
Michal Vasko5233e962020-08-14 14:26:20 +02001053 ret = ly_print_(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001054 break;
1055 case '<':
Michal Vasko5233e962020-08-14 14:26:20 +02001056 ret = ly_print_(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001057 break;
1058 case '>':
1059 /* not needed, just for readability */
Michal Vasko5233e962020-08-14 14:26:20 +02001060 ret = ly_print_(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001061 break;
1062 case '"':
1063 if (attribute) {
Michal Vasko5233e962020-08-14 14:26:20 +02001064 ret = ly_print_(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001065 break;
1066 }
Radek Krejci0f969882020-08-21 16:56:47 +02001067 /* falls through */
Radek Krejcie7b95092019-05-15 11:03:07 +02001068 default:
Michal Vasko5233e962020-08-14 14:26:20 +02001069 ret = ly_write_(out, &text[u], 1);
1070 break;
Radek Krejcie7b95092019-05-15 11:03:07 +02001071 }
Michal Vasko5233e962020-08-14 14:26:20 +02001072 LY_CHECK_RET(ret);
Radek Krejcie7b95092019-05-15 11:03:07 +02001073 }
1074
Michal Vasko5233e962020-08-14 14:26:20 +02001075 return LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001076}
1077
Michal Vasko52927e22020-03-16 17:26:14 +01001078LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001079lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001080{
1081 LY_ERR ret;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001082 LY_ARRAY_COUNT_TYPE u;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001083 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001084 const struct lyxml_ns *ns;
1085 const char *start, *stop;
1086 struct ly_prefix *prefixes = NULL;
1087 size_t len;
1088
1089 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1090 size_t bytes;
1091 ly_getutf8(&stop, &c, &bytes);
1092 if (is_xmlqnamestartchar(c)) {
1093 for (ly_getutf8(&stop, &c, &bytes);
1094 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
Radek Krejci1e008d22020-08-17 11:37:37 +02001095 ly_getutf8(&stop, &c, &bytes)) {}
Michal Vasko52927e22020-03-16 17:26:14 +01001096 stop = stop - bytes;
1097 if (*stop == ':') {
1098 /* we have a possible prefix */
1099 len = stop - start;
Michal Vaskoc8a230d2020-08-14 12:17:10 +02001100 ns = lyxml_ns_get(&xmlctx->ns, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001101 if (ns) {
1102 struct ly_prefix *p = NULL;
1103
1104 /* check whether we do not already have this prefix stored */
1105 LY_ARRAY_FOR(prefixes, u) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001106 if (!ly_strncmp(prefixes[u].id, start, len)) {
Michal Vasko52927e22020-03-16 17:26:14 +01001107 p = &prefixes[u];
1108 break;
1109 }
1110 }
1111 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001112 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
Radek Krejci011e4aa2020-09-04 15:22:31 +02001113 LY_CHECK_GOTO(ret = lydict_insert(xmlctx->ctx, start, len, &p->id), error);
1114 LY_CHECK_GOTO(ret = lydict_insert(xmlctx->ctx, ns->uri, 0, &p->module_ns), error);
Michal Vasko52927e22020-03-16 17:26:14 +01001115 } /* else the prefix already present */
1116 }
1117 }
1118 stop = stop + bytes;
1119 }
1120 }
1121
1122 *val_prefs = prefixes;
1123 return LY_SUCCESS;
1124
1125error:
1126 LY_ARRAY_FOR(prefixes, u) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001127 lydict_remove(xmlctx->ctx, prefixes[u].id);
1128 lydict_remove(xmlctx->ctx, prefixes[u].module_ns);
Michal Vasko52927e22020-03-16 17:26:14 +01001129 }
1130 LY_ARRAY_FREE(prefixes);
1131 return ret;
1132}
1133
1134LY_ERR
1135lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1136{
1137 const char *ptr1, *ptr2, *ns1, *ns2;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001138 LY_ARRAY_COUNT_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001139
1140 if (!value1 && !value2) {
1141 return LY_SUCCESS;
1142 }
1143 if ((value1 && !value2) || (!value1 && value2)) {
1144 return LY_ENOT;
1145 }
1146
1147 ptr1 = value1;
1148 ptr2 = value2;
1149 while (ptr1[0] && ptr2[0]) {
1150 if (ptr1[0] != ptr2[0]) {
1151 /* it can be a start of prefix that maps to the same module */
Radek Krejci1deb5be2020-08-26 16:43:36 +02001152 size_t len;
Michal Vasko52927e22020-03-16 17:26:14 +01001153 ns1 = ns2 = NULL;
Michal Vaskoed4fcfe2020-07-08 10:38:56 +02001154 u1 = u2 = 0;
Michal Vasko52927e22020-03-16 17:26:14 +01001155 if (prefs1) {
1156 /* find module of the first prefix, if any */
1157 LY_ARRAY_FOR(prefs1, u1) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001158 len = strlen(prefs1[u1].id);
1159 if (!strncmp(ptr1, prefs1[u1].id, len) && (ptr1[len] == ':')) {
1160 ns1 = prefs1[u1].module_ns;
Michal Vasko52927e22020-03-16 17:26:14 +01001161 break;
1162 }
1163 }
1164 }
1165 if (prefs2) {
1166 /* find module of the second prefix, if any */
1167 LY_ARRAY_FOR(prefs2, u2) {
Radek Krejci1798aae2020-07-14 13:26:06 +02001168 len = strlen(prefs2[u2].id);
1169 if (!strncmp(ptr2, prefs2[u2].id, len) && (ptr2[len] == ':')) {
1170 ns2 = prefs2[u2].module_ns;
Michal Vasko52927e22020-03-16 17:26:14 +01001171 break;
1172 }
1173 }
1174 }
1175
1176 if (!ns1 || !ns2 || (ns1 != ns2)) {
1177 /* not a prefix or maps to different namespaces */
1178 break;
1179 }
1180
1181 /* skip prefixes in both values (':' is skipped as iter) */
Radek Krejci1798aae2020-07-14 13:26:06 +02001182 ptr1 += strlen(prefs1[u1].id);
1183 ptr2 += strlen(prefs2[u2].id);
Michal Vasko52927e22020-03-16 17:26:14 +01001184 }
1185
1186 ++ptr1;
1187 ++ptr2;
1188 }
1189 if (ptr1[0] || ptr2[0]) {
1190 /* not a match or simply different lengths */
1191 return LY_ENOT;
1192 }
1193
1194 return LY_SUCCESS;
1195}