blob: 67819eead8a33a433c15cea948b4d37631bbd18e [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcica376bd2020-06-11 16:04:06 +020025#include <sys/types.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejci535ea9f2020-05-29 16:01:05 +020027#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020028#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020029#include "dict.h"
Michal Vasko63f3d842020-07-08 10:10:14 +020030#include "parser_internal.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020031#include "printer.h"
32#include "tree.h"
33#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020034
Michal Vaskob36053d2020-03-26 15:49:30 +010035/* Move input p by s characters, if EOF log with lyxml_ctx c */
Michal Vasko63f3d842020-07-08 10:10:14 +020036#define move_input(c,s) ly_in_skip(c->in, s); LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020037
Radek Krejcib1890642018-10-03 14:05:40 +020038/* Ignore whitespaces in the input string p */
Michal Vasko63f3d842020-07-08 10:10:14 +020039#define ign_xmlws(c) while (is_xmlws(*(c)->in->current)) {if (*(c)->in->current == '\n') {++c->line;} ly_in_skip(c->in, 1);}
Michal Vaskob36053d2020-03-26 15:49:30 +010040
41static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only,
42 int *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020043
Radek Krejci4b74d5e2018-09-26 14:30:55 +020044/**
45 * @brief Ignore any characters until the delim of the size delim_len is read
46 *
47 * Detects number of read new lines.
Michal Vasko63f3d842020-07-08 10:10:14 +020048 * Returns 0 if delim was found, non-zero if was not.
49 */
50static int
51ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines, size_t *parsed)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020052{
53 size_t i;
54 register const char *a, *b;
55
56 (*newlines) = 0;
Michal Vasko63f3d842020-07-08 10:10:14 +020057 (*parsed) = 0;
58 for ( ; *input; ++input, ++(*parsed)) {
Radek Krejcid91dbaf2018-09-21 15:51:39 +020059 if (*input != *delim) {
60 if (*input == '\n') {
61 ++(*newlines);
62 }
63 continue;
64 }
65 a = input;
66 b = delim;
67 for (i = 0; i < delim_len; ++i) {
68 if (*a++ != *b++) {
69 break;
70 }
71 }
72 if (i == delim_len) {
Michal Vasko63f3d842020-07-08 10:10:14 +020073 /* delim found */
74 return 0;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020075 }
76 }
Michal Vasko63f3d842020-07-08 10:10:14 +020077
78 /* delim not found */
79 return -1;
Radek Krejcid91dbaf2018-09-21 15:51:39 +020080}
81
Radek Krejci4b74d5e2018-09-26 14:30:55 +020082/**
Michal Vaskob36053d2020-03-26 15:49:30 +010083 * @brief Check/Get an XML identifier from the input string.
84 *
85 * The identifier must have at least one valid character complying the name start character constraints.
86 * The identifier is terminated by the first character, which does not comply to the name character constraints.
87 *
88 * See https://www.w3.org/TR/xml-names/#NT-NCName
89 *
90 * @param[in] xmlctx XML context.
91 * @param[out] start Pointer to the start of the identifier.
92 * @param[out] end Pointer ot the end of the identifier.
93 * @return LY_ERR value.
94 */
95static LY_ERR
96lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
97{
98 const char *s, *in;
99 uint32_t c;
100 size_t parsed;
101 LY_ERR rc;
102
Michal Vasko63f3d842020-07-08 10:10:14 +0200103 in = s = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100104
105 /* check NameStartChar (minus colon) */
106 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
107 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
108 LY_EVALID);
109 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
110 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
111 "Identifier \"%s\" starts with an invalid character.", in - parsed),
112 LY_EVALID);
113
114 /* check rest of the identifier */
115 do {
116 /* move only successfully parsed bytes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200117 ly_in_skip(xmlctx->in, parsed);
Michal Vaskob36053d2020-03-26 15:49:30 +0100118
119 rc = ly_getutf8(&in, &c, &parsed);
120 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
121 } while (is_xmlqnamechar(c));
122
123 *start = s;
Michal Vasko63f3d842020-07-08 10:10:14 +0200124 *end = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100125 return LY_SUCCESS;
126}
127
128/**
129 * @brief Add namespace definition into XML context.
130 *
131 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
132 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
133 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
134 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
135 *
136 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
137 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
138 *
139 * @param[in] xmlctx XML context to work with.
140 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
141 * @param[in] prefix_len Length of the prefix.
142 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
143 * @return LY_ERR values.
144 */
145LY_ERR
146lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
147{
148 struct lyxml_ns *ns;
149
150 ns = malloc(sizeof *ns);
151 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
152
153 /* we need to connect the depth of the element where the namespace is defined with the
154 * namespace record to be able to maintain (remove) the record when the parser leaves
155 * (to its sibling or back to the parent) the element where the namespace was defined */
156 ns->depth = xmlctx->elements.count;
157
158 ns->uri = uri;
159 if (prefix) {
160 ns->prefix = strndup(prefix, prefix_len);
161 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
162 } else {
163 ns->prefix = NULL;
164 }
165
166 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
167 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
168 return LY_SUCCESS;
169}
170
171/**
172 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
173 *
174 * @param[in] xmlctx XML context to work with.
175 */
176void
177lyxml_ns_rm(struct lyxml_ctx *xmlctx)
178{
179 unsigned int u;
180
181 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
182 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
183 /* we are done, the namespaces from a single element are supposed to be together */
184 break;
185 }
186 /* remove the ns structure */
187 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
188 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
189 free(xmlctx->ns.objs[u]);
190 --xmlctx->ns.count;
191 }
192
193 if (!xmlctx->ns.count) {
194 /* cleanup the xmlctx's namespaces storage */
195 ly_set_erase(&xmlctx->ns, NULL);
196 }
197}
198
Michal Vaskob36053d2020-03-26 15:49:30 +0100199const struct lyxml_ns *
200lyxml_ns_get(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len)
201{
202 unsigned int u;
203 struct lyxml_ns *ns;
204
205 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
206 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
207 if (prefix && prefix_len) {
208 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
209 return ns;
210 }
211 } else if (!ns->prefix) {
212 /* default namespace */
213 return ns;
214 }
215 }
216
217 return NULL;
218}
219
Michal Vasko8cef5232020-06-15 17:59:47 +0200220/**
221 * @brief Skip in the input until EOF or just after the opening tag.
222 * Handles special XML constructs (comment, cdata, doctype).
223 *
224 * @param[in] xmlctx XML context to use.
225 * @return LY_ERR value.
226 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100227static LY_ERR
228lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
229{
230 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200231 const char *endtag, *sectname;
232 size_t endtag_len, newlines, parsed;
233 int rc;
Michal Vaskob36053d2020-03-26 15:49:30 +0100234
235 while (1) {
236 ign_xmlws(xmlctx);
237
Michal Vasko63f3d842020-07-08 10:10:14 +0200238 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100239 /* EOF */
240 if (xmlctx->elements.count) {
241 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
242 return LY_EVALID;
243 }
244 return LY_SUCCESS;
Michal Vasko63f3d842020-07-08 10:10:14 +0200245 } else if (xmlctx->in->current[0] != '<') {
246 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
247 xmlctx->in->current, "element tag start ('<')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100248 return LY_EVALID;
249 }
250 move_input(xmlctx, 1);
251
Michal Vasko63f3d842020-07-08 10:10:14 +0200252 if (xmlctx->in->current[0] == '!') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100253 move_input(xmlctx, 1);
254 /* sections to ignore */
Michal Vasko63f3d842020-07-08 10:10:14 +0200255 if (!strncmp(xmlctx->in->current, "--", 2)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100256 /* comment */
257 move_input(xmlctx, 2);
258 sectname = "Comment";
259 endtag = "-->";
260 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200261 } else if (!strncmp(xmlctx->in->current, "[CDATA[", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100262 /* CDATA section */
263 move_input(xmlctx, 7);
264 sectname = "CData";
265 endtag = "]]>";
266 endtag_len = 3;
Michal Vasko63f3d842020-07-08 10:10:14 +0200267 } else if (!strncmp(xmlctx->in->current, "DOCTYPE", 7)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100268 /* Document type declaration - not supported */
269 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
270 return LY_EVALID;
271 } else {
Michal Vasko63f3d842020-07-08 10:10:14 +0200272 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".",
273 &xmlctx->in->current[-2]);
Michal Vaskob36053d2020-03-26 15:49:30 +0100274 return LY_EVALID;
275 }
Michal Vasko63f3d842020-07-08 10:10:14 +0200276 rc = ign_todelim(xmlctx->in->current, endtag, endtag_len, &newlines, &parsed);
277 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100278 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200279 ly_in_skip(xmlctx->in, parsed + endtag_len);
280 } else if (xmlctx->in->current[0] == '?') {
281 rc = ign_todelim(xmlctx->in->current, "?>", 2, &newlines, &parsed);
282 LY_CHECK_ERR_RET(rc, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
Michal Vaskob36053d2020-03-26 15:49:30 +0100283 xmlctx->line += newlines;
Michal Vasko63f3d842020-07-08 10:10:14 +0200284 ly_in_skip(xmlctx->in, parsed + 2);
Michal Vaskob36053d2020-03-26 15:49:30 +0100285 } else {
286 /* other non-WS character */
287 break;
288 }
289 }
290
291 return LY_SUCCESS;
292}
293
Michal Vasko8cef5232020-06-15 17:59:47 +0200294/**
295 * @brief Parse QName.
296 *
297 * @param[in] xmlctx XML context to use.
298 * @param[out] prefix Parsed prefix, may be NULL.
299 * @param[out] prefix_len Length of @p prefix.
300 * @param[out] name Parsed name.
301 * @param[out] name_len Length of @p name.
302 * @return LY_ERR value.
303 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100304static LY_ERR
305lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
306{
307 const char *start, *end;
308
309 *prefix = NULL;
310 *prefix_len = 0;
311
312 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
313 if (end[0] == ':') {
314 /* we have prefixed identifier */
315 *prefix = start;
316 *prefix_len = end - start;
317
318 move_input(xmlctx, 1);
319 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
320 }
321
322 *name = start;
323 *name_len = end - start;
324 return LY_SUCCESS;
325}
326
327/**
Radek Krejci7a7fa902018-09-25 17:08:21 +0200328 * Store UTF-8 character specified as 4byte integer into the dst buffer.
329 * Returns number of written bytes (4 max), expects that dst has enough space.
330 *
331 * UTF-8 mapping:
332 * 00000000 -- 0000007F: 0xxxxxxx
333 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
334 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
335 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
336 *
337 * Includes checking for valid characters (following RFC 7950, sec 9.4)
338 */
339static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200340lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200341{
342 if (value < 0x80) {
343 /* one byte character */
344 if (value < 0x20 &&
345 value != 0x09 &&
346 value != 0x0a &&
347 value != 0x0d) {
348 return LY_EINVAL;
349 }
350
351 dst[0] = value;
352 (*bytes_written) = 1;
353 } else if (value < 0x800) {
354 /* two bytes character */
355 dst[0] = 0xc0 | (value >> 6);
356 dst[1] = 0x80 | (value & 0x3f);
357 (*bytes_written) = 2;
358 } else if (value < 0xfffe) {
359 /* three bytes character */
360 if (((value & 0xf800) == 0xd800) ||
361 (value >= 0xfdd0 && value <= 0xfdef)) {
362 /* exclude surrogate blocks %xD800-DFFF */
363 /* exclude noncharacters %xFDD0-FDEF */
364 return LY_EINVAL;
365 }
366
367 dst[0] = 0xe0 | (value >> 12);
368 dst[1] = 0x80 | ((value >> 6) & 0x3f);
369 dst[2] = 0x80 | (value & 0x3f);
370
371 (*bytes_written) = 3;
372 } else if (value < 0x10fffe) {
373 if ((value & 0xffe) == 0xffe) {
374 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
375 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
376 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
377 return LY_EINVAL;
378 }
379 /* four bytes character */
380 dst[0] = 0xf0 | (value >> 18);
381 dst[1] = 0x80 | ((value >> 12) & 0x3f);
382 dst[2] = 0x80 | ((value >> 6) & 0x3f);
383 dst[3] = 0x80 | (value & 0x3f);
384
385 (*bytes_written) = 4;
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200386 } else {
387 return LY_EINVAL;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200388 }
389 return LY_SUCCESS;
390}
391
Michal Vasko8cef5232020-06-15 17:59:47 +0200392/**
393 * @brief Parse XML text content (value).
394 *
395 * @param[in] xmlctx XML context to use.
396 * @param[in] endchar Expected character to mark value end.
397 * @param[out] value Parsed value.
398 * @param[out] length Length of @p value.
399 * @param[out] ws_only Whether the value is empty/white-spaces only.
400 * @param[out] dynamic Whether the value was dynamically allocated.
401 * @return LY_ERR value.
402 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200403static LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100404lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, int *ws_only, int *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200405{
Michal Vaskob36053d2020-03-26 15:49:30 +0100406#define BUFSIZE 24
407#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200408
Michal Vaskob36053d2020-03-26 15:49:30 +0100409 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
Michal Vasko63f3d842020-07-08 10:10:14 +0200410 const char *in = xmlctx->in->current, *start;
Michal Vaskob36053d2020-03-26 15:49:30 +0100411 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200412 size_t offset; /* read offset in input buffer */
413 size_t len; /* length of the output string (write offset in output buffer) */
414 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200415 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200416 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100417 size_t u;
418 int ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200419
Michal Vaskob36053d2020-03-26 15:49:30 +0100420 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200421
Radek Krejcid70d1072018-10-09 14:20:47 +0200422 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100423 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200424 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200425
426 /* parse */
427 while (in[offset]) {
428 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100429 /* non WS */
430 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200431
Michal Vaskob36053d2020-03-26 15:49:30 +0100432 if (!buf) {
433 /* prepare output buffer */
434 buf = malloc(BUFSIZE);
435 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
436 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200437 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100438
439 /* allocate enough for the offset and next character,
440 * we will need 4 bytes at most since we support only the predefined
441 * (one-char) entities and character references */
Juraj Vijtiukcb017cc2020-07-08 16:19:58 +0200442 while (len + offset + 4 >= size) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100443 buf = ly_realloc(buf, size + BUFSIZE_STEP);
444 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
445 size += BUFSIZE_STEP;
446 }
447
448 if (offset) {
449 /* store what we have so far */
450 memcpy(&buf[len], in, offset);
451 len += offset;
452 in += offset;
453 offset = 0;
454 }
455
Radek Krejci7a7fa902018-09-25 17:08:21 +0200456 ++offset;
457 if (in[offset] != '#') {
458 /* entity reference - only predefined references are supported */
459 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100460 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200461 in += 4; /* &lt; */
462 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100463 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200464 in += 4; /* &gt; */
465 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100466 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200467 in += 5; /* &amp; */
468 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100469 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200470 in += 6; /* &apos; */
471 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100472 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200473 in += 6; /* &quot; */
474 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100475 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200476 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200477 goto error;
478 }
479 offset = 0;
480 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100481 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200482 /* character reference */
483 ++offset;
484 if (isdigit(in[offset])) {
485 for (n = 0; isdigit(in[offset]); offset++) {
486 n = (10 * n) + (in[offset] - '0');
487 }
488 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
489 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
490 if (isdigit(in[offset])) {
491 u = (in[offset] - '0');
492 } else if (in[offset] > 'F') {
493 u = 10 + (in[offset] - 'a');
494 } else {
495 u = 10 + (in[offset] - 'A');
496 }
497 n = (16 * n) + u;
498 }
499 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100500 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200501 goto error;
502
503 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100504
Radek Krejci7a7fa902018-09-25 17:08:21 +0200505 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100506 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200507 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
508 error);
509 ++offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100510 LY_CHECK_ERR_GOTO(lyxml_pututf8(&buf[len], n, &u),
511 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
512 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200513 error);
514 len += u;
515 in += offset;
516 offset = 0;
517 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100518 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200519 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200520 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100521 /* realloc exact size string */
522 buf = ly_realloc(buf, len + offset + 1);
523 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
524 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200525 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100526
527 /* set terminating NULL byte */
528 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200529 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200530 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100531 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200532 goto success;
533 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100534 if (!is_xmlws(in[offset])) {
535 /* non WS */
536 ws = 0;
537 }
538
Radek Krejci7a7fa902018-09-25 17:08:21 +0200539 /* log lines */
540 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100541 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200542 }
543
544 /* continue */
545 ++offset;
546 }
547 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100548
549 /* EOF reached before endchar */
550 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
551
Radek Krejci7a7fa902018-09-25 17:08:21 +0200552error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100553 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200554 return LY_EVALID;
555
556success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200557 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100558 *value = buf;
559 *dynamic = 1;
560 } else {
561 *value = (char *)start;
562 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200563 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100564 *length = len;
565 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200566
Michal Vasko63f3d842020-07-08 10:10:14 +0200567 ly_in_skip(xmlctx->in, in - xmlctx->in->current);
Michal Vaskob36053d2020-03-26 15:49:30 +0100568 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200569
570#undef BUFSIZE
571#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200572}
573
Michal Vasko8cef5232020-06-15 17:59:47 +0200574/**
575 * @brief Parse XML closing element and match it to a stored starting element.
576 *
577 * @param[in] xmlctx XML context to use.
578 * @param[in] prefix Expected closing element prefix.
579 * @param[in] prefix_len Length of @p prefix.
580 * @param[in] name Expected closing element name.
581 * @param[in] name_len Length of @p name.
582 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
583 * @return LY_ERR value.
584 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100585static LY_ERR
586lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
587 int empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200588{
Michal Vaskob36053d2020-03-26 15:49:30 +0100589 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200590
Michal Vaskob36053d2020-03-26 15:49:30 +0100591 /* match opening and closing element tags */
592 if (!xmlctx->elements.count) {
593 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
594 name_len, name);
595 return LY_EVALID;
596 }
Radek Krejcid972c252018-09-25 13:23:39 +0200597
Michal Vaskob36053d2020-03-26 15:49:30 +0100598 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
599 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
600 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
601 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
602 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
603 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
604 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
605 return LY_EVALID;
606 }
Radek Krejcid972c252018-09-25 13:23:39 +0200607
Michal Vaskob36053d2020-03-26 15:49:30 +0100608 /* opening and closing element tags matches, remove record from the opening tags list */
609 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200610
Michal Vaskob36053d2020-03-26 15:49:30 +0100611 /* remove also the namespaces connected with the element */
612 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200613
Michal Vaskob36053d2020-03-26 15:49:30 +0100614 /* skip WS */
615 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200616
Michal Vaskob36053d2020-03-26 15:49:30 +0100617 /* special "<elem/>" element */
Michal Vasko63f3d842020-07-08 10:10:14 +0200618 if (empty && (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100619 move_input(xmlctx, 1);
620 }
Michal Vasko52927e22020-03-16 17:26:14 +0100621
Michal Vaskob36053d2020-03-26 15:49:30 +0100622 /* parse closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200623 if (xmlctx->in->current[0] != '>') {
624 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
625 xmlctx->in->current, "element tag termination ('>')");
Michal Vaskob36053d2020-03-26 15:49:30 +0100626 return LY_EVALID;
627 }
Michal Vasko52927e22020-03-16 17:26:14 +0100628
Michal Vaskob36053d2020-03-26 15:49:30 +0100629 /* move after closing tag without checking for EOF */
Michal Vasko63f3d842020-07-08 10:10:14 +0200630 ly_in_skip(xmlctx->in, 1);
Michal Vasko52927e22020-03-16 17:26:14 +0100631
Radek Krejcid972c252018-09-25 13:23:39 +0200632 return LY_SUCCESS;
633}
634
Michal Vasko8cef5232020-06-15 17:59:47 +0200635/**
636 * @brief Store parsed opening element and parse any included namespaces.
637 *
638 * @param[in] xmlctx XML context to use.
639 * @param[in] prefix Parsed starting element prefix.
640 * @param[in] prefix_len Length of @p prefix.
641 * @param[in] name Parsed starting element name.
642 * @param[in] name_len Length of @p name.
643 * @return LY_ERR value.
644 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100645static LY_ERR
646lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200647{
Michal Vaskob36053d2020-03-26 15:49:30 +0100648 LY_ERR ret = LY_SUCCESS;
649 struct lyxml_elem *e;
650 const char *prev_input;
651 char *value;
652 size_t parsed, value_len;
653 int ws_only, dynamic, is_ns;
654 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200655
Michal Vaskob36053d2020-03-26 15:49:30 +0100656 /* store element opening tag information */
657 e = malloc(sizeof *e);
658 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
659 e->name = name;
660 e->prefix = prefix;
661 e->name_len = name_len;
662 e->prefix_len = prefix_len;
663 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
664
665 /* skip WS */
666 ign_xmlws(xmlctx);
667
668 /* parse and store all namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200669 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100670 is_ns = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200671 while ((xmlctx->in->current[0] != '\0') && !ly_getutf8(&xmlctx->in->current, &c, &parsed) && is_xmlqnamestartchar(c)) {
672 xmlctx->in->current -= parsed;
Michal Vaskob36053d2020-03-26 15:49:30 +0100673
674 /* parse attribute name */
675 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
676
677 /* parse the value */
678 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
679
680 /* store every namespace */
681 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
682 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
683 dynamic ? value : strndup(value, value_len)), cleanup);
684 dynamic = 0;
685 } else {
686 /* not a namespace */
687 is_ns = 0;
688 }
689 if (dynamic) {
690 free(value);
691 }
692
693 /* skip WS */
694 ign_xmlws(xmlctx);
695
696 if (is_ns) {
697 /* we can actually skip all the namespaces as there is no reason to parse them again */
Michal Vasko63f3d842020-07-08 10:10:14 +0200698 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100699 }
Radek Krejcib1890642018-10-03 14:05:40 +0200700 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100701
702cleanup:
703 if (!ret) {
Michal Vasko63f3d842020-07-08 10:10:14 +0200704 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +0100705 }
706 return ret;
707}
708
Michal Vasko8cef5232020-06-15 17:59:47 +0200709/**
710 * @brief Move parser to the attribute content and parse it.
711 *
712 * @param[in] xmlctx XML context to use.
713 * @param[out] value Parsed attribute value.
714 * @param[out] value_len Length of @p value.
715 * @param[out] ws_only Whether the value is empty/white-spaces only.
716 * @param[out] dynamic Whether the value was dynamically allocated.
717 * @return LY_ERR value.
718 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100719static LY_ERR
720lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only, int *dynamic)
721{
722 char quot;
723
724 /* skip WS */
725 ign_xmlws(xmlctx);
726
727 /* skip '=' */
Michal Vasko63f3d842020-07-08 10:10:14 +0200728 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100729 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
730 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200731 } else if (xmlctx->in->current[0] != '=') {
732 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
733 xmlctx->in->current, "'='");
Michal Vaskob36053d2020-03-26 15:49:30 +0100734 return LY_EVALID;
735 }
736 move_input(xmlctx, 1);
737
738 /* skip WS */
739 ign_xmlws(xmlctx);
740
741 /* find quotes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200742 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100743 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
744 return LY_EVALID;
Michal Vasko63f3d842020-07-08 10:10:14 +0200745 } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) {
746 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current),
747 xmlctx->in->current, "either single or double quotation mark");
Michal Vaskob36053d2020-03-26 15:49:30 +0100748 return LY_EVALID;
749 }
750
751 /* remember quote */
Michal Vasko63f3d842020-07-08 10:10:14 +0200752 quot = xmlctx->in->current[0];
Michal Vaskob36053d2020-03-26 15:49:30 +0100753 move_input(xmlctx, 1);
754
755 /* parse attribute value */
756 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
757
758 /* move after ending quote (without checking for EOF) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200759 ly_in_skip(xmlctx->in, 1);
Michal Vaskob36053d2020-03-26 15:49:30 +0100760
761 return LY_SUCCESS;
762}
763
Michal Vasko8cef5232020-06-15 17:59:47 +0200764/**
765 * @brief Move parser to the next attribute and parse it.
766 *
767 * @param[in] xmlctx XML context to use.
768 * @param[out] prefix Parsed attribute prefix.
769 * @param[out] prefix_len Length of @p prefix.
770 * @param[out] name Parsed attribute name.
771 * @param[out] name_len Length of @p name.
772 * @return LY_ERR value.
773 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100774static LY_ERR
775lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
776{
777 const char *in;
778 char *value;
779 uint32_t c;
780 size_t parsed, value_len;
781 int ws_only, dynamic;
782
783 /* skip WS */
784 ign_xmlws(xmlctx);
785
786 /* parse only possible attributes */
Michal Vasko63f3d842020-07-08 10:10:14 +0200787 while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) {
788 in = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +0100789 if (in[0] == '\0') {
790 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
791 return LY_EVALID;
792 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
793 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
794 "element tag end ('>' or '/>') or an attribute");
795 return LY_EVALID;
796 }
797
798 /* parse attribute name */
799 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
800
801 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
802 /* standard attribute */
803 break;
804 }
805
806 /* namespace, skip it */
807 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
808 if (dynamic) {
809 free(value);
810 }
811
812 /* skip WS */
813 ign_xmlws(xmlctx);
814 }
815
816 return LY_SUCCESS;
817}
818
Michal Vasko8cef5232020-06-15 17:59:47 +0200819/**
820 * @brief Move parser to the next element and parse it.
821 *
822 * @param[in] xmlctx XML context to use.
823 * @param[out] prefix Parsed element prefix.
824 * @param[out] prefix_len Length of @p prefix.
825 * @param[out] name Parse element name.
826 * @param[out] name_len Length of @p name.
827 * @return LY_ERR value.
828 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100829static LY_ERR
830lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
831 int *closing)
832{
833 /* skip WS until EOF or after opening tag '<' */
834 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
Michal Vasko63f3d842020-07-08 10:10:14 +0200835 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100836 /* set return values */
837 *prefix = *name = NULL;
838 *prefix_len = *name_len = 0;
839 return LY_SUCCESS;
840 }
841
Michal Vasko63f3d842020-07-08 10:10:14 +0200842 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100843 move_input(xmlctx, 1);
844 *closing = 1;
845 } else {
846 *closing = 0;
847 }
848
849 /* skip WS */
850 ign_xmlws(xmlctx);
851
852 /* parse element name */
853 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
854
855 return LY_SUCCESS;
856}
857
858LY_ERR
Michal Vasko63f3d842020-07-08 10:10:14 +0200859lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p)
Michal Vaskob36053d2020-03-26 15:49:30 +0100860{
861 LY_ERR ret = LY_SUCCESS;
862 struct lyxml_ctx *xmlctx;
863 int closing;
864
865 /* new context */
866 xmlctx = calloc(1, sizeof *xmlctx);
867 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
868 xmlctx->ctx = ctx;
869 xmlctx->line = 1;
Michal Vasko63f3d842020-07-08 10:10:14 +0200870 xmlctx->in = in;
Michal Vaskob36053d2020-03-26 15:49:30 +0100871
872 /* parse next element, if any */
873 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
874 &xmlctx->name_len, &closing), cleanup);
875
Michal Vasko63f3d842020-07-08 10:10:14 +0200876 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100877 /* update status */
878 xmlctx->status = LYXML_END;
879 } else if (closing) {
880 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
881 xmlctx->name_len, xmlctx->name);
882 ret = LY_EVALID;
883 goto cleanup;
884 } else {
885 /* open an element, also parses all enclosed namespaces */
886 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
887
888 /* update status */
889 xmlctx->status = LYXML_ELEMENT;
890 }
891
892cleanup:
893 if (ret) {
894 lyxml_ctx_free(xmlctx);
895 } else {
896 *xmlctx_p = xmlctx;
897 }
898 return ret;
899}
900
901LY_ERR
902lyxml_ctx_next(struct lyxml_ctx *xmlctx)
903{
904 LY_ERR ret = LY_SUCCESS;
905 int closing;
906 struct lyxml_elem *e;
907
908 /* if the value was not used, free it */
909 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
910 free((char *)xmlctx->value);
911 xmlctx->value = NULL;
912 xmlctx->dynamic = 0;
913 }
914
915 switch (xmlctx->status) {
916 /* content |</elem> */
917 case LYXML_ELEM_CONTENT:
918 /* handle special case when empty content for "<elem/>" was returned */
Michal Vasko63f3d842020-07-08 10:10:14 +0200919 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100920 assert(xmlctx->elements.count);
921 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
922
923 /* close the element (parses closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200924 ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1);
925 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100926
927 /* update status */
928 xmlctx->status = LYXML_ELEM_CLOSE;
929 break;
930 }
931 /* fallthrough */
932
933 /* </elem>| <elem2>* */
934 case LYXML_ELEM_CLOSE:
935 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200936 ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing);
937 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100938
Michal Vasko63f3d842020-07-08 10:10:14 +0200939 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100940 /* update status */
941 xmlctx->status = LYXML_END;
942 } else if (closing) {
943 /* close an element (parses also closing tag) */
Michal Vasko63f3d842020-07-08 10:10:14 +0200944 ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0);
945 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100946
947 /* update status */
948 xmlctx->status = LYXML_ELEM_CLOSE;
949 } else {
950 /* open an element, also parses all enclosed namespaces */
Michal Vasko63f3d842020-07-08 10:10:14 +0200951 ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len);
952 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100953
954 /* update status */
955 xmlctx->status = LYXML_ELEMENT;
956 }
957 break;
958
959 /* <elem| attr='val'* > content */
960 case LYXML_ELEMENT:
961
962 /* attr='val'| attr='val'* > content */
963 case LYXML_ATTR_CONTENT:
964 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +0200965 ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len);
966 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100967
Michal Vasko63f3d842020-07-08 10:10:14 +0200968 if (xmlctx->in->current[0] == '>') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100969 /* no attributes but a closing tag */
Michal Vasko63f3d842020-07-08 10:10:14 +0200970 ly_in_skip(xmlctx->in, 1);
971 if (!xmlctx->in->current[0]) {
Michal Vaskof55ae202020-06-30 15:49:36 +0200972 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
973 ret = LY_EVALID;
974 goto cleanup;
975 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100976
977 /* parse element content */
Michal Vasko63f3d842020-07-08 10:10:14 +0200978 ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
979 &xmlctx->dynamic);
980 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +0100981
982 if (!xmlctx->value_len) {
983 /* use empty value, easier to work with */
984 xmlctx->value = "";
985 assert(!xmlctx->dynamic);
986 }
987
988 /* update status */
989 xmlctx->status = LYXML_ELEM_CONTENT;
Michal Vasko63f3d842020-07-08 10:10:14 +0200990 } else if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100991 /* no content but we still return it */
992 xmlctx->value = "";
993 xmlctx->value_len = 0;
994 xmlctx->ws_only = 1;
995 xmlctx->dynamic = 0;
996
997 /* update status */
998 xmlctx->status = LYXML_ELEM_CONTENT;
999 } else {
1000 /* update status */
1001 xmlctx->status = LYXML_ATTRIBUTE;
1002 }
1003 break;
1004
1005 /* attr|='val' */
1006 case LYXML_ATTRIBUTE:
1007 /* skip formatting and parse value */
Michal Vasko63f3d842020-07-08 10:10:14 +02001008 ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic);
1009 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001010
1011 /* update status */
1012 xmlctx->status = LYXML_ATTR_CONTENT;
1013 break;
1014
1015 /* </elem> |EOF */
1016 case LYXML_END:
1017 /* nothing to do */
1018 break;
1019 }
1020
1021cleanup:
1022 if (ret) {
1023 /* invalidate context */
1024 xmlctx->status = LYXML_END;
1025 }
1026 return ret;
1027}
1028
1029LY_ERR
1030lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
1031{
1032 LY_ERR ret = LY_SUCCESS;
1033 const char *prefix, *name, *prev_input;
1034 size_t prefix_len, name_len;
1035 int closing;
1036
Michal Vasko63f3d842020-07-08 10:10:14 +02001037 prev_input = xmlctx->in->current;
Michal Vaskob36053d2020-03-26 15:49:30 +01001038
1039 switch (xmlctx->status) {
1040 case LYXML_ELEM_CONTENT:
Michal Vasko63f3d842020-07-08 10:10:14 +02001041 if (xmlctx->in->current[0] == '/') {
Michal Vaskob36053d2020-03-26 15:49:30 +01001042 *next = LYXML_ELEM_CLOSE;
1043 break;
1044 }
1045 /* fallthrough */
1046 case LYXML_ELEM_CLOSE:
1047 /* parse next element, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001048 ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing);
1049 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001050
Michal Vasko63f3d842020-07-08 10:10:14 +02001051 if (xmlctx->in->current[0] == '\0') {
Michal Vaskob36053d2020-03-26 15:49:30 +01001052 *next = LYXML_END;
1053 } else if (closing) {
1054 *next = LYXML_ELEM_CLOSE;
1055 } else {
1056 *next = LYXML_ELEMENT;
1057 }
1058 break;
1059 case LYXML_ELEMENT:
1060 case LYXML_ATTR_CONTENT:
1061 /* parse attribute name, if any */
Michal Vasko63f3d842020-07-08 10:10:14 +02001062 ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len);
1063 LY_CHECK_GOTO(ret, cleanup);
Michal Vaskob36053d2020-03-26 15:49:30 +01001064
Michal Vasko63f3d842020-07-08 10:10:14 +02001065 if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001066 *next = LYXML_ELEM_CONTENT;
1067 } else {
1068 *next = LYXML_ATTRIBUTE;
1069 }
1070 break;
1071 case LYXML_ATTRIBUTE:
1072 *next = LYXML_ATTR_CONTENT;
1073 break;
1074 case LYXML_END:
1075 *next = LYXML_END;
1076 break;
1077 }
1078
1079cleanup:
Michal Vasko63f3d842020-07-08 10:10:14 +02001080 xmlctx->in->current = prev_input;
Michal Vaskob36053d2020-03-26 15:49:30 +01001081 return ret;
1082}
1083
1084void
1085lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1086{
1087 uint32_t u;
1088
1089 if (!xmlctx) {
1090 return;
1091 }
1092
1093 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1094 free((char *)xmlctx->value);
1095 }
1096 ly_set_erase(&xmlctx->elements, free);
1097 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1098 /* remove the ns structure */
1099 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1100 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1101 free(xmlctx->ns.objs[u]);
1102 }
1103 ly_set_erase(&xmlctx->ns, NULL);
1104 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001105}
Radek Krejcie7b95092019-05-15 11:03:07 +02001106
1107LY_ERR
Radek Krejci241f6b52020-05-21 18:13:49 +02001108lyxml_dump_text(struct ly_out *out, const char *text, int attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001109{
Radek Krejcibaeb8382020-05-27 16:44:53 +02001110 ssize_t ret = LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001111 unsigned int u;
1112
1113 if (!text) {
1114 return 0;
1115 }
1116
1117 for (u = 0; text[u]; u++) {
1118 switch (text[u]) {
1119 case '&':
Radek Krejci241f6b52020-05-21 18:13:49 +02001120 ret = ly_print(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001121 break;
1122 case '<':
Radek Krejci241f6b52020-05-21 18:13:49 +02001123 ret = ly_print(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001124 break;
1125 case '>':
1126 /* not needed, just for readability */
Radek Krejci241f6b52020-05-21 18:13:49 +02001127 ret = ly_print(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001128 break;
1129 case '"':
1130 if (attribute) {
Radek Krejci241f6b52020-05-21 18:13:49 +02001131 ret = ly_print(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001132 break;
1133 }
1134 /* falls through */
1135 default:
Radek Krejcibaeb8382020-05-27 16:44:53 +02001136 ret = ly_write(out, &text[u], 1);
Radek Krejcie7b95092019-05-15 11:03:07 +02001137 }
1138 }
1139
Radek Krejcibaeb8382020-05-27 16:44:53 +02001140 return ret < 0 ? (-1 * ret) : 0;
Radek Krejcie7b95092019-05-15 11:03:07 +02001141}
1142
Michal Vasko52927e22020-03-16 17:26:14 +01001143LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001144lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001145{
1146 LY_ERR ret;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001147 LY_ARRAY_COUNT_TYPE u;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001148 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001149 const struct lyxml_ns *ns;
1150 const char *start, *stop;
1151 struct ly_prefix *prefixes = NULL;
1152 size_t len;
1153
1154 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1155 size_t bytes;
1156 ly_getutf8(&stop, &c, &bytes);
1157 if (is_xmlqnamestartchar(c)) {
1158 for (ly_getutf8(&stop, &c, &bytes);
1159 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1160 ly_getutf8(&stop, &c, &bytes));
1161 stop = stop - bytes;
1162 if (*stop == ':') {
1163 /* we have a possible prefix */
1164 len = stop - start;
Michal Vaskob36053d2020-03-26 15:49:30 +01001165 ns = lyxml_ns_get(xmlctx, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001166 if (ns) {
1167 struct ly_prefix *p = NULL;
1168
1169 /* check whether we do not already have this prefix stored */
1170 LY_ARRAY_FOR(prefixes, u) {
1171 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1172 p = &prefixes[u];
1173 break;
1174 }
1175 }
1176 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001177 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
1178 p->pref = lydict_insert(xmlctx->ctx, start, len);
1179 p->ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001180 } /* else the prefix already present */
1181 }
1182 }
1183 stop = stop + bytes;
1184 }
1185 }
1186
1187 *val_prefs = prefixes;
1188 return LY_SUCCESS;
1189
1190error:
1191 LY_ARRAY_FOR(prefixes, u) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001192 lydict_remove(xmlctx->ctx, prefixes[u].pref);
Michal Vasko52927e22020-03-16 17:26:14 +01001193 }
1194 LY_ARRAY_FREE(prefixes);
1195 return ret;
1196}
1197
1198LY_ERR
1199lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1200{
1201 const char *ptr1, *ptr2, *ns1, *ns2;
Michal Vaskofd69e1d2020-07-03 11:57:17 +02001202 LY_ARRAY_COUNT_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001203 int len;
1204
1205 if (!value1 && !value2) {
1206 return LY_SUCCESS;
1207 }
1208 if ((value1 && !value2) || (!value1 && value2)) {
1209 return LY_ENOT;
1210 }
1211
1212 ptr1 = value1;
1213 ptr2 = value2;
1214 while (ptr1[0] && ptr2[0]) {
1215 if (ptr1[0] != ptr2[0]) {
1216 /* it can be a start of prefix that maps to the same module */
1217 ns1 = ns2 = NULL;
Michal Vaskoed4fcfe2020-07-08 10:38:56 +02001218 u1 = u2 = 0;
Michal Vasko52927e22020-03-16 17:26:14 +01001219 if (prefs1) {
1220 /* find module of the first prefix, if any */
1221 LY_ARRAY_FOR(prefs1, u1) {
1222 len = strlen(prefs1[u1].pref);
1223 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1224 ns1 = prefs1[u1].ns;
1225 break;
1226 }
1227 }
1228 }
1229 if (prefs2) {
1230 /* find module of the second prefix, if any */
1231 LY_ARRAY_FOR(prefs2, u2) {
1232 len = strlen(prefs2[u2].pref);
1233 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1234 ns2 = prefs2[u2].ns;
1235 break;
1236 }
1237 }
1238 }
1239
1240 if (!ns1 || !ns2 || (ns1 != ns2)) {
1241 /* not a prefix or maps to different namespaces */
1242 break;
1243 }
1244
1245 /* skip prefixes in both values (':' is skipped as iter) */
1246 ptr1 += strlen(prefs1[u1].pref);
1247 ptr2 += strlen(prefs2[u2].pref);
1248 }
1249
1250 ++ptr1;
1251 ++ptr2;
1252 }
1253 if (ptr1[0] || ptr2[0]) {
1254 /* not a match or simply different lengths */
1255 return LY_ENOT;
1256 }
1257
1258 return LY_SUCCESS;
1259}