blob: b4670f617cd0ca3375b79ba60ae9299ee466b22f [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcica376bd2020-06-11 16:04:06 +020025#include <sys/types.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejci535ea9f2020-05-29 16:01:05 +020027#include "common.h"
Michal Vasko5aa44c02020-06-29 11:47:02 +020028#include "compat.h"
Radek Krejci535ea9f2020-05-29 16:01:05 +020029#include "dict.h"
30#include "printer.h"
31#include "tree.h"
32#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020033
Michal Vaskob36053d2020-03-26 15:49:30 +010034/* Move input p by s characters, if EOF log with lyxml_ctx c */
35#define move_input(c,s) c->input += s; LY_CHECK_ERR_RET(!c->input[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020036
Radek Krejcib1890642018-10-03 14:05:40 +020037/* Ignore whitespaces in the input string p */
Michal Vaskob36053d2020-03-26 15:49:30 +010038#define ign_xmlws(c) while (is_xmlws(*(c)->input)) {if (*(c)->input == '\n') {++c->line;} ++c->input;}
39
40static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only,
41 int *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020042
Radek Krejci4b74d5e2018-09-26 14:30:55 +020043/**
44 * @brief Ignore any characters until the delim of the size delim_len is read
45 *
46 * Detects number of read new lines.
47 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
48 * NULL-terminated input string.
49 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020050static const char *
51ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
52{
53 size_t i;
54 register const char *a, *b;
55
56 (*newlines) = 0;
57 for ( ; *input; ++input) {
58 if (*input != *delim) {
59 if (*input == '\n') {
60 ++(*newlines);
61 }
62 continue;
63 }
64 a = input;
65 b = delim;
66 for (i = 0; i < delim_len; ++i) {
67 if (*a++ != *b++) {
68 break;
69 }
70 }
71 if (i == delim_len) {
72 return input;
73 }
74 }
75 return NULL;
76}
77
Radek Krejci4b74d5e2018-09-26 14:30:55 +020078/**
Michal Vaskob36053d2020-03-26 15:49:30 +010079 * @brief Check/Get an XML identifier from the input string.
80 *
81 * The identifier must have at least one valid character complying the name start character constraints.
82 * The identifier is terminated by the first character, which does not comply to the name character constraints.
83 *
84 * See https://www.w3.org/TR/xml-names/#NT-NCName
85 *
86 * @param[in] xmlctx XML context.
87 * @param[out] start Pointer to the start of the identifier.
88 * @param[out] end Pointer ot the end of the identifier.
89 * @return LY_ERR value.
90 */
91static LY_ERR
92lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
93{
94 const char *s, *in;
95 uint32_t c;
96 size_t parsed;
97 LY_ERR rc;
98
99 in = s = xmlctx->input;
100
101 /* check NameStartChar (minus colon) */
102 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
103 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
104 LY_EVALID);
105 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
106 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
107 "Identifier \"%s\" starts with an invalid character.", in - parsed),
108 LY_EVALID);
109
110 /* check rest of the identifier */
111 do {
112 /* move only successfully parsed bytes */
113 xmlctx->input += parsed;
114
115 rc = ly_getutf8(&in, &c, &parsed);
116 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
117 } while (is_xmlqnamechar(c));
118
119 *start = s;
120 *end = xmlctx->input;
121 return LY_SUCCESS;
122}
123
124/**
125 * @brief Add namespace definition into XML context.
126 *
127 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
128 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
129 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
130 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
131 *
132 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
133 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
134 *
135 * @param[in] xmlctx XML context to work with.
136 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
137 * @param[in] prefix_len Length of the prefix.
138 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
139 * @return LY_ERR values.
140 */
141LY_ERR
142lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
143{
144 struct lyxml_ns *ns;
145
146 ns = malloc(sizeof *ns);
147 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
148
149 /* we need to connect the depth of the element where the namespace is defined with the
150 * namespace record to be able to maintain (remove) the record when the parser leaves
151 * (to its sibling or back to the parent) the element where the namespace was defined */
152 ns->depth = xmlctx->elements.count;
153
154 ns->uri = uri;
155 if (prefix) {
156 ns->prefix = strndup(prefix, prefix_len);
157 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
158 } else {
159 ns->prefix = NULL;
160 }
161
162 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
163 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
164 return LY_SUCCESS;
165}
166
167/**
168 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
169 *
170 * @param[in] xmlctx XML context to work with.
171 */
172void
173lyxml_ns_rm(struct lyxml_ctx *xmlctx)
174{
175 unsigned int u;
176
177 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
178 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
179 /* we are done, the namespaces from a single element are supposed to be together */
180 break;
181 }
182 /* remove the ns structure */
183 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
184 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
185 free(xmlctx->ns.objs[u]);
186 --xmlctx->ns.count;
187 }
188
189 if (!xmlctx->ns.count) {
190 /* cleanup the xmlctx's namespaces storage */
191 ly_set_erase(&xmlctx->ns, NULL);
192 }
193}
194
Michal Vaskob36053d2020-03-26 15:49:30 +0100195const struct lyxml_ns *
196lyxml_ns_get(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len)
197{
198 unsigned int u;
199 struct lyxml_ns *ns;
200
201 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
202 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
203 if (prefix && prefix_len) {
204 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
205 return ns;
206 }
207 } else if (!ns->prefix) {
208 /* default namespace */
209 return ns;
210 }
211 }
212
213 return NULL;
214}
215
Michal Vasko8cef5232020-06-15 17:59:47 +0200216/**
217 * @brief Skip in the input until EOF or just after the opening tag.
218 * Handles special XML constructs (comment, cdata, doctype).
219 *
220 * @param[in] xmlctx XML context to use.
221 * @return LY_ERR value.
222 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100223static LY_ERR
224lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
225{
226 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
227 const char *in, *endtag, *sectname;
228 size_t endtag_len, newlines;
229
230 while (1) {
231 ign_xmlws(xmlctx);
232
233 if (xmlctx->input[0] == '\0') {
234 /* EOF */
235 if (xmlctx->elements.count) {
236 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
237 return LY_EVALID;
238 }
239 return LY_SUCCESS;
240 } else if (xmlctx->input[0] != '<') {
241 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
242 xmlctx->input, "element tag start ('<')");
243 return LY_EVALID;
244 }
245 move_input(xmlctx, 1);
246
247 if (xmlctx->input[0] == '!') {
248 move_input(xmlctx, 1);
249 /* sections to ignore */
250 if (!strncmp(xmlctx->input, "--", 2)) {
251 /* comment */
252 move_input(xmlctx, 2);
253 sectname = "Comment";
254 endtag = "-->";
255 endtag_len = 3;
256 } else if (!strncmp(xmlctx->input, "[CDATA[", 7)) {
257 /* CDATA section */
258 move_input(xmlctx, 7);
259 sectname = "CData";
260 endtag = "]]>";
261 endtag_len = 3;
262 } else if (!strncmp(xmlctx->input, "DOCTYPE", 7)) {
263 /* Document type declaration - not supported */
264 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
265 return LY_EVALID;
266 } else {
267 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->input[-2]);
268 return LY_EVALID;
269 }
270 in = ign_todelim(xmlctx->input, endtag, endtag_len, &newlines);
271 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
272 xmlctx->line += newlines;
273 xmlctx->input = in + endtag_len;
274 } else if (xmlctx->input[0] == '?') {
275 in = ign_todelim(xmlctx->input, "?>", 2, &newlines);
276 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
277 xmlctx->line += newlines;
278 xmlctx->input = in + 2;
279 } else {
280 /* other non-WS character */
281 break;
282 }
283 }
284
285 return LY_SUCCESS;
286}
287
Michal Vasko8cef5232020-06-15 17:59:47 +0200288/**
289 * @brief Parse QName.
290 *
291 * @param[in] xmlctx XML context to use.
292 * @param[out] prefix Parsed prefix, may be NULL.
293 * @param[out] prefix_len Length of @p prefix.
294 * @param[out] name Parsed name.
295 * @param[out] name_len Length of @p name.
296 * @return LY_ERR value.
297 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100298static LY_ERR
299lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
300{
301 const char *start, *end;
302
303 *prefix = NULL;
304 *prefix_len = 0;
305
306 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
307 if (end[0] == ':') {
308 /* we have prefixed identifier */
309 *prefix = start;
310 *prefix_len = end - start;
311
312 move_input(xmlctx, 1);
313 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
314 }
315
316 *name = start;
317 *name_len = end - start;
318 return LY_SUCCESS;
319}
320
321/**
Radek Krejci7a7fa902018-09-25 17:08:21 +0200322 * Store UTF-8 character specified as 4byte integer into the dst buffer.
323 * Returns number of written bytes (4 max), expects that dst has enough space.
324 *
325 * UTF-8 mapping:
326 * 00000000 -- 0000007F: 0xxxxxxx
327 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
328 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
329 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
330 *
331 * Includes checking for valid characters (following RFC 7950, sec 9.4)
332 */
333static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200334lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200335{
336 if (value < 0x80) {
337 /* one byte character */
338 if (value < 0x20 &&
339 value != 0x09 &&
340 value != 0x0a &&
341 value != 0x0d) {
342 return LY_EINVAL;
343 }
344
345 dst[0] = value;
346 (*bytes_written) = 1;
347 } else if (value < 0x800) {
348 /* two bytes character */
349 dst[0] = 0xc0 | (value >> 6);
350 dst[1] = 0x80 | (value & 0x3f);
351 (*bytes_written) = 2;
352 } else if (value < 0xfffe) {
353 /* three bytes character */
354 if (((value & 0xf800) == 0xd800) ||
355 (value >= 0xfdd0 && value <= 0xfdef)) {
356 /* exclude surrogate blocks %xD800-DFFF */
357 /* exclude noncharacters %xFDD0-FDEF */
358 return LY_EINVAL;
359 }
360
361 dst[0] = 0xe0 | (value >> 12);
362 dst[1] = 0x80 | ((value >> 6) & 0x3f);
363 dst[2] = 0x80 | (value & 0x3f);
364
365 (*bytes_written) = 3;
366 } else if (value < 0x10fffe) {
367 if ((value & 0xffe) == 0xffe) {
368 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
369 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
370 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
371 return LY_EINVAL;
372 }
373 /* four bytes character */
374 dst[0] = 0xf0 | (value >> 18);
375 dst[1] = 0x80 | ((value >> 12) & 0x3f);
376 dst[2] = 0x80 | ((value >> 6) & 0x3f);
377 dst[3] = 0x80 | (value & 0x3f);
378
379 (*bytes_written) = 4;
380 }
381 return LY_SUCCESS;
382}
383
Michal Vasko8cef5232020-06-15 17:59:47 +0200384/**
385 * @brief Parse XML text content (value).
386 *
387 * @param[in] xmlctx XML context to use.
388 * @param[in] endchar Expected character to mark value end.
389 * @param[out] value Parsed value.
390 * @param[out] length Length of @p value.
391 * @param[out] ws_only Whether the value is empty/white-spaces only.
392 * @param[out] dynamic Whether the value was dynamically allocated.
393 * @return LY_ERR value.
394 */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200395static LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100396lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, int *ws_only, int *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200397{
Michal Vaskob36053d2020-03-26 15:49:30 +0100398#define BUFSIZE 24
399#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200400
Michal Vaskob36053d2020-03-26 15:49:30 +0100401 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
402 const char *in = xmlctx->input, *start;
403 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200404 size_t offset; /* read offset in input buffer */
405 size_t len; /* length of the output string (write offset in output buffer) */
406 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200407 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200408 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100409 size_t u;
410 int ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200411
Michal Vaskob36053d2020-03-26 15:49:30 +0100412 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200413
Radek Krejcid70d1072018-10-09 14:20:47 +0200414 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100415 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200416 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200417
418 /* parse */
419 while (in[offset]) {
420 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100421 /* non WS */
422 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200423
Michal Vaskob36053d2020-03-26 15:49:30 +0100424 if (!buf) {
425 /* prepare output buffer */
426 buf = malloc(BUFSIZE);
427 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
428 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200429 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100430
431 /* allocate enough for the offset and next character,
432 * we will need 4 bytes at most since we support only the predefined
433 * (one-char) entities and character references */
434 if (len + offset + 4 >= size) {
435 buf = ly_realloc(buf, size + BUFSIZE_STEP);
436 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
437 size += BUFSIZE_STEP;
438 }
439
440 if (offset) {
441 /* store what we have so far */
442 memcpy(&buf[len], in, offset);
443 len += offset;
444 in += offset;
445 offset = 0;
446 }
447
Radek Krejci7a7fa902018-09-25 17:08:21 +0200448 ++offset;
449 if (in[offset] != '#') {
450 /* entity reference - only predefined references are supported */
451 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100452 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200453 in += 4; /* &lt; */
454 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100455 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200456 in += 4; /* &gt; */
457 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100458 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200459 in += 5; /* &amp; */
460 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100461 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200462 in += 6; /* &apos; */
463 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100464 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200465 in += 6; /* &quot; */
466 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100467 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200468 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200469 goto error;
470 }
471 offset = 0;
472 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100473 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200474 /* character reference */
475 ++offset;
476 if (isdigit(in[offset])) {
477 for (n = 0; isdigit(in[offset]); offset++) {
478 n = (10 * n) + (in[offset] - '0');
479 }
480 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
481 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
482 if (isdigit(in[offset])) {
483 u = (in[offset] - '0');
484 } else if (in[offset] > 'F') {
485 u = 10 + (in[offset] - 'a');
486 } else {
487 u = 10 + (in[offset] - 'A');
488 }
489 n = (16 * n) + u;
490 }
491 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100492 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200493 goto error;
494
495 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100496
Radek Krejci7a7fa902018-09-25 17:08:21 +0200497 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100498 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200499 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
500 error);
501 ++offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100502 LY_CHECK_ERR_GOTO(lyxml_pututf8(&buf[len], n, &u),
503 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
504 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200505 error);
506 len += u;
507 in += offset;
508 offset = 0;
509 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100510 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200511 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200512 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100513 /* realloc exact size string */
514 buf = ly_realloc(buf, len + offset + 1);
515 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
516 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200517 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100518
519 /* set terminating NULL byte */
520 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200521 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200522 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100523 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200524 goto success;
525 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100526 if (!is_xmlws(in[offset])) {
527 /* non WS */
528 ws = 0;
529 }
530
Radek Krejci7a7fa902018-09-25 17:08:21 +0200531 /* log lines */
532 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100533 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200534 }
535
536 /* continue */
537 ++offset;
538 }
539 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100540
541 /* EOF reached before endchar */
542 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
543
Radek Krejci7a7fa902018-09-25 17:08:21 +0200544error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100545 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200546 return LY_EVALID;
547
548success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200549 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100550 *value = buf;
551 *dynamic = 1;
552 } else {
553 *value = (char *)start;
554 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200555 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100556 *length = len;
557 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200558
Michal Vaskob36053d2020-03-26 15:49:30 +0100559 xmlctx->input = in;
560 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200561
562#undef BUFSIZE
563#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200564}
565
Michal Vasko8cef5232020-06-15 17:59:47 +0200566/**
567 * @brief Parse XML closing element and match it to a stored starting element.
568 *
569 * @param[in] xmlctx XML context to use.
570 * @param[in] prefix Expected closing element prefix.
571 * @param[in] prefix_len Length of @p prefix.
572 * @param[in] name Expected closing element name.
573 * @param[in] name_len Length of @p name.
574 * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value.
575 * @return LY_ERR value.
576 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100577static LY_ERR
578lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
579 int empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200580{
Michal Vaskob36053d2020-03-26 15:49:30 +0100581 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200582
Michal Vaskob36053d2020-03-26 15:49:30 +0100583 /* match opening and closing element tags */
584 if (!xmlctx->elements.count) {
585 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
586 name_len, name);
587 return LY_EVALID;
588 }
Radek Krejcid972c252018-09-25 13:23:39 +0200589
Michal Vaskob36053d2020-03-26 15:49:30 +0100590 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
591 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
592 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
593 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
594 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
595 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
596 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
597 return LY_EVALID;
598 }
Radek Krejcid972c252018-09-25 13:23:39 +0200599
Michal Vaskob36053d2020-03-26 15:49:30 +0100600 /* opening and closing element tags matches, remove record from the opening tags list */
601 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200602
Michal Vaskob36053d2020-03-26 15:49:30 +0100603 /* remove also the namespaces connected with the element */
604 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200605
Michal Vaskob36053d2020-03-26 15:49:30 +0100606 /* skip WS */
607 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200608
Michal Vaskob36053d2020-03-26 15:49:30 +0100609 /* special "<elem/>" element */
610 if (empty && (xmlctx->input[0] == '/')) {
611 move_input(xmlctx, 1);
612 }
Michal Vasko52927e22020-03-16 17:26:14 +0100613
Michal Vaskob36053d2020-03-26 15:49:30 +0100614 /* parse closing tag */
615 if (xmlctx->input[0] != '>') {
616 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
617 xmlctx->input, "element tag termination ('>')");
618 return LY_EVALID;
619 }
Michal Vasko52927e22020-03-16 17:26:14 +0100620
Michal Vaskob36053d2020-03-26 15:49:30 +0100621 /* move after closing tag without checking for EOF */
622 ++xmlctx->input;
Michal Vasko52927e22020-03-16 17:26:14 +0100623
Radek Krejcid972c252018-09-25 13:23:39 +0200624 return LY_SUCCESS;
625}
626
Michal Vasko8cef5232020-06-15 17:59:47 +0200627/**
628 * @brief Store parsed opening element and parse any included namespaces.
629 *
630 * @param[in] xmlctx XML context to use.
631 * @param[in] prefix Parsed starting element prefix.
632 * @param[in] prefix_len Length of @p prefix.
633 * @param[in] name Parsed starting element name.
634 * @param[in] name_len Length of @p name.
635 * @return LY_ERR value.
636 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100637static LY_ERR
638lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200639{
Michal Vaskob36053d2020-03-26 15:49:30 +0100640 LY_ERR ret = LY_SUCCESS;
641 struct lyxml_elem *e;
642 const char *prev_input;
643 char *value;
644 size_t parsed, value_len;
645 int ws_only, dynamic, is_ns;
646 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200647
Michal Vaskob36053d2020-03-26 15:49:30 +0100648 /* store element opening tag information */
649 e = malloc(sizeof *e);
650 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
651 e->name = name;
652 e->prefix = prefix;
653 e->name_len = name_len;
654 e->prefix_len = prefix_len;
655 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
656
657 /* skip WS */
658 ign_xmlws(xmlctx);
659
660 /* parse and store all namespaces */
661 prev_input = xmlctx->input;
662 is_ns = 1;
663 while ((xmlctx->input[0] != '\0') && !ly_getutf8(&xmlctx->input, &c, &parsed) && is_xmlqnamestartchar(c)) {
664 xmlctx->input -= parsed;
665
666 /* parse attribute name */
667 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
668
669 /* parse the value */
670 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
671
672 /* store every namespace */
673 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
674 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
675 dynamic ? value : strndup(value, value_len)), cleanup);
676 dynamic = 0;
677 } else {
678 /* not a namespace */
679 is_ns = 0;
680 }
681 if (dynamic) {
682 free(value);
683 }
684
685 /* skip WS */
686 ign_xmlws(xmlctx);
687
688 if (is_ns) {
689 /* we can actually skip all the namespaces as there is no reason to parse them again */
690 prev_input = xmlctx->input;
691 }
Radek Krejcib1890642018-10-03 14:05:40 +0200692 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100693
694cleanup:
695 if (!ret) {
696 xmlctx->input = prev_input;
697 }
698 return ret;
699}
700
Michal Vasko8cef5232020-06-15 17:59:47 +0200701/**
702 * @brief Move parser to the attribute content and parse it.
703 *
704 * @param[in] xmlctx XML context to use.
705 * @param[out] value Parsed attribute value.
706 * @param[out] value_len Length of @p value.
707 * @param[out] ws_only Whether the value is empty/white-spaces only.
708 * @param[out] dynamic Whether the value was dynamically allocated.
709 * @return LY_ERR value.
710 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100711static LY_ERR
712lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only, int *dynamic)
713{
714 char quot;
715
716 /* skip WS */
717 ign_xmlws(xmlctx);
718
719 /* skip '=' */
720 if (xmlctx->input[0] == '\0') {
721 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
722 return LY_EVALID;
723 } else if (xmlctx->input[0] != '=') {
724 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
725 xmlctx->input, "'='");
726 return LY_EVALID;
727 }
728 move_input(xmlctx, 1);
729
730 /* skip WS */
731 ign_xmlws(xmlctx);
732
733 /* find quotes */
734 if (xmlctx->input[0] == '\0') {
735 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
736 return LY_EVALID;
737 } else if ((xmlctx->input[0] != '\'') && (xmlctx->input[0] != '\"')) {
738 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
739 xmlctx->input, "either single or double quotation mark");
740 return LY_EVALID;
741 }
742
743 /* remember quote */
744 quot = xmlctx->input[0];
745 move_input(xmlctx, 1);
746
747 /* parse attribute value */
748 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
749
750 /* move after ending quote (without checking for EOF) */
751 ++xmlctx->input;
752
753 return LY_SUCCESS;
754}
755
Michal Vasko8cef5232020-06-15 17:59:47 +0200756/**
757 * @brief Move parser to the next attribute and parse it.
758 *
759 * @param[in] xmlctx XML context to use.
760 * @param[out] prefix Parsed attribute prefix.
761 * @param[out] prefix_len Length of @p prefix.
762 * @param[out] name Parsed attribute name.
763 * @param[out] name_len Length of @p name.
764 * @return LY_ERR value.
765 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100766static LY_ERR
767lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
768{
769 const char *in;
770 char *value;
771 uint32_t c;
772 size_t parsed, value_len;
773 int ws_only, dynamic;
774
775 /* skip WS */
776 ign_xmlws(xmlctx);
777
778 /* parse only possible attributes */
779 while ((xmlctx->input[0] != '>') && (xmlctx->input[0] != '/')) {
780 in = xmlctx->input;
781 if (in[0] == '\0') {
782 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
783 return LY_EVALID;
784 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
785 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
786 "element tag end ('>' or '/>') or an attribute");
787 return LY_EVALID;
788 }
789
790 /* parse attribute name */
791 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
792
793 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
794 /* standard attribute */
795 break;
796 }
797
798 /* namespace, skip it */
799 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
800 if (dynamic) {
801 free(value);
802 }
803
804 /* skip WS */
805 ign_xmlws(xmlctx);
806 }
807
808 return LY_SUCCESS;
809}
810
Michal Vasko8cef5232020-06-15 17:59:47 +0200811/**
812 * @brief Move parser to the next element and parse it.
813 *
814 * @param[in] xmlctx XML context to use.
815 * @param[out] prefix Parsed element prefix.
816 * @param[out] prefix_len Length of @p prefix.
817 * @param[out] name Parse element name.
818 * @param[out] name_len Length of @p name.
819 * @return LY_ERR value.
820 */
Michal Vaskob36053d2020-03-26 15:49:30 +0100821static LY_ERR
822lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
823 int *closing)
824{
825 /* skip WS until EOF or after opening tag '<' */
826 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
827 if (xmlctx->input[0] == '\0') {
828 /* set return values */
829 *prefix = *name = NULL;
830 *prefix_len = *name_len = 0;
831 return LY_SUCCESS;
832 }
833
834 if (xmlctx->input[0] == '/') {
835 move_input(xmlctx, 1);
836 *closing = 1;
837 } else {
838 *closing = 0;
839 }
840
841 /* skip WS */
842 ign_xmlws(xmlctx);
843
844 /* parse element name */
845 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
846
847 return LY_SUCCESS;
848}
849
850LY_ERR
851lyxml_ctx_new(const struct ly_ctx *ctx, const char *input, struct lyxml_ctx **xmlctx_p)
852{
853 LY_ERR ret = LY_SUCCESS;
854 struct lyxml_ctx *xmlctx;
855 int closing;
856
857 /* new context */
858 xmlctx = calloc(1, sizeof *xmlctx);
859 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
860 xmlctx->ctx = ctx;
861 xmlctx->line = 1;
862 xmlctx->input = input;
863
864 /* parse next element, if any */
865 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
866 &xmlctx->name_len, &closing), cleanup);
867
868 if (xmlctx->input[0] == '\0') {
869 /* update status */
870 xmlctx->status = LYXML_END;
871 } else if (closing) {
872 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
873 xmlctx->name_len, xmlctx->name);
874 ret = LY_EVALID;
875 goto cleanup;
876 } else {
877 /* open an element, also parses all enclosed namespaces */
878 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
879
880 /* update status */
881 xmlctx->status = LYXML_ELEMENT;
882 }
883
884cleanup:
885 if (ret) {
886 lyxml_ctx_free(xmlctx);
887 } else {
888 *xmlctx_p = xmlctx;
889 }
890 return ret;
891}
892
893LY_ERR
894lyxml_ctx_next(struct lyxml_ctx *xmlctx)
895{
896 LY_ERR ret = LY_SUCCESS;
897 int closing;
898 struct lyxml_elem *e;
899
900 /* if the value was not used, free it */
901 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
902 free((char *)xmlctx->value);
903 xmlctx->value = NULL;
904 xmlctx->dynamic = 0;
905 }
906
907 switch (xmlctx->status) {
908 /* content |</elem> */
909 case LYXML_ELEM_CONTENT:
910 /* handle special case when empty content for "<elem/>" was returned */
911 if (xmlctx->input[0] == '/') {
912 assert(xmlctx->elements.count);
913 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
914
915 /* close the element (parses closing tag) */
916 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1), cleanup);
917
918 /* update status */
919 xmlctx->status = LYXML_ELEM_CLOSE;
920 break;
921 }
922 /* fallthrough */
923
924 /* </elem>| <elem2>* */
925 case LYXML_ELEM_CLOSE:
926 /* parse next element, if any */
927 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
928 &xmlctx->name_len, &closing), cleanup);
929
930 if (xmlctx->input[0] == '\0') {
931 /* update status */
932 xmlctx->status = LYXML_END;
933 } else if (closing) {
934 /* close an element (parses also closing tag) */
935 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0), cleanup);
936
937 /* update status */
938 xmlctx->status = LYXML_ELEM_CLOSE;
939 } else {
940 /* open an element, also parses all enclosed namespaces */
941 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
942
943 /* update status */
944 xmlctx->status = LYXML_ELEMENT;
945 }
946 break;
947
948 /* <elem| attr='val'* > content */
949 case LYXML_ELEMENT:
950
951 /* attr='val'| attr='val'* > content */
952 case LYXML_ATTR_CONTENT:
953 /* parse attribute name, if any */
954 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len), cleanup);
955
956 if (xmlctx->input[0] == '>') {
957 /* no attributes but a closing tag */
Michal Vaskof55ae202020-06-30 15:49:36 +0200958 ++xmlctx->input;
959 if (!xmlctx->input[0]) {
960 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
961 ret = LY_EVALID;
962 goto cleanup;
963 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100964
965 /* parse element content */
966 LY_CHECK_GOTO(ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
967 &xmlctx->dynamic), cleanup);
968
969 if (!xmlctx->value_len) {
970 /* use empty value, easier to work with */
971 xmlctx->value = "";
972 assert(!xmlctx->dynamic);
973 }
974
975 /* update status */
976 xmlctx->status = LYXML_ELEM_CONTENT;
977 } else if (xmlctx->input[0] == '/') {
978 /* no content but we still return it */
979 xmlctx->value = "";
980 xmlctx->value_len = 0;
981 xmlctx->ws_only = 1;
982 xmlctx->dynamic = 0;
983
984 /* update status */
985 xmlctx->status = LYXML_ELEM_CONTENT;
986 } else {
987 /* update status */
988 xmlctx->status = LYXML_ATTRIBUTE;
989 }
990 break;
991
992 /* attr|='val' */
993 case LYXML_ATTRIBUTE:
994 /* skip formatting and parse value */
995 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
996 &xmlctx->dynamic), cleanup);
997
998 /* update status */
999 xmlctx->status = LYXML_ATTR_CONTENT;
1000 break;
1001
1002 /* </elem> |EOF */
1003 case LYXML_END:
1004 /* nothing to do */
1005 break;
1006 }
1007
1008cleanup:
1009 if (ret) {
1010 /* invalidate context */
1011 xmlctx->status = LYXML_END;
1012 }
1013 return ret;
1014}
1015
1016LY_ERR
1017lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
1018{
1019 LY_ERR ret = LY_SUCCESS;
1020 const char *prefix, *name, *prev_input;
1021 size_t prefix_len, name_len;
1022 int closing;
1023
1024 prev_input = xmlctx->input;
1025
1026 switch (xmlctx->status) {
1027 case LYXML_ELEM_CONTENT:
1028 if (xmlctx->input[0] == '/') {
1029 *next = LYXML_ELEM_CLOSE;
1030 break;
1031 }
1032 /* fallthrough */
1033 case LYXML_ELEM_CLOSE:
1034 /* parse next element, if any */
1035 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing), cleanup);
1036
1037 if (xmlctx->input[0] == '\0') {
1038 *next = LYXML_END;
1039 } else if (closing) {
1040 *next = LYXML_ELEM_CLOSE;
1041 } else {
1042 *next = LYXML_ELEMENT;
1043 }
1044 break;
1045 case LYXML_ELEMENT:
1046 case LYXML_ATTR_CONTENT:
1047 /* parse attribute name, if any */
1048 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
1049
1050 if ((xmlctx->input[0] == '>') || (xmlctx->input[0] == '/')) {
1051 *next = LYXML_ELEM_CONTENT;
1052 } else {
1053 *next = LYXML_ATTRIBUTE;
1054 }
1055 break;
1056 case LYXML_ATTRIBUTE:
1057 *next = LYXML_ATTR_CONTENT;
1058 break;
1059 case LYXML_END:
1060 *next = LYXML_END;
1061 break;
1062 }
1063
1064cleanup:
1065 xmlctx->input = prev_input;
1066 return ret;
1067}
1068
1069void
1070lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1071{
1072 uint32_t u;
1073
1074 if (!xmlctx) {
1075 return;
1076 }
1077
1078 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1079 free((char *)xmlctx->value);
1080 }
1081 ly_set_erase(&xmlctx->elements, free);
1082 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1083 /* remove the ns structure */
1084 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1085 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1086 free(xmlctx->ns.objs[u]);
1087 }
1088 ly_set_erase(&xmlctx->ns, NULL);
1089 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001090}
Radek Krejcie7b95092019-05-15 11:03:07 +02001091
1092LY_ERR
Radek Krejci241f6b52020-05-21 18:13:49 +02001093lyxml_dump_text(struct ly_out *out, const char *text, int attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001094{
Radek Krejcibaeb8382020-05-27 16:44:53 +02001095 ssize_t ret = LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001096 unsigned int u;
1097
1098 if (!text) {
1099 return 0;
1100 }
1101
1102 for (u = 0; text[u]; u++) {
1103 switch (text[u]) {
1104 case '&':
Radek Krejci241f6b52020-05-21 18:13:49 +02001105 ret = ly_print(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001106 break;
1107 case '<':
Radek Krejci241f6b52020-05-21 18:13:49 +02001108 ret = ly_print(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001109 break;
1110 case '>':
1111 /* not needed, just for readability */
Radek Krejci241f6b52020-05-21 18:13:49 +02001112 ret = ly_print(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001113 break;
1114 case '"':
1115 if (attribute) {
Radek Krejci241f6b52020-05-21 18:13:49 +02001116 ret = ly_print(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001117 break;
1118 }
1119 /* falls through */
1120 default:
Radek Krejcibaeb8382020-05-27 16:44:53 +02001121 ret = ly_write(out, &text[u], 1);
Radek Krejcie7b95092019-05-15 11:03:07 +02001122 }
1123 }
1124
Radek Krejcibaeb8382020-05-27 16:44:53 +02001125 return ret < 0 ? (-1 * ret) : 0;
Radek Krejcie7b95092019-05-15 11:03:07 +02001126}
1127
Michal Vasko52927e22020-03-16 17:26:14 +01001128LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001129lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001130{
1131 LY_ERR ret;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001132 LY_ARRAY_SIZE_TYPE u;
1133 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001134 const struct lyxml_ns *ns;
1135 const char *start, *stop;
1136 struct ly_prefix *prefixes = NULL;
1137 size_t len;
1138
1139 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1140 size_t bytes;
1141 ly_getutf8(&stop, &c, &bytes);
1142 if (is_xmlqnamestartchar(c)) {
1143 for (ly_getutf8(&stop, &c, &bytes);
1144 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1145 ly_getutf8(&stop, &c, &bytes));
1146 stop = stop - bytes;
1147 if (*stop == ':') {
1148 /* we have a possible prefix */
1149 len = stop - start;
Michal Vaskob36053d2020-03-26 15:49:30 +01001150 ns = lyxml_ns_get(xmlctx, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001151 if (ns) {
1152 struct ly_prefix *p = NULL;
1153
1154 /* check whether we do not already have this prefix stored */
1155 LY_ARRAY_FOR(prefixes, u) {
1156 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1157 p = &prefixes[u];
1158 break;
1159 }
1160 }
1161 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001162 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
1163 p->pref = lydict_insert(xmlctx->ctx, start, len);
1164 p->ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001165 } /* else the prefix already present */
1166 }
1167 }
1168 stop = stop + bytes;
1169 }
1170 }
1171
1172 *val_prefs = prefixes;
1173 return LY_SUCCESS;
1174
1175error:
1176 LY_ARRAY_FOR(prefixes, u) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001177 lydict_remove(xmlctx->ctx, prefixes[u].pref);
Michal Vasko52927e22020-03-16 17:26:14 +01001178 }
1179 LY_ARRAY_FREE(prefixes);
1180 return ret;
1181}
1182
1183LY_ERR
1184lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1185{
1186 const char *ptr1, *ptr2, *ns1, *ns2;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001187 LY_ARRAY_SIZE_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001188 int len;
1189
1190 if (!value1 && !value2) {
1191 return LY_SUCCESS;
1192 }
1193 if ((value1 && !value2) || (!value1 && value2)) {
1194 return LY_ENOT;
1195 }
1196
1197 ptr1 = value1;
1198 ptr2 = value2;
1199 while (ptr1[0] && ptr2[0]) {
1200 if (ptr1[0] != ptr2[0]) {
1201 /* it can be a start of prefix that maps to the same module */
1202 ns1 = ns2 = NULL;
1203 if (prefs1) {
1204 /* find module of the first prefix, if any */
1205 LY_ARRAY_FOR(prefs1, u1) {
1206 len = strlen(prefs1[u1].pref);
1207 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1208 ns1 = prefs1[u1].ns;
1209 break;
1210 }
1211 }
1212 }
1213 if (prefs2) {
1214 /* find module of the second prefix, if any */
1215 LY_ARRAY_FOR(prefs2, u2) {
1216 len = strlen(prefs2[u2].pref);
1217 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1218 ns2 = prefs2[u2].ns;
1219 break;
1220 }
1221 }
1222 }
1223
1224 if (!ns1 || !ns2 || (ns1 != ns2)) {
1225 /* not a prefix or maps to different namespaces */
1226 break;
1227 }
1228
1229 /* skip prefixes in both values (':' is skipped as iter) */
1230 ptr1 += strlen(prefs1[u1].pref);
1231 ptr2 += strlen(prefs2[u2].pref);
1232 }
1233
1234 ++ptr1;
1235 ++ptr2;
1236 }
1237 if (ptr1[0] || ptr2[0]) {
1238 /* not a match or simply different lengths */
1239 return LY_ENOT;
1240 }
1241
1242 return LY_SUCCESS;
1243}