blob: ce296c40cff16d3adcabc660f304125494a5adbc [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcica376bd2020-06-11 16:04:06 +020025#include <sys/types.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejci535ea9f2020-05-29 16:01:05 +020027#include "common.h"
28#include "dict.h"
29#include "printer.h"
30#include "tree.h"
31#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020032
Michal Vaskob36053d2020-03-26 15:49:30 +010033/* Move input p by s characters, if EOF log with lyxml_ctx c */
34#define move_input(c,s) c->input += s; LY_CHECK_ERR_RET(!c->input[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020035
Radek Krejcib1890642018-10-03 14:05:40 +020036/* Ignore whitespaces in the input string p */
Michal Vaskob36053d2020-03-26 15:49:30 +010037#define ign_xmlws(c) while (is_xmlws(*(c)->input)) {if (*(c)->input == '\n') {++c->line;} ++c->input;}
38
39static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only,
40 int *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020041
Radek Krejci4b74d5e2018-09-26 14:30:55 +020042/**
43 * @brief Ignore any characters until the delim of the size delim_len is read
44 *
45 * Detects number of read new lines.
46 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
47 * NULL-terminated input string.
48 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020049static const char *
50ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
51{
52 size_t i;
53 register const char *a, *b;
54
55 (*newlines) = 0;
56 for ( ; *input; ++input) {
57 if (*input != *delim) {
58 if (*input == '\n') {
59 ++(*newlines);
60 }
61 continue;
62 }
63 a = input;
64 b = delim;
65 for (i = 0; i < delim_len; ++i) {
66 if (*a++ != *b++) {
67 break;
68 }
69 }
70 if (i == delim_len) {
71 return input;
72 }
73 }
74 return NULL;
75}
76
Radek Krejci4b74d5e2018-09-26 14:30:55 +020077/**
Michal Vaskob36053d2020-03-26 15:49:30 +010078 * @brief Check/Get an XML identifier from the input string.
79 *
80 * The identifier must have at least one valid character complying the name start character constraints.
81 * The identifier is terminated by the first character, which does not comply to the name character constraints.
82 *
83 * See https://www.w3.org/TR/xml-names/#NT-NCName
84 *
85 * @param[in] xmlctx XML context.
86 * @param[out] start Pointer to the start of the identifier.
87 * @param[out] end Pointer ot the end of the identifier.
88 * @return LY_ERR value.
89 */
90static LY_ERR
91lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
92{
93 const char *s, *in;
94 uint32_t c;
95 size_t parsed;
96 LY_ERR rc;
97
98 in = s = xmlctx->input;
99
100 /* check NameStartChar (minus colon) */
101 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
102 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
103 LY_EVALID);
104 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
105 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
106 "Identifier \"%s\" starts with an invalid character.", in - parsed),
107 LY_EVALID);
108
109 /* check rest of the identifier */
110 do {
111 /* move only successfully parsed bytes */
112 xmlctx->input += parsed;
113
114 rc = ly_getutf8(&in, &c, &parsed);
115 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
116 } while (is_xmlqnamechar(c));
117
118 *start = s;
119 *end = xmlctx->input;
120 return LY_SUCCESS;
121}
122
123/**
124 * @brief Add namespace definition into XML context.
125 *
126 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
127 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
128 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
129 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
130 *
131 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
132 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
133 *
134 * @param[in] xmlctx XML context to work with.
135 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
136 * @param[in] prefix_len Length of the prefix.
137 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
138 * @return LY_ERR values.
139 */
140LY_ERR
141lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
142{
143 struct lyxml_ns *ns;
144
145 ns = malloc(sizeof *ns);
146 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
147
148 /* we need to connect the depth of the element where the namespace is defined with the
149 * namespace record to be able to maintain (remove) the record when the parser leaves
150 * (to its sibling or back to the parent) the element where the namespace was defined */
151 ns->depth = xmlctx->elements.count;
152
153 ns->uri = uri;
154 if (prefix) {
155 ns->prefix = strndup(prefix, prefix_len);
156 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
157 } else {
158 ns->prefix = NULL;
159 }
160
161 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
162 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
163 return LY_SUCCESS;
164}
165
166/**
167 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
168 *
169 * @param[in] xmlctx XML context to work with.
170 */
171void
172lyxml_ns_rm(struct lyxml_ctx *xmlctx)
173{
174 unsigned int u;
175
176 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
177 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
178 /* we are done, the namespaces from a single element are supposed to be together */
179 break;
180 }
181 /* remove the ns structure */
182 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
183 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
184 free(xmlctx->ns.objs[u]);
185 --xmlctx->ns.count;
186 }
187
188 if (!xmlctx->ns.count) {
189 /* cleanup the xmlctx's namespaces storage */
190 ly_set_erase(&xmlctx->ns, NULL);
191 }
192}
193
194void *
195lyxml_elem_dup(void *item)
196{
197 struct lyxml_elem *dup;
198
199 dup = malloc(sizeof *dup);
200 memcpy(dup, item, sizeof *dup);
201
202 return dup;
203}
204
205void *
206lyxml_ns_dup(void *item)
207{
208 struct lyxml_ns *dup, *orig;
209
210 orig = (struct lyxml_ns *)item;
211 dup = malloc(sizeof *dup);
212 dup->prefix = orig->prefix ? strdup(orig->prefix) : NULL;
213 dup->uri = strdup(orig->uri);
214 dup->depth = orig->depth;
215
216 return dup;
217}
218
219const struct lyxml_ns *
220lyxml_ns_get(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len)
221{
222 unsigned int u;
223 struct lyxml_ns *ns;
224
225 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
226 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
227 if (prefix && prefix_len) {
228 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
229 return ns;
230 }
231 } else if (!ns->prefix) {
232 /* default namespace */
233 return ns;
234 }
235 }
236
237 return NULL;
238}
239
240static LY_ERR
241lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
242{
243 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
244 const char *in, *endtag, *sectname;
245 size_t endtag_len, newlines;
246
247 while (1) {
248 ign_xmlws(xmlctx);
249
250 if (xmlctx->input[0] == '\0') {
251 /* EOF */
252 if (xmlctx->elements.count) {
253 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
254 return LY_EVALID;
255 }
256 return LY_SUCCESS;
257 } else if (xmlctx->input[0] != '<') {
258 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
259 xmlctx->input, "element tag start ('<')");
260 return LY_EVALID;
261 }
262 move_input(xmlctx, 1);
263
264 if (xmlctx->input[0] == '!') {
265 move_input(xmlctx, 1);
266 /* sections to ignore */
267 if (!strncmp(xmlctx->input, "--", 2)) {
268 /* comment */
269 move_input(xmlctx, 2);
270 sectname = "Comment";
271 endtag = "-->";
272 endtag_len = 3;
273 } else if (!strncmp(xmlctx->input, "[CDATA[", 7)) {
274 /* CDATA section */
275 move_input(xmlctx, 7);
276 sectname = "CData";
277 endtag = "]]>";
278 endtag_len = 3;
279 } else if (!strncmp(xmlctx->input, "DOCTYPE", 7)) {
280 /* Document type declaration - not supported */
281 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
282 return LY_EVALID;
283 } else {
284 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->input[-2]);
285 return LY_EVALID;
286 }
287 in = ign_todelim(xmlctx->input, endtag, endtag_len, &newlines);
288 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
289 xmlctx->line += newlines;
290 xmlctx->input = in + endtag_len;
291 } else if (xmlctx->input[0] == '?') {
292 in = ign_todelim(xmlctx->input, "?>", 2, &newlines);
293 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
294 xmlctx->line += newlines;
295 xmlctx->input = in + 2;
296 } else {
297 /* other non-WS character */
298 break;
299 }
300 }
301
302 return LY_SUCCESS;
303}
304
305static LY_ERR
306lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
307{
308 const char *start, *end;
309
310 *prefix = NULL;
311 *prefix_len = 0;
312
313 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
314 if (end[0] == ':') {
315 /* we have prefixed identifier */
316 *prefix = start;
317 *prefix_len = end - start;
318
319 move_input(xmlctx, 1);
320 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
321 }
322
323 *name = start;
324 *name_len = end - start;
325 return LY_SUCCESS;
326}
327
328/**
Radek Krejci7a7fa902018-09-25 17:08:21 +0200329 * Store UTF-8 character specified as 4byte integer into the dst buffer.
330 * Returns number of written bytes (4 max), expects that dst has enough space.
331 *
332 * UTF-8 mapping:
333 * 00000000 -- 0000007F: 0xxxxxxx
334 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
335 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
336 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
337 *
338 * Includes checking for valid characters (following RFC 7950, sec 9.4)
339 */
340static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200341lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200342{
343 if (value < 0x80) {
344 /* one byte character */
345 if (value < 0x20 &&
346 value != 0x09 &&
347 value != 0x0a &&
348 value != 0x0d) {
349 return LY_EINVAL;
350 }
351
352 dst[0] = value;
353 (*bytes_written) = 1;
354 } else if (value < 0x800) {
355 /* two bytes character */
356 dst[0] = 0xc0 | (value >> 6);
357 dst[1] = 0x80 | (value & 0x3f);
358 (*bytes_written) = 2;
359 } else if (value < 0xfffe) {
360 /* three bytes character */
361 if (((value & 0xf800) == 0xd800) ||
362 (value >= 0xfdd0 && value <= 0xfdef)) {
363 /* exclude surrogate blocks %xD800-DFFF */
364 /* exclude noncharacters %xFDD0-FDEF */
365 return LY_EINVAL;
366 }
367
368 dst[0] = 0xe0 | (value >> 12);
369 dst[1] = 0x80 | ((value >> 6) & 0x3f);
370 dst[2] = 0x80 | (value & 0x3f);
371
372 (*bytes_written) = 3;
373 } else if (value < 0x10fffe) {
374 if ((value & 0xffe) == 0xffe) {
375 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
376 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
377 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
378 return LY_EINVAL;
379 }
380 /* four bytes character */
381 dst[0] = 0xf0 | (value >> 18);
382 dst[1] = 0x80 | ((value >> 12) & 0x3f);
383 dst[2] = 0x80 | ((value >> 6) & 0x3f);
384 dst[3] = 0x80 | (value & 0x3f);
385
386 (*bytes_written) = 4;
387 }
388 return LY_SUCCESS;
389}
390
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200391static LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100392lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, int *ws_only, int *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200393{
Michal Vaskob36053d2020-03-26 15:49:30 +0100394#define BUFSIZE 24
395#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200396
Michal Vaskob36053d2020-03-26 15:49:30 +0100397 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
398 const char *in = xmlctx->input, *start;
399 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200400 size_t offset; /* read offset in input buffer */
401 size_t len; /* length of the output string (write offset in output buffer) */
402 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200403 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200404 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100405 size_t u;
406 int ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200407
Michal Vaskob36053d2020-03-26 15:49:30 +0100408 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200409
Radek Krejcid70d1072018-10-09 14:20:47 +0200410 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100411 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200412 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200413
414 /* parse */
415 while (in[offset]) {
416 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100417 /* non WS */
418 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200419
Michal Vaskob36053d2020-03-26 15:49:30 +0100420 if (!buf) {
421 /* prepare output buffer */
422 buf = malloc(BUFSIZE);
423 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
424 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200425 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100426
427 /* allocate enough for the offset and next character,
428 * we will need 4 bytes at most since we support only the predefined
429 * (one-char) entities and character references */
430 if (len + offset + 4 >= size) {
431 buf = ly_realloc(buf, size + BUFSIZE_STEP);
432 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
433 size += BUFSIZE_STEP;
434 }
435
436 if (offset) {
437 /* store what we have so far */
438 memcpy(&buf[len], in, offset);
439 len += offset;
440 in += offset;
441 offset = 0;
442 }
443
Radek Krejci7a7fa902018-09-25 17:08:21 +0200444 ++offset;
445 if (in[offset] != '#') {
446 /* entity reference - only predefined references are supported */
447 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100448 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200449 in += 4; /* &lt; */
450 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100451 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200452 in += 4; /* &gt; */
453 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100454 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200455 in += 5; /* &amp; */
456 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100457 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200458 in += 6; /* &apos; */
459 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100460 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200461 in += 6; /* &quot; */
462 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100463 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200464 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200465 goto error;
466 }
467 offset = 0;
468 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100469 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200470 /* character reference */
471 ++offset;
472 if (isdigit(in[offset])) {
473 for (n = 0; isdigit(in[offset]); offset++) {
474 n = (10 * n) + (in[offset] - '0');
475 }
476 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
477 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
478 if (isdigit(in[offset])) {
479 u = (in[offset] - '0');
480 } else if (in[offset] > 'F') {
481 u = 10 + (in[offset] - 'a');
482 } else {
483 u = 10 + (in[offset] - 'A');
484 }
485 n = (16 * n) + u;
486 }
487 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100488 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200489 goto error;
490
491 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100492
Radek Krejci7a7fa902018-09-25 17:08:21 +0200493 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100494 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200495 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
496 error);
497 ++offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100498 LY_CHECK_ERR_GOTO(lyxml_pututf8(&buf[len], n, &u),
499 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
500 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200501 error);
502 len += u;
503 in += offset;
504 offset = 0;
505 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100506 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200507 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200508 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100509 /* realloc exact size string */
510 buf = ly_realloc(buf, len + offset + 1);
511 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
512 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200513 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100514
515 /* set terminating NULL byte */
516 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200517 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200518 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100519 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200520 goto success;
521 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100522 if (!is_xmlws(in[offset])) {
523 /* non WS */
524 ws = 0;
525 }
526
Radek Krejci7a7fa902018-09-25 17:08:21 +0200527 /* log lines */
528 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100529 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200530 }
531
532 /* continue */
533 ++offset;
534 }
535 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100536
537 /* EOF reached before endchar */
538 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
539
Radek Krejci7a7fa902018-09-25 17:08:21 +0200540error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100541 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200542 return LY_EVALID;
543
544success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200545 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100546 *value = buf;
547 *dynamic = 1;
548 } else {
549 *value = (char *)start;
550 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200551 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100552 *length = len;
553 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200554
Michal Vaskob36053d2020-03-26 15:49:30 +0100555 xmlctx->input = in;
556 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200557
558#undef BUFSIZE
559#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200560}
561
Michal Vaskob36053d2020-03-26 15:49:30 +0100562static LY_ERR
563lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
564 int empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200565{
Michal Vaskob36053d2020-03-26 15:49:30 +0100566 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200567
Michal Vaskob36053d2020-03-26 15:49:30 +0100568 /* match opening and closing element tags */
569 if (!xmlctx->elements.count) {
570 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
571 name_len, name);
572 return LY_EVALID;
573 }
Radek Krejcid972c252018-09-25 13:23:39 +0200574
Michal Vaskob36053d2020-03-26 15:49:30 +0100575 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
576 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
577 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
578 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
579 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
580 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
581 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
582 return LY_EVALID;
583 }
Radek Krejcid972c252018-09-25 13:23:39 +0200584
Michal Vaskob36053d2020-03-26 15:49:30 +0100585 /* opening and closing element tags matches, remove record from the opening tags list */
586 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200587
Michal Vaskob36053d2020-03-26 15:49:30 +0100588 /* remove also the namespaces connected with the element */
589 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200590
Michal Vaskob36053d2020-03-26 15:49:30 +0100591 /* skip WS */
592 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200593
Michal Vaskob36053d2020-03-26 15:49:30 +0100594 /* special "<elem/>" element */
595 if (empty && (xmlctx->input[0] == '/')) {
596 move_input(xmlctx, 1);
597 }
Michal Vasko52927e22020-03-16 17:26:14 +0100598
Michal Vaskob36053d2020-03-26 15:49:30 +0100599 /* parse closing tag */
600 if (xmlctx->input[0] != '>') {
601 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
602 xmlctx->input, "element tag termination ('>')");
603 return LY_EVALID;
604 }
Michal Vasko52927e22020-03-16 17:26:14 +0100605
Michal Vaskob36053d2020-03-26 15:49:30 +0100606 /* move after closing tag without checking for EOF */
607 ++xmlctx->input;
Michal Vasko52927e22020-03-16 17:26:14 +0100608
Radek Krejcid972c252018-09-25 13:23:39 +0200609 return LY_SUCCESS;
610}
611
Michal Vaskob36053d2020-03-26 15:49:30 +0100612static LY_ERR
613lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200614{
Michal Vaskob36053d2020-03-26 15:49:30 +0100615 LY_ERR ret = LY_SUCCESS;
616 struct lyxml_elem *e;
617 const char *prev_input;
618 char *value;
619 size_t parsed, value_len;
620 int ws_only, dynamic, is_ns;
621 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200622
Michal Vaskob36053d2020-03-26 15:49:30 +0100623 /* store element opening tag information */
624 e = malloc(sizeof *e);
625 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
626 e->name = name;
627 e->prefix = prefix;
628 e->name_len = name_len;
629 e->prefix_len = prefix_len;
630 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
631
632 /* skip WS */
633 ign_xmlws(xmlctx);
634
635 /* parse and store all namespaces */
636 prev_input = xmlctx->input;
637 is_ns = 1;
638 while ((xmlctx->input[0] != '\0') && !ly_getutf8(&xmlctx->input, &c, &parsed) && is_xmlqnamestartchar(c)) {
639 xmlctx->input -= parsed;
640
641 /* parse attribute name */
642 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
643
644 /* parse the value */
645 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
646
647 /* store every namespace */
648 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
649 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
650 dynamic ? value : strndup(value, value_len)), cleanup);
651 dynamic = 0;
652 } else {
653 /* not a namespace */
654 is_ns = 0;
655 }
656 if (dynamic) {
657 free(value);
658 }
659
660 /* skip WS */
661 ign_xmlws(xmlctx);
662
663 if (is_ns) {
664 /* we can actually skip all the namespaces as there is no reason to parse them again */
665 prev_input = xmlctx->input;
666 }
Radek Krejcib1890642018-10-03 14:05:40 +0200667 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100668
669cleanup:
670 if (!ret) {
671 xmlctx->input = prev_input;
672 }
673 return ret;
674}
675
676static LY_ERR
677lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only, int *dynamic)
678{
679 char quot;
680
681 /* skip WS */
682 ign_xmlws(xmlctx);
683
684 /* skip '=' */
685 if (xmlctx->input[0] == '\0') {
686 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
687 return LY_EVALID;
688 } else if (xmlctx->input[0] != '=') {
689 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
690 xmlctx->input, "'='");
691 return LY_EVALID;
692 }
693 move_input(xmlctx, 1);
694
695 /* skip WS */
696 ign_xmlws(xmlctx);
697
698 /* find quotes */
699 if (xmlctx->input[0] == '\0') {
700 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
701 return LY_EVALID;
702 } else if ((xmlctx->input[0] != '\'') && (xmlctx->input[0] != '\"')) {
703 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
704 xmlctx->input, "either single or double quotation mark");
705 return LY_EVALID;
706 }
707
708 /* remember quote */
709 quot = xmlctx->input[0];
710 move_input(xmlctx, 1);
711
712 /* parse attribute value */
713 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
714
715 /* move after ending quote (without checking for EOF) */
716 ++xmlctx->input;
717
718 return LY_SUCCESS;
719}
720
721static LY_ERR
722lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
723{
724 const char *in;
725 char *value;
726 uint32_t c;
727 size_t parsed, value_len;
728 int ws_only, dynamic;
729
730 /* skip WS */
731 ign_xmlws(xmlctx);
732
733 /* parse only possible attributes */
734 while ((xmlctx->input[0] != '>') && (xmlctx->input[0] != '/')) {
735 in = xmlctx->input;
736 if (in[0] == '\0') {
737 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
738 return LY_EVALID;
739 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
740 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
741 "element tag end ('>' or '/>') or an attribute");
742 return LY_EVALID;
743 }
744
745 /* parse attribute name */
746 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
747
748 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
749 /* standard attribute */
750 break;
751 }
752
753 /* namespace, skip it */
754 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
755 if (dynamic) {
756 free(value);
757 }
758
759 /* skip WS */
760 ign_xmlws(xmlctx);
761 }
762
763 return LY_SUCCESS;
764}
765
766static LY_ERR
767lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
768 int *closing)
769{
770 /* skip WS until EOF or after opening tag '<' */
771 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
772 if (xmlctx->input[0] == '\0') {
773 /* set return values */
774 *prefix = *name = NULL;
775 *prefix_len = *name_len = 0;
776 return LY_SUCCESS;
777 }
778
779 if (xmlctx->input[0] == '/') {
780 move_input(xmlctx, 1);
781 *closing = 1;
782 } else {
783 *closing = 0;
784 }
785
786 /* skip WS */
787 ign_xmlws(xmlctx);
788
789 /* parse element name */
790 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
791
792 return LY_SUCCESS;
793}
794
795LY_ERR
796lyxml_ctx_new(const struct ly_ctx *ctx, const char *input, struct lyxml_ctx **xmlctx_p)
797{
798 LY_ERR ret = LY_SUCCESS;
799 struct lyxml_ctx *xmlctx;
800 int closing;
801
802 /* new context */
803 xmlctx = calloc(1, sizeof *xmlctx);
804 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
805 xmlctx->ctx = ctx;
806 xmlctx->line = 1;
807 xmlctx->input = input;
808
809 /* parse next element, if any */
810 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
811 &xmlctx->name_len, &closing), cleanup);
812
813 if (xmlctx->input[0] == '\0') {
814 /* update status */
815 xmlctx->status = LYXML_END;
816 } else if (closing) {
817 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
818 xmlctx->name_len, xmlctx->name);
819 ret = LY_EVALID;
820 goto cleanup;
821 } else {
822 /* open an element, also parses all enclosed namespaces */
823 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
824
825 /* update status */
826 xmlctx->status = LYXML_ELEMENT;
827 }
828
829cleanup:
830 if (ret) {
831 lyxml_ctx_free(xmlctx);
832 } else {
833 *xmlctx_p = xmlctx;
834 }
835 return ret;
836}
837
838LY_ERR
839lyxml_ctx_next(struct lyxml_ctx *xmlctx)
840{
841 LY_ERR ret = LY_SUCCESS;
842 int closing;
843 struct lyxml_elem *e;
844
845 /* if the value was not used, free it */
846 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
847 free((char *)xmlctx->value);
848 xmlctx->value = NULL;
849 xmlctx->dynamic = 0;
850 }
851
852 switch (xmlctx->status) {
853 /* content |</elem> */
854 case LYXML_ELEM_CONTENT:
855 /* handle special case when empty content for "<elem/>" was returned */
856 if (xmlctx->input[0] == '/') {
857 assert(xmlctx->elements.count);
858 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
859
860 /* close the element (parses closing tag) */
861 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1), cleanup);
862
863 /* update status */
864 xmlctx->status = LYXML_ELEM_CLOSE;
865 break;
866 }
867 /* fallthrough */
868
869 /* </elem>| <elem2>* */
870 case LYXML_ELEM_CLOSE:
871 /* parse next element, if any */
872 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
873 &xmlctx->name_len, &closing), cleanup);
874
875 if (xmlctx->input[0] == '\0') {
876 /* update status */
877 xmlctx->status = LYXML_END;
878 } else if (closing) {
879 /* close an element (parses also closing tag) */
880 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0), cleanup);
881
882 /* update status */
883 xmlctx->status = LYXML_ELEM_CLOSE;
884 } else {
885 /* open an element, also parses all enclosed namespaces */
886 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
887
888 /* update status */
889 xmlctx->status = LYXML_ELEMENT;
890 }
891 break;
892
893 /* <elem| attr='val'* > content */
894 case LYXML_ELEMENT:
895
896 /* attr='val'| attr='val'* > content */
897 case LYXML_ATTR_CONTENT:
898 /* parse attribute name, if any */
899 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len), cleanup);
900
901 if (xmlctx->input[0] == '>') {
902 /* no attributes but a closing tag */
903 move_input(xmlctx, 1);
904
905 /* parse element content */
906 LY_CHECK_GOTO(ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
907 &xmlctx->dynamic), cleanup);
908
909 if (!xmlctx->value_len) {
910 /* use empty value, easier to work with */
911 xmlctx->value = "";
912 assert(!xmlctx->dynamic);
913 }
914
915 /* update status */
916 xmlctx->status = LYXML_ELEM_CONTENT;
917 } else if (xmlctx->input[0] == '/') {
918 /* no content but we still return it */
919 xmlctx->value = "";
920 xmlctx->value_len = 0;
921 xmlctx->ws_only = 1;
922 xmlctx->dynamic = 0;
923
924 /* update status */
925 xmlctx->status = LYXML_ELEM_CONTENT;
926 } else {
927 /* update status */
928 xmlctx->status = LYXML_ATTRIBUTE;
929 }
930 break;
931
932 /* attr|='val' */
933 case LYXML_ATTRIBUTE:
934 /* skip formatting and parse value */
935 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
936 &xmlctx->dynamic), cleanup);
937
938 /* update status */
939 xmlctx->status = LYXML_ATTR_CONTENT;
940 break;
941
942 /* </elem> |EOF */
943 case LYXML_END:
944 /* nothing to do */
945 break;
946 }
947
948cleanup:
949 if (ret) {
950 /* invalidate context */
951 xmlctx->status = LYXML_END;
952 }
953 return ret;
954}
955
956LY_ERR
957lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
958{
959 LY_ERR ret = LY_SUCCESS;
960 const char *prefix, *name, *prev_input;
961 size_t prefix_len, name_len;
962 int closing;
963
964 prev_input = xmlctx->input;
965
966 switch (xmlctx->status) {
967 case LYXML_ELEM_CONTENT:
968 if (xmlctx->input[0] == '/') {
969 *next = LYXML_ELEM_CLOSE;
970 break;
971 }
972 /* fallthrough */
973 case LYXML_ELEM_CLOSE:
974 /* parse next element, if any */
975 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing), cleanup);
976
977 if (xmlctx->input[0] == '\0') {
978 *next = LYXML_END;
979 } else if (closing) {
980 *next = LYXML_ELEM_CLOSE;
981 } else {
982 *next = LYXML_ELEMENT;
983 }
984 break;
985 case LYXML_ELEMENT:
986 case LYXML_ATTR_CONTENT:
987 /* parse attribute name, if any */
988 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
989
990 if ((xmlctx->input[0] == '>') || (xmlctx->input[0] == '/')) {
991 *next = LYXML_ELEM_CONTENT;
992 } else {
993 *next = LYXML_ATTRIBUTE;
994 }
995 break;
996 case LYXML_ATTRIBUTE:
997 *next = LYXML_ATTR_CONTENT;
998 break;
999 case LYXML_END:
1000 *next = LYXML_END;
1001 break;
1002 }
1003
1004cleanup:
1005 xmlctx->input = prev_input;
1006 return ret;
1007}
1008
1009void
1010lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1011{
1012 uint32_t u;
1013
1014 if (!xmlctx) {
1015 return;
1016 }
1017
1018 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1019 free((char *)xmlctx->value);
1020 }
1021 ly_set_erase(&xmlctx->elements, free);
1022 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1023 /* remove the ns structure */
1024 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1025 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1026 free(xmlctx->ns.objs[u]);
1027 }
1028 ly_set_erase(&xmlctx->ns, NULL);
1029 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001030}
Radek Krejcie7b95092019-05-15 11:03:07 +02001031
1032LY_ERR
Radek Krejci241f6b52020-05-21 18:13:49 +02001033lyxml_dump_text(struct ly_out *out, const char *text, int attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001034{
Radek Krejcibaeb8382020-05-27 16:44:53 +02001035 ssize_t ret = LY_SUCCESS;
Radek Krejcie7b95092019-05-15 11:03:07 +02001036 unsigned int u;
1037
1038 if (!text) {
1039 return 0;
1040 }
1041
1042 for (u = 0; text[u]; u++) {
1043 switch (text[u]) {
1044 case '&':
Radek Krejci241f6b52020-05-21 18:13:49 +02001045 ret = ly_print(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001046 break;
1047 case '<':
Radek Krejci241f6b52020-05-21 18:13:49 +02001048 ret = ly_print(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001049 break;
1050 case '>':
1051 /* not needed, just for readability */
Radek Krejci241f6b52020-05-21 18:13:49 +02001052 ret = ly_print(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001053 break;
1054 case '"':
1055 if (attribute) {
Radek Krejci241f6b52020-05-21 18:13:49 +02001056 ret = ly_print(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001057 break;
1058 }
1059 /* falls through */
1060 default:
Radek Krejcibaeb8382020-05-27 16:44:53 +02001061 ret = ly_write(out, &text[u], 1);
Radek Krejcie7b95092019-05-15 11:03:07 +02001062 }
1063 }
1064
Radek Krejcibaeb8382020-05-27 16:44:53 +02001065 return ret < 0 ? (-1 * ret) : 0;
Radek Krejcie7b95092019-05-15 11:03:07 +02001066}
1067
Michal Vasko52927e22020-03-16 17:26:14 +01001068LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001069lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001070{
1071 LY_ERR ret;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001072 LY_ARRAY_SIZE_TYPE u;
1073 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001074 const struct lyxml_ns *ns;
1075 const char *start, *stop;
1076 struct ly_prefix *prefixes = NULL;
1077 size_t len;
1078
1079 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1080 size_t bytes;
1081 ly_getutf8(&stop, &c, &bytes);
1082 if (is_xmlqnamestartchar(c)) {
1083 for (ly_getutf8(&stop, &c, &bytes);
1084 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1085 ly_getutf8(&stop, &c, &bytes));
1086 stop = stop - bytes;
1087 if (*stop == ':') {
1088 /* we have a possible prefix */
1089 len = stop - start;
Michal Vaskob36053d2020-03-26 15:49:30 +01001090 ns = lyxml_ns_get(xmlctx, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001091 if (ns) {
1092 struct ly_prefix *p = NULL;
1093
1094 /* check whether we do not already have this prefix stored */
1095 LY_ARRAY_FOR(prefixes, u) {
1096 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1097 p = &prefixes[u];
1098 break;
1099 }
1100 }
1101 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001102 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
1103 p->pref = lydict_insert(xmlctx->ctx, start, len);
1104 p->ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001105 } /* else the prefix already present */
1106 }
1107 }
1108 stop = stop + bytes;
1109 }
1110 }
1111
1112 *val_prefs = prefixes;
1113 return LY_SUCCESS;
1114
1115error:
1116 LY_ARRAY_FOR(prefixes, u) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001117 lydict_remove(xmlctx->ctx, prefixes[u].pref);
Michal Vasko52927e22020-03-16 17:26:14 +01001118 }
1119 LY_ARRAY_FREE(prefixes);
1120 return ret;
1121}
1122
1123LY_ERR
1124lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1125{
1126 const char *ptr1, *ptr2, *ns1, *ns2;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001127 LY_ARRAY_SIZE_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001128 int len;
1129
1130 if (!value1 && !value2) {
1131 return LY_SUCCESS;
1132 }
1133 if ((value1 && !value2) || (!value1 && value2)) {
1134 return LY_ENOT;
1135 }
1136
1137 ptr1 = value1;
1138 ptr2 = value2;
1139 while (ptr1[0] && ptr2[0]) {
1140 if (ptr1[0] != ptr2[0]) {
1141 /* it can be a start of prefix that maps to the same module */
1142 ns1 = ns2 = NULL;
1143 if (prefs1) {
1144 /* find module of the first prefix, if any */
1145 LY_ARRAY_FOR(prefs1, u1) {
1146 len = strlen(prefs1[u1].pref);
1147 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1148 ns1 = prefs1[u1].ns;
1149 break;
1150 }
1151 }
1152 }
1153 if (prefs2) {
1154 /* find module of the second prefix, if any */
1155 LY_ARRAY_FOR(prefs2, u2) {
1156 len = strlen(prefs2[u2].pref);
1157 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1158 ns2 = prefs2[u2].ns;
1159 break;
1160 }
1161 }
1162 }
1163
1164 if (!ns1 || !ns2 || (ns1 != ns2)) {
1165 /* not a prefix or maps to different namespaces */
1166 break;
1167 }
1168
1169 /* skip prefixes in both values (':' is skipped as iter) */
1170 ptr1 += strlen(prefs1[u1].pref);
1171 ptr2 += strlen(prefs2[u2].pref);
1172 }
1173
1174 ++ptr1;
1175 ++ptr2;
1176 }
1177 if (ptr1[0] || ptr2[0]) {
1178 /* not a match or simply different lengths */
1179 return LY_ENOT;
1180 }
1181
1182 return LY_SUCCESS;
1183}