blob: 00b8699a653a7e2297e6ce1c6d5e8d4ca3dd5e41 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejci535ea9f2020-05-29 16:01:05 +020016#define _GNU_SOURCE
17
18#include "xml.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020019
Radek Krejcib1890642018-10-03 14:05:40 +020020#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020021#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020023#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020024#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejci535ea9f2020-05-29 16:01:05 +020026#include "common.h"
27#include "dict.h"
28#include "printer.h"
29#include "tree.h"
30#include "tree_data.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020031
Michal Vaskob36053d2020-03-26 15:49:30 +010032/* Move input p by s characters, if EOF log with lyxml_ctx c */
33#define move_input(c,s) c->input += s; LY_CHECK_ERR_RET(!c->input[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020034
Radek Krejcib1890642018-10-03 14:05:40 +020035/* Ignore whitespaces in the input string p */
Michal Vaskob36053d2020-03-26 15:49:30 +010036#define ign_xmlws(c) while (is_xmlws(*(c)->input)) {if (*(c)->input == '\n') {++c->line;} ++c->input;}
37
38static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only,
39 int *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020040
Radek Krejci4b74d5e2018-09-26 14:30:55 +020041/**
42 * @brief Ignore any characters until the delim of the size delim_len is read
43 *
44 * Detects number of read new lines.
45 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
46 * NULL-terminated input string.
47 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020048static const char *
49ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
50{
51 size_t i;
52 register const char *a, *b;
53
54 (*newlines) = 0;
55 for ( ; *input; ++input) {
56 if (*input != *delim) {
57 if (*input == '\n') {
58 ++(*newlines);
59 }
60 continue;
61 }
62 a = input;
63 b = delim;
64 for (i = 0; i < delim_len; ++i) {
65 if (*a++ != *b++) {
66 break;
67 }
68 }
69 if (i == delim_len) {
70 return input;
71 }
72 }
73 return NULL;
74}
75
Radek Krejci4b74d5e2018-09-26 14:30:55 +020076/**
Michal Vaskob36053d2020-03-26 15:49:30 +010077 * @brief Check/Get an XML identifier from the input string.
78 *
79 * The identifier must have at least one valid character complying the name start character constraints.
80 * The identifier is terminated by the first character, which does not comply to the name character constraints.
81 *
82 * See https://www.w3.org/TR/xml-names/#NT-NCName
83 *
84 * @param[in] xmlctx XML context.
85 * @param[out] start Pointer to the start of the identifier.
86 * @param[out] end Pointer ot the end of the identifier.
87 * @return LY_ERR value.
88 */
89static LY_ERR
90lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
91{
92 const char *s, *in;
93 uint32_t c;
94 size_t parsed;
95 LY_ERR rc;
96
97 in = s = xmlctx->input;
98
99 /* check NameStartChar (minus colon) */
100 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
101 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
102 LY_EVALID);
103 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
104 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
105 "Identifier \"%s\" starts with an invalid character.", in - parsed),
106 LY_EVALID);
107
108 /* check rest of the identifier */
109 do {
110 /* move only successfully parsed bytes */
111 xmlctx->input += parsed;
112
113 rc = ly_getutf8(&in, &c, &parsed);
114 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
115 } while (is_xmlqnamechar(c));
116
117 *start = s;
118 *end = xmlctx->input;
119 return LY_SUCCESS;
120}
121
122/**
123 * @brief Add namespace definition into XML context.
124 *
125 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
126 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
127 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
128 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
129 *
130 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
131 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
132 *
133 * @param[in] xmlctx XML context to work with.
134 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
135 * @param[in] prefix_len Length of the prefix.
136 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
137 * @return LY_ERR values.
138 */
139LY_ERR
140lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
141{
142 struct lyxml_ns *ns;
143
144 ns = malloc(sizeof *ns);
145 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
146
147 /* we need to connect the depth of the element where the namespace is defined with the
148 * namespace record to be able to maintain (remove) the record when the parser leaves
149 * (to its sibling or back to the parent) the element where the namespace was defined */
150 ns->depth = xmlctx->elements.count;
151
152 ns->uri = uri;
153 if (prefix) {
154 ns->prefix = strndup(prefix, prefix_len);
155 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
156 } else {
157 ns->prefix = NULL;
158 }
159
160 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
161 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
162 return LY_SUCCESS;
163}
164
165/**
166 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
167 *
168 * @param[in] xmlctx XML context to work with.
169 */
170void
171lyxml_ns_rm(struct lyxml_ctx *xmlctx)
172{
173 unsigned int u;
174
175 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
176 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
177 /* we are done, the namespaces from a single element are supposed to be together */
178 break;
179 }
180 /* remove the ns structure */
181 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
182 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
183 free(xmlctx->ns.objs[u]);
184 --xmlctx->ns.count;
185 }
186
187 if (!xmlctx->ns.count) {
188 /* cleanup the xmlctx's namespaces storage */
189 ly_set_erase(&xmlctx->ns, NULL);
190 }
191}
192
193void *
194lyxml_elem_dup(void *item)
195{
196 struct lyxml_elem *dup;
197
198 dup = malloc(sizeof *dup);
199 memcpy(dup, item, sizeof *dup);
200
201 return dup;
202}
203
204void *
205lyxml_ns_dup(void *item)
206{
207 struct lyxml_ns *dup, *orig;
208
209 orig = (struct lyxml_ns *)item;
210 dup = malloc(sizeof *dup);
211 dup->prefix = orig->prefix ? strdup(orig->prefix) : NULL;
212 dup->uri = strdup(orig->uri);
213 dup->depth = orig->depth;
214
215 return dup;
216}
217
218const struct lyxml_ns *
219lyxml_ns_get(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len)
220{
221 unsigned int u;
222 struct lyxml_ns *ns;
223
224 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
225 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
226 if (prefix && prefix_len) {
227 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
228 return ns;
229 }
230 } else if (!ns->prefix) {
231 /* default namespace */
232 return ns;
233 }
234 }
235
236 return NULL;
237}
238
239static LY_ERR
240lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
241{
242 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
243 const char *in, *endtag, *sectname;
244 size_t endtag_len, newlines;
245
246 while (1) {
247 ign_xmlws(xmlctx);
248
249 if (xmlctx->input[0] == '\0') {
250 /* EOF */
251 if (xmlctx->elements.count) {
252 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
253 return LY_EVALID;
254 }
255 return LY_SUCCESS;
256 } else if (xmlctx->input[0] != '<') {
257 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
258 xmlctx->input, "element tag start ('<')");
259 return LY_EVALID;
260 }
261 move_input(xmlctx, 1);
262
263 if (xmlctx->input[0] == '!') {
264 move_input(xmlctx, 1);
265 /* sections to ignore */
266 if (!strncmp(xmlctx->input, "--", 2)) {
267 /* comment */
268 move_input(xmlctx, 2);
269 sectname = "Comment";
270 endtag = "-->";
271 endtag_len = 3;
272 } else if (!strncmp(xmlctx->input, "[CDATA[", 7)) {
273 /* CDATA section */
274 move_input(xmlctx, 7);
275 sectname = "CData";
276 endtag = "]]>";
277 endtag_len = 3;
278 } else if (!strncmp(xmlctx->input, "DOCTYPE", 7)) {
279 /* Document type declaration - not supported */
280 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
281 return LY_EVALID;
282 } else {
283 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->input[-2]);
284 return LY_EVALID;
285 }
286 in = ign_todelim(xmlctx->input, endtag, endtag_len, &newlines);
287 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
288 xmlctx->line += newlines;
289 xmlctx->input = in + endtag_len;
290 } else if (xmlctx->input[0] == '?') {
291 in = ign_todelim(xmlctx->input, "?>", 2, &newlines);
292 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
293 xmlctx->line += newlines;
294 xmlctx->input = in + 2;
295 } else {
296 /* other non-WS character */
297 break;
298 }
299 }
300
301 return LY_SUCCESS;
302}
303
304static LY_ERR
305lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
306{
307 const char *start, *end;
308
309 *prefix = NULL;
310 *prefix_len = 0;
311
312 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
313 if (end[0] == ':') {
314 /* we have prefixed identifier */
315 *prefix = start;
316 *prefix_len = end - start;
317
318 move_input(xmlctx, 1);
319 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
320 }
321
322 *name = start;
323 *name_len = end - start;
324 return LY_SUCCESS;
325}
326
327/**
Radek Krejci7a7fa902018-09-25 17:08:21 +0200328 * Store UTF-8 character specified as 4byte integer into the dst buffer.
329 * Returns number of written bytes (4 max), expects that dst has enough space.
330 *
331 * UTF-8 mapping:
332 * 00000000 -- 0000007F: 0xxxxxxx
333 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
334 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
335 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
336 *
337 * Includes checking for valid characters (following RFC 7950, sec 9.4)
338 */
339static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200340lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200341{
342 if (value < 0x80) {
343 /* one byte character */
344 if (value < 0x20 &&
345 value != 0x09 &&
346 value != 0x0a &&
347 value != 0x0d) {
348 return LY_EINVAL;
349 }
350
351 dst[0] = value;
352 (*bytes_written) = 1;
353 } else if (value < 0x800) {
354 /* two bytes character */
355 dst[0] = 0xc0 | (value >> 6);
356 dst[1] = 0x80 | (value & 0x3f);
357 (*bytes_written) = 2;
358 } else if (value < 0xfffe) {
359 /* three bytes character */
360 if (((value & 0xf800) == 0xd800) ||
361 (value >= 0xfdd0 && value <= 0xfdef)) {
362 /* exclude surrogate blocks %xD800-DFFF */
363 /* exclude noncharacters %xFDD0-FDEF */
364 return LY_EINVAL;
365 }
366
367 dst[0] = 0xe0 | (value >> 12);
368 dst[1] = 0x80 | ((value >> 6) & 0x3f);
369 dst[2] = 0x80 | (value & 0x3f);
370
371 (*bytes_written) = 3;
372 } else if (value < 0x10fffe) {
373 if ((value & 0xffe) == 0xffe) {
374 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
375 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
376 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
377 return LY_EINVAL;
378 }
379 /* four bytes character */
380 dst[0] = 0xf0 | (value >> 18);
381 dst[1] = 0x80 | ((value >> 12) & 0x3f);
382 dst[2] = 0x80 | ((value >> 6) & 0x3f);
383 dst[3] = 0x80 | (value & 0x3f);
384
385 (*bytes_written) = 4;
386 }
387 return LY_SUCCESS;
388}
389
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200390static LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100391lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, int *ws_only, int *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200392{
Michal Vaskob36053d2020-03-26 15:49:30 +0100393#define BUFSIZE 24
394#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200395
Michal Vaskob36053d2020-03-26 15:49:30 +0100396 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
397 const char *in = xmlctx->input, *start;
398 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200399 size_t offset; /* read offset in input buffer */
400 size_t len; /* length of the output string (write offset in output buffer) */
401 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200402 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200403 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100404 size_t u;
405 int ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200406
Michal Vaskob36053d2020-03-26 15:49:30 +0100407 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200408
Radek Krejcid70d1072018-10-09 14:20:47 +0200409 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100410 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200411 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200412
413 /* parse */
414 while (in[offset]) {
415 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100416 /* non WS */
417 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200418
Michal Vaskob36053d2020-03-26 15:49:30 +0100419 if (!buf) {
420 /* prepare output buffer */
421 buf = malloc(BUFSIZE);
422 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
423 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200424 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100425
426 /* allocate enough for the offset and next character,
427 * we will need 4 bytes at most since we support only the predefined
428 * (one-char) entities and character references */
429 if (len + offset + 4 >= size) {
430 buf = ly_realloc(buf, size + BUFSIZE_STEP);
431 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
432 size += BUFSIZE_STEP;
433 }
434
435 if (offset) {
436 /* store what we have so far */
437 memcpy(&buf[len], in, offset);
438 len += offset;
439 in += offset;
440 offset = 0;
441 }
442
Radek Krejci7a7fa902018-09-25 17:08:21 +0200443 ++offset;
444 if (in[offset] != '#') {
445 /* entity reference - only predefined references are supported */
446 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100447 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200448 in += 4; /* &lt; */
449 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100450 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200451 in += 4; /* &gt; */
452 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100453 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200454 in += 5; /* &amp; */
455 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100456 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200457 in += 6; /* &apos; */
458 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100459 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200460 in += 6; /* &quot; */
461 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100462 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200463 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200464 goto error;
465 }
466 offset = 0;
467 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100468 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200469 /* character reference */
470 ++offset;
471 if (isdigit(in[offset])) {
472 for (n = 0; isdigit(in[offset]); offset++) {
473 n = (10 * n) + (in[offset] - '0');
474 }
475 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
476 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
477 if (isdigit(in[offset])) {
478 u = (in[offset] - '0');
479 } else if (in[offset] > 'F') {
480 u = 10 + (in[offset] - 'a');
481 } else {
482 u = 10 + (in[offset] - 'A');
483 }
484 n = (16 * n) + u;
485 }
486 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100487 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200488 goto error;
489
490 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100491
Radek Krejci7a7fa902018-09-25 17:08:21 +0200492 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100493 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200494 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
495 error);
496 ++offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100497 LY_CHECK_ERR_GOTO(lyxml_pututf8(&buf[len], n, &u),
498 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
499 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200500 error);
501 len += u;
502 in += offset;
503 offset = 0;
504 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100505 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200506 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200507 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100508 /* realloc exact size string */
509 buf = ly_realloc(buf, len + offset + 1);
510 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
511 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200512 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100513
514 /* set terminating NULL byte */
515 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200516 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200517 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100518 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200519 goto success;
520 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100521 if (!is_xmlws(in[offset])) {
522 /* non WS */
523 ws = 0;
524 }
525
Radek Krejci7a7fa902018-09-25 17:08:21 +0200526 /* log lines */
527 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100528 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200529 }
530
531 /* continue */
532 ++offset;
533 }
534 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100535
536 /* EOF reached before endchar */
537 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
538
Radek Krejci7a7fa902018-09-25 17:08:21 +0200539error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100540 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200541 return LY_EVALID;
542
543success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200544 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100545 *value = buf;
546 *dynamic = 1;
547 } else {
548 *value = (char *)start;
549 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200550 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100551 *length = len;
552 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200553
Michal Vaskob36053d2020-03-26 15:49:30 +0100554 xmlctx->input = in;
555 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200556
557#undef BUFSIZE
558#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200559}
560
Michal Vaskob36053d2020-03-26 15:49:30 +0100561static LY_ERR
562lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
563 int empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200564{
Michal Vaskob36053d2020-03-26 15:49:30 +0100565 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200566
Michal Vaskob36053d2020-03-26 15:49:30 +0100567 /* match opening and closing element tags */
568 if (!xmlctx->elements.count) {
569 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
570 name_len, name);
571 return LY_EVALID;
572 }
Radek Krejcid972c252018-09-25 13:23:39 +0200573
Michal Vaskob36053d2020-03-26 15:49:30 +0100574 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
575 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
576 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
577 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
578 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
579 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
580 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
581 return LY_EVALID;
582 }
Radek Krejcid972c252018-09-25 13:23:39 +0200583
Michal Vaskob36053d2020-03-26 15:49:30 +0100584 /* opening and closing element tags matches, remove record from the opening tags list */
585 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200586
Michal Vaskob36053d2020-03-26 15:49:30 +0100587 /* remove also the namespaces connected with the element */
588 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200589
Michal Vaskob36053d2020-03-26 15:49:30 +0100590 /* skip WS */
591 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200592
Michal Vaskob36053d2020-03-26 15:49:30 +0100593 /* special "<elem/>" element */
594 if (empty && (xmlctx->input[0] == '/')) {
595 move_input(xmlctx, 1);
596 }
Michal Vasko52927e22020-03-16 17:26:14 +0100597
Michal Vaskob36053d2020-03-26 15:49:30 +0100598 /* parse closing tag */
599 if (xmlctx->input[0] != '>') {
600 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
601 xmlctx->input, "element tag termination ('>')");
602 return LY_EVALID;
603 }
Michal Vasko52927e22020-03-16 17:26:14 +0100604
Michal Vaskob36053d2020-03-26 15:49:30 +0100605 /* move after closing tag without checking for EOF */
606 ++xmlctx->input;
Michal Vasko52927e22020-03-16 17:26:14 +0100607
Radek Krejcid972c252018-09-25 13:23:39 +0200608 return LY_SUCCESS;
609}
610
Michal Vaskob36053d2020-03-26 15:49:30 +0100611static LY_ERR
612lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200613{
Michal Vaskob36053d2020-03-26 15:49:30 +0100614 LY_ERR ret = LY_SUCCESS;
615 struct lyxml_elem *e;
616 const char *prev_input;
617 char *value;
618 size_t parsed, value_len;
619 int ws_only, dynamic, is_ns;
620 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200621
Michal Vaskob36053d2020-03-26 15:49:30 +0100622 /* store element opening tag information */
623 e = malloc(sizeof *e);
624 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
625 e->name = name;
626 e->prefix = prefix;
627 e->name_len = name_len;
628 e->prefix_len = prefix_len;
629 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
630
631 /* skip WS */
632 ign_xmlws(xmlctx);
633
634 /* parse and store all namespaces */
635 prev_input = xmlctx->input;
636 is_ns = 1;
637 while ((xmlctx->input[0] != '\0') && !ly_getutf8(&xmlctx->input, &c, &parsed) && is_xmlqnamestartchar(c)) {
638 xmlctx->input -= parsed;
639
640 /* parse attribute name */
641 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
642
643 /* parse the value */
644 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
645
646 /* store every namespace */
647 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
648 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
649 dynamic ? value : strndup(value, value_len)), cleanup);
650 dynamic = 0;
651 } else {
652 /* not a namespace */
653 is_ns = 0;
654 }
655 if (dynamic) {
656 free(value);
657 }
658
659 /* skip WS */
660 ign_xmlws(xmlctx);
661
662 if (is_ns) {
663 /* we can actually skip all the namespaces as there is no reason to parse them again */
664 prev_input = xmlctx->input;
665 }
Radek Krejcib1890642018-10-03 14:05:40 +0200666 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100667
668cleanup:
669 if (!ret) {
670 xmlctx->input = prev_input;
671 }
672 return ret;
673}
674
675static LY_ERR
676lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only, int *dynamic)
677{
678 char quot;
679
680 /* skip WS */
681 ign_xmlws(xmlctx);
682
683 /* skip '=' */
684 if (xmlctx->input[0] == '\0') {
685 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
686 return LY_EVALID;
687 } else if (xmlctx->input[0] != '=') {
688 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
689 xmlctx->input, "'='");
690 return LY_EVALID;
691 }
692 move_input(xmlctx, 1);
693
694 /* skip WS */
695 ign_xmlws(xmlctx);
696
697 /* find quotes */
698 if (xmlctx->input[0] == '\0') {
699 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
700 return LY_EVALID;
701 } else if ((xmlctx->input[0] != '\'') && (xmlctx->input[0] != '\"')) {
702 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
703 xmlctx->input, "either single or double quotation mark");
704 return LY_EVALID;
705 }
706
707 /* remember quote */
708 quot = xmlctx->input[0];
709 move_input(xmlctx, 1);
710
711 /* parse attribute value */
712 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
713
714 /* move after ending quote (without checking for EOF) */
715 ++xmlctx->input;
716
717 return LY_SUCCESS;
718}
719
720static LY_ERR
721lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
722{
723 const char *in;
724 char *value;
725 uint32_t c;
726 size_t parsed, value_len;
727 int ws_only, dynamic;
728
729 /* skip WS */
730 ign_xmlws(xmlctx);
731
732 /* parse only possible attributes */
733 while ((xmlctx->input[0] != '>') && (xmlctx->input[0] != '/')) {
734 in = xmlctx->input;
735 if (in[0] == '\0') {
736 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
737 return LY_EVALID;
738 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
739 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
740 "element tag end ('>' or '/>') or an attribute");
741 return LY_EVALID;
742 }
743
744 /* parse attribute name */
745 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
746
747 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
748 /* standard attribute */
749 break;
750 }
751
752 /* namespace, skip it */
753 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
754 if (dynamic) {
755 free(value);
756 }
757
758 /* skip WS */
759 ign_xmlws(xmlctx);
760 }
761
762 return LY_SUCCESS;
763}
764
765static LY_ERR
766lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
767 int *closing)
768{
769 /* skip WS until EOF or after opening tag '<' */
770 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
771 if (xmlctx->input[0] == '\0') {
772 /* set return values */
773 *prefix = *name = NULL;
774 *prefix_len = *name_len = 0;
775 return LY_SUCCESS;
776 }
777
778 if (xmlctx->input[0] == '/') {
779 move_input(xmlctx, 1);
780 *closing = 1;
781 } else {
782 *closing = 0;
783 }
784
785 /* skip WS */
786 ign_xmlws(xmlctx);
787
788 /* parse element name */
789 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
790
791 return LY_SUCCESS;
792}
793
794LY_ERR
795lyxml_ctx_new(const struct ly_ctx *ctx, const char *input, struct lyxml_ctx **xmlctx_p)
796{
797 LY_ERR ret = LY_SUCCESS;
798 struct lyxml_ctx *xmlctx;
799 int closing;
800
801 /* new context */
802 xmlctx = calloc(1, sizeof *xmlctx);
803 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
804 xmlctx->ctx = ctx;
805 xmlctx->line = 1;
806 xmlctx->input = input;
807
808 /* parse next element, if any */
809 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
810 &xmlctx->name_len, &closing), cleanup);
811
812 if (xmlctx->input[0] == '\0') {
813 /* update status */
814 xmlctx->status = LYXML_END;
815 } else if (closing) {
816 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
817 xmlctx->name_len, xmlctx->name);
818 ret = LY_EVALID;
819 goto cleanup;
820 } else {
821 /* open an element, also parses all enclosed namespaces */
822 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
823
824 /* update status */
825 xmlctx->status = LYXML_ELEMENT;
826 }
827
828cleanup:
829 if (ret) {
830 lyxml_ctx_free(xmlctx);
831 } else {
832 *xmlctx_p = xmlctx;
833 }
834 return ret;
835}
836
837LY_ERR
838lyxml_ctx_next(struct lyxml_ctx *xmlctx)
839{
840 LY_ERR ret = LY_SUCCESS;
841 int closing;
842 struct lyxml_elem *e;
843
844 /* if the value was not used, free it */
845 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
846 free((char *)xmlctx->value);
847 xmlctx->value = NULL;
848 xmlctx->dynamic = 0;
849 }
850
851 switch (xmlctx->status) {
852 /* content |</elem> */
853 case LYXML_ELEM_CONTENT:
854 /* handle special case when empty content for "<elem/>" was returned */
855 if (xmlctx->input[0] == '/') {
856 assert(xmlctx->elements.count);
857 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
858
859 /* close the element (parses closing tag) */
860 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1), cleanup);
861
862 /* update status */
863 xmlctx->status = LYXML_ELEM_CLOSE;
864 break;
865 }
866 /* fallthrough */
867
868 /* </elem>| <elem2>* */
869 case LYXML_ELEM_CLOSE:
870 /* parse next element, if any */
871 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
872 &xmlctx->name_len, &closing), cleanup);
873
874 if (xmlctx->input[0] == '\0') {
875 /* update status */
876 xmlctx->status = LYXML_END;
877 } else if (closing) {
878 /* close an element (parses also closing tag) */
879 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0), cleanup);
880
881 /* update status */
882 xmlctx->status = LYXML_ELEM_CLOSE;
883 } else {
884 /* open an element, also parses all enclosed namespaces */
885 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
886
887 /* update status */
888 xmlctx->status = LYXML_ELEMENT;
889 }
890 break;
891
892 /* <elem| attr='val'* > content */
893 case LYXML_ELEMENT:
894
895 /* attr='val'| attr='val'* > content */
896 case LYXML_ATTR_CONTENT:
897 /* parse attribute name, if any */
898 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len), cleanup);
899
900 if (xmlctx->input[0] == '>') {
901 /* no attributes but a closing tag */
902 move_input(xmlctx, 1);
903
904 /* parse element content */
905 LY_CHECK_GOTO(ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
906 &xmlctx->dynamic), cleanup);
907
908 if (!xmlctx->value_len) {
909 /* use empty value, easier to work with */
910 xmlctx->value = "";
911 assert(!xmlctx->dynamic);
912 }
913
914 /* update status */
915 xmlctx->status = LYXML_ELEM_CONTENT;
916 } else if (xmlctx->input[0] == '/') {
917 /* no content but we still return it */
918 xmlctx->value = "";
919 xmlctx->value_len = 0;
920 xmlctx->ws_only = 1;
921 xmlctx->dynamic = 0;
922
923 /* update status */
924 xmlctx->status = LYXML_ELEM_CONTENT;
925 } else {
926 /* update status */
927 xmlctx->status = LYXML_ATTRIBUTE;
928 }
929 break;
930
931 /* attr|='val' */
932 case LYXML_ATTRIBUTE:
933 /* skip formatting and parse value */
934 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
935 &xmlctx->dynamic), cleanup);
936
937 /* update status */
938 xmlctx->status = LYXML_ATTR_CONTENT;
939 break;
940
941 /* </elem> |EOF */
942 case LYXML_END:
943 /* nothing to do */
944 break;
945 }
946
947cleanup:
948 if (ret) {
949 /* invalidate context */
950 xmlctx->status = LYXML_END;
951 }
952 return ret;
953}
954
955LY_ERR
956lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
957{
958 LY_ERR ret = LY_SUCCESS;
959 const char *prefix, *name, *prev_input;
960 size_t prefix_len, name_len;
961 int closing;
962
963 prev_input = xmlctx->input;
964
965 switch (xmlctx->status) {
966 case LYXML_ELEM_CONTENT:
967 if (xmlctx->input[0] == '/') {
968 *next = LYXML_ELEM_CLOSE;
969 break;
970 }
971 /* fallthrough */
972 case LYXML_ELEM_CLOSE:
973 /* parse next element, if any */
974 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing), cleanup);
975
976 if (xmlctx->input[0] == '\0') {
977 *next = LYXML_END;
978 } else if (closing) {
979 *next = LYXML_ELEM_CLOSE;
980 } else {
981 *next = LYXML_ELEMENT;
982 }
983 break;
984 case LYXML_ELEMENT:
985 case LYXML_ATTR_CONTENT:
986 /* parse attribute name, if any */
987 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
988
989 if ((xmlctx->input[0] == '>') || (xmlctx->input[0] == '/')) {
990 *next = LYXML_ELEM_CONTENT;
991 } else {
992 *next = LYXML_ATTRIBUTE;
993 }
994 break;
995 case LYXML_ATTRIBUTE:
996 *next = LYXML_ATTR_CONTENT;
997 break;
998 case LYXML_END:
999 *next = LYXML_END;
1000 break;
1001 }
1002
1003cleanup:
1004 xmlctx->input = prev_input;
1005 return ret;
1006}
1007
1008void
1009lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1010{
1011 uint32_t u;
1012
1013 if (!xmlctx) {
1014 return;
1015 }
1016
1017 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1018 free((char *)xmlctx->value);
1019 }
1020 ly_set_erase(&xmlctx->elements, free);
1021 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1022 /* remove the ns structure */
1023 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1024 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1025 free(xmlctx->ns.objs[u]);
1026 }
1027 ly_set_erase(&xmlctx->ns, NULL);
1028 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001029}
Radek Krejcie7b95092019-05-15 11:03:07 +02001030
1031LY_ERR
Radek Krejci241f6b52020-05-21 18:13:49 +02001032lyxml_dump_text(struct ly_out *out, const char *text, int attribute)
Radek Krejcie7b95092019-05-15 11:03:07 +02001033{
1034 LY_ERR ret = LY_SUCCESS;
1035 unsigned int u;
1036
1037 if (!text) {
1038 return 0;
1039 }
1040
1041 for (u = 0; text[u]; u++) {
1042 switch (text[u]) {
1043 case '&':
Radek Krejci241f6b52020-05-21 18:13:49 +02001044 ret = ly_print(out, "&amp;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001045 break;
1046 case '<':
Radek Krejci241f6b52020-05-21 18:13:49 +02001047 ret = ly_print(out, "&lt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001048 break;
1049 case '>':
1050 /* not needed, just for readability */
Radek Krejci241f6b52020-05-21 18:13:49 +02001051 ret = ly_print(out, "&gt;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001052 break;
1053 case '"':
1054 if (attribute) {
Radek Krejci241f6b52020-05-21 18:13:49 +02001055 ret = ly_print(out, "&quot;");
Radek Krejcie7b95092019-05-15 11:03:07 +02001056 break;
1057 }
1058 /* falls through */
1059 default:
Radek Krejci241f6b52020-05-21 18:13:49 +02001060 ly_write(out, &text[u], 1);
Radek Krejcie7b95092019-05-15 11:03:07 +02001061 }
1062 }
1063
1064 return ret;
1065}
1066
Michal Vasko52927e22020-03-16 17:26:14 +01001067LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001068lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001069{
1070 LY_ERR ret;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001071 LY_ARRAY_SIZE_TYPE u;
1072 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001073 const struct lyxml_ns *ns;
1074 const char *start, *stop;
1075 struct ly_prefix *prefixes = NULL;
1076 size_t len;
1077
1078 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1079 size_t bytes;
1080 ly_getutf8(&stop, &c, &bytes);
1081 if (is_xmlqnamestartchar(c)) {
1082 for (ly_getutf8(&stop, &c, &bytes);
1083 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1084 ly_getutf8(&stop, &c, &bytes));
1085 stop = stop - bytes;
1086 if (*stop == ':') {
1087 /* we have a possible prefix */
1088 len = stop - start;
Michal Vaskob36053d2020-03-26 15:49:30 +01001089 ns = lyxml_ns_get(xmlctx, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001090 if (ns) {
1091 struct ly_prefix *p = NULL;
1092
1093 /* check whether we do not already have this prefix stored */
1094 LY_ARRAY_FOR(prefixes, u) {
1095 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1096 p = &prefixes[u];
1097 break;
1098 }
1099 }
1100 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001101 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
1102 p->pref = lydict_insert(xmlctx->ctx, start, len);
1103 p->ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001104 } /* else the prefix already present */
1105 }
1106 }
1107 stop = stop + bytes;
1108 }
1109 }
1110
1111 *val_prefs = prefixes;
1112 return LY_SUCCESS;
1113
1114error:
1115 LY_ARRAY_FOR(prefixes, u) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001116 lydict_remove(xmlctx->ctx, prefixes[u].pref);
Michal Vasko52927e22020-03-16 17:26:14 +01001117 }
1118 LY_ARRAY_FREE(prefixes);
1119 return ret;
1120}
1121
1122LY_ERR
1123lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1124{
1125 const char *ptr1, *ptr2, *ns1, *ns2;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001126 LY_ARRAY_SIZE_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001127 int len;
1128
1129 if (!value1 && !value2) {
1130 return LY_SUCCESS;
1131 }
1132 if ((value1 && !value2) || (!value1 && value2)) {
1133 return LY_ENOT;
1134 }
1135
1136 ptr1 = value1;
1137 ptr2 = value2;
1138 while (ptr1[0] && ptr2[0]) {
1139 if (ptr1[0] != ptr2[0]) {
1140 /* it can be a start of prefix that maps to the same module */
1141 ns1 = ns2 = NULL;
1142 if (prefs1) {
1143 /* find module of the first prefix, if any */
1144 LY_ARRAY_FOR(prefs1, u1) {
1145 len = strlen(prefs1[u1].pref);
1146 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1147 ns1 = prefs1[u1].ns;
1148 break;
1149 }
1150 }
1151 }
1152 if (prefs2) {
1153 /* find module of the second prefix, if any */
1154 LY_ARRAY_FOR(prefs2, u2) {
1155 len = strlen(prefs2[u2].pref);
1156 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1157 ns2 = prefs2[u2].ns;
1158 break;
1159 }
1160 }
1161 }
1162
1163 if (!ns1 || !ns2 || (ns1 != ns2)) {
1164 /* not a prefix or maps to different namespaces */
1165 break;
1166 }
1167
1168 /* skip prefixes in both values (':' is skipped as iter) */
1169 ptr1 += strlen(prefs1[u1].pref);
1170 ptr2 += strlen(prefs2[u2].pref);
1171 }
1172
1173 ++ptr1;
1174 ++ptr2;
1175 }
1176 if (ptr1[0] || ptr2[0]) {
1177 /* not a match or simply different lengths */
1178 return LY_ENOT;
1179 }
1180
1181 return LY_SUCCESS;
1182}