blob: f3766727c34b65e350177ef1bebfdbfadb9f0eaf [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vaskob36053d2020-03-26 15:49:30 +01004 * @author Michal Vasko <mvasko@cesnet.cz>
Radek Krejcid91dbaf2018-09-21 15:51:39 +02005 * @brief Generic XML parser implementation for libyang
6 *
7 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
8 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
Radek Krejcic1c03d62018-11-27 10:52:43 +010016#include "common.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020017
Radek Krejcib1890642018-10-03 14:05:40 +020018#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020019#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020020#include <stdbool.h>
21#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020022#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020023#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020024
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025#include "xml.h"
Radek Krejcie7b95092019-05-15 11:03:07 +020026#include "printer_internal.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020027
Michal Vaskob36053d2020-03-26 15:49:30 +010028/* Move input p by s characters, if EOF log with lyxml_ctx c */
29#define move_input(c,s) c->input += s; LY_CHECK_ERR_RET(!c->input[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
Radek Krejcid91dbaf2018-09-21 15:51:39 +020030
Radek Krejcib1890642018-10-03 14:05:40 +020031/* Ignore whitespaces in the input string p */
Michal Vaskob36053d2020-03-26 15:49:30 +010032#define ign_xmlws(c) while (is_xmlws(*(c)->input)) {if (*(c)->input == '\n') {++c->line;} ++c->input;}
33
34static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only,
35 int *dynamic);
Radek Krejcid91dbaf2018-09-21 15:51:39 +020036
Radek Krejci4b74d5e2018-09-26 14:30:55 +020037/**
38 * @brief Ignore any characters until the delim of the size delim_len is read
39 *
40 * Detects number of read new lines.
41 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
42 * NULL-terminated input string.
43 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020044static const char *
45ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
46{
47 size_t i;
48 register const char *a, *b;
49
50 (*newlines) = 0;
51 for ( ; *input; ++input) {
52 if (*input != *delim) {
53 if (*input == '\n') {
54 ++(*newlines);
55 }
56 continue;
57 }
58 a = input;
59 b = delim;
60 for (i = 0; i < delim_len; ++i) {
61 if (*a++ != *b++) {
62 break;
63 }
64 }
65 if (i == delim_len) {
66 return input;
67 }
68 }
69 return NULL;
70}
71
Radek Krejci4b74d5e2018-09-26 14:30:55 +020072/**
Michal Vaskob36053d2020-03-26 15:49:30 +010073 * @brief Check/Get an XML identifier from the input string.
74 *
75 * The identifier must have at least one valid character complying the name start character constraints.
76 * The identifier is terminated by the first character, which does not comply to the name character constraints.
77 *
78 * See https://www.w3.org/TR/xml-names/#NT-NCName
79 *
80 * @param[in] xmlctx XML context.
81 * @param[out] start Pointer to the start of the identifier.
82 * @param[out] end Pointer ot the end of the identifier.
83 * @return LY_ERR value.
84 */
85static LY_ERR
86lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end)
87{
88 const char *s, *in;
89 uint32_t c;
90 size_t parsed;
91 LY_ERR rc;
92
93 in = s = xmlctx->input;
94
95 /* check NameStartChar (minus colon) */
96 LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed),
97 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]),
98 LY_EVALID);
99 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
100 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
101 "Identifier \"%s\" starts with an invalid character.", in - parsed),
102 LY_EVALID);
103
104 /* check rest of the identifier */
105 do {
106 /* move only successfully parsed bytes */
107 xmlctx->input += parsed;
108
109 rc = ly_getutf8(&in, &c, &parsed);
110 LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INCHAR, in[0]), LY_EVALID);
111 } while (is_xmlqnamechar(c));
112
113 *start = s;
114 *end = xmlctx->input;
115 return LY_SUCCESS;
116}
117
118/**
119 * @brief Add namespace definition into XML context.
120 *
121 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
122 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
123 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
124 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
125 *
126 * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call
127 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
128 *
129 * @param[in] xmlctx XML context to work with.
130 * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace.
131 * @param[in] prefix_len Length of the prefix.
132 * @param[in] uri Namespace URI (value) to store directly. Value is always spent.
133 * @return LY_ERR values.
134 */
135LY_ERR
136lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri)
137{
138 struct lyxml_ns *ns;
139
140 ns = malloc(sizeof *ns);
141 LY_CHECK_ERR_RET(!ns, LOGMEM(xmlctx->ctx), LY_EMEM);
142
143 /* we need to connect the depth of the element where the namespace is defined with the
144 * namespace record to be able to maintain (remove) the record when the parser leaves
145 * (to its sibling or back to the parent) the element where the namespace was defined */
146 ns->depth = xmlctx->elements.count;
147
148 ns->uri = uri;
149 if (prefix) {
150 ns->prefix = strndup(prefix, prefix_len);
151 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns->uri); free(ns), LY_EMEM);
152 } else {
153 ns->prefix = NULL;
154 }
155
156 LY_CHECK_ERR_RET(ly_set_add(&xmlctx->ns, ns, LY_SET_OPT_USEASLIST) == -1,
157 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
158 return LY_SUCCESS;
159}
160
161/**
162 * @brief Remove all the namespaces defined in the element recently closed (removed from the xmlctx->elements).
163 *
164 * @param[in] xmlctx XML context to work with.
165 */
166void
167lyxml_ns_rm(struct lyxml_ctx *xmlctx)
168{
169 unsigned int u;
170
171 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
172 if (((struct lyxml_ns *)xmlctx->ns.objs[u])->depth != xmlctx->elements.count + 1) {
173 /* we are done, the namespaces from a single element are supposed to be together */
174 break;
175 }
176 /* remove the ns structure */
177 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
178 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
179 free(xmlctx->ns.objs[u]);
180 --xmlctx->ns.count;
181 }
182
183 if (!xmlctx->ns.count) {
184 /* cleanup the xmlctx's namespaces storage */
185 ly_set_erase(&xmlctx->ns, NULL);
186 }
187}
188
189void *
190lyxml_elem_dup(void *item)
191{
192 struct lyxml_elem *dup;
193
194 dup = malloc(sizeof *dup);
195 memcpy(dup, item, sizeof *dup);
196
197 return dup;
198}
199
200void *
201lyxml_ns_dup(void *item)
202{
203 struct lyxml_ns *dup, *orig;
204
205 orig = (struct lyxml_ns *)item;
206 dup = malloc(sizeof *dup);
207 dup->prefix = orig->prefix ? strdup(orig->prefix) : NULL;
208 dup->uri = strdup(orig->uri);
209 dup->depth = orig->depth;
210
211 return dup;
212}
213
214const struct lyxml_ns *
215lyxml_ns_get(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len)
216{
217 unsigned int u;
218 struct lyxml_ns *ns;
219
220 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
221 ns = (struct lyxml_ns *)xmlctx->ns.objs[u];
222 if (prefix && prefix_len) {
223 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
224 return ns;
225 }
226 } else if (!ns->prefix) {
227 /* default namespace */
228 return ns;
229 }
230 }
231
232 return NULL;
233}
234
235static LY_ERR
236lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
237{
238 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
239 const char *in, *endtag, *sectname;
240 size_t endtag_len, newlines;
241
242 while (1) {
243 ign_xmlws(xmlctx);
244
245 if (xmlctx->input[0] == '\0') {
246 /* EOF */
247 if (xmlctx->elements.count) {
248 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
249 return LY_EVALID;
250 }
251 return LY_SUCCESS;
252 } else if (xmlctx->input[0] != '<') {
253 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
254 xmlctx->input, "element tag start ('<')");
255 return LY_EVALID;
256 }
257 move_input(xmlctx, 1);
258
259 if (xmlctx->input[0] == '!') {
260 move_input(xmlctx, 1);
261 /* sections to ignore */
262 if (!strncmp(xmlctx->input, "--", 2)) {
263 /* comment */
264 move_input(xmlctx, 2);
265 sectname = "Comment";
266 endtag = "-->";
267 endtag_len = 3;
268 } else if (!strncmp(xmlctx->input, "[CDATA[", 7)) {
269 /* CDATA section */
270 move_input(xmlctx, 7);
271 sectname = "CData";
272 endtag = "]]>";
273 endtag_len = 3;
274 } else if (!strncmp(xmlctx->input, "DOCTYPE", 7)) {
275 /* Document type declaration - not supported */
276 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NSUPP, "Document Type Declaration");
277 return LY_EVALID;
278 } else {
279 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->input[-2]);
280 return LY_EVALID;
281 }
282 in = ign_todelim(xmlctx->input, endtag, endtag_len, &newlines);
283 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, sectname), LY_EVALID);
284 xmlctx->line += newlines;
285 xmlctx->input = in + endtag_len;
286 } else if (xmlctx->input[0] == '?') {
287 in = ign_todelim(xmlctx->input, "?>", 2, &newlines);
288 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
289 xmlctx->line += newlines;
290 xmlctx->input = in + 2;
291 } else {
292 /* other non-WS character */
293 break;
294 }
295 }
296
297 return LY_SUCCESS;
298}
299
300static LY_ERR
301lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
302{
303 const char *start, *end;
304
305 *prefix = NULL;
306 *prefix_len = 0;
307
308 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
309 if (end[0] == ':') {
310 /* we have prefixed identifier */
311 *prefix = start;
312 *prefix_len = end - start;
313
314 move_input(xmlctx, 1);
315 LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end));
316 }
317
318 *name = start;
319 *name_len = end - start;
320 return LY_SUCCESS;
321}
322
323/**
Radek Krejci7a7fa902018-09-25 17:08:21 +0200324 * Store UTF-8 character specified as 4byte integer into the dst buffer.
325 * Returns number of written bytes (4 max), expects that dst has enough space.
326 *
327 * UTF-8 mapping:
328 * 00000000 -- 0000007F: 0xxxxxxx
329 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
330 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
331 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
332 *
333 * Includes checking for valid characters (following RFC 7950, sec 9.4)
334 */
335static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200336lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200337{
338 if (value < 0x80) {
339 /* one byte character */
340 if (value < 0x20 &&
341 value != 0x09 &&
342 value != 0x0a &&
343 value != 0x0d) {
344 return LY_EINVAL;
345 }
346
347 dst[0] = value;
348 (*bytes_written) = 1;
349 } else if (value < 0x800) {
350 /* two bytes character */
351 dst[0] = 0xc0 | (value >> 6);
352 dst[1] = 0x80 | (value & 0x3f);
353 (*bytes_written) = 2;
354 } else if (value < 0xfffe) {
355 /* three bytes character */
356 if (((value & 0xf800) == 0xd800) ||
357 (value >= 0xfdd0 && value <= 0xfdef)) {
358 /* exclude surrogate blocks %xD800-DFFF */
359 /* exclude noncharacters %xFDD0-FDEF */
360 return LY_EINVAL;
361 }
362
363 dst[0] = 0xe0 | (value >> 12);
364 dst[1] = 0x80 | ((value >> 6) & 0x3f);
365 dst[2] = 0x80 | (value & 0x3f);
366
367 (*bytes_written) = 3;
368 } else if (value < 0x10fffe) {
369 if ((value & 0xffe) == 0xffe) {
370 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
371 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
372 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
373 return LY_EINVAL;
374 }
375 /* four bytes character */
376 dst[0] = 0xf0 | (value >> 18);
377 dst[1] = 0x80 | ((value >> 12) & 0x3f);
378 dst[2] = 0x80 | ((value >> 6) & 0x3f);
379 dst[3] = 0x80 | (value & 0x3f);
380
381 (*bytes_written) = 4;
382 }
383 return LY_SUCCESS;
384}
385
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200386static LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +0100387lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, int *ws_only, int *dynamic)
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200388{
Michal Vaskob36053d2020-03-26 15:49:30 +0100389#define BUFSIZE 24
390#define BUFSIZE_STEP 128
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200391
Michal Vaskob36053d2020-03-26 15:49:30 +0100392 const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
393 const char *in = xmlctx->input, *start;
394 char *buf = NULL;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200395 size_t offset; /* read offset in input buffer */
396 size_t len; /* length of the output string (write offset in output buffer) */
397 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200398 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200399 uint32_t n;
Michal Vaskob36053d2020-03-26 15:49:30 +0100400 size_t u;
401 int ws = 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200402
Michal Vaskob36053d2020-03-26 15:49:30 +0100403 assert(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +0200404
Radek Krejcid70d1072018-10-09 14:20:47 +0200405 /* init */
Michal Vaskob36053d2020-03-26 15:49:30 +0100406 start = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200407 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200408
409 /* parse */
410 while (in[offset]) {
411 if (in[offset] == '&') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100412 /* non WS */
413 ws = 0;
Radek Krejcid70d1072018-10-09 14:20:47 +0200414
Michal Vaskob36053d2020-03-26 15:49:30 +0100415 if (!buf) {
416 /* prepare output buffer */
417 buf = malloc(BUFSIZE);
418 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
419 size = BUFSIZE;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200420 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100421
422 /* allocate enough for the offset and next character,
423 * we will need 4 bytes at most since we support only the predefined
424 * (one-char) entities and character references */
425 if (len + offset + 4 >= size) {
426 buf = ly_realloc(buf, size + BUFSIZE_STEP);
427 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
428 size += BUFSIZE_STEP;
429 }
430
431 if (offset) {
432 /* store what we have so far */
433 memcpy(&buf[len], in, offset);
434 len += offset;
435 in += offset;
436 offset = 0;
437 }
438
Radek Krejci7a7fa902018-09-25 17:08:21 +0200439 ++offset;
440 if (in[offset] != '#') {
441 /* entity reference - only predefined references are supported */
442 if (!strncmp(&in[offset], "lt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100443 buf[len++] = '<';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200444 in += 4; /* &lt; */
445 } else if (!strncmp(&in[offset], "gt;", 3)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100446 buf[len++] = '>';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200447 in += 4; /* &gt; */
448 } else if (!strncmp(&in[offset], "amp;", 4)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100449 buf[len++] = '&';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200450 in += 5; /* &amp; */
451 } else if (!strncmp(&in[offset], "apos;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100452 buf[len++] = '\'';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200453 in += 6; /* &apos; */
454 } else if (!strncmp(&in[offset], "quot;", 5)) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100455 buf[len++] = '\"';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200456 in += 6; /* &quot; */
457 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100458 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200459 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200460 goto error;
461 }
462 offset = 0;
463 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100464 p = (void *)&in[offset - 1];
Radek Krejci7a7fa902018-09-25 17:08:21 +0200465 /* character reference */
466 ++offset;
467 if (isdigit(in[offset])) {
468 for (n = 0; isdigit(in[offset]); offset++) {
469 n = (10 * n) + (in[offset] - '0');
470 }
471 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
472 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
473 if (isdigit(in[offset])) {
474 u = (in[offset] - '0');
475 } else if (in[offset] > 'F') {
476 u = 10 + (in[offset] - 'a');
477 } else {
478 u = 10 + (in[offset] - 'A');
479 }
480 n = (16 * n) + u;
481 }
482 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100483 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200484 goto error;
485
486 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100487
Radek Krejci7a7fa902018-09-25 17:08:21 +0200488 LY_CHECK_ERR_GOTO(in[offset] != ';',
Michal Vaskob36053d2020-03-26 15:49:30 +0100489 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP,
Radek Krejci7a7fa902018-09-25 17:08:21 +0200490 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
491 error);
492 ++offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100493 LY_CHECK_ERR_GOTO(lyxml_pututf8(&buf[len], n, &u),
494 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
495 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200496 error);
497 len += u;
498 in += offset;
499 offset = 0;
500 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100501 } else if (in[offset] == endchar) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200502 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200503 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100504 /* realloc exact size string */
505 buf = ly_realloc(buf, len + offset + 1);
506 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
507 size = len + offset + 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200508 memcpy(&buf[len], in, offset);
Michal Vaskob36053d2020-03-26 15:49:30 +0100509
510 /* set terminating NULL byte */
511 buf[len + offset] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200512 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200513 len += offset;
Michal Vaskob36053d2020-03-26 15:49:30 +0100514 in += offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200515 goto success;
516 } else {
Michal Vaskob36053d2020-03-26 15:49:30 +0100517 if (!is_xmlws(in[offset])) {
518 /* non WS */
519 ws = 0;
520 }
521
Radek Krejci7a7fa902018-09-25 17:08:21 +0200522 /* log lines */
523 if (in[offset] == '\n') {
Michal Vaskob36053d2020-03-26 15:49:30 +0100524 ++xmlctx->line;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200525 }
526
527 /* continue */
528 ++offset;
529 }
530 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100531
532 /* EOF reached before endchar */
533 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
534
Radek Krejci7a7fa902018-09-25 17:08:21 +0200535error:
Michal Vaskob36053d2020-03-26 15:49:30 +0100536 free(buf);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200537 return LY_EVALID;
538
539success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200540 if (buf) {
Michal Vaskob36053d2020-03-26 15:49:30 +0100541 *value = buf;
542 *dynamic = 1;
543 } else {
544 *value = (char *)start;
545 *dynamic = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200546 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100547 *length = len;
548 *ws_only = ws;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200549
Michal Vaskob36053d2020-03-26 15:49:30 +0100550 xmlctx->input = in;
551 return LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200552
553#undef BUFSIZE
554#undef BUFSIZE_STEP
Radek Krejci7a7fa902018-09-25 17:08:21 +0200555}
556
Michal Vaskob36053d2020-03-26 15:49:30 +0100557static LY_ERR
558lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len,
559 int empty)
Radek Krejcid972c252018-09-25 13:23:39 +0200560{
Michal Vaskob36053d2020-03-26 15:49:30 +0100561 struct lyxml_elem *e;
Radek Krejcid972c252018-09-25 13:23:39 +0200562
Michal Vaskob36053d2020-03-26 15:49:30 +0100563 /* match opening and closing element tags */
564 if (!xmlctx->elements.count) {
565 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
566 name_len, name);
567 return LY_EVALID;
568 }
Radek Krejcid972c252018-09-25 13:23:39 +0200569
Michal Vaskob36053d2020-03-26 15:49:30 +0100570 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
571 if ((e->prefix_len != prefix_len) || (e->name_len != name_len)
572 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
573 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX,
574 "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.",
575 e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", e->name_len, e->name,
576 prefix_len, prefix ? prefix : "", prefix ? ":" : "", name_len, name);
577 return LY_EVALID;
578 }
Radek Krejcid972c252018-09-25 13:23:39 +0200579
Michal Vaskob36053d2020-03-26 15:49:30 +0100580 /* opening and closing element tags matches, remove record from the opening tags list */
581 ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free);
Radek Krejcid972c252018-09-25 13:23:39 +0200582
Michal Vaskob36053d2020-03-26 15:49:30 +0100583 /* remove also the namespaces connected with the element */
584 lyxml_ns_rm(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200585
Michal Vaskob36053d2020-03-26 15:49:30 +0100586 /* skip WS */
587 ign_xmlws(xmlctx);
Radek Krejcid972c252018-09-25 13:23:39 +0200588
Michal Vaskob36053d2020-03-26 15:49:30 +0100589 /* special "<elem/>" element */
590 if (empty && (xmlctx->input[0] == '/')) {
591 move_input(xmlctx, 1);
592 }
Michal Vasko52927e22020-03-16 17:26:14 +0100593
Michal Vaskob36053d2020-03-26 15:49:30 +0100594 /* parse closing tag */
595 if (xmlctx->input[0] != '>') {
596 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
597 xmlctx->input, "element tag termination ('>')");
598 return LY_EVALID;
599 }
Michal Vasko52927e22020-03-16 17:26:14 +0100600
Michal Vaskob36053d2020-03-26 15:49:30 +0100601 /* move after closing tag without checking for EOF */
602 ++xmlctx->input;
Michal Vasko52927e22020-03-16 17:26:14 +0100603
Radek Krejcid972c252018-09-25 13:23:39 +0200604 return LY_SUCCESS;
605}
606
Michal Vaskob36053d2020-03-26 15:49:30 +0100607static LY_ERR
608lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len)
Radek Krejcib1890642018-10-03 14:05:40 +0200609{
Michal Vaskob36053d2020-03-26 15:49:30 +0100610 LY_ERR ret = LY_SUCCESS;
611 struct lyxml_elem *e;
612 const char *prev_input;
613 char *value;
614 size_t parsed, value_len;
615 int ws_only, dynamic, is_ns;
616 uint32_t c;
Radek Krejcib1890642018-10-03 14:05:40 +0200617
Michal Vaskob36053d2020-03-26 15:49:30 +0100618 /* store element opening tag information */
619 e = malloc(sizeof *e);
620 LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM);
621 e->name = name;
622 e->prefix = prefix;
623 e->name_len = name_len;
624 e->prefix_len = prefix_len;
625 ly_set_add(&xmlctx->elements, e, LY_SET_OPT_USEASLIST);
626
627 /* skip WS */
628 ign_xmlws(xmlctx);
629
630 /* parse and store all namespaces */
631 prev_input = xmlctx->input;
632 is_ns = 1;
633 while ((xmlctx->input[0] != '\0') && !ly_getutf8(&xmlctx->input, &c, &parsed) && is_xmlqnamestartchar(c)) {
634 xmlctx->input -= parsed;
635
636 /* parse attribute name */
637 LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
638
639 /* parse the value */
640 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup);
641
642 /* store every namespace */
643 if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) {
644 LY_CHECK_GOTO(ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0,
645 dynamic ? value : strndup(value, value_len)), cleanup);
646 dynamic = 0;
647 } else {
648 /* not a namespace */
649 is_ns = 0;
650 }
651 if (dynamic) {
652 free(value);
653 }
654
655 /* skip WS */
656 ign_xmlws(xmlctx);
657
658 if (is_ns) {
659 /* we can actually skip all the namespaces as there is no reason to parse them again */
660 prev_input = xmlctx->input;
661 }
Radek Krejcib1890642018-10-03 14:05:40 +0200662 }
Michal Vaskob36053d2020-03-26 15:49:30 +0100663
664cleanup:
665 if (!ret) {
666 xmlctx->input = prev_input;
667 }
668 return ret;
669}
670
671static LY_ERR
672lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, int *ws_only, int *dynamic)
673{
674 char quot;
675
676 /* skip WS */
677 ign_xmlws(xmlctx);
678
679 /* skip '=' */
680 if (xmlctx->input[0] == '\0') {
681 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
682 return LY_EVALID;
683 } else if (xmlctx->input[0] != '=') {
684 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
685 xmlctx->input, "'='");
686 return LY_EVALID;
687 }
688 move_input(xmlctx, 1);
689
690 /* skip WS */
691 ign_xmlws(xmlctx);
692
693 /* find quotes */
694 if (xmlctx->input[0] == '\0') {
695 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
696 return LY_EVALID;
697 } else if ((xmlctx->input[0] != '\'') && (xmlctx->input[0] != '\"')) {
698 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->input),
699 xmlctx->input, "either single or double quotation mark");
700 return LY_EVALID;
701 }
702
703 /* remember quote */
704 quot = xmlctx->input[0];
705 move_input(xmlctx, 1);
706
707 /* parse attribute value */
708 LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic));
709
710 /* move after ending quote (without checking for EOF) */
711 ++xmlctx->input;
712
713 return LY_SUCCESS;
714}
715
716static LY_ERR
717lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
718{
719 const char *in;
720 char *value;
721 uint32_t c;
722 size_t parsed, value_len;
723 int ws_only, dynamic;
724
725 /* skip WS */
726 ign_xmlws(xmlctx);
727
728 /* parse only possible attributes */
729 while ((xmlctx->input[0] != '>') && (xmlctx->input[0] != '/')) {
730 in = xmlctx->input;
731 if (in[0] == '\0') {
732 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_EOF);
733 return LY_EVALID;
734 } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) {
735 LOGVAL(xmlctx->ctx, LY_VLOG_LINE, &xmlctx->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed,
736 "element tag end ('>' or '/>') or an attribute");
737 return LY_EVALID;
738 }
739
740 /* parse attribute name */
741 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
742
743 if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) {
744 /* standard attribute */
745 break;
746 }
747
748 /* namespace, skip it */
749 LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic));
750 if (dynamic) {
751 free(value);
752 }
753
754 /* skip WS */
755 ign_xmlws(xmlctx);
756 }
757
758 return LY_SUCCESS;
759}
760
761static LY_ERR
762lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len,
763 int *closing)
764{
765 /* skip WS until EOF or after opening tag '<' */
766 LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx));
767 if (xmlctx->input[0] == '\0') {
768 /* set return values */
769 *prefix = *name = NULL;
770 *prefix_len = *name_len = 0;
771 return LY_SUCCESS;
772 }
773
774 if (xmlctx->input[0] == '/') {
775 move_input(xmlctx, 1);
776 *closing = 1;
777 } else {
778 *closing = 0;
779 }
780
781 /* skip WS */
782 ign_xmlws(xmlctx);
783
784 /* parse element name */
785 LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len));
786
787 return LY_SUCCESS;
788}
789
790LY_ERR
791lyxml_ctx_new(const struct ly_ctx *ctx, const char *input, struct lyxml_ctx **xmlctx_p)
792{
793 LY_ERR ret = LY_SUCCESS;
794 struct lyxml_ctx *xmlctx;
795 int closing;
796
797 /* new context */
798 xmlctx = calloc(1, sizeof *xmlctx);
799 LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM);
800 xmlctx->ctx = ctx;
801 xmlctx->line = 1;
802 xmlctx->input = input;
803
804 /* parse next element, if any */
805 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
806 &xmlctx->name_len, &closing), cleanup);
807
808 if (xmlctx->input[0] == '\0') {
809 /* update status */
810 xmlctx->status = LYXML_END;
811 } else if (closing) {
812 LOGVAL(ctx, LY_VLOG_LINE, &xmlctx->line, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").",
813 xmlctx->name_len, xmlctx->name);
814 ret = LY_EVALID;
815 goto cleanup;
816 } else {
817 /* open an element, also parses all enclosed namespaces */
818 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
819
820 /* update status */
821 xmlctx->status = LYXML_ELEMENT;
822 }
823
824cleanup:
825 if (ret) {
826 lyxml_ctx_free(xmlctx);
827 } else {
828 *xmlctx_p = xmlctx;
829 }
830 return ret;
831}
832
833LY_ERR
834lyxml_ctx_next(struct lyxml_ctx *xmlctx)
835{
836 LY_ERR ret = LY_SUCCESS;
837 int closing;
838 struct lyxml_elem *e;
839
840 /* if the value was not used, free it */
841 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
842 free((char *)xmlctx->value);
843 xmlctx->value = NULL;
844 xmlctx->dynamic = 0;
845 }
846
847 switch (xmlctx->status) {
848 /* content |</elem> */
849 case LYXML_ELEM_CONTENT:
850 /* handle special case when empty content for "<elem/>" was returned */
851 if (xmlctx->input[0] == '/') {
852 assert(xmlctx->elements.count);
853 e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1];
854
855 /* close the element (parses closing tag) */
856 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1), cleanup);
857
858 /* update status */
859 xmlctx->status = LYXML_ELEM_CLOSE;
860 break;
861 }
862 /* fallthrough */
863
864 /* </elem>| <elem2>* */
865 case LYXML_ELEM_CLOSE:
866 /* parse next element, if any */
867 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name,
868 &xmlctx->name_len, &closing), cleanup);
869
870 if (xmlctx->input[0] == '\0') {
871 /* update status */
872 xmlctx->status = LYXML_END;
873 } else if (closing) {
874 /* close an element (parses also closing tag) */
875 LY_CHECK_GOTO(ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0), cleanup);
876
877 /* update status */
878 xmlctx->status = LYXML_ELEM_CLOSE;
879 } else {
880 /* open an element, also parses all enclosed namespaces */
881 LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup);
882
883 /* update status */
884 xmlctx->status = LYXML_ELEMENT;
885 }
886 break;
887
888 /* <elem| attr='val'* > content */
889 case LYXML_ELEMENT:
890
891 /* attr='val'| attr='val'* > content */
892 case LYXML_ATTR_CONTENT:
893 /* parse attribute name, if any */
894 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len), cleanup);
895
896 if (xmlctx->input[0] == '>') {
897 /* no attributes but a closing tag */
898 move_input(xmlctx, 1);
899
900 /* parse element content */
901 LY_CHECK_GOTO(ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
902 &xmlctx->dynamic), cleanup);
903
904 if (!xmlctx->value_len) {
905 /* use empty value, easier to work with */
906 xmlctx->value = "";
907 assert(!xmlctx->dynamic);
908 }
909
910 /* update status */
911 xmlctx->status = LYXML_ELEM_CONTENT;
912 } else if (xmlctx->input[0] == '/') {
913 /* no content but we still return it */
914 xmlctx->value = "";
915 xmlctx->value_len = 0;
916 xmlctx->ws_only = 1;
917 xmlctx->dynamic = 0;
918
919 /* update status */
920 xmlctx->status = LYXML_ELEM_CONTENT;
921 } else {
922 /* update status */
923 xmlctx->status = LYXML_ATTRIBUTE;
924 }
925 break;
926
927 /* attr|='val' */
928 case LYXML_ATTRIBUTE:
929 /* skip formatting and parse value */
930 LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only,
931 &xmlctx->dynamic), cleanup);
932
933 /* update status */
934 xmlctx->status = LYXML_ATTR_CONTENT;
935 break;
936
937 /* </elem> |EOF */
938 case LYXML_END:
939 /* nothing to do */
940 break;
941 }
942
943cleanup:
944 if (ret) {
945 /* invalidate context */
946 xmlctx->status = LYXML_END;
947 }
948 return ret;
949}
950
951LY_ERR
952lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
953{
954 LY_ERR ret = LY_SUCCESS;
955 const char *prefix, *name, *prev_input;
956 size_t prefix_len, name_len;
957 int closing;
958
959 prev_input = xmlctx->input;
960
961 switch (xmlctx->status) {
962 case LYXML_ELEM_CONTENT:
963 if (xmlctx->input[0] == '/') {
964 *next = LYXML_ELEM_CLOSE;
965 break;
966 }
967 /* fallthrough */
968 case LYXML_ELEM_CLOSE:
969 /* parse next element, if any */
970 LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing), cleanup);
971
972 if (xmlctx->input[0] == '\0') {
973 *next = LYXML_END;
974 } else if (closing) {
975 *next = LYXML_ELEM_CLOSE;
976 } else {
977 *next = LYXML_ELEMENT;
978 }
979 break;
980 case LYXML_ELEMENT:
981 case LYXML_ATTR_CONTENT:
982 /* parse attribute name, if any */
983 LY_CHECK_GOTO(ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup);
984
985 if ((xmlctx->input[0] == '>') || (xmlctx->input[0] == '/')) {
986 *next = LYXML_ELEM_CONTENT;
987 } else {
988 *next = LYXML_ATTRIBUTE;
989 }
990 break;
991 case LYXML_ATTRIBUTE:
992 *next = LYXML_ATTR_CONTENT;
993 break;
994 case LYXML_END:
995 *next = LYXML_END;
996 break;
997 }
998
999cleanup:
1000 xmlctx->input = prev_input;
1001 return ret;
1002}
1003
1004void
1005lyxml_ctx_free(struct lyxml_ctx *xmlctx)
1006{
1007 uint32_t u;
1008
1009 if (!xmlctx) {
1010 return;
1011 }
1012
1013 if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) {
1014 free((char *)xmlctx->value);
1015 }
1016 ly_set_erase(&xmlctx->elements, free);
1017 for (u = xmlctx->ns.count - 1; u + 1 > 0; --u) {
1018 /* remove the ns structure */
1019 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->prefix);
1020 free(((struct lyxml_ns *)xmlctx->ns.objs[u])->uri);
1021 free(xmlctx->ns.objs[u]);
1022 }
1023 ly_set_erase(&xmlctx->ns, NULL);
1024 free(xmlctx);
Radek Krejcib1890642018-10-03 14:05:40 +02001025}
Radek Krejcie7b95092019-05-15 11:03:07 +02001026
1027LY_ERR
1028lyxml_dump_text(struct lyout *out, const char *text, int attribute)
1029{
1030 LY_ERR ret = LY_SUCCESS;
1031 unsigned int u;
1032
1033 if (!text) {
1034 return 0;
1035 }
1036
1037 for (u = 0; text[u]; u++) {
1038 switch (text[u]) {
1039 case '&':
1040 ret = ly_print(out, "&amp;");
1041 break;
1042 case '<':
1043 ret = ly_print(out, "&lt;");
1044 break;
1045 case '>':
1046 /* not needed, just for readability */
1047 ret = ly_print(out, "&gt;");
1048 break;
1049 case '"':
1050 if (attribute) {
1051 ret = ly_print(out, "&quot;");
1052 break;
1053 }
1054 /* falls through */
1055 default:
1056 ly_write(out, &text[u], 1);
1057 }
1058 }
1059
1060 return ret;
1061}
1062
Michal Vasko52927e22020-03-16 17:26:14 +01001063LY_ERR
Michal Vaskob36053d2020-03-26 15:49:30 +01001064lyxml_get_prefixes(struct lyxml_ctx *xmlctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
Michal Vasko52927e22020-03-16 17:26:14 +01001065{
1066 LY_ERR ret;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001067 LY_ARRAY_SIZE_TYPE u;
1068 uint32_t c;
Michal Vasko52927e22020-03-16 17:26:14 +01001069 const struct lyxml_ns *ns;
1070 const char *start, *stop;
1071 struct ly_prefix *prefixes = NULL;
1072 size_t len;
1073
1074 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1075 size_t bytes;
1076 ly_getutf8(&stop, &c, &bytes);
1077 if (is_xmlqnamestartchar(c)) {
1078 for (ly_getutf8(&stop, &c, &bytes);
1079 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1080 ly_getutf8(&stop, &c, &bytes));
1081 stop = stop - bytes;
1082 if (*stop == ':') {
1083 /* we have a possible prefix */
1084 len = stop - start;
Michal Vaskob36053d2020-03-26 15:49:30 +01001085 ns = lyxml_ns_get(xmlctx, start, len);
Michal Vasko52927e22020-03-16 17:26:14 +01001086 if (ns) {
1087 struct ly_prefix *p = NULL;
1088
1089 /* check whether we do not already have this prefix stored */
1090 LY_ARRAY_FOR(prefixes, u) {
1091 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1092 p = &prefixes[u];
1093 break;
1094 }
1095 }
1096 if (!p) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001097 LY_ARRAY_NEW_GOTO(xmlctx->ctx, prefixes, p, ret, error);
1098 p->pref = lydict_insert(xmlctx->ctx, start, len);
1099 p->ns = lydict_insert(xmlctx->ctx, ns->uri, 0);
Michal Vasko52927e22020-03-16 17:26:14 +01001100 } /* else the prefix already present */
1101 }
1102 }
1103 stop = stop + bytes;
1104 }
1105 }
1106
1107 *val_prefs = prefixes;
1108 return LY_SUCCESS;
1109
1110error:
1111 LY_ARRAY_FOR(prefixes, u) {
Michal Vaskob36053d2020-03-26 15:49:30 +01001112 lydict_remove(xmlctx->ctx, prefixes[u].pref);
Michal Vasko52927e22020-03-16 17:26:14 +01001113 }
1114 LY_ARRAY_FREE(prefixes);
1115 return ret;
1116}
1117
1118LY_ERR
1119lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1120{
1121 const char *ptr1, *ptr2, *ns1, *ns2;
Radek Krejci7eb54ba2020-05-18 16:30:04 +02001122 LY_ARRAY_SIZE_TYPE u1, u2;
Michal Vasko52927e22020-03-16 17:26:14 +01001123 int len;
1124
1125 if (!value1 && !value2) {
1126 return LY_SUCCESS;
1127 }
1128 if ((value1 && !value2) || (!value1 && value2)) {
1129 return LY_ENOT;
1130 }
1131
1132 ptr1 = value1;
1133 ptr2 = value2;
1134 while (ptr1[0] && ptr2[0]) {
1135 if (ptr1[0] != ptr2[0]) {
1136 /* it can be a start of prefix that maps to the same module */
1137 ns1 = ns2 = NULL;
1138 if (prefs1) {
1139 /* find module of the first prefix, if any */
1140 LY_ARRAY_FOR(prefs1, u1) {
1141 len = strlen(prefs1[u1].pref);
1142 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1143 ns1 = prefs1[u1].ns;
1144 break;
1145 }
1146 }
1147 }
1148 if (prefs2) {
1149 /* find module of the second prefix, if any */
1150 LY_ARRAY_FOR(prefs2, u2) {
1151 len = strlen(prefs2[u2].pref);
1152 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1153 ns2 = prefs2[u2].ns;
1154 break;
1155 }
1156 }
1157 }
1158
1159 if (!ns1 || !ns2 || (ns1 != ns2)) {
1160 /* not a prefix or maps to different namespaces */
1161 break;
1162 }
1163
1164 /* skip prefixes in both values (':' is skipped as iter) */
1165 ptr1 += strlen(prefs1[u1].pref);
1166 ptr2 += strlen(prefs2[u2].pref);
1167 }
1168
1169 ++ptr1;
1170 ++ptr2;
1171 }
1172 if (ptr1[0] || ptr2[0]) {
1173 /* not a match or simply different lengths */
1174 return LY_ENOT;
1175 }
1176
1177 return LY_SUCCESS;
1178}