blob: ff76c31c0e42db6781ed83f69b2319348f20b55d [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
Radek Krejcic1c03d62018-11-27 10:52:43 +010015#include "common.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020016
Radek Krejcib1890642018-10-03 14:05:40 +020017#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020018#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020019#include <stdbool.h>
20#include <stdint.h>
Radek Krejcie7b95092019-05-15 11:03:07 +020021#include <stdlib.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020022#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020023
Radek Krejcid91dbaf2018-09-21 15:51:39 +020024#include "xml.h"
Radek Krejcie7b95092019-05-15 11:03:07 +020025#include "printer_internal.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026
Radek Krejcid91dbaf2018-09-21 15:51:39 +020027/* Move input p by s characters, if EOF log with lyxml_context c */
28#define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
29
Radek Krejcib1890642018-10-03 14:05:40 +020030/* Ignore whitespaces in the input string p */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020031#define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;}
32
Radek Krejci4b74d5e2018-09-26 14:30:55 +020033/**
34 * @brief Ignore any characters until the delim of the size delim_len is read
35 *
36 * Detects number of read new lines.
37 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
38 * NULL-terminated input string.
39 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020040static const char *
41ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
42{
43 size_t i;
44 register const char *a, *b;
45
46 (*newlines) = 0;
47 for ( ; *input; ++input) {
48 if (*input != *delim) {
49 if (*input == '\n') {
50 ++(*newlines);
51 }
52 continue;
53 }
54 a = input;
55 b = delim;
56 for (i = 0; i < delim_len; ++i) {
57 if (*a++ != *b++) {
58 break;
59 }
60 }
61 if (i == delim_len) {
62 return input;
63 }
64 }
65 return NULL;
66}
67
Radek Krejci4b74d5e2018-09-26 14:30:55 +020068/**
Radek Krejci7a7fa902018-09-25 17:08:21 +020069 * Store UTF-8 character specified as 4byte integer into the dst buffer.
70 * Returns number of written bytes (4 max), expects that dst has enough space.
71 *
72 * UTF-8 mapping:
73 * 00000000 -- 0000007F: 0xxxxxxx
74 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
75 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
76 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
77 *
78 * Includes checking for valid characters (following RFC 7950, sec 9.4)
79 */
80static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +020081lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +020082{
83 if (value < 0x80) {
84 /* one byte character */
85 if (value < 0x20 &&
86 value != 0x09 &&
87 value != 0x0a &&
88 value != 0x0d) {
89 return LY_EINVAL;
90 }
91
92 dst[0] = value;
93 (*bytes_written) = 1;
94 } else if (value < 0x800) {
95 /* two bytes character */
96 dst[0] = 0xc0 | (value >> 6);
97 dst[1] = 0x80 | (value & 0x3f);
98 (*bytes_written) = 2;
99 } else if (value < 0xfffe) {
100 /* three bytes character */
101 if (((value & 0xf800) == 0xd800) ||
102 (value >= 0xfdd0 && value <= 0xfdef)) {
103 /* exclude surrogate blocks %xD800-DFFF */
104 /* exclude noncharacters %xFDD0-FDEF */
105 return LY_EINVAL;
106 }
107
108 dst[0] = 0xe0 | (value >> 12);
109 dst[1] = 0x80 | ((value >> 6) & 0x3f);
110 dst[2] = 0x80 | (value & 0x3f);
111
112 (*bytes_written) = 3;
113 } else if (value < 0x10fffe) {
114 if ((value & 0xffe) == 0xffe) {
115 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
116 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
117 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
118 return LY_EINVAL;
119 }
120 /* four bytes character */
121 dst[0] = 0xf0 | (value >> 18);
122 dst[1] = 0x80 | ((value >> 12) & 0x3f);
123 dst[2] = 0x80 | ((value >> 6) & 0x3f);
124 dst[3] = 0x80 | (value & 0x3f);
125
126 (*bytes_written) = 4;
127 }
128 return LY_SUCCESS;
129}
130
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200131/**
132 * @brief Check/Get an XML qualified name from the input string.
133 *
134 * The identifier must have at least one valid character complying the name start character constraints.
135 * The identifier is terminated by the first character, which does not comply to the name character constraints.
136 *
137 * See https://www.w3.org/TR/xml-names/#NT-NCName
138 *
139 * @param[in] context XML context to track lines or store errors into libyang context.
140 * @param[in,out] input Input string to process, updated according to the processed/read data.
141 * Note that the term_char is also read, so input points after the term_char at the end.
142 * @param[out] term_char The first character in the input string which does not compy to the name constraints.
143 * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string.
144 * @return LY_ERR value.
145 */
146static LY_ERR
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200147lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len)
148{
149 unsigned int c;
150 const char *id = (*input);
151 LY_ERR rc;
152
153 /* check NameStartChar (minus colon) */
Radek Krejcib416be62018-10-01 14:51:45 +0200154 LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200155 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
156 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
157 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
158 "Identifier \"%s\" starts with invalid character.", id),
159 LY_EVALID);
160
161 /* check rest of the identifier */
Radek Krejcib416be62018-10-01 14:51:45 +0200162 for (rc = ly_getutf8(input, &c, term_char_len);
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200163 rc == LY_SUCCESS && is_xmlqnamechar(c);
Radek Krejcib416be62018-10-01 14:51:45 +0200164 rc = ly_getutf8(input, &c, term_char_len));
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200165 LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
166
167 (*term_char) = c;
168 return LY_SUCCESS;
169}
170
Radek Krejci17a78d82019-05-15 15:49:55 +0200171/**
172 * @brief Add namespace definition into XML context.
173 *
174 * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other
175 * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or
176 * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected
177 * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix.
178 *
179 * When leaving processing of a subtree of some element (after it is removed from context->elements), caller is supposed to call
180 * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context.
181 *
182 * @param[in] context XML context to work with.
183 * @param[in] prefix Pointer to the namespace prefix as taken from lyxml_get_attribute(). Can be NULL for default namespace.
184 * @param[in] prefix_len Length of the prefix string (since it is not NULL-terminated when returned from lyxml_get_attribute()).
185 * @param[in] uri Namespace URI (value) to store. Value can be obtained via lyxml_get_string() and caller is not supposed to
186 * work with the pointer when the function succeeds. In case of error the value is freed.
187 * @return LY_ERR values.
188 */
Radek Krejci2d7a47b2019-05-16 13:34:10 +0200189LY_ERR
Radek Krejci17a78d82019-05-15 15:49:55 +0200190lyxml_ns_add(struct lyxml_context *context, const char *prefix, size_t prefix_len, char *uri)
191{
192 struct lyxml_ns *ns;
193
194 ns = malloc(sizeof *ns);
195 LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM);
196
197 /* we need to connect the depth of the element where the namespace is defined with the
198 * namespace record to be able to maintain (remove) the record when the parser leaves
199 * (to its sibling or back to the parent) the element where the namespace was defined */
200 ns->depth = context->elements.count;
201
202 ns->uri = uri;
203 if (prefix) {
204 ns->prefix = strndup(prefix, prefix_len);
205 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns->uri); free(ns), LY_EMEM);
206 } else {
207 ns->prefix = NULL;
208 }
209
210 LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1,
211 free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
212 return LY_SUCCESS;
213}
214
215/**
216 * @brief Remove all the namespaces defined in the element recently closed (removed from the context->elements).
217 *
218 * @param[in] context XML context to work with.
Radek Krejci17a78d82019-05-15 15:49:55 +0200219 */
Radek Krejci17dca992019-05-17 10:53:27 +0200220void
Radek Krejci17a78d82019-05-15 15:49:55 +0200221lyxml_ns_rm(struct lyxml_context *context)
222{
223 unsigned int u;
224
225 for (u = context->ns.count - 1; u + 1 > 0; --u) {
226 if (((struct lyxml_ns *)context->ns.objs[u])->depth != context->elements.count + 1) {
227 /* we are done, the namespaces from a single element are supposed to be together */
228 break;
229 }
230 /* remove the ns structure */
231 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
232 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
233 free(context->ns.objs[u]);
234 --context->ns.count;
235 }
236
237 if (!context->ns.count) {
238 /* cleanup the context's namespaces storage */
239 ly_set_erase(&context->ns, NULL);
240 }
Radek Krejci17a78d82019-05-15 15:49:55 +0200241}
242
243const struct lyxml_ns *
244lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len)
245{
246 unsigned int u;
247 struct lyxml_ns *ns;
248
249 for (u = context->ns.count - 1; u + 1 > 0; --u) {
250 ns = (struct lyxml_ns *)context->ns.objs[u];
Radek Krejcif2c721d2019-06-03 16:37:58 +0200251 if (prefix && prefix_len) {
Radek Krejci7f9b6512019-09-18 13:11:09 +0200252 if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) {
Radek Krejci17a78d82019-05-15 15:49:55 +0200253 return ns;
254 }
255 } else if (!ns->prefix) {
256 /* default namespace */
257 return ns;
258 }
259 }
260
261 return NULL;
262}
263
Radek Krejcif2c721d2019-06-03 16:37:58 +0200264static LY_ERR
265lyxml_parse_element_start(struct lyxml_context *context, const char **input, int *closing)
266{
267 struct ly_ctx *ctx = context->ctx; /* shortcut */
268 const char *in = (*input);
269 const char *endtag;
270 const char *sectname;
271 size_t endtag_len, newlines;
272
273 while (1) {
274 ign_xmlws(context, in);
275
276 if (in[0] == '\0') {
277 /* EOF */
Radek Krejcifad79c92019-06-04 11:43:30 +0200278 if (context->elements.count) {
279 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
280 return LY_EVALID;
281 }
Radek Krejcif2c721d2019-06-03 16:37:58 +0200282 context->status = LYXML_END;
283 (*input) = in;
284 return LY_SUCCESS;
285 } else if (in[0] != '<') {
286 return LY_EINVAL;
287 }
288 move_input(context, in, 1);
289
290 if (in[0] == '!') {
291 move_input(context, in, 1);
292 /* sections to ignore */
293 if (!strncmp(in, "--", 2)) {
294 /* comment */
295 move_input(context, in, 2);
296 sectname = "Comment";
297 endtag = "-->";
298 endtag_len = 3;
299 } else if (!strncmp(in, "[CDATA[", 7)) {
300 /* CDATA section */
301 move_input(context, in, 7);
302 sectname = "CData";
303 endtag = "]]>";
304 endtag_len = 3;
305 } else if (!strncmp(in, "DOCTYPE", 7)) {
306 /* Document type declaration - not supported */
307 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration");
308 return LY_EVALID;
309 } else {
310 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &in[-2]);
311 return LY_EVALID;
312 }
313 in = ign_todelim(in, endtag, endtag_len, &newlines);
314 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID);
315 context->line += newlines;
316 in += endtag_len;
317 } else if (in[0] == '?') {
318 in = ign_todelim(in, "?>", 2, &newlines);
319 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
320 context->line += newlines;
321 in += 2;
322 } else if (in[0] == '/') {
323 /* closing element tag */
324 *closing = 1;
325 ++in;
326 goto element;
327 } else {
328 /* opening element tag */
329 *closing = 0;
330element:
331 ign_xmlws(context, in);
332 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
333
334 (*input) = in;
335 return LY_SUCCESS;
336 }
337 }
338}
339
340static LY_ERR
341lyxml_parse_element_name(struct lyxml_context *context, const char **input, size_t *endtag_len, unsigned int *term_char,
342 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
343{
344 LY_ERR rc;
345 const char *in = (*input);
346 const char *id;
347 const char *endtag;
348
349 id = in;
350 rc = lyxml_check_qname(context, &in, term_char, endtag_len);
351 LY_CHECK_RET(rc);
352 if (*term_char == ':') {
353 /* we have prefixed identifier */
354 endtag = in - *endtag_len;
355
356 rc = lyxml_check_qname(context, &in, term_char, endtag_len);
357 LY_CHECK_RET(rc);
358
359 (*prefix) = id;
360 (*prefix_len) = endtag - id;
361 id = endtag + 1;
362 }
363 if (!is_xmlws(*term_char) && *term_char != '/' && *term_char != '>') {
364 (*input) = in - *endtag_len;
365 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(*input), *input,
366 "whitespace or element tag termination ('>' or '/>'");
367 return LY_EVALID;
368 }
369 (*name) = id;
370 (*name_len) = in - *endtag_len - id;
371
372 if (is_xmlws(*term_char)) {
373 /* go to the next meaningful input */
374 ign_xmlws(context, in);
375 LY_CHECK_ERR_RET(!in[0], LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
376 *term_char = in[0];
377 ++in;
378 *endtag_len = 1;
379 }
380
381 (*input) = in;
382 return LY_SUCCESS;
383}
384
385LY_ERR
386lyxml_get_element(struct lyxml_context *context, const char **input,
387 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
388{
389 struct ly_ctx *ctx = context->ctx; /* shortcut */
390 const char *in = (*input);
391 size_t endtag_len;
392 bool loop = true;
393 int closing = 0;
394 unsigned int c;
395 LY_ERR rc;
396 struct lyxml_elem *e;
397
398 /* initialize output variables */
399 (*prefix) = (*name) = NULL;
400 (*prefix_len) = (*name_len) = 0;
401
402 while (loop) {
403 rc = lyxml_parse_element_start(context, &in, &closing);
404 if (rc) {
405 return rc;
406 } else if (context->status == LYXML_END) {
407 goto success;
408 }
409 /* we are at the begining of the element name, remember the identifier start before checking its format */
410 LY_CHECK_RET(rc = lyxml_parse_element_name(context, &in, &endtag_len, &c, prefix, prefix_len, name, name_len));
411
412 if (closing) {
413 /* match opening and closing element tags */
414 LY_CHECK_ERR_RET(
415 !context->elements.count,
Michal Vasko14654712020-02-06 08:35:21 +0100416 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
417 "Opening and closing elements tag missmatch (\"%.*s\").", *name_len, *name),
Radek Krejcif2c721d2019-06-03 16:37:58 +0200418 LY_EVALID);
419 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
Michal Vasko14654712020-02-06 08:35:21 +0100420 if (e->prefix_len != *prefix_len || e->name_len != *name_len
421 || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len)) {
422 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
423 "Opening and closing elements tag missmatch (\"%.*s\").", *name_len, *name);
424 return LY_EVALID;
425 }
Radek Krejcif2c721d2019-06-03 16:37:58 +0200426 /* opening and closing element tags matches, remove record from the opening tags list */
427 free(e);
428 --context->elements.count;
429
Michal Vasko14654712020-02-06 08:35:21 +0100430 /* remove also the namespaces connected with the element */
Radek Krejcif2c721d2019-06-03 16:37:58 +0200431 lyxml_ns_rm(context);
432
433 /* do not return element information to announce closing element being currently processed */
434 *name = *prefix = NULL;
435 *name_len = *prefix_len = 0;
436
437 if (c == '>') {
438 /* end of closing element */
439 context->status = LYXML_ELEMENT;
440 } else {
441 in -= endtag_len;
442 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.",
443 LY_VCODE_INSTREXP_len(in), in);
444 return LY_EVALID;
445 }
446 } else {
447 if (c == '>') {
448 /* end of opening element */
449 context->status = LYXML_ELEM_CONTENT;
450 } else if (c == '/' && in[0] == '>') {
451 /* empty element closing */
452 context->status = LYXML_ELEMENT;
453 ++in;
454 } else {
455 /* attribute */
456 context->status = LYXML_ATTRIBUTE;
457 in -= endtag_len;
458 }
459
460 if (context->status != LYXML_ELEMENT) {
461 /* store element opening tag information */
462 e = malloc(sizeof *e);
463 LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM);
464 e->name = *name;
465 e->prefix = *prefix;
466 e->name_len = *name_len;
467 e->prefix_len = *prefix_len;
468 ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST);
469 }
470 }
471 loop = false;
472 }
473
474success:
Radek Krejcifad79c92019-06-04 11:43:30 +0200475 /* check for end of input */
476 if (in[0] == '\0') {
477 /* EOF */
478 if (context->elements.count) {
479 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
480 return LY_EVALID;
481 }
482 context->status = LYXML_END;
483 }
Radek Krejcif2c721d2019-06-03 16:37:58 +0200484 /* move caller's input */
485 (*input) = in;
486 return LY_SUCCESS;
487}
488
Radek Krejci7a7fa902018-09-25 17:08:21 +0200489LY_ERR
Michal Vasko52927e22020-03-16 17:26:14 +0100490lyxml_skip_element(struct lyxml_context *context, const char **input)
491{
492 LY_ERR ret;
493 unsigned int parents_count = context->elements.count;
494
495 while (context->elements.count >= parents_count) {
496 /* skip attributes */
497 while (context->status == LYXML_ATTRIBUTE) {
498 LY_CHECK_RET(lyxml_get_attribute(context, input, NULL, NULL, NULL, NULL));
499 }
500
501 /* skip content */
502 if (context->status == LYXML_ELEM_CONTENT) {
503 ret = lyxml_get_string(context, input, NULL, NULL, NULL, NULL, NULL);
504 if (ret && (ret != LY_EINVAL)) {
505 return ret;
506 }
507 }
508
509 if (context->status != LYXML_ELEMENT) {
510 LOGINT(context->ctx);
511 return LY_EINT;
512 }
513
514 /* nested element/closing element */
515 LY_CHECK_RET(lyxml_get_element(context, input, NULL, NULL, NULL, NULL));
516 }
517
518 return LY_SUCCESS;
519}
520
521LY_ERR
Michal Vasko14654712020-02-06 08:35:21 +0100522lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output,
523 size_t *length, int *dynamic)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200524{
525#define BUFSIZE 4096
526#define BUFSIZE_STEP 4096
527#define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \
528 if (CURR+NEED >= SIZE) { \
529 BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \
530 LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \
531 SIZE += BUFSIZE_STEP; \
532 }
533
534 struct ly_ctx *ctx = context->ctx; /* shortcut */
Radek Krejcid70d1072018-10-09 14:20:47 +0200535 const char *in = (*input), *start;
536 char *buf = NULL, delim;
Radek Krejci4ad42aa2019-07-23 16:55:58 +0200537 size_t offset; /* read offset in input buffer */
538 size_t len; /* length of the output string (write offset in output buffer) */
539 size_t size = 0; /* size of the output buffer */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200540 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200541 uint32_t n;
542 size_t u, newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200543 bool empty_content = false;
Radek Krejci17a78d82019-05-15 15:49:55 +0200544 LY_ERR rc = LY_SUCCESS;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200545
Radek Krejcib1890642018-10-03 14:05:40 +0200546 assert(context);
547 assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT);
548
Radek Krejci7a7fa902018-09-25 17:08:21 +0200549 if (in[0] == '\'') {
550 delim = '\'';
551 ++in;
552 } else if (in[0] == '"') {
553 delim = '"';
554 ++in;
555 } else {
556 delim = '<';
557 empty_content = true;
558 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200559 start = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200560
561 if (empty_content) {
562 /* only when processing element's content - try to ignore whitespaces used to format XML data
563 * before element's child or closing tag */
Radek Krejci117d2082018-09-26 10:05:14 +0200564 for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) {
565 if (in[offset] == '\n') {
566 ++newlines;
567 }
568 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200569 LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
Radek Krejci117d2082018-09-26 10:05:14 +0200570 context->line += newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200571 if (in[offset] == '<') {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200572 (*input) = in + offset;
Radek Krejci339e2de2019-05-17 14:28:24 +0200573
574 /* get know if it is child element (indentation) or closing element (whitespace-only content) */
Radek Krejcie553e6d2019-06-07 15:33:18 +0200575 len = offset;
576 offset = 0;
Radek Krejci339e2de2019-05-17 14:28:24 +0200577 in = *input;
Radek Krejcie553e6d2019-06-07 15:33:18 +0200578 goto element_endtag_check;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200579 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200580 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200581 /* init */
582 offset = len = 0;
Radek Krejcie553e6d2019-06-07 15:33:18 +0200583 empty_content = false;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200584
Radek Krejcid70d1072018-10-09 14:20:47 +0200585 if (0) {
586getbuffer:
587 /* prepare output buffer */
588 if (*buffer) {
589 buf = *buffer;
590 size = *buffer_size;
591 } else {
592 buf = malloc(BUFSIZE);
593 size = BUFSIZE;
594 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
595 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200596 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200597
598 /* parse */
599 while (in[offset]) {
600 if (in[offset] == '&') {
Radek Krejciee4cab22019-07-17 17:07:47 +0200601 if (output) {
602 if (!buf) {
603 /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
604 goto getbuffer;
605 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200606
Radek Krejciee4cab22019-07-17 17:07:47 +0200607 if (offset) {
608 /* store what we have so far */
609 BUFSIZE_CHECK(ctx, buf, size, len, offset);
610 memcpy(&buf[len], in, offset);
611 len += offset;
612 in += offset;
613 offset = 0;
614 }
615 /* process reference */
616 /* we will need 4 bytes at most since we support only the predefined
617 * (one-char) entities and character references */
618 BUFSIZE_CHECK(ctx, buf, size, len, 4);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200619 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200620 ++offset;
621 if (in[offset] != '#') {
622 /* entity reference - only predefined references are supported */
623 if (!strncmp(&in[offset], "lt;", 3)) {
Radek Krejciee4cab22019-07-17 17:07:47 +0200624 if (output) {
625 buf[len++] = '<';
626 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200627 in += 4; /* &lt; */
628 } else if (!strncmp(&in[offset], "gt;", 3)) {
Radek Krejciee4cab22019-07-17 17:07:47 +0200629 if (output) {
630 buf[len++] = '>';
631 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200632 in += 4; /* &gt; */
633 } else if (!strncmp(&in[offset], "amp;", 4)) {
Radek Krejciee4cab22019-07-17 17:07:47 +0200634 if (output) {
635 buf[len++] = '&';
636 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200637 in += 5; /* &amp; */
638 } else if (!strncmp(&in[offset], "apos;", 5)) {
Radek Krejciee4cab22019-07-17 17:07:47 +0200639 if (output) {
640 buf[len++] = '\'';
641 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200642 in += 6; /* &apos; */
643 } else if (!strncmp(&in[offset], "quot;", 5)) {
Radek Krejciee4cab22019-07-17 17:07:47 +0200644 if (output) {
645 buf[len++] = '\"';
646 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200647 in += 6; /* &quot; */
648 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200649 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
650 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200651 goto error;
652 }
653 offset = 0;
654 } else {
655 p = (void*)&in[offset - 1];
656 /* character reference */
657 ++offset;
658 if (isdigit(in[offset])) {
659 for (n = 0; isdigit(in[offset]); offset++) {
660 n = (10 * n) + (in[offset] - '0');
661 }
662 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
663 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
664 if (isdigit(in[offset])) {
665 u = (in[offset] - '0');
666 } else if (in[offset] > 'F') {
667 u = 10 + (in[offset] - 'a');
668 } else {
669 u = 10 + (in[offset] - 'A');
670 }
671 n = (16 * n) + u;
672 }
673 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200674 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200675 goto error;
676
677 }
678 LY_CHECK_ERR_GOTO(in[offset] != ';',
679 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
680 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
681 error);
682 ++offset;
Radek Krejciee4cab22019-07-17 17:07:47 +0200683 if (output) {
684 rc = lyxml_pututf8(&buf[len], n, &u);
685 } else {
686 char utf8[4];
687 rc = lyxml_pututf8(&utf8[0], n, &u);
688 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200689 LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
Radek Krejci117d2082018-09-26 10:05:14 +0200690 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200691 error);
692 len += u;
693 in += offset;
694 offset = 0;
695 }
696 } else if (in[offset] == delim) {
697 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200698 if (buf) {
699 if (len + offset >= size) {
700 buf = ly_realloc(buf, len + offset + 1);
701 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
702 size = len + offset + 1;
703 }
704 memcpy(&buf[len], in, offset);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200705 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200706 len += offset;
707 /* in case of element content, keep the leading <,
Radek Krejcib1890642018-10-03 14:05:40 +0200708 * for attribute's value move after the terminating quotation mark */
Radek Krejcie553e6d2019-06-07 15:33:18 +0200709element_endtag_check:
Radek Krejcib1890642018-10-03 14:05:40 +0200710 if (context->status == LYXML_ELEM_CONTENT) {
Radek Krejcif2c721d2019-06-03 16:37:58 +0200711 const char *name = NULL, *prefix = NULL;
712 size_t name_len = 0, prefix_len = 0;
713 int closing = 0;
714 /* use fake context to preserve real context (lines, status) since we don't want really parse the element tag here */
715 struct lyxml_context fakecontext = {.ctx = context->ctx, .line = context->line, .status = context->status};
Radek Krejci339e2de2019-05-17 14:28:24 +0200716
Radek Krejci7a7fa902018-09-25 17:08:21 +0200717 in += offset;
Radek Krejci339e2de2019-05-17 14:28:24 +0200718
719 /* get know if it is child element (mixed content) or closing element (regular content) */
Radek Krejcif2c721d2019-06-03 16:37:58 +0200720 /* We don't want actually to parse the closing element, we just need to check mixed content.
721 * The closing element tag is preserved to keep the context for the data (returned string),
722 * since it can contain data using XML prefixes defined in this element and the caller can
723 * want to work with it */
Radek Krejci339e2de2019-05-17 14:28:24 +0200724 (*input) = in;
Radek Krejcif2c721d2019-06-03 16:37:58 +0200725 rc = lyxml_parse_element_start(&fakecontext, &in, &closing);
726 if (rc) {
Radek Krejci8ced2f72019-05-20 12:33:49 +0200727 /* some parsing error */
728 goto error;
Radek Krejci339e2de2019-05-17 14:28:24 +0200729 } else {
Radek Krejcif2c721d2019-06-03 16:37:58 +0200730 size_t endtag_len;
731 unsigned int c;
732 struct lyxml_elem *e;
733
734 LY_CHECK_GOTO(lyxml_parse_element_name(&fakecontext, &in, &endtag_len, &c, &prefix, &prefix_len, &name, &name_len), error);
735
736 if (!closing) {
Radek Krejcie553e6d2019-06-07 15:33:18 +0200737 if (empty_content) {
738 /* the element here is not closing element, so we have the just indentation formatting before the child */
739 context->status = LYXML_ELEMENT;
740 return LY_EINVAL;
741 } else {
742 /* the element here is not closing element, so we have not allowed mixed content */
743 struct lyxml_elem *e = (struct lyxml_elem*)context->elements.objs[--context->elements.count];
744 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Mixed XML content is not allowed (%.*s).",
745 offset + (in - (*input)), &(*input)[-offset]);
746 free(e);
747 goto error;
748 }
Radek Krejcif2c721d2019-06-03 16:37:58 +0200749 }
750
751 /* closing element start - check the name if it matches the opening element tag */
752 LY_CHECK_ERR_GOTO(!context->elements.count,
753 LOGVAL(ctx, LY_VLOG_LINE, &fakecontext.line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").",
754 name_len, name),
755 error);
756 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
Michal Vasko14654712020-02-06 08:35:21 +0100757 if (e->prefix_len != prefix_len || e->name_len != name_len
758 || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) {
759 LOGVAL(ctx, LY_VLOG_LINE, &fakecontext.line, LYVE_SYNTAX,
760 "Opening and closing elements tag missmatch (\"%.*s\", expected \"%.*s\").",
761 name_len, name, e->name_len, e->name);
762 free(e);
763 --context->elements.count;
764 goto error;
765 }
Radek Krejcif2c721d2019-06-03 16:37:58 +0200766 /* opening and closing element tags matches */
767 /* return input back */
768 in = (*input);
Radek Krejci339e2de2019-05-17 14:28:24 +0200769 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200770 } else {
771 in += offset + 1;
772 }
773 goto success;
774 } else {
775 /* log lines */
776 if (in[offset] == '\n') {
777 ++context->line;
778 }
779
780 /* continue */
781 ++offset;
782 }
783 }
784 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
785error:
786 if (!(*buffer)) {
Radek Krejcibb9b1982019-04-08 14:24:59 +0200787 /* buffer not provided, buf is local */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200788 free(buf);
Radek Krejcibb9b1982019-04-08 14:24:59 +0200789 } else if (buf) {
790 /* buf is shared with caller via buffer, but buf could be reallocated, so update the provided buffer */
791 (*buffer) = buf;
792 (*buffer_size) = size;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200793 }
794 return LY_EVALID;
795
796success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200797 if (buf) {
798 if (!(*buffer) && size != len + 1) {
799 /* not using provided buffer, so fit the allocated buffer to what we really have inside */
800 p = realloc(buf, len + 1);
801 /* ignore realloc fail because we are reducing the buffer,
802 * so just return bigger buffer than needed */
803 if (p) {
804 size = len + 1;
805 buf = p;
806 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200807 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200808 /* set terminating NULL byte */
809 buf[len] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200810 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200811
Radek Krejcib1890642018-10-03 14:05:40 +0200812 context->status -= 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200813 if (buf) {
814 (*buffer) = buf;
815 (*buffer_size) = size;
816 (*output) = buf;
817 (*dynamic) = 1;
Radek Krejciee4cab22019-07-17 17:07:47 +0200818 (*length) = len;
819 } else if (output) {
Radek Krejcid70d1072018-10-09 14:20:47 +0200820 (*output) = (char*)start;
821 (*dynamic) = 0;
Radek Krejciee4cab22019-07-17 17:07:47 +0200822 (*length) = len;
Radek Krejcid70d1072018-10-09 14:20:47 +0200823 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200824
Radek Krejci28e8cb52019-03-08 11:31:31 +0100825 if (context->status == LYXML_ATTRIBUTE) {
Radek Krejcifad79c92019-06-04 11:43:30 +0200826 /* skip whitespaces after the value */
827 ign_xmlws(context, in);
828
Radek Krejci28e8cb52019-03-08 11:31:31 +0100829 if (in[0] == '>') {
830 /* element terminated by > - termination of the opening tag */
831 context->status = LYXML_ELEM_CONTENT;
832 ++in;
833 } else if (in[0] == '/' && in[1] == '>') {
834 /* element terminated by /> - termination of an empty element */
835 context->status = LYXML_ELEMENT;
836 in += 2;
837
838 /* remove the closed element record from the tags list */
839 free(context->elements.objs[context->elements.count - 1]);
840 --context->elements.count;
Radek Krejci17a78d82019-05-15 15:49:55 +0200841
842 /* remove also the namespaces conneted with the element */
Radek Krejci17dca992019-05-17 10:53:27 +0200843 lyxml_ns_rm(context);
Radek Krejcifad79c92019-06-04 11:43:30 +0200844
845 if (!context->elements.count && in[0] == '\0') {
846 /* EOF */
847 context->status = LYXML_END;
848 }
849 } /* else another attribute */
Radek Krejci28e8cb52019-03-08 11:31:31 +0100850 }
851
852 (*input) = in;
Radek Krejci17a78d82019-05-15 15:49:55 +0200853 return rc;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200854
855#undef BUFSIZE
856#undef BUFSIZE_STEP
857#undef BUFSIZE_CHECK
858}
859
Radek Krejcid972c252018-09-25 13:23:39 +0200860LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200861lyxml_get_attribute(struct lyxml_context *context, const char **input,
Radek Krejcid972c252018-09-25 13:23:39 +0200862 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
863{
864 struct ly_ctx *ctx = context->ctx; /* shortcut */
865 const char *in = (*input);
866 const char *id;
867 const char *endtag;
868 LY_ERR rc;
869 unsigned int c;
870 size_t endtag_len;
Radek Krejci17a78d82019-05-15 15:49:55 +0200871 int is_ns = 0;
Michal Vasko52927e22020-03-16 17:26:14 +0100872 const char *ns_prefix;
873 size_t ns_prefix_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200874
875 /* initialize output variables */
876 (*prefix) = (*name) = NULL;
877 (*prefix_len) = (*name_len) = 0;
878
Michal Vasko52927e22020-03-16 17:26:14 +0100879 do {
880 /* skip initial whitespaces */
881 ign_xmlws(context, in);
Radek Krejcid972c252018-09-25 13:23:39 +0200882
Michal Vasko52927e22020-03-16 17:26:14 +0100883 if (in[0] == '\0') {
884 /* EOF - not expected at this place */
885 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
886 return LY_EVALID;
887 }
Radek Krejcid972c252018-09-25 13:23:39 +0200888
Michal Vasko52927e22020-03-16 17:26:14 +0100889 /* remember the identifier start before checking its format */
890 id = in;
Radek Krejcid972c252018-09-25 13:23:39 +0200891 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
892 LY_CHECK_RET(rc);
Michal Vasko52927e22020-03-16 17:26:14 +0100893 if (c == ':') {
894 /* we have prefixed identifier */
895 endtag = in - endtag_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200896
Michal Vasko52927e22020-03-16 17:26:14 +0100897 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
898 LY_CHECK_RET(rc);
Radek Krejcid972c252018-09-25 13:23:39 +0200899
Michal Vasko52927e22020-03-16 17:26:14 +0100900 (*prefix) = id;
901 (*prefix_len) = endtag - id;
902 id = endtag + 1;
Radek Krejci17a78d82019-05-15 15:49:55 +0200903 }
Michal Vasko52927e22020-03-16 17:26:14 +0100904 if (!is_xmlws(c) && c != '=') {
905 in = in - endtag_len;
906 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='");
907 return LY_EVALID;
908 }
909 in = in - endtag_len;
910 (*name) = id;
911 (*name_len) = in - id;
912
913 /* eat '=' and stop at the value beginning */
914 ign_xmlws(context, in);
915 if (in[0] != '=') {
916 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='");
917 return LY_EVALID;
918 }
919 ++in;
920 ign_xmlws(context, in);
921 if (in[0] != '\'' && in[0] != '"') {
922 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
923 LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark");
924 return LY_EVALID;
925 }
926 context->status = LYXML_ATTR_CONTENT;
927
928 is_ns = 0;
929 if (*prefix && *prefix_len == 5 && !strncmp(*prefix, "xmlns", 5)) {
930 is_ns = 1;
931 ns_prefix = *name;
932 ns_prefix_len = *name_len;
933 } else if (*name_len == 5 && !strncmp(*name, "xmlns", 5)) {
934 is_ns = 1;
935 ns_prefix = NULL;
936 ns_prefix_len = 0;
937 }
938 if (is_ns) {
939 /* instead of attribute, we have namespace specification,
940 * so process it automatically and then move to another attribute (if any) */
941 char *value = NULL;
942 size_t value_len = 0;
943 int dynamic = 0;
944
945 LY_CHECK_RET(lyxml_get_string(context, &in, &value, &value_len, &value, &value_len, &dynamic));
946 if ((rc = lyxml_ns_add(context, ns_prefix, ns_prefix_len, dynamic ? value : strndup(value, value_len)))) {
947 if (dynamic) {
948 free(value);
949 return rc;
950 }
951 }
952
953 /* do not return ns */
Radek Krejci17a78d82019-05-15 15:49:55 +0200954 (*prefix) = (*name) = NULL;
955 (*prefix_len) = (*name_len) = 0;
956 }
Michal Vasko52927e22020-03-16 17:26:14 +0100957 } while (is_ns && (context->status == LYXML_ATTRIBUTE));
Radek Krejci17a78d82019-05-15 15:49:55 +0200958
Radek Krejcid972c252018-09-25 13:23:39 +0200959 /* move caller's input */
960 (*input) = in;
961 return LY_SUCCESS;
962}
963
Radek Krejcib1890642018-10-03 14:05:40 +0200964void
965lyxml_context_clear(struct lyxml_context *context)
966{
967 unsigned int u;
968
969 ly_set_erase(&context->elements, free);
970 for (u = context->ns.count - 1; u + 1 > 0; --u) {
971 /* remove the ns structure */
972 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
973 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
974 free(context->ns.objs[u]);
975 }
976 ly_set_erase(&context->ns, NULL);
Radek Krejcifad79c92019-06-04 11:43:30 +0200977 context->status = 0;
Radek Krejcib1890642018-10-03 14:05:40 +0200978}
Radek Krejcie7b95092019-05-15 11:03:07 +0200979
980LY_ERR
981lyxml_dump_text(struct lyout *out, const char *text, int attribute)
982{
983 LY_ERR ret = LY_SUCCESS;
984 unsigned int u;
985
986 if (!text) {
987 return 0;
988 }
989
990 for (u = 0; text[u]; u++) {
991 switch (text[u]) {
992 case '&':
993 ret = ly_print(out, "&amp;");
994 break;
995 case '<':
996 ret = ly_print(out, "&lt;");
997 break;
998 case '>':
999 /* not needed, just for readability */
1000 ret = ly_print(out, "&gt;");
1001 break;
1002 case '"':
1003 if (attribute) {
1004 ret = ly_print(out, "&quot;");
1005 break;
1006 }
1007 /* falls through */
1008 default:
1009 ly_write(out, &text[u], 1);
1010 }
1011 }
1012
1013 return ret;
1014}
1015
Michal Vasko52927e22020-03-16 17:26:14 +01001016LY_ERR
1017lyxml_get_prefixes(struct lyxml_context *ctx, const char *value, size_t value_len, struct ly_prefix **val_prefs)
1018{
1019 LY_ERR ret;
1020 uint32_t u, c;
1021 const struct lyxml_ns *ns;
1022 const char *start, *stop;
1023 struct ly_prefix *prefixes = NULL;
1024 size_t len;
1025
1026 for (stop = start = value; (size_t)(stop - value) < value_len; start = stop) {
1027 size_t bytes;
1028 ly_getutf8(&stop, &c, &bytes);
1029 if (is_xmlqnamestartchar(c)) {
1030 for (ly_getutf8(&stop, &c, &bytes);
1031 is_xmlqnamechar(c) && (size_t)(stop - value) < value_len;
1032 ly_getutf8(&stop, &c, &bytes));
1033 stop = stop - bytes;
1034 if (*stop == ':') {
1035 /* we have a possible prefix */
1036 len = stop - start;
1037 ns = lyxml_ns_get(ctx, start, len);
1038 if (ns) {
1039 struct ly_prefix *p = NULL;
1040
1041 /* check whether we do not already have this prefix stored */
1042 LY_ARRAY_FOR(prefixes, u) {
1043 if (!ly_strncmp(prefixes[u].pref, start, len)) {
1044 p = &prefixes[u];
1045 break;
1046 }
1047 }
1048 if (!p) {
1049 LY_ARRAY_NEW_GOTO(ctx->ctx, prefixes, p, ret, error);
1050 p->pref = lydict_insert(ctx->ctx, start, len);
1051 p->ns = lydict_insert(ctx->ctx, ns->uri, 0);
1052 } /* else the prefix already present */
1053 }
1054 }
1055 stop = stop + bytes;
1056 }
1057 }
1058
1059 *val_prefs = prefixes;
1060 return LY_SUCCESS;
1061
1062error:
1063 LY_ARRAY_FOR(prefixes, u) {
1064 lydict_remove(ctx->ctx, prefixes[u].pref);
1065 }
1066 LY_ARRAY_FREE(prefixes);
1067 return ret;
1068}
1069
1070LY_ERR
1071lyxml_value_compare(const char *value1, const struct ly_prefix *prefs1, const char *value2, const struct ly_prefix *prefs2)
1072{
1073 const char *ptr1, *ptr2, *ns1, *ns2;
1074 uint32_t u1, u2;
1075 int len;
1076
1077 if (!value1 && !value2) {
1078 return LY_SUCCESS;
1079 }
1080 if ((value1 && !value2) || (!value1 && value2)) {
1081 return LY_ENOT;
1082 }
1083
1084 ptr1 = value1;
1085 ptr2 = value2;
1086 while (ptr1[0] && ptr2[0]) {
1087 if (ptr1[0] != ptr2[0]) {
1088 /* it can be a start of prefix that maps to the same module */
1089 ns1 = ns2 = NULL;
1090 if (prefs1) {
1091 /* find module of the first prefix, if any */
1092 LY_ARRAY_FOR(prefs1, u1) {
1093 len = strlen(prefs1[u1].pref);
1094 if (!strncmp(ptr1, prefs1[u1].pref, len) && (ptr1[len] == ':')) {
1095 ns1 = prefs1[u1].ns;
1096 break;
1097 }
1098 }
1099 }
1100 if (prefs2) {
1101 /* find module of the second prefix, if any */
1102 LY_ARRAY_FOR(prefs2, u2) {
1103 len = strlen(prefs2[u2].pref);
1104 if (!strncmp(ptr2, prefs2[u2].pref, len) && (ptr2[len] == ':')) {
1105 ns2 = prefs2[u2].ns;
1106 break;
1107 }
1108 }
1109 }
1110
1111 if (!ns1 || !ns2 || (ns1 != ns2)) {
1112 /* not a prefix or maps to different namespaces */
1113 break;
1114 }
1115
1116 /* skip prefixes in both values (':' is skipped as iter) */
1117 ptr1 += strlen(prefs1[u1].pref);
1118 ptr2 += strlen(prefs2[u2].pref);
1119 }
1120
1121 ++ptr1;
1122 ++ptr2;
1123 }
1124 if (ptr1[0] || ptr2[0]) {
1125 /* not a match or simply different lengths */
1126 return LY_ENOT;
1127 }
1128
1129 return LY_SUCCESS;
1130}