blob: a7e40f795b7f0c4b845bf02d5c2b2b2726399ee1 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
Radek Krejcic1c03d62018-11-27 10:52:43 +010015#include "common.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020016
Radek Krejcib1890642018-10-03 14:05:40 +020017#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020018#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020019#include <stdbool.h>
20#include <stdint.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020021#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022
23#include "libyang.h"
24#include "xml.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026/* Move input p by s characters, if EOF log with lyxml_context c */
27#define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
28
Radek Krejcib1890642018-10-03 14:05:40 +020029/* Ignore whitespaces in the input string p */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020030#define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;}
31
Radek Krejci4b74d5e2018-09-26 14:30:55 +020032/**
33 * @brief Ignore any characters until the delim of the size delim_len is read
34 *
35 * Detects number of read new lines.
36 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
37 * NULL-terminated input string.
38 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020039static const char *
40ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
41{
42 size_t i;
43 register const char *a, *b;
44
45 (*newlines) = 0;
46 for ( ; *input; ++input) {
47 if (*input != *delim) {
48 if (*input == '\n') {
49 ++(*newlines);
50 }
51 continue;
52 }
53 a = input;
54 b = delim;
55 for (i = 0; i < delim_len; ++i) {
56 if (*a++ != *b++) {
57 break;
58 }
59 }
60 if (i == delim_len) {
61 return input;
62 }
63 }
64 return NULL;
65}
66
Radek Krejci4b74d5e2018-09-26 14:30:55 +020067/**
Radek Krejci7a7fa902018-09-25 17:08:21 +020068 * Store UTF-8 character specified as 4byte integer into the dst buffer.
69 * Returns number of written bytes (4 max), expects that dst has enough space.
70 *
71 * UTF-8 mapping:
72 * 00000000 -- 0000007F: 0xxxxxxx
73 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
74 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
75 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
76 *
77 * Includes checking for valid characters (following RFC 7950, sec 9.4)
78 */
79static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +020080lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +020081{
82 if (value < 0x80) {
83 /* one byte character */
84 if (value < 0x20 &&
85 value != 0x09 &&
86 value != 0x0a &&
87 value != 0x0d) {
88 return LY_EINVAL;
89 }
90
91 dst[0] = value;
92 (*bytes_written) = 1;
93 } else if (value < 0x800) {
94 /* two bytes character */
95 dst[0] = 0xc0 | (value >> 6);
96 dst[1] = 0x80 | (value & 0x3f);
97 (*bytes_written) = 2;
98 } else if (value < 0xfffe) {
99 /* three bytes character */
100 if (((value & 0xf800) == 0xd800) ||
101 (value >= 0xfdd0 && value <= 0xfdef)) {
102 /* exclude surrogate blocks %xD800-DFFF */
103 /* exclude noncharacters %xFDD0-FDEF */
104 return LY_EINVAL;
105 }
106
107 dst[0] = 0xe0 | (value >> 12);
108 dst[1] = 0x80 | ((value >> 6) & 0x3f);
109 dst[2] = 0x80 | (value & 0x3f);
110
111 (*bytes_written) = 3;
112 } else if (value < 0x10fffe) {
113 if ((value & 0xffe) == 0xffe) {
114 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
115 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
116 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
117 return LY_EINVAL;
118 }
119 /* four bytes character */
120 dst[0] = 0xf0 | (value >> 18);
121 dst[1] = 0x80 | ((value >> 12) & 0x3f);
122 dst[2] = 0x80 | ((value >> 6) & 0x3f);
123 dst[3] = 0x80 | (value & 0x3f);
124
125 (*bytes_written) = 4;
126 }
127 return LY_SUCCESS;
128}
129
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200130/**
131 * @brief Check/Get an XML qualified name from the input string.
132 *
133 * The identifier must have at least one valid character complying the name start character constraints.
134 * The identifier is terminated by the first character, which does not comply to the name character constraints.
135 *
136 * See https://www.w3.org/TR/xml-names/#NT-NCName
137 *
138 * @param[in] context XML context to track lines or store errors into libyang context.
139 * @param[in,out] input Input string to process, updated according to the processed/read data.
140 * Note that the term_char is also read, so input points after the term_char at the end.
141 * @param[out] term_char The first character in the input string which does not compy to the name constraints.
142 * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string.
143 * @return LY_ERR value.
144 */
145static LY_ERR
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200146lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len)
147{
148 unsigned int c;
149 const char *id = (*input);
150 LY_ERR rc;
151
152 /* check NameStartChar (minus colon) */
Radek Krejcib416be62018-10-01 14:51:45 +0200153 LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200154 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
155 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
156 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
157 "Identifier \"%s\" starts with invalid character.", id),
158 LY_EVALID);
159
160 /* check rest of the identifier */
Radek Krejcib416be62018-10-01 14:51:45 +0200161 for (rc = ly_getutf8(input, &c, term_char_len);
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200162 rc == LY_SUCCESS && is_xmlqnamechar(c);
Radek Krejcib416be62018-10-01 14:51:45 +0200163 rc = ly_getutf8(input, &c, term_char_len));
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200164 LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
165
166 (*term_char) = c;
167 return LY_SUCCESS;
168}
169
Radek Krejci7a7fa902018-09-25 17:08:21 +0200170LY_ERR
Radek Krejcid70d1072018-10-09 14:20:47 +0200171lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200172{
173#define BUFSIZE 4096
174#define BUFSIZE_STEP 4096
175#define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \
176 if (CURR+NEED >= SIZE) { \
177 BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \
178 LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \
179 SIZE += BUFSIZE_STEP; \
180 }
181
182 struct ly_ctx *ctx = context->ctx; /* shortcut */
Radek Krejcid70d1072018-10-09 14:20:47 +0200183 const char *in = (*input), *start;
184 char *buf = NULL, delim;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200185 size_t offset; /* read offset in input buffer */
Radek Krejcid70d1072018-10-09 14:20:47 +0200186 size_t len; /* length of the output string (write offset in output buffer) */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200187 size_t size; /* size of the output buffer */
188 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200189 uint32_t n;
190 size_t u, newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200191 bool empty_content = false;
192 LY_ERR rc;
193
Radek Krejcib1890642018-10-03 14:05:40 +0200194 assert(context);
195 assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT);
196
Radek Krejci7a7fa902018-09-25 17:08:21 +0200197 if (in[0] == '\'') {
198 delim = '\'';
199 ++in;
200 } else if (in[0] == '"') {
201 delim = '"';
202 ++in;
203 } else {
204 delim = '<';
205 empty_content = true;
206 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200207 start = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200208
209 if (empty_content) {
210 /* only when processing element's content - try to ignore whitespaces used to format XML data
211 * before element's child or closing tag */
Radek Krejci117d2082018-09-26 10:05:14 +0200212 for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) {
213 if (in[offset] == '\n') {
214 ++newlines;
215 }
216 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200217 LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
Radek Krejci117d2082018-09-26 10:05:14 +0200218 context->line += newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200219 if (in[offset] == '<') {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200220 (*input) = in + offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200221 return LY_EINVAL;
222 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200223 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200224 /* init */
225 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200226
Radek Krejcid70d1072018-10-09 14:20:47 +0200227 if (0) {
228getbuffer:
229 /* prepare output buffer */
230 if (*buffer) {
231 buf = *buffer;
232 size = *buffer_size;
233 } else {
234 buf = malloc(BUFSIZE);
235 size = BUFSIZE;
236 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
237 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200238 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200239
240 /* parse */
241 while (in[offset]) {
242 if (in[offset] == '&') {
Radek Krejcid70d1072018-10-09 14:20:47 +0200243 if (!buf) {
244 /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
245 goto getbuffer;
246 }
247
Radek Krejci7a7fa902018-09-25 17:08:21 +0200248 if (offset) {
249 /* store what we have so far */
250 BUFSIZE_CHECK(ctx, buf, size, len, offset);
251 memcpy(&buf[len], in, offset);
252 len += offset;
253 in += offset;
254 offset = 0;
255 }
256 /* process reference */
257 /* we will need 4 bytes at most since we support only the predefined
258 * (one-char) entities and character references */
259 BUFSIZE_CHECK(ctx, buf, size, len, 4);
260 ++offset;
261 if (in[offset] != '#') {
262 /* entity reference - only predefined references are supported */
263 if (!strncmp(&in[offset], "lt;", 3)) {
264 buf[len++] = '<';
265 in += 4; /* &lt; */
266 } else if (!strncmp(&in[offset], "gt;", 3)) {
267 buf[len++] = '>';
268 in += 4; /* &gt; */
269 } else if (!strncmp(&in[offset], "amp;", 4)) {
270 buf[len++] = '&';
271 in += 5; /* &amp; */
272 } else if (!strncmp(&in[offset], "apos;", 5)) {
273 buf[len++] = '\'';
274 in += 6; /* &apos; */
275 } else if (!strncmp(&in[offset], "quot;", 5)) {
276 buf[len++] = '\"';
277 in += 6; /* &quot; */
278 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200279 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
280 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200281 goto error;
282 }
283 offset = 0;
284 } else {
285 p = (void*)&in[offset - 1];
286 /* character reference */
287 ++offset;
288 if (isdigit(in[offset])) {
289 for (n = 0; isdigit(in[offset]); offset++) {
290 n = (10 * n) + (in[offset] - '0');
291 }
292 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
293 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
294 if (isdigit(in[offset])) {
295 u = (in[offset] - '0');
296 } else if (in[offset] > 'F') {
297 u = 10 + (in[offset] - 'a');
298 } else {
299 u = 10 + (in[offset] - 'A');
300 }
301 n = (16 * n) + u;
302 }
303 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200304 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200305 goto error;
306
307 }
308 LY_CHECK_ERR_GOTO(in[offset] != ';',
309 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
310 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
311 error);
312 ++offset;
313 rc = lyxml_pututf8(&buf[len], n, &u);
314 LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
Radek Krejci117d2082018-09-26 10:05:14 +0200315 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200316 error);
317 len += u;
318 in += offset;
319 offset = 0;
320 }
321 } else if (in[offset] == delim) {
322 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200323 if (buf) {
324 if (len + offset >= size) {
325 buf = ly_realloc(buf, len + offset + 1);
326 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
327 size = len + offset + 1;
328 }
329 memcpy(&buf[len], in, offset);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200330 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200331 len += offset;
332 /* in case of element content, keep the leading <,
Radek Krejcib1890642018-10-03 14:05:40 +0200333 * for attribute's value move after the terminating quotation mark */
334 if (context->status == LYXML_ELEM_CONTENT) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200335 in += offset;
336 } else {
337 in += offset + 1;
338 }
339 goto success;
340 } else {
341 /* log lines */
342 if (in[offset] == '\n') {
343 ++context->line;
344 }
345
346 /* continue */
347 ++offset;
348 }
349 }
350 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
351error:
352 if (!(*buffer)) {
Radek Krejcibb9b1982019-04-08 14:24:59 +0200353 /* buffer not provided, buf is local */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200354 free(buf);
Radek Krejcibb9b1982019-04-08 14:24:59 +0200355 } else if (buf) {
356 /* buf is shared with caller via buffer, but buf could be reallocated, so update the provided buffer */
357 (*buffer) = buf;
358 (*buffer_size) = size;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200359 }
360 return LY_EVALID;
361
362success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200363 if (buf) {
364 if (!(*buffer) && size != len + 1) {
365 /* not using provided buffer, so fit the allocated buffer to what we really have inside */
366 p = realloc(buf, len + 1);
367 /* ignore realloc fail because we are reducing the buffer,
368 * so just return bigger buffer than needed */
369 if (p) {
370 size = len + 1;
371 buf = p;
372 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200373 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200374 /* set terminating NULL byte */
375 buf[len] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200376 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200377
Radek Krejcib1890642018-10-03 14:05:40 +0200378 context->status -= 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200379 if (buf) {
380 (*buffer) = buf;
381 (*buffer_size) = size;
382 (*output) = buf;
383 (*dynamic) = 1;
384 } else {
385 (*output) = (char*)start;
386 (*dynamic) = 0;
387 }
388 (*length) = len;
389
Radek Krejci28e8cb52019-03-08 11:31:31 +0100390 if (context->status == LYXML_ATTRIBUTE) {
391 if (in[0] == '>') {
392 /* element terminated by > - termination of the opening tag */
393 context->status = LYXML_ELEM_CONTENT;
394 ++in;
395 } else if (in[0] == '/' && in[1] == '>') {
396 /* element terminated by /> - termination of an empty element */
397 context->status = LYXML_ELEMENT;
398 in += 2;
399
400 /* remove the closed element record from the tags list */
401 free(context->elements.objs[context->elements.count - 1]);
402 --context->elements.count;
403 }
404 }
405
406 (*input) = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200407 return LY_SUCCESS;
408
409#undef BUFSIZE
410#undef BUFSIZE_STEP
411#undef BUFSIZE_CHECK
412}
413
Radek Krejcid972c252018-09-25 13:23:39 +0200414LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200415lyxml_get_attribute(struct lyxml_context *context, const char **input,
Radek Krejcid972c252018-09-25 13:23:39 +0200416 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
417{
418 struct ly_ctx *ctx = context->ctx; /* shortcut */
419 const char *in = (*input);
420 const char *id;
421 const char *endtag;
422 LY_ERR rc;
423 unsigned int c;
424 size_t endtag_len;
425
426 /* initialize output variables */
427 (*prefix) = (*name) = NULL;
428 (*prefix_len) = (*name_len) = 0;
429
430 /* skip initial whitespaces */
431 ign_xmlws(context, in);
432
433 if (in[0] == '\0') {
434 /* EOF - not expected at this place */
435 return LY_EINVAL;
Radek Krejcid972c252018-09-25 13:23:39 +0200436 }
437
438 /* remember the identifier start before checking its format */
439 id = in;
440 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
441 LY_CHECK_RET(rc);
442 if (c == ':') {
443 /* we have prefixed identifier */
444 endtag = in - endtag_len;
445
446 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
447 LY_CHECK_RET(rc);
448
449 (*prefix) = id;
450 (*prefix_len) = endtag - id;
451 id = endtag + 1;
452 }
453 if (!is_xmlws(c) && c != '=') {
454 in = in - endtag_len;
455 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='");
456 return LY_EVALID;
457 }
458 in = in - endtag_len;
459 (*name) = id;
460 (*name_len) = in - id;
461
462 /* eat '=' and stop at the value beginning */
463 ign_xmlws(context, in);
464 if (in[0] != '=') {
465 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='");
466 return LY_EVALID;
467 }
468 ++in;
469 ign_xmlws(context, in);
470 if (in[0] != '\'' && in[0] != '"') {
Radek Krejcib1890642018-10-03 14:05:40 +0200471 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
472 LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark");
Radek Krejcid972c252018-09-25 13:23:39 +0200473 return LY_EVALID;
474 }
Radek Krejcib1890642018-10-03 14:05:40 +0200475 context->status = LYXML_ATTR_CONTENT;
Radek Krejcid972c252018-09-25 13:23:39 +0200476
Radek Krejcid972c252018-09-25 13:23:39 +0200477 /* move caller's input */
478 (*input) = in;
479 return LY_SUCCESS;
480}
481
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200482LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200483lyxml_get_element(struct lyxml_context *context, const char **input,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200484 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
485{
486 struct ly_ctx *ctx = context->ctx; /* shortcut */
487 const char *in = (*input);
488 const char *endtag;
489 const char *sectname;
490 const char *id;
491 size_t endtag_len, newlines;
Radek Krejcib1890642018-10-03 14:05:40 +0200492 bool loop = true, closing = false;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200493 unsigned int c;
494 LY_ERR rc;
Radek Krejcib1890642018-10-03 14:05:40 +0200495 struct lyxml_elem *e;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200496
497 /* initialize output variables */
498 (*prefix) = (*name) = NULL;
499 (*prefix_len) = (*name_len) = 0;
500
501 while (loop) {
502 ign_xmlws(context, in);
503
504 if (in[0] == '\0') {
505 /* EOF */
Radek Krejcib1890642018-10-03 14:05:40 +0200506 context->status = LYXML_END;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200507 goto success;
508 } else if (in[0] != '<') {
509 return LY_EINVAL;
510 }
511 move_input(context, in, 1);
512
513 if (in[0] == '!') {
514 move_input(context, in, 1);
515 /* sections to ignore */
516 if (!strncmp(in, "--", 2)) {
517 /* comment */
518 move_input(context, in, 2);
519 sectname = "Comment";
520 endtag = "-->";
521 endtag_len = 3;
522 } else if (!strncmp(in, "[CDATA[", 7)) {
523 /* CDATA section */
524 move_input(context, in, 7);
525 sectname = "CData";
526 endtag = "]]>";
527 endtag_len = 3;
528 } else if (!strncmp(in, "DOCTYPE", 7)) {
529 /* Document type declaration - not supported */
530 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration");
531 return LY_EVALID;
Radek Krejcic5c31bb2019-04-08 14:40:52 +0200532 } else {
533 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &in[-2]);
534 return LY_EVALID;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200535 }
536 in = ign_todelim(in, endtag, endtag_len, &newlines);
537 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID);
538 context->line += newlines;
539 in += endtag_len;
540 } else if (in[0] == '?') {
541 in = ign_todelim(in, "?>", 2, &newlines);
542 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
543 context->line += newlines;
544 in += 2;
Radek Krejcib1890642018-10-03 14:05:40 +0200545 } else if (in[0] == '/') {
546 /* closing element */
547 closing = true;
548 ++in;
549 goto element;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200550 } else {
551 /* element */
Radek Krejcib1890642018-10-03 14:05:40 +0200552element:
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200553 ign_xmlws(context, in);
554 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
555
556 /* remember the identifier start before checking its format */
557 id = in;
558 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
559 LY_CHECK_RET(rc);
560 if (c == ':') {
561 /* we have prefixed identifier */
562 endtag = in - endtag_len;
563
564 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
565 LY_CHECK_RET(rc);
566
567 (*prefix) = id;
568 (*prefix_len) = endtag - id;
569 id = endtag + 1;
570 }
571 if (!is_xmlws(c) && c != '/' && c != '>') {
572 in = in - endtag_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200573 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in,
574 "whitespace or element tag termination ('>' or '/>'");
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200575 return LY_EVALID;
576 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200577 (*name) = id;
Radek Krejcib1890642018-10-03 14:05:40 +0200578 (*name_len) = in - endtag_len - id;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200579
Radek Krejcib1890642018-10-03 14:05:40 +0200580 if (is_xmlws(c)) {
581 /* go to the next meaningful input */
582 ign_xmlws(context, in);
583 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
584 c = in[0];
585 ++in;
586 endtag_len = 1;
587 }
588
589 if (closing) {
590 /* match opening and closing element tags */
591 LY_CHECK_ERR_RET(
592 !context->elements.count,
Radek Krejci3fbc9872019-04-16 16:50:01 +0200593 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", *name_len, *name),
Radek Krejcib1890642018-10-03 14:05:40 +0200594 LY_EVALID);
595 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
596 LY_CHECK_ERR_RET(e->prefix_len != *prefix_len || e->name_len != *name_len
597 || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len),
Radek Krejci3fbc9872019-04-16 16:50:01 +0200598 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", *name_len, *name),
Radek Krejcib1890642018-10-03 14:05:40 +0200599 LY_EVALID);
600 /* opening and closing element tags matches, remove record from the opening tags list */
601 free(e);
602 --context->elements.count;
603 /* do not return element information to announce closing element being currently processed */
604 *name = *prefix = NULL;
605 *name_len = *prefix_len = 0;
606
607 if (c == '>') {
608 /* end of closing element */
609 context->status = LYXML_ELEMENT;
610 } else {
611 in -= endtag_len;
612 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.",
613 LY_VCODE_INSTREXP_len(in), in);
614 return LY_EVALID;
615 }
616 } else {
617 if (c == '>') {
618 /* end of opening element */
619 context->status = LYXML_ELEM_CONTENT;
620 } else if (c == '/' && in[0] == '>') {
621 /* empty element closing */
622 context->status = LYXML_ELEMENT;
623 ++in;
624 } else {
625 /* attribute */
626 context->status = LYXML_ATTRIBUTE;
627 in -= endtag_len;
628 }
629
630 if (context->status != LYXML_ELEMENT) {
631 /* store element opening tag information */
632 e = malloc(sizeof *e);
633 LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM);
634 e->name = *name;
635 e->prefix = *prefix;
636 e->name_len = *name_len;
637 e->prefix_len = *prefix_len;
638 ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST);
639 }
640 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200641 loop = false;
642 }
643 }
644
645success:
646 /* move caller's input */
647 (*input) = in;
648 return LY_SUCCESS;
649}
650
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200651LY_ERR
652lyxml_ns_add(struct lyxml_context *context, const char *element_name, const char *prefix, size_t prefix_len, char *uri)
653{
654 struct lyxml_ns *ns;
655
656 ns = malloc(sizeof *ns);
657 LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM);
658
Radek Krejcie0734d22019-04-05 15:54:28 +0200659 /* to distinguish 2 elements, we need not only the name, but also its depth in the XML tree.
660 * In case some dictionary is used to store elements' names (so name strings of 2 distinguish nodes
661 * actually points to the same memory), so the depth is necessary to distinguish parent/child nodes
662 * of the same name. Otherwise, the namespace defined in parent could be removed when leaving child node. */
663 ns->element_depth = context->elements.count;
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200664 ns->element = element_name;
Radek Krejcie0734d22019-04-05 15:54:28 +0200665
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200666 ns->uri = uri;
667 if (prefix) {
668 ns->prefix = strndup(prefix, prefix_len);
669 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns), LY_EMEM);
670 } else {
671 ns->prefix = NULL;
672 }
673
674 LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1, free(ns->prefix), LY_EMEM);
675 return LY_SUCCESS;
676}
677
678const struct lyxml_ns *
679lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len)
680{
681 unsigned int u;
682 struct lyxml_ns *ns;
683
684 for (u = context->ns.count - 1; u + 1 > 0; --u) {
685 ns = (struct lyxml_ns *)context->ns.objs[u];
686 if (prefix) {
687 if (!strncmp(prefix, ns->prefix, prefix_len) && ns->prefix[prefix_len] == '\0') {
688 return ns;
689 }
690 } else if (!ns->prefix) {
691 /* default namespace */
692 return ns;
693 }
694 }
695
696 return NULL;
697}
698
699LY_ERR
700lyxml_ns_rm(struct lyxml_context *context, const char *element_name)
701{
702 unsigned int u;
703
704 for (u = context->ns.count - 1; u + 1 > 0; --u) {
Radek Krejcie0734d22019-04-05 15:54:28 +0200705 if (((struct lyxml_ns *)context->ns.objs[u])->element != element_name ||
706 ((struct lyxml_ns *)context->ns.objs[u])->element_depth != context->elements.count + 1) {
707 /* we are done, the namespaces from a single element are supposed to be together;
708 * the second condition is there to distinguish parent/child elements with the same name
709 * (which are for some reason stored at the same memory chunk), so we need to distinguish
710 * level of the node */
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200711 break;
712 }
713 /* remove the ns structure */
714 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
715 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
716 free(context->ns.objs[u]);
717 --context->ns.count;
718 }
719
720 if (!context->ns.count) {
721 /* cleanup the context's namespaces storage */
722 ly_set_erase(&context->ns, NULL);
723 }
724
725 return LY_SUCCESS;
726}
Radek Krejcib1890642018-10-03 14:05:40 +0200727
728void
729lyxml_context_clear(struct lyxml_context *context)
730{
731 unsigned int u;
732
733 ly_set_erase(&context->elements, free);
734 for (u = context->ns.count - 1; u + 1 > 0; --u) {
735 /* remove the ns structure */
736 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
737 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
738 free(context->ns.objs[u]);
739 }
740 ly_set_erase(&context->ns, NULL);
741}