blob: 147170a03ff0ceb4f219c5ee543623f6273c5057 [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
Radek Krejcic1c03d62018-11-27 10:52:43 +010015#include "common.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020016
Radek Krejcib1890642018-10-03 14:05:40 +020017#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020018#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020019#include <stdbool.h>
20#include <stdint.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020021#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022
23#include "libyang.h"
24#include "xml.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026/* Move input p by s characters, if EOF log with lyxml_context c */
27#define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
28
Radek Krejcib1890642018-10-03 14:05:40 +020029/* Ignore whitespaces in the input string p */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020030#define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;}
31
Radek Krejci4b74d5e2018-09-26 14:30:55 +020032/**
33 * @brief Ignore any characters until the delim of the size delim_len is read
34 *
35 * Detects number of read new lines.
36 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
37 * NULL-terminated input string.
38 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020039static const char *
40ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
41{
42 size_t i;
43 register const char *a, *b;
44
45 (*newlines) = 0;
46 for ( ; *input; ++input) {
47 if (*input != *delim) {
48 if (*input == '\n') {
49 ++(*newlines);
50 }
51 continue;
52 }
53 a = input;
54 b = delim;
55 for (i = 0; i < delim_len; ++i) {
56 if (*a++ != *b++) {
57 break;
58 }
59 }
60 if (i == delim_len) {
61 return input;
62 }
63 }
64 return NULL;
65}
66
Radek Krejci4b74d5e2018-09-26 14:30:55 +020067/**
Radek Krejci7a7fa902018-09-25 17:08:21 +020068 * Store UTF-8 character specified as 4byte integer into the dst buffer.
69 * Returns number of written bytes (4 max), expects that dst has enough space.
70 *
71 * UTF-8 mapping:
72 * 00000000 -- 0000007F: 0xxxxxxx
73 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
74 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
75 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
76 *
77 * Includes checking for valid characters (following RFC 7950, sec 9.4)
78 */
79static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +020080lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +020081{
82 if (value < 0x80) {
83 /* one byte character */
84 if (value < 0x20 &&
85 value != 0x09 &&
86 value != 0x0a &&
87 value != 0x0d) {
88 return LY_EINVAL;
89 }
90
91 dst[0] = value;
92 (*bytes_written) = 1;
93 } else if (value < 0x800) {
94 /* two bytes character */
95 dst[0] = 0xc0 | (value >> 6);
96 dst[1] = 0x80 | (value & 0x3f);
97 (*bytes_written) = 2;
98 } else if (value < 0xfffe) {
99 /* three bytes character */
100 if (((value & 0xf800) == 0xd800) ||
101 (value >= 0xfdd0 && value <= 0xfdef)) {
102 /* exclude surrogate blocks %xD800-DFFF */
103 /* exclude noncharacters %xFDD0-FDEF */
104 return LY_EINVAL;
105 }
106
107 dst[0] = 0xe0 | (value >> 12);
108 dst[1] = 0x80 | ((value >> 6) & 0x3f);
109 dst[2] = 0x80 | (value & 0x3f);
110
111 (*bytes_written) = 3;
112 } else if (value < 0x10fffe) {
113 if ((value & 0xffe) == 0xffe) {
114 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
115 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
116 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
117 return LY_EINVAL;
118 }
119 /* four bytes character */
120 dst[0] = 0xf0 | (value >> 18);
121 dst[1] = 0x80 | ((value >> 12) & 0x3f);
122 dst[2] = 0x80 | ((value >> 6) & 0x3f);
123 dst[3] = 0x80 | (value & 0x3f);
124
125 (*bytes_written) = 4;
126 }
127 return LY_SUCCESS;
128}
129
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200130/**
131 * @brief Check/Get an XML qualified name from the input string.
132 *
133 * The identifier must have at least one valid character complying the name start character constraints.
134 * The identifier is terminated by the first character, which does not comply to the name character constraints.
135 *
136 * See https://www.w3.org/TR/xml-names/#NT-NCName
137 *
138 * @param[in] context XML context to track lines or store errors into libyang context.
139 * @param[in,out] input Input string to process, updated according to the processed/read data.
140 * Note that the term_char is also read, so input points after the term_char at the end.
141 * @param[out] term_char The first character in the input string which does not compy to the name constraints.
142 * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string.
143 * @return LY_ERR value.
144 */
145static LY_ERR
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200146lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len)
147{
148 unsigned int c;
149 const char *id = (*input);
150 LY_ERR rc;
151
152 /* check NameStartChar (minus colon) */
Radek Krejcib416be62018-10-01 14:51:45 +0200153 LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200154 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
155 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
156 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
157 "Identifier \"%s\" starts with invalid character.", id),
158 LY_EVALID);
159
160 /* check rest of the identifier */
Radek Krejcib416be62018-10-01 14:51:45 +0200161 for (rc = ly_getutf8(input, &c, term_char_len);
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200162 rc == LY_SUCCESS && is_xmlqnamechar(c);
Radek Krejcib416be62018-10-01 14:51:45 +0200163 rc = ly_getutf8(input, &c, term_char_len));
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200164 LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
165
166 (*term_char) = c;
167 return LY_SUCCESS;
168}
169
Radek Krejci7a7fa902018-09-25 17:08:21 +0200170LY_ERR
Radek Krejcid70d1072018-10-09 14:20:47 +0200171lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200172{
173#define BUFSIZE 4096
174#define BUFSIZE_STEP 4096
175#define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \
176 if (CURR+NEED >= SIZE) { \
177 BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \
178 LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \
179 SIZE += BUFSIZE_STEP; \
180 }
181
182 struct ly_ctx *ctx = context->ctx; /* shortcut */
Radek Krejcid70d1072018-10-09 14:20:47 +0200183 const char *in = (*input), *start;
184 char *buf = NULL, delim;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200185 size_t offset; /* read offset in input buffer */
Radek Krejcid70d1072018-10-09 14:20:47 +0200186 size_t len; /* length of the output string (write offset in output buffer) */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200187 size_t size; /* size of the output buffer */
188 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200189 uint32_t n;
190 size_t u, newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200191 bool empty_content = false;
192 LY_ERR rc;
193
Radek Krejcib1890642018-10-03 14:05:40 +0200194 assert(context);
195 assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT);
196
Radek Krejci7a7fa902018-09-25 17:08:21 +0200197 if (in[0] == '\'') {
198 delim = '\'';
199 ++in;
200 } else if (in[0] == '"') {
201 delim = '"';
202 ++in;
203 } else {
204 delim = '<';
205 empty_content = true;
206 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200207 start = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200208
209 if (empty_content) {
210 /* only when processing element's content - try to ignore whitespaces used to format XML data
211 * before element's child or closing tag */
Radek Krejci117d2082018-09-26 10:05:14 +0200212 for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) {
213 if (in[offset] == '\n') {
214 ++newlines;
215 }
216 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200217 LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
Radek Krejci117d2082018-09-26 10:05:14 +0200218 context->line += newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200219 if (in[offset] == '<') {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200220 (*input) = in + offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200221 return LY_EINVAL;
222 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200223 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200224 /* init */
225 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200226
Radek Krejcid70d1072018-10-09 14:20:47 +0200227 if (0) {
228getbuffer:
229 /* prepare output buffer */
230 if (*buffer) {
231 buf = *buffer;
232 size = *buffer_size;
233 } else {
234 buf = malloc(BUFSIZE);
235 size = BUFSIZE;
236 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
237 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200238 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200239
240 /* parse */
241 while (in[offset]) {
242 if (in[offset] == '&') {
Radek Krejcid70d1072018-10-09 14:20:47 +0200243 if (!buf) {
244 /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
245 goto getbuffer;
246 }
247
Radek Krejci7a7fa902018-09-25 17:08:21 +0200248 if (offset) {
249 /* store what we have so far */
250 BUFSIZE_CHECK(ctx, buf, size, len, offset);
251 memcpy(&buf[len], in, offset);
252 len += offset;
253 in += offset;
254 offset = 0;
255 }
256 /* process reference */
257 /* we will need 4 bytes at most since we support only the predefined
258 * (one-char) entities and character references */
259 BUFSIZE_CHECK(ctx, buf, size, len, 4);
260 ++offset;
261 if (in[offset] != '#') {
262 /* entity reference - only predefined references are supported */
263 if (!strncmp(&in[offset], "lt;", 3)) {
264 buf[len++] = '<';
265 in += 4; /* &lt; */
266 } else if (!strncmp(&in[offset], "gt;", 3)) {
267 buf[len++] = '>';
268 in += 4; /* &gt; */
269 } else if (!strncmp(&in[offset], "amp;", 4)) {
270 buf[len++] = '&';
271 in += 5; /* &amp; */
272 } else if (!strncmp(&in[offset], "apos;", 5)) {
273 buf[len++] = '\'';
274 in += 6; /* &apos; */
275 } else if (!strncmp(&in[offset], "quot;", 5)) {
276 buf[len++] = '\"';
277 in += 6; /* &quot; */
278 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200279 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
280 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200281 goto error;
282 }
283 offset = 0;
284 } else {
285 p = (void*)&in[offset - 1];
286 /* character reference */
287 ++offset;
288 if (isdigit(in[offset])) {
289 for (n = 0; isdigit(in[offset]); offset++) {
290 n = (10 * n) + (in[offset] - '0');
291 }
292 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
293 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
294 if (isdigit(in[offset])) {
295 u = (in[offset] - '0');
296 } else if (in[offset] > 'F') {
297 u = 10 + (in[offset] - 'a');
298 } else {
299 u = 10 + (in[offset] - 'A');
300 }
301 n = (16 * n) + u;
302 }
303 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200304 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200305 goto error;
306
307 }
308 LY_CHECK_ERR_GOTO(in[offset] != ';',
309 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
310 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
311 error);
312 ++offset;
313 rc = lyxml_pututf8(&buf[len], n, &u);
314 LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
Radek Krejci117d2082018-09-26 10:05:14 +0200315 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200316 error);
317 len += u;
318 in += offset;
319 offset = 0;
320 }
321 } else if (in[offset] == delim) {
322 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200323 if (buf) {
324 if (len + offset >= size) {
325 buf = ly_realloc(buf, len + offset + 1);
326 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
327 size = len + offset + 1;
328 }
329 memcpy(&buf[len], in, offset);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200330 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200331 len += offset;
332 /* in case of element content, keep the leading <,
Radek Krejcib1890642018-10-03 14:05:40 +0200333 * for attribute's value move after the terminating quotation mark */
334 if (context->status == LYXML_ELEM_CONTENT) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200335 in += offset;
336 } else {
337 in += offset + 1;
338 }
339 goto success;
340 } else {
341 /* log lines */
342 if (in[offset] == '\n') {
343 ++context->line;
344 }
345
346 /* continue */
347 ++offset;
348 }
349 }
350 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
351error:
352 if (!(*buffer)) {
353 free(buf);
354 }
355 return LY_EVALID;
356
357success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200358 if (buf) {
359 if (!(*buffer) && size != len + 1) {
360 /* not using provided buffer, so fit the allocated buffer to what we really have inside */
361 p = realloc(buf, len + 1);
362 /* ignore realloc fail because we are reducing the buffer,
363 * so just return bigger buffer than needed */
364 if (p) {
365 size = len + 1;
366 buf = p;
367 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200368 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200369 /* set terminating NULL byte */
370 buf[len] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200371 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200372
Radek Krejcib1890642018-10-03 14:05:40 +0200373 context->status -= 1;
Radek Krejcid70d1072018-10-09 14:20:47 +0200374 if (buf) {
375 (*buffer) = buf;
376 (*buffer_size) = size;
377 (*output) = buf;
378 (*dynamic) = 1;
379 } else {
380 (*output) = (char*)start;
381 (*dynamic) = 0;
382 }
383 (*length) = len;
384
Radek Krejci28e8cb52019-03-08 11:31:31 +0100385 if (context->status == LYXML_ATTRIBUTE) {
386 if (in[0] == '>') {
387 /* element terminated by > - termination of the opening tag */
388 context->status = LYXML_ELEM_CONTENT;
389 ++in;
390 } else if (in[0] == '/' && in[1] == '>') {
391 /* element terminated by /> - termination of an empty element */
392 context->status = LYXML_ELEMENT;
393 in += 2;
394
395 /* remove the closed element record from the tags list */
396 free(context->elements.objs[context->elements.count - 1]);
397 --context->elements.count;
398 }
399 }
400
401 (*input) = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200402 return LY_SUCCESS;
403
404#undef BUFSIZE
405#undef BUFSIZE_STEP
406#undef BUFSIZE_CHECK
407}
408
Radek Krejcid972c252018-09-25 13:23:39 +0200409LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200410lyxml_get_attribute(struct lyxml_context *context, const char **input,
Radek Krejcid972c252018-09-25 13:23:39 +0200411 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
412{
413 struct ly_ctx *ctx = context->ctx; /* shortcut */
414 const char *in = (*input);
415 const char *id;
416 const char *endtag;
417 LY_ERR rc;
418 unsigned int c;
419 size_t endtag_len;
420
421 /* initialize output variables */
422 (*prefix) = (*name) = NULL;
423 (*prefix_len) = (*name_len) = 0;
424
425 /* skip initial whitespaces */
426 ign_xmlws(context, in);
427
428 if (in[0] == '\0') {
429 /* EOF - not expected at this place */
430 return LY_EINVAL;
Radek Krejcid972c252018-09-25 13:23:39 +0200431 }
432
433 /* remember the identifier start before checking its format */
434 id = in;
435 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
436 LY_CHECK_RET(rc);
437 if (c == ':') {
438 /* we have prefixed identifier */
439 endtag = in - endtag_len;
440
441 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
442 LY_CHECK_RET(rc);
443
444 (*prefix) = id;
445 (*prefix_len) = endtag - id;
446 id = endtag + 1;
447 }
448 if (!is_xmlws(c) && c != '=') {
449 in = in - endtag_len;
450 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='");
451 return LY_EVALID;
452 }
453 in = in - endtag_len;
454 (*name) = id;
455 (*name_len) = in - id;
456
457 /* eat '=' and stop at the value beginning */
458 ign_xmlws(context, in);
459 if (in[0] != '=') {
460 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='");
461 return LY_EVALID;
462 }
463 ++in;
464 ign_xmlws(context, in);
465 if (in[0] != '\'' && in[0] != '"') {
Radek Krejcib1890642018-10-03 14:05:40 +0200466 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
467 LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark");
Radek Krejcid972c252018-09-25 13:23:39 +0200468 return LY_EVALID;
469 }
Radek Krejcib1890642018-10-03 14:05:40 +0200470 context->status = LYXML_ATTR_CONTENT;
Radek Krejcid972c252018-09-25 13:23:39 +0200471
Radek Krejcid972c252018-09-25 13:23:39 +0200472 /* move caller's input */
473 (*input) = in;
474 return LY_SUCCESS;
475}
476
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200477LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200478lyxml_get_element(struct lyxml_context *context, const char **input,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200479 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
480{
481 struct ly_ctx *ctx = context->ctx; /* shortcut */
482 const char *in = (*input);
483 const char *endtag;
484 const char *sectname;
485 const char *id;
486 size_t endtag_len, newlines;
Radek Krejcib1890642018-10-03 14:05:40 +0200487 bool loop = true, closing = false;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200488 unsigned int c;
489 LY_ERR rc;
Radek Krejcib1890642018-10-03 14:05:40 +0200490 struct lyxml_elem *e;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200491
492 /* initialize output variables */
493 (*prefix) = (*name) = NULL;
494 (*prefix_len) = (*name_len) = 0;
495
496 while (loop) {
497 ign_xmlws(context, in);
498
499 if (in[0] == '\0') {
500 /* EOF */
Radek Krejcib1890642018-10-03 14:05:40 +0200501 context->status = LYXML_END;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200502 goto success;
503 } else if (in[0] != '<') {
504 return LY_EINVAL;
505 }
506 move_input(context, in, 1);
507
508 if (in[0] == '!') {
509 move_input(context, in, 1);
510 /* sections to ignore */
511 if (!strncmp(in, "--", 2)) {
512 /* comment */
513 move_input(context, in, 2);
514 sectname = "Comment";
515 endtag = "-->";
516 endtag_len = 3;
517 } else if (!strncmp(in, "[CDATA[", 7)) {
518 /* CDATA section */
519 move_input(context, in, 7);
520 sectname = "CData";
521 endtag = "]]>";
522 endtag_len = 3;
523 } else if (!strncmp(in, "DOCTYPE", 7)) {
524 /* Document type declaration - not supported */
525 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration");
526 return LY_EVALID;
527 }
528 in = ign_todelim(in, endtag, endtag_len, &newlines);
529 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID);
530 context->line += newlines;
531 in += endtag_len;
532 } else if (in[0] == '?') {
533 in = ign_todelim(in, "?>", 2, &newlines);
534 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
535 context->line += newlines;
536 in += 2;
Radek Krejcib1890642018-10-03 14:05:40 +0200537 } else if (in[0] == '/') {
538 /* closing element */
539 closing = true;
540 ++in;
541 goto element;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200542 } else {
543 /* element */
Radek Krejcib1890642018-10-03 14:05:40 +0200544element:
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200545 ign_xmlws(context, in);
546 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
547
548 /* remember the identifier start before checking its format */
549 id = in;
550 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
551 LY_CHECK_RET(rc);
552 if (c == ':') {
553 /* we have prefixed identifier */
554 endtag = in - endtag_len;
555
556 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
557 LY_CHECK_RET(rc);
558
559 (*prefix) = id;
560 (*prefix_len) = endtag - id;
561 id = endtag + 1;
562 }
563 if (!is_xmlws(c) && c != '/' && c != '>') {
564 in = in - endtag_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200565 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in,
566 "whitespace or element tag termination ('>' or '/>'");
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200567 return LY_EVALID;
568 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200569 (*name) = id;
Radek Krejcib1890642018-10-03 14:05:40 +0200570 (*name_len) = in - endtag_len - id;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200571
Radek Krejcib1890642018-10-03 14:05:40 +0200572 if (is_xmlws(c)) {
573 /* go to the next meaningful input */
574 ign_xmlws(context, in);
575 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
576 c = in[0];
577 ++in;
578 endtag_len = 1;
579 }
580
581 if (closing) {
582 /* match opening and closing element tags */
583 LY_CHECK_ERR_RET(
584 !context->elements.count,
585 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
586 LY_EVALID);
587 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
588 LY_CHECK_ERR_RET(e->prefix_len != *prefix_len || e->name_len != *name_len
589 || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len),
590 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
591 LY_EVALID);
592 /* opening and closing element tags matches, remove record from the opening tags list */
593 free(e);
594 --context->elements.count;
595 /* do not return element information to announce closing element being currently processed */
596 *name = *prefix = NULL;
597 *name_len = *prefix_len = 0;
598
599 if (c == '>') {
600 /* end of closing element */
601 context->status = LYXML_ELEMENT;
602 } else {
603 in -= endtag_len;
604 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.",
605 LY_VCODE_INSTREXP_len(in), in);
606 return LY_EVALID;
607 }
608 } else {
609 if (c == '>') {
610 /* end of opening element */
611 context->status = LYXML_ELEM_CONTENT;
612 } else if (c == '/' && in[0] == '>') {
613 /* empty element closing */
614 context->status = LYXML_ELEMENT;
615 ++in;
616 } else {
617 /* attribute */
618 context->status = LYXML_ATTRIBUTE;
619 in -= endtag_len;
620 }
621
622 if (context->status != LYXML_ELEMENT) {
623 /* store element opening tag information */
624 e = malloc(sizeof *e);
625 LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM);
626 e->name = *name;
627 e->prefix = *prefix;
628 e->name_len = *name_len;
629 e->prefix_len = *prefix_len;
630 ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST);
631 }
632 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200633 loop = false;
634 }
635 }
636
637success:
638 /* move caller's input */
639 (*input) = in;
640 return LY_SUCCESS;
641}
642
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200643LY_ERR
David Sedlák3d0a82b2019-03-15 13:49:43 +0100644lyxml_ns_add(struct lyxml_context *context, const char *element_name, const char *prefix, size_t prefix_len, char *uri, size_t uri_len)
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200645{
646 struct lyxml_ns *ns;
647
648 ns = malloc(sizeof *ns);
649 LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM);
650
651 ns->element = element_name;
David Sedlák3d0a82b2019-03-15 13:49:43 +0100652 ns->uri = strndup(uri, uri_len);
653 LY_CHECK_ERR_RET(!ns->uri, LOGMEM(context->ctx); free(ns), LY_EMEM);
654
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200655 if (prefix) {
656 ns->prefix = strndup(prefix, prefix_len);
David Sedlák3d0a82b2019-03-15 13:49:43 +0100657 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns->uri); free(ns), LY_EMEM);
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200658 } else {
659 ns->prefix = NULL;
660 }
661
David Sedlák3d0a82b2019-03-15 13:49:43 +0100662 LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1, free(ns->prefix); free(ns->uri); free(ns), LY_EMEM);
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200663 return LY_SUCCESS;
664}
665
666const struct lyxml_ns *
667lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len)
668{
669 unsigned int u;
670 struct lyxml_ns *ns;
671
672 for (u = context->ns.count - 1; u + 1 > 0; --u) {
673 ns = (struct lyxml_ns *)context->ns.objs[u];
674 if (prefix) {
675 if (!strncmp(prefix, ns->prefix, prefix_len) && ns->prefix[prefix_len] == '\0') {
676 return ns;
677 }
678 } else if (!ns->prefix) {
679 /* default namespace */
680 return ns;
681 }
682 }
683
684 return NULL;
685}
686
687LY_ERR
688lyxml_ns_rm(struct lyxml_context *context, const char *element_name)
689{
690 unsigned int u;
691
692 for (u = context->ns.count - 1; u + 1 > 0; --u) {
693 if (((struct lyxml_ns *)context->ns.objs[u])->element != element_name) {
694 /* we are done, the namespaces from a single element are supposed to be together */
695 break;
696 }
697 /* remove the ns structure */
698 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
699 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
700 free(context->ns.objs[u]);
701 --context->ns.count;
702 }
703
704 if (!context->ns.count) {
705 /* cleanup the context's namespaces storage */
706 ly_set_erase(&context->ns, NULL);
707 }
708
709 return LY_SUCCESS;
710}
Radek Krejcib1890642018-10-03 14:05:40 +0200711
712void
713lyxml_context_clear(struct lyxml_context *context)
714{
715 unsigned int u;
716
717 ly_set_erase(&context->elements, free);
718 for (u = context->ns.count - 1; u + 1 > 0; --u) {
719 /* remove the ns structure */
720 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
721 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
722 free(context->ns.objs[u]);
723 }
724 ly_set_erase(&context->ns, NULL);
725}