blob: be943cd5670798b0cc3cb7ab74556176f94c77aa [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
Radek Krejcic1c03d62018-11-27 10:52:43 +010015#include "common.h"
Radek Krejci4b74d5e2018-09-26 14:30:55 +020016
Radek Krejcib1890642018-10-03 14:05:40 +020017#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020018#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020019#include <stdbool.h>
20#include <stdint.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020021#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022
23#include "libyang.h"
24#include "xml.h"
Radek Krejcid91dbaf2018-09-21 15:51:39 +020025
Radek Krejcid91dbaf2018-09-21 15:51:39 +020026/* Move input p by s characters, if EOF log with lyxml_context c */
27#define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
28
Radek Krejcib1890642018-10-03 14:05:40 +020029/* Ignore whitespaces in the input string p */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020030#define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;}
31
Radek Krejci4b74d5e2018-09-26 14:30:55 +020032/**
33 * @brief Ignore any characters until the delim of the size delim_len is read
34 *
35 * Detects number of read new lines.
36 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
37 * NULL-terminated input string.
38 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020039static const char *
40ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
41{
42 size_t i;
43 register const char *a, *b;
44
45 (*newlines) = 0;
46 for ( ; *input; ++input) {
47 if (*input != *delim) {
48 if (*input == '\n') {
49 ++(*newlines);
50 }
51 continue;
52 }
53 a = input;
54 b = delim;
55 for (i = 0; i < delim_len; ++i) {
56 if (*a++ != *b++) {
57 break;
58 }
59 }
60 if (i == delim_len) {
61 return input;
62 }
63 }
64 return NULL;
65}
66
Radek Krejci4b74d5e2018-09-26 14:30:55 +020067/**
Radek Krejci7a7fa902018-09-25 17:08:21 +020068 * Store UTF-8 character specified as 4byte integer into the dst buffer.
69 * Returns number of written bytes (4 max), expects that dst has enough space.
70 *
71 * UTF-8 mapping:
72 * 00000000 -- 0000007F: 0xxxxxxx
73 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
74 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
75 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
76 *
77 * Includes checking for valid characters (following RFC 7950, sec 9.4)
78 */
79static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +020080lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +020081{
82 if (value < 0x80) {
83 /* one byte character */
84 if (value < 0x20 &&
85 value != 0x09 &&
86 value != 0x0a &&
87 value != 0x0d) {
88 return LY_EINVAL;
89 }
90
91 dst[0] = value;
92 (*bytes_written) = 1;
93 } else if (value < 0x800) {
94 /* two bytes character */
95 dst[0] = 0xc0 | (value >> 6);
96 dst[1] = 0x80 | (value & 0x3f);
97 (*bytes_written) = 2;
98 } else if (value < 0xfffe) {
99 /* three bytes character */
100 if (((value & 0xf800) == 0xd800) ||
101 (value >= 0xfdd0 && value <= 0xfdef)) {
102 /* exclude surrogate blocks %xD800-DFFF */
103 /* exclude noncharacters %xFDD0-FDEF */
104 return LY_EINVAL;
105 }
106
107 dst[0] = 0xe0 | (value >> 12);
108 dst[1] = 0x80 | ((value >> 6) & 0x3f);
109 dst[2] = 0x80 | (value & 0x3f);
110
111 (*bytes_written) = 3;
112 } else if (value < 0x10fffe) {
113 if ((value & 0xffe) == 0xffe) {
114 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
115 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
116 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
117 return LY_EINVAL;
118 }
119 /* four bytes character */
120 dst[0] = 0xf0 | (value >> 18);
121 dst[1] = 0x80 | ((value >> 12) & 0x3f);
122 dst[2] = 0x80 | ((value >> 6) & 0x3f);
123 dst[3] = 0x80 | (value & 0x3f);
124
125 (*bytes_written) = 4;
126 }
127 return LY_SUCCESS;
128}
129
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200130/**
131 * @brief Check/Get an XML qualified name from the input string.
132 *
133 * The identifier must have at least one valid character complying the name start character constraints.
134 * The identifier is terminated by the first character, which does not comply to the name character constraints.
135 *
136 * See https://www.w3.org/TR/xml-names/#NT-NCName
137 *
138 * @param[in] context XML context to track lines or store errors into libyang context.
139 * @param[in,out] input Input string to process, updated according to the processed/read data.
140 * Note that the term_char is also read, so input points after the term_char at the end.
141 * @param[out] term_char The first character in the input string which does not compy to the name constraints.
142 * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string.
143 * @return LY_ERR value.
144 */
145static LY_ERR
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200146lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len)
147{
148 unsigned int c;
149 const char *id = (*input);
150 LY_ERR rc;
151
152 /* check NameStartChar (minus colon) */
Radek Krejcib416be62018-10-01 14:51:45 +0200153 LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200154 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
155 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
156 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
157 "Identifier \"%s\" starts with invalid character.", id),
158 LY_EVALID);
159
160 /* check rest of the identifier */
Radek Krejcib416be62018-10-01 14:51:45 +0200161 for (rc = ly_getutf8(input, &c, term_char_len);
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200162 rc == LY_SUCCESS && is_xmlqnamechar(c);
Radek Krejcib416be62018-10-01 14:51:45 +0200163 rc = ly_getutf8(input, &c, term_char_len));
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200164 LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
165
166 (*term_char) = c;
167 return LY_SUCCESS;
168}
169
Radek Krejci7a7fa902018-09-25 17:08:21 +0200170LY_ERR
Radek Krejcid70d1072018-10-09 14:20:47 +0200171lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200172{
173#define BUFSIZE 4096
174#define BUFSIZE_STEP 4096
175#define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \
176 if (CURR+NEED >= SIZE) { \
177 BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \
178 LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \
179 SIZE += BUFSIZE_STEP; \
180 }
181
182 struct ly_ctx *ctx = context->ctx; /* shortcut */
Radek Krejcid70d1072018-10-09 14:20:47 +0200183 const char *in = (*input), *start;
184 char *buf = NULL, delim;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200185 size_t offset; /* read offset in input buffer */
Radek Krejcid70d1072018-10-09 14:20:47 +0200186 size_t len; /* length of the output string (write offset in output buffer) */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200187 size_t size; /* size of the output buffer */
188 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200189 uint32_t n;
190 size_t u, newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200191 bool empty_content = false;
192 LY_ERR rc;
193
Radek Krejcib1890642018-10-03 14:05:40 +0200194 assert(context);
195 assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT);
196
Radek Krejci7a7fa902018-09-25 17:08:21 +0200197 if (in[0] == '\'') {
198 delim = '\'';
199 ++in;
200 } else if (in[0] == '"') {
201 delim = '"';
202 ++in;
203 } else {
204 delim = '<';
205 empty_content = true;
206 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200207 start = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200208
209 if (empty_content) {
210 /* only when processing element's content - try to ignore whitespaces used to format XML data
211 * before element's child or closing tag */
Radek Krejci117d2082018-09-26 10:05:14 +0200212 for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) {
213 if (in[offset] == '\n') {
214 ++newlines;
215 }
216 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200217 LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
Radek Krejci117d2082018-09-26 10:05:14 +0200218 context->line += newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200219 if (in[offset] == '<') {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200220 (*input) = in + offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200221 return LY_EINVAL;
222 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200223 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200224 /* init */
225 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200226
Radek Krejcid70d1072018-10-09 14:20:47 +0200227 if (0) {
228getbuffer:
229 /* prepare output buffer */
230 if (*buffer) {
231 buf = *buffer;
232 size = *buffer_size;
233 } else {
234 buf = malloc(BUFSIZE);
235 size = BUFSIZE;
236 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
237 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200238 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200239
240 /* parse */
241 while (in[offset]) {
242 if (in[offset] == '&') {
Radek Krejcid70d1072018-10-09 14:20:47 +0200243 if (!buf) {
244 /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
245 goto getbuffer;
246 }
247
Radek Krejci7a7fa902018-09-25 17:08:21 +0200248 if (offset) {
249 /* store what we have so far */
250 BUFSIZE_CHECK(ctx, buf, size, len, offset);
251 memcpy(&buf[len], in, offset);
252 len += offset;
253 in += offset;
254 offset = 0;
255 }
256 /* process reference */
257 /* we will need 4 bytes at most since we support only the predefined
258 * (one-char) entities and character references */
259 BUFSIZE_CHECK(ctx, buf, size, len, 4);
260 ++offset;
261 if (in[offset] != '#') {
262 /* entity reference - only predefined references are supported */
263 if (!strncmp(&in[offset], "lt;", 3)) {
264 buf[len++] = '<';
265 in += 4; /* &lt; */
266 } else if (!strncmp(&in[offset], "gt;", 3)) {
267 buf[len++] = '>';
268 in += 4; /* &gt; */
269 } else if (!strncmp(&in[offset], "amp;", 4)) {
270 buf[len++] = '&';
271 in += 5; /* &amp; */
272 } else if (!strncmp(&in[offset], "apos;", 5)) {
273 buf[len++] = '\'';
274 in += 6; /* &apos; */
275 } else if (!strncmp(&in[offset], "quot;", 5)) {
276 buf[len++] = '\"';
277 in += 6; /* &quot; */
278 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200279 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
280 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200281 goto error;
282 }
283 offset = 0;
284 } else {
285 p = (void*)&in[offset - 1];
286 /* character reference */
287 ++offset;
288 if (isdigit(in[offset])) {
289 for (n = 0; isdigit(in[offset]); offset++) {
290 n = (10 * n) + (in[offset] - '0');
291 }
292 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
293 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
294 if (isdigit(in[offset])) {
295 u = (in[offset] - '0');
296 } else if (in[offset] > 'F') {
297 u = 10 + (in[offset] - 'a');
298 } else {
299 u = 10 + (in[offset] - 'A');
300 }
301 n = (16 * n) + u;
302 }
303 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200304 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200305 goto error;
306
307 }
308 LY_CHECK_ERR_GOTO(in[offset] != ';',
309 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
310 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
311 error);
312 ++offset;
313 rc = lyxml_pututf8(&buf[len], n, &u);
314 LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
Radek Krejci117d2082018-09-26 10:05:14 +0200315 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200316 error);
317 len += u;
318 in += offset;
319 offset = 0;
320 }
321 } else if (in[offset] == delim) {
322 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200323 if (buf) {
324 if (len + offset >= size) {
325 buf = ly_realloc(buf, len + offset + 1);
326 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
327 size = len + offset + 1;
328 }
329 memcpy(&buf[len], in, offset);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200330 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200331 len += offset;
332 /* in case of element content, keep the leading <,
Radek Krejcib1890642018-10-03 14:05:40 +0200333 * for attribute's value move after the terminating quotation mark */
334 if (context->status == LYXML_ELEM_CONTENT) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200335 in += offset;
336 } else {
337 in += offset + 1;
338 }
339 goto success;
340 } else {
341 /* log lines */
342 if (in[offset] == '\n') {
343 ++context->line;
344 }
345
346 /* continue */
347 ++offset;
348 }
349 }
350 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
351error:
352 if (!(*buffer)) {
353 free(buf);
354 }
355 return LY_EVALID;
356
357success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200358 if (buf) {
359 if (!(*buffer) && size != len + 1) {
360 /* not using provided buffer, so fit the allocated buffer to what we really have inside */
361 p = realloc(buf, len + 1);
362 /* ignore realloc fail because we are reducing the buffer,
363 * so just return bigger buffer than needed */
364 if (p) {
365 size = len + 1;
366 buf = p;
367 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200368 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200369 /* set terminating NULL byte */
370 buf[len] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200371 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200372
Radek Krejcib1890642018-10-03 14:05:40 +0200373 context->status -= 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200374 (*input) = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200375 if (buf) {
376 (*buffer) = buf;
377 (*buffer_size) = size;
378 (*output) = buf;
379 (*dynamic) = 1;
380 } else {
381 (*output) = (char*)start;
382 (*dynamic) = 0;
383 }
384 (*length) = len;
385
Radek Krejci7a7fa902018-09-25 17:08:21 +0200386 return LY_SUCCESS;
387
388#undef BUFSIZE
389#undef BUFSIZE_STEP
390#undef BUFSIZE_CHECK
391}
392
Radek Krejcid972c252018-09-25 13:23:39 +0200393LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200394lyxml_get_attribute(struct lyxml_context *context, const char **input,
Radek Krejcid972c252018-09-25 13:23:39 +0200395 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
396{
397 struct ly_ctx *ctx = context->ctx; /* shortcut */
398 const char *in = (*input);
399 const char *id;
400 const char *endtag;
401 LY_ERR rc;
402 unsigned int c;
403 size_t endtag_len;
404
405 /* initialize output variables */
406 (*prefix) = (*name) = NULL;
407 (*prefix_len) = (*name_len) = 0;
408
409 /* skip initial whitespaces */
410 ign_xmlws(context, in);
411
412 if (in[0] == '\0') {
413 /* EOF - not expected at this place */
414 return LY_EINVAL;
Radek Krejcib1890642018-10-03 14:05:40 +0200415 } else if (in[0] == '>') {
416 /* element terminated by > - termination of the opening tag */
417 context->status = LYXML_ELEM_CONTENT;
418 ++in;
419 goto success;
420 } else if (in[0] == '/' && in[1] == '>') {
421 /* element terminated by /> - termination of an empty element */
422 context->status = LYXML_ELEMENT;
423 in += 2;
Radek Krejcid972c252018-09-25 13:23:39 +0200424 goto success;
425 }
426
427 /* remember the identifier start before checking its format */
428 id = in;
429 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
430 LY_CHECK_RET(rc);
431 if (c == ':') {
432 /* we have prefixed identifier */
433 endtag = in - endtag_len;
434
435 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
436 LY_CHECK_RET(rc);
437
438 (*prefix) = id;
439 (*prefix_len) = endtag - id;
440 id = endtag + 1;
441 }
442 if (!is_xmlws(c) && c != '=') {
443 in = in - endtag_len;
444 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='");
445 return LY_EVALID;
446 }
447 in = in - endtag_len;
448 (*name) = id;
449 (*name_len) = in - id;
450
451 /* eat '=' and stop at the value beginning */
452 ign_xmlws(context, in);
453 if (in[0] != '=') {
454 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='");
455 return LY_EVALID;
456 }
457 ++in;
458 ign_xmlws(context, in);
459 if (in[0] != '\'' && in[0] != '"') {
Radek Krejcib1890642018-10-03 14:05:40 +0200460 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
461 LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark");
Radek Krejcid972c252018-09-25 13:23:39 +0200462 return LY_EVALID;
463 }
Radek Krejcib1890642018-10-03 14:05:40 +0200464 context->status = LYXML_ATTR_CONTENT;
Radek Krejcid972c252018-09-25 13:23:39 +0200465
466success:
467 /* move caller's input */
468 (*input) = in;
469 return LY_SUCCESS;
470}
471
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200472LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200473lyxml_get_element(struct lyxml_context *context, const char **input,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200474 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
475{
476 struct ly_ctx *ctx = context->ctx; /* shortcut */
477 const char *in = (*input);
478 const char *endtag;
479 const char *sectname;
480 const char *id;
481 size_t endtag_len, newlines;
Radek Krejcib1890642018-10-03 14:05:40 +0200482 bool loop = true, closing = false;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200483 unsigned int c;
484 LY_ERR rc;
Radek Krejcib1890642018-10-03 14:05:40 +0200485 struct lyxml_elem *e;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200486
487 /* initialize output variables */
488 (*prefix) = (*name) = NULL;
489 (*prefix_len) = (*name_len) = 0;
490
491 while (loop) {
492 ign_xmlws(context, in);
493
494 if (in[0] == '\0') {
495 /* EOF */
Radek Krejcib1890642018-10-03 14:05:40 +0200496 context->status = LYXML_END;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200497 goto success;
498 } else if (in[0] != '<') {
499 return LY_EINVAL;
500 }
501 move_input(context, in, 1);
502
503 if (in[0] == '!') {
504 move_input(context, in, 1);
505 /* sections to ignore */
506 if (!strncmp(in, "--", 2)) {
507 /* comment */
508 move_input(context, in, 2);
509 sectname = "Comment";
510 endtag = "-->";
511 endtag_len = 3;
512 } else if (!strncmp(in, "[CDATA[", 7)) {
513 /* CDATA section */
514 move_input(context, in, 7);
515 sectname = "CData";
516 endtag = "]]>";
517 endtag_len = 3;
518 } else if (!strncmp(in, "DOCTYPE", 7)) {
519 /* Document type declaration - not supported */
520 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration");
521 return LY_EVALID;
522 }
523 in = ign_todelim(in, endtag, endtag_len, &newlines);
524 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID);
525 context->line += newlines;
526 in += endtag_len;
527 } else if (in[0] == '?') {
528 in = ign_todelim(in, "?>", 2, &newlines);
529 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
530 context->line += newlines;
531 in += 2;
Radek Krejcib1890642018-10-03 14:05:40 +0200532 } else if (in[0] == '/') {
533 /* closing element */
534 closing = true;
535 ++in;
536 goto element;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200537 } else {
538 /* element */
Radek Krejcib1890642018-10-03 14:05:40 +0200539element:
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200540 ign_xmlws(context, in);
541 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
542
543 /* remember the identifier start before checking its format */
544 id = in;
545 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
546 LY_CHECK_RET(rc);
547 if (c == ':') {
548 /* we have prefixed identifier */
549 endtag = in - endtag_len;
550
551 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
552 LY_CHECK_RET(rc);
553
554 (*prefix) = id;
555 (*prefix_len) = endtag - id;
556 id = endtag + 1;
557 }
558 if (!is_xmlws(c) && c != '/' && c != '>') {
559 in = in - endtag_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200560 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in,
561 "whitespace or element tag termination ('>' or '/>'");
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200562 return LY_EVALID;
563 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200564 (*name) = id;
Radek Krejcib1890642018-10-03 14:05:40 +0200565 (*name_len) = in - endtag_len - id;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200566
Radek Krejcib1890642018-10-03 14:05:40 +0200567 if (is_xmlws(c)) {
568 /* go to the next meaningful input */
569 ign_xmlws(context, in);
570 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
571 c = in[0];
572 ++in;
573 endtag_len = 1;
574 }
575
576 if (closing) {
577 /* match opening and closing element tags */
578 LY_CHECK_ERR_RET(
579 !context->elements.count,
580 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
581 LY_EVALID);
582 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
583 LY_CHECK_ERR_RET(e->prefix_len != *prefix_len || e->name_len != *name_len
584 || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len),
585 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
586 LY_EVALID);
587 /* opening and closing element tags matches, remove record from the opening tags list */
588 free(e);
589 --context->elements.count;
590 /* do not return element information to announce closing element being currently processed */
591 *name = *prefix = NULL;
592 *name_len = *prefix_len = 0;
593
594 if (c == '>') {
595 /* end of closing element */
596 context->status = LYXML_ELEMENT;
597 } else {
598 in -= endtag_len;
599 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.",
600 LY_VCODE_INSTREXP_len(in), in);
601 return LY_EVALID;
602 }
603 } else {
604 if (c == '>') {
605 /* end of opening element */
606 context->status = LYXML_ELEM_CONTENT;
607 } else if (c == '/' && in[0] == '>') {
608 /* empty element closing */
609 context->status = LYXML_ELEMENT;
610 ++in;
611 } else {
612 /* attribute */
613 context->status = LYXML_ATTRIBUTE;
614 in -= endtag_len;
615 }
616
617 if (context->status != LYXML_ELEMENT) {
618 /* store element opening tag information */
619 e = malloc(sizeof *e);
620 LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM);
621 e->name = *name;
622 e->prefix = *prefix;
623 e->name_len = *name_len;
624 e->prefix_len = *prefix_len;
625 ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST);
626 }
627 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200628 loop = false;
629 }
630 }
631
632success:
633 /* move caller's input */
634 (*input) = in;
635 return LY_SUCCESS;
636}
637
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200638LY_ERR
639lyxml_ns_add(struct lyxml_context *context, const char *element_name, const char *prefix, size_t prefix_len, char *uri)
640{
641 struct lyxml_ns *ns;
642
643 ns = malloc(sizeof *ns);
644 LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM);
645
646 ns->element = element_name;
647 ns->uri = uri;
648 if (prefix) {
649 ns->prefix = strndup(prefix, prefix_len);
650 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns), LY_EMEM);
651 } else {
652 ns->prefix = NULL;
653 }
654
655 LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1, free(ns->prefix), LY_EMEM);
656 return LY_SUCCESS;
657}
658
659const struct lyxml_ns *
660lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len)
661{
662 unsigned int u;
663 struct lyxml_ns *ns;
664
665 for (u = context->ns.count - 1; u + 1 > 0; --u) {
666 ns = (struct lyxml_ns *)context->ns.objs[u];
667 if (prefix) {
668 if (!strncmp(prefix, ns->prefix, prefix_len) && ns->prefix[prefix_len] == '\0') {
669 return ns;
670 }
671 } else if (!ns->prefix) {
672 /* default namespace */
673 return ns;
674 }
675 }
676
677 return NULL;
678}
679
680LY_ERR
681lyxml_ns_rm(struct lyxml_context *context, const char *element_name)
682{
683 unsigned int u;
684
685 for (u = context->ns.count - 1; u + 1 > 0; --u) {
686 if (((struct lyxml_ns *)context->ns.objs[u])->element != element_name) {
687 /* we are done, the namespaces from a single element are supposed to be together */
688 break;
689 }
690 /* remove the ns structure */
691 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
692 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
693 free(context->ns.objs[u]);
694 --context->ns.count;
695 }
696
697 if (!context->ns.count) {
698 /* cleanup the context's namespaces storage */
699 ly_set_erase(&context->ns, NULL);
700 }
701
702 return LY_SUCCESS;
703}
Radek Krejcib1890642018-10-03 14:05:40 +0200704
705void
706lyxml_context_clear(struct lyxml_context *context)
707{
708 unsigned int u;
709
710 ly_set_erase(&context->elements, free);
711 for (u = context->ns.count - 1; u + 1 > 0; --u) {
712 /* remove the ns structure */
713 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
714 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
715 free(context->ns.objs[u]);
716 }
717 ly_set_erase(&context->ns, NULL);
718}