blob: 06d79ce4d9e2abd9ce1ee8f2a89d958ecd81bb8a [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
Radek Krejci4b74d5e2018-09-26 14:30:55 +020015#define _POSIX_C_SOURCE 200809L /* strndup() */
16
Radek Krejcib1890642018-10-03 14:05:40 +020017#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020018#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020019#include <stdbool.h>
20#include <stdint.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020021#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022
23#include "libyang.h"
24#include "xml.h"
25#include "common.h"
26
Radek Krejcid91dbaf2018-09-21 15:51:39 +020027/* Move input p by s characters, if EOF log with lyxml_context c */
28#define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
29
Radek Krejcib1890642018-10-03 14:05:40 +020030/* Ignore whitespaces in the input string p */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020031#define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;}
32
Radek Krejci4b74d5e2018-09-26 14:30:55 +020033/**
34 * @brief Ignore any characters until the delim of the size delim_len is read
35 *
36 * Detects number of read new lines.
37 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
38 * NULL-terminated input string.
39 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020040static const char *
41ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
42{
43 size_t i;
44 register const char *a, *b;
45
46 (*newlines) = 0;
47 for ( ; *input; ++input) {
48 if (*input != *delim) {
49 if (*input == '\n') {
50 ++(*newlines);
51 }
52 continue;
53 }
54 a = input;
55 b = delim;
56 for (i = 0; i < delim_len; ++i) {
57 if (*a++ != *b++) {
58 break;
59 }
60 }
61 if (i == delim_len) {
62 return input;
63 }
64 }
65 return NULL;
66}
67
Radek Krejci4b74d5e2018-09-26 14:30:55 +020068/**
Radek Krejci7a7fa902018-09-25 17:08:21 +020069 * Store UTF-8 character specified as 4byte integer into the dst buffer.
70 * Returns number of written bytes (4 max), expects that dst has enough space.
71 *
72 * UTF-8 mapping:
73 * 00000000 -- 0000007F: 0xxxxxxx
74 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
75 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
76 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
77 *
78 * Includes checking for valid characters (following RFC 7950, sec 9.4)
79 */
80static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +020081lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +020082{
83 if (value < 0x80) {
84 /* one byte character */
85 if (value < 0x20 &&
86 value != 0x09 &&
87 value != 0x0a &&
88 value != 0x0d) {
89 return LY_EINVAL;
90 }
91
92 dst[0] = value;
93 (*bytes_written) = 1;
94 } else if (value < 0x800) {
95 /* two bytes character */
96 dst[0] = 0xc0 | (value >> 6);
97 dst[1] = 0x80 | (value & 0x3f);
98 (*bytes_written) = 2;
99 } else if (value < 0xfffe) {
100 /* three bytes character */
101 if (((value & 0xf800) == 0xd800) ||
102 (value >= 0xfdd0 && value <= 0xfdef)) {
103 /* exclude surrogate blocks %xD800-DFFF */
104 /* exclude noncharacters %xFDD0-FDEF */
105 return LY_EINVAL;
106 }
107
108 dst[0] = 0xe0 | (value >> 12);
109 dst[1] = 0x80 | ((value >> 6) & 0x3f);
110 dst[2] = 0x80 | (value & 0x3f);
111
112 (*bytes_written) = 3;
113 } else if (value < 0x10fffe) {
114 if ((value & 0xffe) == 0xffe) {
115 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
116 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
117 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
118 return LY_EINVAL;
119 }
120 /* four bytes character */
121 dst[0] = 0xf0 | (value >> 18);
122 dst[1] = 0x80 | ((value >> 12) & 0x3f);
123 dst[2] = 0x80 | ((value >> 6) & 0x3f);
124 dst[3] = 0x80 | (value & 0x3f);
125
126 (*bytes_written) = 4;
127 }
128 return LY_SUCCESS;
129}
130
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200131/**
132 * @brief Check/Get an XML qualified name from the input string.
133 *
134 * The identifier must have at least one valid character complying the name start character constraints.
135 * The identifier is terminated by the first character, which does not comply to the name character constraints.
136 *
137 * See https://www.w3.org/TR/xml-names/#NT-NCName
138 *
139 * @param[in] context XML context to track lines or store errors into libyang context.
140 * @param[in,out] input Input string to process, updated according to the processed/read data.
141 * Note that the term_char is also read, so input points after the term_char at the end.
142 * @param[out] term_char The first character in the input string which does not compy to the name constraints.
143 * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string.
144 * @return LY_ERR value.
145 */
146static LY_ERR
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200147lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len)
148{
149 unsigned int c;
150 const char *id = (*input);
151 LY_ERR rc;
152
153 /* check NameStartChar (minus colon) */
Radek Krejcib416be62018-10-01 14:51:45 +0200154 LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200155 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
156 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
157 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
158 "Identifier \"%s\" starts with invalid character.", id),
159 LY_EVALID);
160
161 /* check rest of the identifier */
Radek Krejcib416be62018-10-01 14:51:45 +0200162 for (rc = ly_getutf8(input, &c, term_char_len);
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200163 rc == LY_SUCCESS && is_xmlqnamechar(c);
Radek Krejcib416be62018-10-01 14:51:45 +0200164 rc = ly_getutf8(input, &c, term_char_len));
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200165 LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
166
167 (*term_char) = c;
168 return LY_SUCCESS;
169}
170
Radek Krejci7a7fa902018-09-25 17:08:21 +0200171LY_ERR
Radek Krejcid70d1072018-10-09 14:20:47 +0200172lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200173{
174#define BUFSIZE 4096
175#define BUFSIZE_STEP 4096
176#define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \
177 if (CURR+NEED >= SIZE) { \
178 BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \
179 LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \
180 SIZE += BUFSIZE_STEP; \
181 }
182
183 struct ly_ctx *ctx = context->ctx; /* shortcut */
Radek Krejcid70d1072018-10-09 14:20:47 +0200184 const char *in = (*input), *start;
185 char *buf = NULL, delim;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200186 size_t offset; /* read offset in input buffer */
Radek Krejcid70d1072018-10-09 14:20:47 +0200187 size_t len; /* length of the output string (write offset in output buffer) */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200188 size_t size; /* size of the output buffer */
189 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200190 uint32_t n;
191 size_t u, newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200192 bool empty_content = false;
193 LY_ERR rc;
194
Radek Krejcib1890642018-10-03 14:05:40 +0200195 assert(context);
196 assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT);
197
Radek Krejci7a7fa902018-09-25 17:08:21 +0200198 if (in[0] == '\'') {
199 delim = '\'';
200 ++in;
201 } else if (in[0] == '"') {
202 delim = '"';
203 ++in;
204 } else {
205 delim = '<';
206 empty_content = true;
207 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200208 start = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200209
210 if (empty_content) {
211 /* only when processing element's content - try to ignore whitespaces used to format XML data
212 * before element's child or closing tag */
Radek Krejci117d2082018-09-26 10:05:14 +0200213 for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) {
214 if (in[offset] == '\n') {
215 ++newlines;
216 }
217 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200218 LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
Radek Krejci117d2082018-09-26 10:05:14 +0200219 context->line += newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200220 if (in[offset] == '<') {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200221 (*input) = in + offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200222 return LY_EINVAL;
223 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200224 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200225 /* init */
226 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200227
Radek Krejcid70d1072018-10-09 14:20:47 +0200228 if (0) {
229getbuffer:
230 /* prepare output buffer */
231 if (*buffer) {
232 buf = *buffer;
233 size = *buffer_size;
234 } else {
235 buf = malloc(BUFSIZE);
236 size = BUFSIZE;
237 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
238 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200239 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200240
241 /* parse */
242 while (in[offset]) {
243 if (in[offset] == '&') {
Radek Krejcid70d1072018-10-09 14:20:47 +0200244 if (!buf) {
245 /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
246 goto getbuffer;
247 }
248
Radek Krejci7a7fa902018-09-25 17:08:21 +0200249 if (offset) {
250 /* store what we have so far */
251 BUFSIZE_CHECK(ctx, buf, size, len, offset);
252 memcpy(&buf[len], in, offset);
253 len += offset;
254 in += offset;
255 offset = 0;
256 }
257 /* process reference */
258 /* we will need 4 bytes at most since we support only the predefined
259 * (one-char) entities and character references */
260 BUFSIZE_CHECK(ctx, buf, size, len, 4);
261 ++offset;
262 if (in[offset] != '#') {
263 /* entity reference - only predefined references are supported */
264 if (!strncmp(&in[offset], "lt;", 3)) {
265 buf[len++] = '<';
266 in += 4; /* &lt; */
267 } else if (!strncmp(&in[offset], "gt;", 3)) {
268 buf[len++] = '>';
269 in += 4; /* &gt; */
270 } else if (!strncmp(&in[offset], "amp;", 4)) {
271 buf[len++] = '&';
272 in += 5; /* &amp; */
273 } else if (!strncmp(&in[offset], "apos;", 5)) {
274 buf[len++] = '\'';
275 in += 6; /* &apos; */
276 } else if (!strncmp(&in[offset], "quot;", 5)) {
277 buf[len++] = '\"';
278 in += 6; /* &quot; */
279 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200280 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
281 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200282 goto error;
283 }
284 offset = 0;
285 } else {
286 p = (void*)&in[offset - 1];
287 /* character reference */
288 ++offset;
289 if (isdigit(in[offset])) {
290 for (n = 0; isdigit(in[offset]); offset++) {
291 n = (10 * n) + (in[offset] - '0');
292 }
293 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
294 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
295 if (isdigit(in[offset])) {
296 u = (in[offset] - '0');
297 } else if (in[offset] > 'F') {
298 u = 10 + (in[offset] - 'a');
299 } else {
300 u = 10 + (in[offset] - 'A');
301 }
302 n = (16 * n) + u;
303 }
304 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200305 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200306 goto error;
307
308 }
309 LY_CHECK_ERR_GOTO(in[offset] != ';',
310 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
311 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
312 error);
313 ++offset;
314 rc = lyxml_pututf8(&buf[len], n, &u);
315 LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
Radek Krejci117d2082018-09-26 10:05:14 +0200316 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200317 error);
318 len += u;
319 in += offset;
320 offset = 0;
321 }
322 } else if (in[offset] == delim) {
323 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200324 if (buf) {
325 if (len + offset >= size) {
326 buf = ly_realloc(buf, len + offset + 1);
327 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
328 size = len + offset + 1;
329 }
330 memcpy(&buf[len], in, offset);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200331 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200332 len += offset;
333 /* in case of element content, keep the leading <,
Radek Krejcib1890642018-10-03 14:05:40 +0200334 * for attribute's value move after the terminating quotation mark */
335 if (context->status == LYXML_ELEM_CONTENT) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200336 in += offset;
337 } else {
338 in += offset + 1;
339 }
340 goto success;
341 } else {
342 /* log lines */
343 if (in[offset] == '\n') {
344 ++context->line;
345 }
346
347 /* continue */
348 ++offset;
349 }
350 }
351 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
352error:
353 if (!(*buffer)) {
354 free(buf);
355 }
356 return LY_EVALID;
357
358success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200359 if (buf) {
360 if (!(*buffer) && size != len + 1) {
361 /* not using provided buffer, so fit the allocated buffer to what we really have inside */
362 p = realloc(buf, len + 1);
363 /* ignore realloc fail because we are reducing the buffer,
364 * so just return bigger buffer than needed */
365 if (p) {
366 size = len + 1;
367 buf = p;
368 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200369 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200370 /* set terminating NULL byte */
371 buf[len] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200372 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200373
Radek Krejcib1890642018-10-03 14:05:40 +0200374 context->status -= 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200375 (*input) = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200376 if (buf) {
377 (*buffer) = buf;
378 (*buffer_size) = size;
379 (*output) = buf;
380 (*dynamic) = 1;
381 } else {
382 (*output) = (char*)start;
383 (*dynamic) = 0;
384 }
385 (*length) = len;
386
Radek Krejci7a7fa902018-09-25 17:08:21 +0200387 return LY_SUCCESS;
388
389#undef BUFSIZE
390#undef BUFSIZE_STEP
391#undef BUFSIZE_CHECK
392}
393
Radek Krejcid972c252018-09-25 13:23:39 +0200394LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200395lyxml_get_attribute(struct lyxml_context *context, const char **input,
Radek Krejcid972c252018-09-25 13:23:39 +0200396 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
397{
398 struct ly_ctx *ctx = context->ctx; /* shortcut */
399 const char *in = (*input);
400 const char *id;
401 const char *endtag;
402 LY_ERR rc;
403 unsigned int c;
404 size_t endtag_len;
405
406 /* initialize output variables */
407 (*prefix) = (*name) = NULL;
408 (*prefix_len) = (*name_len) = 0;
409
410 /* skip initial whitespaces */
411 ign_xmlws(context, in);
412
413 if (in[0] == '\0') {
414 /* EOF - not expected at this place */
415 return LY_EINVAL;
Radek Krejcib1890642018-10-03 14:05:40 +0200416 } else if (in[0] == '>') {
417 /* element terminated by > - termination of the opening tag */
418 context->status = LYXML_ELEM_CONTENT;
419 ++in;
420 goto success;
421 } else if (in[0] == '/' && in[1] == '>') {
422 /* element terminated by /> - termination of an empty element */
423 context->status = LYXML_ELEMENT;
424 in += 2;
Radek Krejcid972c252018-09-25 13:23:39 +0200425 goto success;
426 }
427
428 /* remember the identifier start before checking its format */
429 id = in;
430 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
431 LY_CHECK_RET(rc);
432 if (c == ':') {
433 /* we have prefixed identifier */
434 endtag = in - endtag_len;
435
436 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
437 LY_CHECK_RET(rc);
438
439 (*prefix) = id;
440 (*prefix_len) = endtag - id;
441 id = endtag + 1;
442 }
443 if (!is_xmlws(c) && c != '=') {
444 in = in - endtag_len;
445 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='");
446 return LY_EVALID;
447 }
448 in = in - endtag_len;
449 (*name) = id;
450 (*name_len) = in - id;
451
452 /* eat '=' and stop at the value beginning */
453 ign_xmlws(context, in);
454 if (in[0] != '=') {
455 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='");
456 return LY_EVALID;
457 }
458 ++in;
459 ign_xmlws(context, in);
460 if (in[0] != '\'' && in[0] != '"') {
Radek Krejcib1890642018-10-03 14:05:40 +0200461 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
462 LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark");
Radek Krejcid972c252018-09-25 13:23:39 +0200463 return LY_EVALID;
464 }
Radek Krejcib1890642018-10-03 14:05:40 +0200465 context->status = LYXML_ATTR_CONTENT;
Radek Krejcid972c252018-09-25 13:23:39 +0200466
467success:
468 /* move caller's input */
469 (*input) = in;
470 return LY_SUCCESS;
471}
472
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200473LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200474lyxml_get_element(struct lyxml_context *context, const char **input,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200475 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
476{
477 struct ly_ctx *ctx = context->ctx; /* shortcut */
478 const char *in = (*input);
479 const char *endtag;
480 const char *sectname;
481 const char *id;
482 size_t endtag_len, newlines;
Radek Krejcib1890642018-10-03 14:05:40 +0200483 bool loop = true, closing = false;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200484 unsigned int c;
485 LY_ERR rc;
Radek Krejcib1890642018-10-03 14:05:40 +0200486 struct lyxml_elem *e;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200487
488 /* initialize output variables */
489 (*prefix) = (*name) = NULL;
490 (*prefix_len) = (*name_len) = 0;
491
492 while (loop) {
493 ign_xmlws(context, in);
494
495 if (in[0] == '\0') {
496 /* EOF */
Radek Krejcib1890642018-10-03 14:05:40 +0200497 context->status = LYXML_END;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200498 goto success;
499 } else if (in[0] != '<') {
500 return LY_EINVAL;
501 }
502 move_input(context, in, 1);
503
504 if (in[0] == '!') {
505 move_input(context, in, 1);
506 /* sections to ignore */
507 if (!strncmp(in, "--", 2)) {
508 /* comment */
509 move_input(context, in, 2);
510 sectname = "Comment";
511 endtag = "-->";
512 endtag_len = 3;
513 } else if (!strncmp(in, "[CDATA[", 7)) {
514 /* CDATA section */
515 move_input(context, in, 7);
516 sectname = "CData";
517 endtag = "]]>";
518 endtag_len = 3;
519 } else if (!strncmp(in, "DOCTYPE", 7)) {
520 /* Document type declaration - not supported */
521 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration");
522 return LY_EVALID;
523 }
524 in = ign_todelim(in, endtag, endtag_len, &newlines);
525 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID);
526 context->line += newlines;
527 in += endtag_len;
528 } else if (in[0] == '?') {
529 in = ign_todelim(in, "?>", 2, &newlines);
530 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
531 context->line += newlines;
532 in += 2;
Radek Krejcib1890642018-10-03 14:05:40 +0200533 } else if (in[0] == '/') {
534 /* closing element */
535 closing = true;
536 ++in;
537 goto element;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200538 } else {
539 /* element */
Radek Krejcib1890642018-10-03 14:05:40 +0200540element:
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200541 ign_xmlws(context, in);
542 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
543
544 /* remember the identifier start before checking its format */
545 id = in;
546 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
547 LY_CHECK_RET(rc);
548 if (c == ':') {
549 /* we have prefixed identifier */
550 endtag = in - endtag_len;
551
552 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
553 LY_CHECK_RET(rc);
554
555 (*prefix) = id;
556 (*prefix_len) = endtag - id;
557 id = endtag + 1;
558 }
559 if (!is_xmlws(c) && c != '/' && c != '>') {
560 in = in - endtag_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200561 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in,
562 "whitespace or element tag termination ('>' or '/>'");
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200563 return LY_EVALID;
564 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200565 (*name) = id;
Radek Krejcib1890642018-10-03 14:05:40 +0200566 (*name_len) = in - endtag_len - id;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200567
Radek Krejcib1890642018-10-03 14:05:40 +0200568 if (is_xmlws(c)) {
569 /* go to the next meaningful input */
570 ign_xmlws(context, in);
571 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
572 c = in[0];
573 ++in;
574 endtag_len = 1;
575 }
576
577 if (closing) {
578 /* match opening and closing element tags */
579 LY_CHECK_ERR_RET(
580 !context->elements.count,
581 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
582 LY_EVALID);
583 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
584 LY_CHECK_ERR_RET(e->prefix_len != *prefix_len || e->name_len != *name_len
585 || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len),
586 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
587 LY_EVALID);
588 /* opening and closing element tags matches, remove record from the opening tags list */
589 free(e);
590 --context->elements.count;
591 /* do not return element information to announce closing element being currently processed */
592 *name = *prefix = NULL;
593 *name_len = *prefix_len = 0;
594
595 if (c == '>') {
596 /* end of closing element */
597 context->status = LYXML_ELEMENT;
598 } else {
599 in -= endtag_len;
600 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.",
601 LY_VCODE_INSTREXP_len(in), in);
602 return LY_EVALID;
603 }
604 } else {
605 if (c == '>') {
606 /* end of opening element */
607 context->status = LYXML_ELEM_CONTENT;
608 } else if (c == '/' && in[0] == '>') {
609 /* empty element closing */
610 context->status = LYXML_ELEMENT;
611 ++in;
612 } else {
613 /* attribute */
614 context->status = LYXML_ATTRIBUTE;
615 in -= endtag_len;
616 }
617
618 if (context->status != LYXML_ELEMENT) {
619 /* store element opening tag information */
620 e = malloc(sizeof *e);
621 LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM);
622 e->name = *name;
623 e->prefix = *prefix;
624 e->name_len = *name_len;
625 e->prefix_len = *prefix_len;
626 ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST);
627 }
628 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200629 loop = false;
630 }
631 }
632
633success:
634 /* move caller's input */
635 (*input) = in;
636 return LY_SUCCESS;
637}
638
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200639LY_ERR
640lyxml_ns_add(struct lyxml_context *context, const char *element_name, const char *prefix, size_t prefix_len, char *uri)
641{
642 struct lyxml_ns *ns;
643
644 ns = malloc(sizeof *ns);
645 LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM);
646
647 ns->element = element_name;
648 ns->uri = uri;
649 if (prefix) {
650 ns->prefix = strndup(prefix, prefix_len);
651 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns), LY_EMEM);
652 } else {
653 ns->prefix = NULL;
654 }
655
656 LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1, free(ns->prefix), LY_EMEM);
657 return LY_SUCCESS;
658}
659
660const struct lyxml_ns *
661lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len)
662{
663 unsigned int u;
664 struct lyxml_ns *ns;
665
666 for (u = context->ns.count - 1; u + 1 > 0; --u) {
667 ns = (struct lyxml_ns *)context->ns.objs[u];
668 if (prefix) {
669 if (!strncmp(prefix, ns->prefix, prefix_len) && ns->prefix[prefix_len] == '\0') {
670 return ns;
671 }
672 } else if (!ns->prefix) {
673 /* default namespace */
674 return ns;
675 }
676 }
677
678 return NULL;
679}
680
681LY_ERR
682lyxml_ns_rm(struct lyxml_context *context, const char *element_name)
683{
684 unsigned int u;
685
686 for (u = context->ns.count - 1; u + 1 > 0; --u) {
687 if (((struct lyxml_ns *)context->ns.objs[u])->element != element_name) {
688 /* we are done, the namespaces from a single element are supposed to be together */
689 break;
690 }
691 /* remove the ns structure */
692 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
693 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
694 free(context->ns.objs[u]);
695 --context->ns.count;
696 }
697
698 if (!context->ns.count) {
699 /* cleanup the context's namespaces storage */
700 ly_set_erase(&context->ns, NULL);
701 }
702
703 return LY_SUCCESS;
704}
Radek Krejcib1890642018-10-03 14:05:40 +0200705
706void
707lyxml_context_clear(struct lyxml_context *context)
708{
709 unsigned int u;
710
711 ly_set_erase(&context->elements, free);
712 for (u = context->ns.count - 1; u + 1 > 0; --u) {
713 /* remove the ns structure */
714 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
715 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
716 free(context->ns.objs[u]);
717 }
718 ly_set_erase(&context->ns, NULL);
719}