blob: db25d707bc38d0067027575e34d8fe3c5e51da0b [file] [log] [blame]
Radek Krejcid91dbaf2018-09-21 15:51:39 +02001/**
2 * @file xml.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief Generic XML parser implementation for libyang
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
Radek Krejci4b74d5e2018-09-26 14:30:55 +020015#define _POSIX_C_SOURCE 200809L /* strndup() */
16
Radek Krejcib1890642018-10-03 14:05:40 +020017#include <assert.h>
Radek Krejci7a7fa902018-09-25 17:08:21 +020018#include <ctype.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020019#include <stdbool.h>
20#include <stdint.h>
Radek Krejci4b74d5e2018-09-26 14:30:55 +020021#include <string.h>
Radek Krejcid91dbaf2018-09-21 15:51:39 +020022
23#include "libyang.h"
24#include "xml.h"
25#include "common.h"
26
27/* Macro to test if character is whitespace */
28#define is_xmlws(c) (c == 0x20 || c == 0x9 || c == 0xa || c == 0xd)
29
30/* Macro to test if character is allowed to be a first character of an qualified identifier */
31#define is_xmlqnamestartchar(c) ((c >= 'a' && c <= 'z') || c == '_' || \
32 (c >= 'A' && c <= 'Z') || /* c == ':' || */ \
33 (c >= 0x370 && c <= 0x1fff && c != 0x37e ) || \
34 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
35 c == 0x200d || (c >= 0x2070 && c <= 0x218f) || \
36 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
37 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
38 (c >= 0x10000 && c <= 0xeffff))
39
40/* Macro to test if character is allowed to be used in an qualified identifier */
41#define is_xmlqnamechar(c) ((c >= 'a' && c <= 'z') || c == '_' || c == '-' || \
42 (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || /* c == ':' || */ \
43 c == '.' || c == 0xb7 || (c >= 0x370 && c <= 0x1fff && c != 0x37e ) ||\
44 (c >= 0xc0 && c <= 0x2ff && c != 0xd7 && c != 0xf7) || c == 0x200c || \
45 c == 0x200d || (c >= 0x300 && c <= 0x36f) || \
46 (c >= 0x2070 && c <= 0x218f) || (c >= 0x2030f && c <= 0x2040) || \
47 (c >= 0x2c00 && c <= 0x2fef) || (c >= 0x3001 && c <= 0xd7ff) || \
48 (c >= 0xf900 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
49 (c >= 0x10000 && c <= 0xeffff))
50
51/* Move input p by s characters, if EOF log with lyxml_context c */
52#define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID)
53
Radek Krejcib1890642018-10-03 14:05:40 +020054/* Ignore whitespaces in the input string p */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020055#define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;}
56
Radek Krejci4b74d5e2018-09-26 14:30:55 +020057/**
58 * @brief Ignore any characters until the delim of the size delim_len is read
59 *
60 * Detects number of read new lines.
61 * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in
62 * NULL-terminated input string.
63 * */
Radek Krejcid91dbaf2018-09-21 15:51:39 +020064static const char *
65ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines)
66{
67 size_t i;
68 register const char *a, *b;
69
70 (*newlines) = 0;
71 for ( ; *input; ++input) {
72 if (*input != *delim) {
73 if (*input == '\n') {
74 ++(*newlines);
75 }
76 continue;
77 }
78 a = input;
79 b = delim;
80 for (i = 0; i < delim_len; ++i) {
81 if (*a++ != *b++) {
82 break;
83 }
84 }
85 if (i == delim_len) {
86 return input;
87 }
88 }
89 return NULL;
90}
91
Radek Krejci4b74d5e2018-09-26 14:30:55 +020092/**
Radek Krejci7a7fa902018-09-25 17:08:21 +020093 * Store UTF-8 character specified as 4byte integer into the dst buffer.
94 * Returns number of written bytes (4 max), expects that dst has enough space.
95 *
96 * UTF-8 mapping:
97 * 00000000 -- 0000007F: 0xxxxxxx
98 * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
99 * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
100 * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
101 *
102 * Includes checking for valid characters (following RFC 7950, sec 9.4)
103 */
104static LY_ERR
Radek Krejci117d2082018-09-26 10:05:14 +0200105lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200106{
107 if (value < 0x80) {
108 /* one byte character */
109 if (value < 0x20 &&
110 value != 0x09 &&
111 value != 0x0a &&
112 value != 0x0d) {
113 return LY_EINVAL;
114 }
115
116 dst[0] = value;
117 (*bytes_written) = 1;
118 } else if (value < 0x800) {
119 /* two bytes character */
120 dst[0] = 0xc0 | (value >> 6);
121 dst[1] = 0x80 | (value & 0x3f);
122 (*bytes_written) = 2;
123 } else if (value < 0xfffe) {
124 /* three bytes character */
125 if (((value & 0xf800) == 0xd800) ||
126 (value >= 0xfdd0 && value <= 0xfdef)) {
127 /* exclude surrogate blocks %xD800-DFFF */
128 /* exclude noncharacters %xFDD0-FDEF */
129 return LY_EINVAL;
130 }
131
132 dst[0] = 0xe0 | (value >> 12);
133 dst[1] = 0x80 | ((value >> 6) & 0x3f);
134 dst[2] = 0x80 | (value & 0x3f);
135
136 (*bytes_written) = 3;
137 } else if (value < 0x10fffe) {
138 if ((value & 0xffe) == 0xffe) {
139 /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
140 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
141 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
142 return LY_EINVAL;
143 }
144 /* four bytes character */
145 dst[0] = 0xf0 | (value >> 18);
146 dst[1] = 0x80 | ((value >> 12) & 0x3f);
147 dst[2] = 0x80 | ((value >> 6) & 0x3f);
148 dst[3] = 0x80 | (value & 0x3f);
149
150 (*bytes_written) = 4;
151 }
152 return LY_SUCCESS;
153}
154
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200155/**
156 * @brief Check/Get an XML qualified name from the input string.
157 *
158 * The identifier must have at least one valid character complying the name start character constraints.
159 * The identifier is terminated by the first character, which does not comply to the name character constraints.
160 *
161 * See https://www.w3.org/TR/xml-names/#NT-NCName
162 *
163 * @param[in] context XML context to track lines or store errors into libyang context.
164 * @param[in,out] input Input string to process, updated according to the processed/read data.
165 * Note that the term_char is also read, so input points after the term_char at the end.
166 * @param[out] term_char The first character in the input string which does not compy to the name constraints.
167 * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string.
168 * @return LY_ERR value.
169 */
170static LY_ERR
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200171lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len)
172{
173 unsigned int c;
174 const char *id = (*input);
175 LY_ERR rc;
176
177 /* check NameStartChar (minus colon) */
Radek Krejcib416be62018-10-01 14:51:45 +0200178 LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200179 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
180 LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
181 LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
182 "Identifier \"%s\" starts with invalid character.", id),
183 LY_EVALID);
184
185 /* check rest of the identifier */
Radek Krejcib416be62018-10-01 14:51:45 +0200186 for (rc = ly_getutf8(input, &c, term_char_len);
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200187 rc == LY_SUCCESS && is_xmlqnamechar(c);
Radek Krejcib416be62018-10-01 14:51:45 +0200188 rc = ly_getutf8(input, &c, term_char_len));
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200189 LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
190
191 (*term_char) = c;
192 return LY_SUCCESS;
193}
194
Radek Krejci7a7fa902018-09-25 17:08:21 +0200195LY_ERR
Radek Krejcid70d1072018-10-09 14:20:47 +0200196lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic)
Radek Krejci7a7fa902018-09-25 17:08:21 +0200197{
198#define BUFSIZE 4096
199#define BUFSIZE_STEP 4096
200#define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \
201 if (CURR+NEED >= SIZE) { \
202 BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \
203 LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \
204 SIZE += BUFSIZE_STEP; \
205 }
206
207 struct ly_ctx *ctx = context->ctx; /* shortcut */
Radek Krejcid70d1072018-10-09 14:20:47 +0200208 const char *in = (*input), *start;
209 char *buf = NULL, delim;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200210 size_t offset; /* read offset in input buffer */
Radek Krejcid70d1072018-10-09 14:20:47 +0200211 size_t len; /* length of the output string (write offset in output buffer) */
Radek Krejci7a7fa902018-09-25 17:08:21 +0200212 size_t size; /* size of the output buffer */
213 void *p;
Radek Krejci117d2082018-09-26 10:05:14 +0200214 uint32_t n;
215 size_t u, newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200216 bool empty_content = false;
217 LY_ERR rc;
218
Radek Krejcib1890642018-10-03 14:05:40 +0200219 assert(context);
220 assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT);
221
Radek Krejci7a7fa902018-09-25 17:08:21 +0200222 if (in[0] == '\'') {
223 delim = '\'';
224 ++in;
225 } else if (in[0] == '"') {
226 delim = '"';
227 ++in;
228 } else {
229 delim = '<';
230 empty_content = true;
231 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200232 start = in;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200233
234 if (empty_content) {
235 /* only when processing element's content - try to ignore whitespaces used to format XML data
236 * before element's child or closing tag */
Radek Krejci117d2082018-09-26 10:05:14 +0200237 for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) {
238 if (in[offset] == '\n') {
239 ++newlines;
240 }
241 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200242 LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
Radek Krejci117d2082018-09-26 10:05:14 +0200243 context->line += newlines;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200244 if (in[offset] == '<') {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200245 (*input) = in + offset;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200246 return LY_EINVAL;
247 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200248 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200249 /* init */
250 offset = len = 0;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200251
Radek Krejcid70d1072018-10-09 14:20:47 +0200252 if (0) {
253getbuffer:
254 /* prepare output buffer */
255 if (*buffer) {
256 buf = *buffer;
257 size = *buffer_size;
258 } else {
259 buf = malloc(BUFSIZE);
260 size = BUFSIZE;
261 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
262 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200263 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200264
265 /* parse */
266 while (in[offset]) {
267 if (in[offset] == '&') {
Radek Krejcid70d1072018-10-09 14:20:47 +0200268 if (!buf) {
269 /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
270 goto getbuffer;
271 }
272
Radek Krejci7a7fa902018-09-25 17:08:21 +0200273 if (offset) {
274 /* store what we have so far */
275 BUFSIZE_CHECK(ctx, buf, size, len, offset);
276 memcpy(&buf[len], in, offset);
277 len += offset;
278 in += offset;
279 offset = 0;
280 }
281 /* process reference */
282 /* we will need 4 bytes at most since we support only the predefined
283 * (one-char) entities and character references */
284 BUFSIZE_CHECK(ctx, buf, size, len, 4);
285 ++offset;
286 if (in[offset] != '#') {
287 /* entity reference - only predefined references are supported */
288 if (!strncmp(&in[offset], "lt;", 3)) {
289 buf[len++] = '<';
290 in += 4; /* &lt; */
291 } else if (!strncmp(&in[offset], "gt;", 3)) {
292 buf[len++] = '>';
293 in += 4; /* &gt; */
294 } else if (!strncmp(&in[offset], "amp;", 4)) {
295 buf[len++] = '&';
296 in += 5; /* &amp; */
297 } else if (!strncmp(&in[offset], "apos;", 5)) {
298 buf[len++] = '\'';
299 in += 6; /* &apos; */
300 } else if (!strncmp(&in[offset], "quot;", 5)) {
301 buf[len++] = '\"';
302 in += 6; /* &quot; */
303 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200304 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
305 "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200306 goto error;
307 }
308 offset = 0;
309 } else {
310 p = (void*)&in[offset - 1];
311 /* character reference */
312 ++offset;
313 if (isdigit(in[offset])) {
314 for (n = 0; isdigit(in[offset]); offset++) {
315 n = (10 * n) + (in[offset] - '0');
316 }
317 } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) {
318 for (n = 0, ++offset; isxdigit(in[offset]); offset++) {
319 if (isdigit(in[offset])) {
320 u = (in[offset] - '0');
321 } else if (in[offset] > 'F') {
322 u = 10 + (in[offset] - 'a');
323 } else {
324 u = 10 + (in[offset] - 'A');
325 }
326 n = (16 * n) + u;
327 }
328 } else {
Radek Krejcied6c6ad2018-09-26 09:10:18 +0200329 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200330 goto error;
331
332 }
333 LY_CHECK_ERR_GOTO(in[offset] != ';',
334 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
335 LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
336 error);
337 ++offset;
338 rc = lyxml_pututf8(&buf[len], n, &u);
339 LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
Radek Krejci117d2082018-09-26 10:05:14 +0200340 "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
Radek Krejci7a7fa902018-09-25 17:08:21 +0200341 error);
342 len += u;
343 in += offset;
344 offset = 0;
345 }
346 } else if (in[offset] == delim) {
347 /* end of string */
Radek Krejcid70d1072018-10-09 14:20:47 +0200348 if (buf) {
349 if (len + offset >= size) {
350 buf = ly_realloc(buf, len + offset + 1);
351 LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
352 size = len + offset + 1;
353 }
354 memcpy(&buf[len], in, offset);
Radek Krejci7a7fa902018-09-25 17:08:21 +0200355 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200356 len += offset;
357 /* in case of element content, keep the leading <,
Radek Krejcib1890642018-10-03 14:05:40 +0200358 * for attribute's value move after the terminating quotation mark */
359 if (context->status == LYXML_ELEM_CONTENT) {
Radek Krejci7a7fa902018-09-25 17:08:21 +0200360 in += offset;
361 } else {
362 in += offset + 1;
363 }
364 goto success;
365 } else {
366 /* log lines */
367 if (in[offset] == '\n') {
368 ++context->line;
369 }
370
371 /* continue */
372 ++offset;
373 }
374 }
375 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF);
376error:
377 if (!(*buffer)) {
378 free(buf);
379 }
380 return LY_EVALID;
381
382success:
Radek Krejcid70d1072018-10-09 14:20:47 +0200383 if (buf) {
384 if (!(*buffer) && size != len + 1) {
385 /* not using provided buffer, so fit the allocated buffer to what we really have inside */
386 p = realloc(buf, len + 1);
387 /* ignore realloc fail because we are reducing the buffer,
388 * so just return bigger buffer than needed */
389 if (p) {
390 size = len + 1;
391 buf = p;
392 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200393 }
Radek Krejcid70d1072018-10-09 14:20:47 +0200394 /* set terminating NULL byte */
395 buf[len] = '\0';
Radek Krejci7a7fa902018-09-25 17:08:21 +0200396 }
Radek Krejci7a7fa902018-09-25 17:08:21 +0200397
Radek Krejcib1890642018-10-03 14:05:40 +0200398 context->status -= 1;
Radek Krejci7a7fa902018-09-25 17:08:21 +0200399 (*input) = in;
Radek Krejcid70d1072018-10-09 14:20:47 +0200400 if (buf) {
401 (*buffer) = buf;
402 (*buffer_size) = size;
403 (*output) = buf;
404 (*dynamic) = 1;
405 } else {
406 (*output) = (char*)start;
407 (*dynamic) = 0;
408 }
409 (*length) = len;
410
Radek Krejci7a7fa902018-09-25 17:08:21 +0200411 return LY_SUCCESS;
412
413#undef BUFSIZE
414#undef BUFSIZE_STEP
415#undef BUFSIZE_CHECK
416}
417
Radek Krejcid972c252018-09-25 13:23:39 +0200418LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200419lyxml_get_attribute(struct lyxml_context *context, const char **input,
Radek Krejcid972c252018-09-25 13:23:39 +0200420 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
421{
422 struct ly_ctx *ctx = context->ctx; /* shortcut */
423 const char *in = (*input);
424 const char *id;
425 const char *endtag;
426 LY_ERR rc;
427 unsigned int c;
428 size_t endtag_len;
429
430 /* initialize output variables */
431 (*prefix) = (*name) = NULL;
432 (*prefix_len) = (*name_len) = 0;
433
434 /* skip initial whitespaces */
435 ign_xmlws(context, in);
436
437 if (in[0] == '\0') {
438 /* EOF - not expected at this place */
439 return LY_EINVAL;
Radek Krejcib1890642018-10-03 14:05:40 +0200440 } else if (in[0] == '>') {
441 /* element terminated by > - termination of the opening tag */
442 context->status = LYXML_ELEM_CONTENT;
443 ++in;
444 goto success;
445 } else if (in[0] == '/' && in[1] == '>') {
446 /* element terminated by /> - termination of an empty element */
447 context->status = LYXML_ELEMENT;
448 in += 2;
Radek Krejcid972c252018-09-25 13:23:39 +0200449 goto success;
450 }
451
452 /* remember the identifier start before checking its format */
453 id = in;
454 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
455 LY_CHECK_RET(rc);
456 if (c == ':') {
457 /* we have prefixed identifier */
458 endtag = in - endtag_len;
459
460 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
461 LY_CHECK_RET(rc);
462
463 (*prefix) = id;
464 (*prefix_len) = endtag - id;
465 id = endtag + 1;
466 }
467 if (!is_xmlws(c) && c != '=') {
468 in = in - endtag_len;
469 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='");
470 return LY_EVALID;
471 }
472 in = in - endtag_len;
473 (*name) = id;
474 (*name_len) = in - id;
475
476 /* eat '=' and stop at the value beginning */
477 ign_xmlws(context, in);
478 if (in[0] != '=') {
479 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='");
480 return LY_EVALID;
481 }
482 ++in;
483 ign_xmlws(context, in);
484 if (in[0] != '\'' && in[0] != '"') {
Radek Krejcib1890642018-10-03 14:05:40 +0200485 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP,
486 LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark");
Radek Krejcid972c252018-09-25 13:23:39 +0200487 return LY_EVALID;
488 }
Radek Krejcib1890642018-10-03 14:05:40 +0200489 context->status = LYXML_ATTR_CONTENT;
Radek Krejcid972c252018-09-25 13:23:39 +0200490
491success:
492 /* move caller's input */
493 (*input) = in;
494 return LY_SUCCESS;
495}
496
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200497LY_ERR
Radek Krejci7a7fa902018-09-25 17:08:21 +0200498lyxml_get_element(struct lyxml_context *context, const char **input,
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200499 const char **prefix, size_t *prefix_len, const char **name, size_t *name_len)
500{
501 struct ly_ctx *ctx = context->ctx; /* shortcut */
502 const char *in = (*input);
503 const char *endtag;
504 const char *sectname;
505 const char *id;
506 size_t endtag_len, newlines;
Radek Krejcib1890642018-10-03 14:05:40 +0200507 bool loop = true, closing = false;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200508 unsigned int c;
509 LY_ERR rc;
Radek Krejcib1890642018-10-03 14:05:40 +0200510 struct lyxml_elem *e;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200511
512 /* initialize output variables */
513 (*prefix) = (*name) = NULL;
514 (*prefix_len) = (*name_len) = 0;
515
516 while (loop) {
517 ign_xmlws(context, in);
518
519 if (in[0] == '\0') {
520 /* EOF */
Radek Krejcib1890642018-10-03 14:05:40 +0200521 context->status = LYXML_END;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200522 goto success;
523 } else if (in[0] != '<') {
524 return LY_EINVAL;
525 }
526 move_input(context, in, 1);
527
528 if (in[0] == '!') {
529 move_input(context, in, 1);
530 /* sections to ignore */
531 if (!strncmp(in, "--", 2)) {
532 /* comment */
533 move_input(context, in, 2);
534 sectname = "Comment";
535 endtag = "-->";
536 endtag_len = 3;
537 } else if (!strncmp(in, "[CDATA[", 7)) {
538 /* CDATA section */
539 move_input(context, in, 7);
540 sectname = "CData";
541 endtag = "]]>";
542 endtag_len = 3;
543 } else if (!strncmp(in, "DOCTYPE", 7)) {
544 /* Document type declaration - not supported */
545 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration");
546 return LY_EVALID;
547 }
548 in = ign_todelim(in, endtag, endtag_len, &newlines);
549 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID);
550 context->line += newlines;
551 in += endtag_len;
552 } else if (in[0] == '?') {
553 in = ign_todelim(in, "?>", 2, &newlines);
554 LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID);
555 context->line += newlines;
556 in += 2;
Radek Krejcib1890642018-10-03 14:05:40 +0200557 } else if (in[0] == '/') {
558 /* closing element */
559 closing = true;
560 ++in;
561 goto element;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200562 } else {
563 /* element */
Radek Krejcib1890642018-10-03 14:05:40 +0200564element:
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200565 ign_xmlws(context, in);
566 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
567
568 /* remember the identifier start before checking its format */
569 id = in;
570 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
571 LY_CHECK_RET(rc);
572 if (c == ':') {
573 /* we have prefixed identifier */
574 endtag = in - endtag_len;
575
576 rc = lyxml_check_qname(context, &in, &c, &endtag_len);
577 LY_CHECK_RET(rc);
578
579 (*prefix) = id;
580 (*prefix_len) = endtag - id;
581 id = endtag + 1;
582 }
583 if (!is_xmlws(c) && c != '/' && c != '>') {
584 in = in - endtag_len;
Radek Krejcid972c252018-09-25 13:23:39 +0200585 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in,
586 "whitespace or element tag termination ('>' or '/>'");
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200587 return LY_EVALID;
588 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200589 (*name) = id;
Radek Krejcib1890642018-10-03 14:05:40 +0200590 (*name_len) = in - endtag_len - id;
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200591
Radek Krejcib1890642018-10-03 14:05:40 +0200592 if (is_xmlws(c)) {
593 /* go to the next meaningful input */
594 ign_xmlws(context, in);
595 LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
596 c = in[0];
597 ++in;
598 endtag_len = 1;
599 }
600
601 if (closing) {
602 /* match opening and closing element tags */
603 LY_CHECK_ERR_RET(
604 !context->elements.count,
605 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
606 LY_EVALID);
607 e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1];
608 LY_CHECK_ERR_RET(e->prefix_len != *prefix_len || e->name_len != *name_len
609 || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len),
610 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", name_len, *name),
611 LY_EVALID);
612 /* opening and closing element tags matches, remove record from the opening tags list */
613 free(e);
614 --context->elements.count;
615 /* do not return element information to announce closing element being currently processed */
616 *name = *prefix = NULL;
617 *name_len = *prefix_len = 0;
618
619 if (c == '>') {
620 /* end of closing element */
621 context->status = LYXML_ELEMENT;
622 } else {
623 in -= endtag_len;
624 LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.",
625 LY_VCODE_INSTREXP_len(in), in);
626 return LY_EVALID;
627 }
628 } else {
629 if (c == '>') {
630 /* end of opening element */
631 context->status = LYXML_ELEM_CONTENT;
632 } else if (c == '/' && in[0] == '>') {
633 /* empty element closing */
634 context->status = LYXML_ELEMENT;
635 ++in;
636 } else {
637 /* attribute */
638 context->status = LYXML_ATTRIBUTE;
639 in -= endtag_len;
640 }
641
642 if (context->status != LYXML_ELEMENT) {
643 /* store element opening tag information */
644 e = malloc(sizeof *e);
645 LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM);
646 e->name = *name;
647 e->prefix = *prefix;
648 e->name_len = *name_len;
649 e->prefix_len = *prefix_len;
650 ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST);
651 }
652 }
Radek Krejcid91dbaf2018-09-21 15:51:39 +0200653 loop = false;
654 }
655 }
656
657success:
658 /* move caller's input */
659 (*input) = in;
660 return LY_SUCCESS;
661}
662
Radek Krejci4b74d5e2018-09-26 14:30:55 +0200663LY_ERR
664lyxml_ns_add(struct lyxml_context *context, const char *element_name, const char *prefix, size_t prefix_len, char *uri)
665{
666 struct lyxml_ns *ns;
667
668 ns = malloc(sizeof *ns);
669 LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM);
670
671 ns->element = element_name;
672 ns->uri = uri;
673 if (prefix) {
674 ns->prefix = strndup(prefix, prefix_len);
675 LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns), LY_EMEM);
676 } else {
677 ns->prefix = NULL;
678 }
679
680 LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1, free(ns->prefix), LY_EMEM);
681 return LY_SUCCESS;
682}
683
684const struct lyxml_ns *
685lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len)
686{
687 unsigned int u;
688 struct lyxml_ns *ns;
689
690 for (u = context->ns.count - 1; u + 1 > 0; --u) {
691 ns = (struct lyxml_ns *)context->ns.objs[u];
692 if (prefix) {
693 if (!strncmp(prefix, ns->prefix, prefix_len) && ns->prefix[prefix_len] == '\0') {
694 return ns;
695 }
696 } else if (!ns->prefix) {
697 /* default namespace */
698 return ns;
699 }
700 }
701
702 return NULL;
703}
704
705LY_ERR
706lyxml_ns_rm(struct lyxml_context *context, const char *element_name)
707{
708 unsigned int u;
709
710 for (u = context->ns.count - 1; u + 1 > 0; --u) {
711 if (((struct lyxml_ns *)context->ns.objs[u])->element != element_name) {
712 /* we are done, the namespaces from a single element are supposed to be together */
713 break;
714 }
715 /* remove the ns structure */
716 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
717 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
718 free(context->ns.objs[u]);
719 --context->ns.count;
720 }
721
722 if (!context->ns.count) {
723 /* cleanup the context's namespaces storage */
724 ly_set_erase(&context->ns, NULL);
725 }
726
727 return LY_SUCCESS;
728}
Radek Krejcib1890642018-10-03 14:05:40 +0200729
730void
731lyxml_context_clear(struct lyxml_context *context)
732{
733 unsigned int u;
734
735 ly_set_erase(&context->elements, free);
736 for (u = context->ns.count - 1; u + 1 > 0; --u) {
737 /* remove the ns structure */
738 free(((struct lyxml_ns *)context->ns.objs[u])->prefix);
739 free(((struct lyxml_ns *)context->ns.objs[u])->uri);
740 free(context->ns.objs[u]);
741 }
742 ly_set_erase(&context->ns, NULL);
743}