| /** |
| * @file xml.c |
| * @author Radek Krejci <rkrejci@cesnet.cz> |
| * @brief Generic XML parser implementation for libyang |
| * |
| * Copyright (c) 2015 - 2018 CESNET, z.s.p.o. |
| * |
| * This source code is licensed under BSD 3-Clause License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * https://opensource.org/licenses/BSD-3-Clause |
| */ |
| |
| #include "common.h" |
| |
| #include <assert.h> |
| #include <ctype.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "xml.h" |
| #include "printer_internal.h" |
| |
| /* Move input p by s characters, if EOF log with lyxml_context c */ |
| #define move_input(c,p,s) p += s; LY_CHECK_ERR_RET(!p[0], LOGVAL(c->ctx, LY_VLOG_LINE, &c->line, LY_VCODE_EOF), LY_EVALID) |
| |
| /* Ignore whitespaces in the input string p */ |
| #define ign_xmlws(c,p) while (is_xmlws(*(p))) {if (*(p) == '\n') {++c->line;} ++p;} |
| |
| /** |
| * @brief Ignore any characters until the delim of the size delim_len is read |
| * |
| * Detects number of read new lines. |
| * Returns the pointer to the beginning of the detected delim, or NULL in case the delim not found in |
| * NULL-terminated input string. |
| * */ |
| static const char * |
| ign_todelim(register const char *input, const char *delim, size_t delim_len, size_t *newlines) |
| { |
| size_t i; |
| register const char *a, *b; |
| |
| (*newlines) = 0; |
| for ( ; *input; ++input) { |
| if (*input != *delim) { |
| if (*input == '\n') { |
| ++(*newlines); |
| } |
| continue; |
| } |
| a = input; |
| b = delim; |
| for (i = 0; i < delim_len; ++i) { |
| if (*a++ != *b++) { |
| break; |
| } |
| } |
| if (i == delim_len) { |
| return input; |
| } |
| } |
| return NULL; |
| } |
| |
| /** |
| * Store UTF-8 character specified as 4byte integer into the dst buffer. |
| * Returns number of written bytes (4 max), expects that dst has enough space. |
| * |
| * UTF-8 mapping: |
| * 00000000 -- 0000007F: 0xxxxxxx |
| * 00000080 -- 000007FF: 110xxxxx 10xxxxxx |
| * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx |
| * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| * |
| * Includes checking for valid characters (following RFC 7950, sec 9.4) |
| */ |
| static LY_ERR |
| lyxml_pututf8(char *dst, uint32_t value, size_t *bytes_written) |
| { |
| if (value < 0x80) { |
| /* one byte character */ |
| if (value < 0x20 && |
| value != 0x09 && |
| value != 0x0a && |
| value != 0x0d) { |
| return LY_EINVAL; |
| } |
| |
| dst[0] = value; |
| (*bytes_written) = 1; |
| } else if (value < 0x800) { |
| /* two bytes character */ |
| dst[0] = 0xc0 | (value >> 6); |
| dst[1] = 0x80 | (value & 0x3f); |
| (*bytes_written) = 2; |
| } else if (value < 0xfffe) { |
| /* three bytes character */ |
| if (((value & 0xf800) == 0xd800) || |
| (value >= 0xfdd0 && value <= 0xfdef)) { |
| /* exclude surrogate blocks %xD800-DFFF */ |
| /* exclude noncharacters %xFDD0-FDEF */ |
| return LY_EINVAL; |
| } |
| |
| dst[0] = 0xe0 | (value >> 12); |
| dst[1] = 0x80 | ((value >> 6) & 0x3f); |
| dst[2] = 0x80 | (value & 0x3f); |
| |
| (*bytes_written) = 3; |
| } else if (value < 0x10fffe) { |
| if ((value & 0xffe) == 0xffe) { |
| /* exclude noncharacters %xFFFE-FFFF, %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF, |
| * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF, |
| * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */ |
| return LY_EINVAL; |
| } |
| /* four bytes character */ |
| dst[0] = 0xf0 | (value >> 18); |
| dst[1] = 0x80 | ((value >> 12) & 0x3f); |
| dst[2] = 0x80 | ((value >> 6) & 0x3f); |
| dst[3] = 0x80 | (value & 0x3f); |
| |
| (*bytes_written) = 4; |
| } |
| return LY_SUCCESS; |
| } |
| |
| /** |
| * @brief Check/Get an XML qualified name from the input string. |
| * |
| * The identifier must have at least one valid character complying the name start character constraints. |
| * The identifier is terminated by the first character, which does not comply to the name character constraints. |
| * |
| * See https://www.w3.org/TR/xml-names/#NT-NCName |
| * |
| * @param[in] context XML context to track lines or store errors into libyang context. |
| * @param[in,out] input Input string to process, updated according to the processed/read data. |
| * Note that the term_char is also read, so input points after the term_char at the end. |
| * @param[out] term_char The first character in the input string which does not compy to the name constraints. |
| * @param[out] term_char_len Number of bytes used to encode UTF8 term_char. Serves to be able to go back in input string. |
| * @return LY_ERR value. |
| */ |
| static LY_ERR |
| lyxml_check_qname(struct lyxml_context *context, const char **input, unsigned int *term_char, size_t *term_char_len) |
| { |
| unsigned int c; |
| const char *id = (*input); |
| LY_ERR rc; |
| |
| /* check NameStartChar (minus colon) */ |
| LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS, |
| LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID); |
| LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c), |
| LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, |
| "Identifier \"%s\" starts with invalid character.", id), |
| LY_EVALID); |
| |
| /* check rest of the identifier */ |
| for (rc = ly_getutf8(input, &c, term_char_len); |
| rc == LY_SUCCESS && is_xmlqnamechar(c); |
| rc = ly_getutf8(input, &c, term_char_len)); |
| LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID); |
| |
| (*term_char) = c; |
| return LY_SUCCESS; |
| } |
| |
| /** |
| * @brief Add namespace definition into XML context. |
| * |
| * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other |
| * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or |
| * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected |
| * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix. |
| * |
| * When leaving processing of a subtree of some element (after it is removed from context->elements), caller is supposed to call |
| * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context. |
| * |
| * @param[in] context XML context to work with. |
| * @param[in] prefix Pointer to the namespace prefix as taken from lyxml_get_attribute(). Can be NULL for default namespace. |
| * @param[in] prefix_len Length of the prefix string (since it is not NULL-terminated when returned from lyxml_get_attribute()). |
| * @param[in] uri Namespace URI (value) to store. Value can be obtained via lyxml_get_string() and caller is not supposed to |
| * work with the pointer when the function succeeds. In case of error the value is freed. |
| * @return LY_ERR values. |
| */ |
| LY_ERR |
| lyxml_ns_add(struct lyxml_context *context, const char *prefix, size_t prefix_len, char *uri) |
| { |
| struct lyxml_ns *ns; |
| |
| ns = malloc(sizeof *ns); |
| LY_CHECK_ERR_RET(!ns, LOGMEM(context->ctx), LY_EMEM); |
| |
| /* we need to connect the depth of the element where the namespace is defined with the |
| * namespace record to be able to maintain (remove) the record when the parser leaves |
| * (to its sibling or back to the parent) the element where the namespace was defined */ |
| ns->depth = context->elements.count; |
| |
| ns->uri = uri; |
| if (prefix) { |
| ns->prefix = strndup(prefix, prefix_len); |
| LY_CHECK_ERR_RET(!ns->prefix, LOGMEM(context->ctx); free(ns->uri); free(ns), LY_EMEM); |
| } else { |
| ns->prefix = NULL; |
| } |
| |
| LY_CHECK_ERR_RET(ly_set_add(&context->ns, ns, LY_SET_OPT_USEASLIST) == -1, |
| free(ns->prefix); free(ns->uri); free(ns), LY_EMEM); |
| return LY_SUCCESS; |
| } |
| |
| /** |
| * @brief Remove all the namespaces defined in the element recently closed (removed from the context->elements). |
| * |
| * @param[in] context XML context to work with. |
| */ |
| void |
| lyxml_ns_rm(struct lyxml_context *context) |
| { |
| unsigned int u; |
| |
| for (u = context->ns.count - 1; u + 1 > 0; --u) { |
| if (((struct lyxml_ns *)context->ns.objs[u])->depth != context->elements.count + 1) { |
| /* we are done, the namespaces from a single element are supposed to be together */ |
| break; |
| } |
| /* remove the ns structure */ |
| free(((struct lyxml_ns *)context->ns.objs[u])->prefix); |
| free(((struct lyxml_ns *)context->ns.objs[u])->uri); |
| free(context->ns.objs[u]); |
| --context->ns.count; |
| } |
| |
| if (!context->ns.count) { |
| /* cleanup the context's namespaces storage */ |
| ly_set_erase(&context->ns, NULL); |
| } |
| } |
| |
| const struct lyxml_ns * |
| lyxml_ns_get(struct lyxml_context *context, const char *prefix, size_t prefix_len) |
| { |
| unsigned int u; |
| struct lyxml_ns *ns; |
| |
| for (u = context->ns.count - 1; u + 1 > 0; --u) { |
| ns = (struct lyxml_ns *)context->ns.objs[u]; |
| if (prefix && prefix_len) { |
| if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) { |
| return ns; |
| } |
| } else if (!ns->prefix) { |
| /* default namespace */ |
| return ns; |
| } |
| } |
| |
| return NULL; |
| } |
| |
| static LY_ERR |
| lyxml_parse_element_start(struct lyxml_context *context, const char **input, int *closing) |
| { |
| struct ly_ctx *ctx = context->ctx; /* shortcut */ |
| const char *in = (*input); |
| const char *endtag; |
| const char *sectname; |
| size_t endtag_len, newlines; |
| |
| while (1) { |
| ign_xmlws(context, in); |
| |
| if (in[0] == '\0') { |
| /* EOF */ |
| if (context->elements.count) { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF); |
| return LY_EVALID; |
| } |
| context->status = LYXML_END; |
| (*input) = in; |
| return LY_SUCCESS; |
| } else if (in[0] != '<') { |
| return LY_EINVAL; |
| } |
| move_input(context, in, 1); |
| |
| if (in[0] == '!') { |
| move_input(context, in, 1); |
| /* sections to ignore */ |
| if (!strncmp(in, "--", 2)) { |
| /* comment */ |
| move_input(context, in, 2); |
| sectname = "Comment"; |
| endtag = "-->"; |
| endtag_len = 3; |
| } else if (!strncmp(in, "[CDATA[", 7)) { |
| /* CDATA section */ |
| move_input(context, in, 7); |
| sectname = "CData"; |
| endtag = "]]>"; |
| endtag_len = 3; |
| } else if (!strncmp(in, "DOCTYPE", 7)) { |
| /* Document type declaration - not supported */ |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NSUPP, "Document Type Declaration"); |
| return LY_EVALID; |
| } else { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &in[-2]); |
| return LY_EVALID; |
| } |
| in = ign_todelim(in, endtag, endtag_len, &newlines); |
| LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, sectname), LY_EVALID); |
| context->line += newlines; |
| in += endtag_len; |
| } else if (in[0] == '?') { |
| in = ign_todelim(in, "?>", 2, &newlines); |
| LY_CHECK_ERR_RET(!in, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_NTERM, "Declaration"), LY_EVALID); |
| context->line += newlines; |
| in += 2; |
| } else if (in[0] == '/') { |
| /* closing element tag */ |
| *closing = 1; |
| ++in; |
| goto element; |
| } else { |
| /* opening element tag */ |
| *closing = 0; |
| element: |
| ign_xmlws(context, in); |
| LY_CHECK_ERR_RET(!in[0], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID); |
| |
| (*input) = in; |
| return LY_SUCCESS; |
| } |
| } |
| } |
| |
| static LY_ERR |
| lyxml_parse_element_name(struct lyxml_context *context, const char **input, size_t *endtag_len, unsigned int *term_char, |
| const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) |
| { |
| LY_ERR rc; |
| const char *in = (*input); |
| const char *id; |
| const char *endtag; |
| |
| id = in; |
| rc = lyxml_check_qname(context, &in, term_char, endtag_len); |
| LY_CHECK_RET(rc); |
| if (*term_char == ':') { |
| /* we have prefixed identifier */ |
| endtag = in - *endtag_len; |
| |
| rc = lyxml_check_qname(context, &in, term_char, endtag_len); |
| LY_CHECK_RET(rc); |
| |
| (*prefix) = id; |
| (*prefix_len) = endtag - id; |
| id = endtag + 1; |
| } |
| if (!is_xmlws(*term_char) && *term_char != '/' && *term_char != '>') { |
| (*input) = in - *endtag_len; |
| LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(*input), *input, |
| "whitespace or element tag termination ('>' or '/>'"); |
| return LY_EVALID; |
| } |
| (*name) = id; |
| (*name_len) = in - *endtag_len - id; |
| |
| if (is_xmlws(*term_char)) { |
| /* go to the next meaningful input */ |
| ign_xmlws(context, in); |
| LY_CHECK_ERR_RET(!in[0], LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID); |
| *term_char = in[0]; |
| ++in; |
| *endtag_len = 1; |
| } |
| |
| (*input) = in; |
| return LY_SUCCESS; |
| } |
| |
| LY_ERR |
| lyxml_get_element(struct lyxml_context *context, const char **input, |
| const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) |
| { |
| struct ly_ctx *ctx = context->ctx; /* shortcut */ |
| const char *in = (*input); |
| size_t endtag_len; |
| bool loop = true; |
| int closing = 0; |
| unsigned int c; |
| LY_ERR rc; |
| struct lyxml_elem *e; |
| |
| /* initialize output variables */ |
| (*prefix) = (*name) = NULL; |
| (*prefix_len) = (*name_len) = 0; |
| |
| while (loop) { |
| rc = lyxml_parse_element_start(context, &in, &closing); |
| if (rc) { |
| return rc; |
| } else if (context->status == LYXML_END) { |
| goto success; |
| } |
| /* we are at the begining of the element name, remember the identifier start before checking its format */ |
| LY_CHECK_RET(rc = lyxml_parse_element_name(context, &in, &endtag_len, &c, prefix, prefix_len, name, name_len)); |
| |
| if (closing) { |
| /* match opening and closing element tags */ |
| LY_CHECK_ERR_RET( |
| !context->elements.count, |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", *name_len, *name), |
| LY_EVALID); |
| e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1]; |
| LY_CHECK_ERR_RET(e->prefix_len != *prefix_len || e->name_len != *name_len |
| || (*prefix_len && strncmp(*prefix, e->prefix, e->prefix_len)) || strncmp(*name, e->name, e->name_len), |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", *name_len, *name), |
| LY_EVALID); |
| /* opening and closing element tags matches, remove record from the opening tags list */ |
| free(e); |
| --context->elements.count; |
| |
| /* remove also the namespaces conneted with the element */ |
| lyxml_ns_rm(context); |
| |
| /* do not return element information to announce closing element being currently processed */ |
| *name = *prefix = NULL; |
| *name_len = *prefix_len = 0; |
| |
| if (c == '>') { |
| /* end of closing element */ |
| context->status = LYXML_ELEMENT; |
| } else { |
| in -= endtag_len; |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Unexpected data \"%.*s\" in closing element tag.", |
| LY_VCODE_INSTREXP_len(in), in); |
| return LY_EVALID; |
| } |
| } else { |
| if (c == '>') { |
| /* end of opening element */ |
| context->status = LYXML_ELEM_CONTENT; |
| } else if (c == '/' && in[0] == '>') { |
| /* empty element closing */ |
| context->status = LYXML_ELEMENT; |
| ++in; |
| } else { |
| /* attribute */ |
| context->status = LYXML_ATTRIBUTE; |
| in -= endtag_len; |
| } |
| |
| if (context->status != LYXML_ELEMENT) { |
| /* store element opening tag information */ |
| e = malloc(sizeof *e); |
| LY_CHECK_ERR_RET(!e, LOGMEM(ctx), LY_EMEM); |
| e->name = *name; |
| e->prefix = *prefix; |
| e->name_len = *name_len; |
| e->prefix_len = *prefix_len; |
| ly_set_add(&context->elements, e, LY_SET_OPT_USEASLIST); |
| } |
| } |
| loop = false; |
| } |
| |
| success: |
| /* check for end of input */ |
| if (in[0] == '\0') { |
| /* EOF */ |
| if (context->elements.count) { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF); |
| return LY_EVALID; |
| } |
| context->status = LYXML_END; |
| } |
| /* move caller's input */ |
| (*input) = in; |
| return LY_SUCCESS; |
| } |
| |
| LY_ERR |
| lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic) |
| { |
| #define BUFSIZE 4096 |
| #define BUFSIZE_STEP 4096 |
| #define BUFSIZE_CHECK(CTX, BUF, SIZE, CURR, NEED) \ |
| if (CURR+NEED >= SIZE) { \ |
| BUF = ly_realloc(BUF, SIZE + BUFSIZE_STEP); \ |
| LY_CHECK_ERR_RET(!BUF, LOGMEM(CTX), LY_EMEM); \ |
| SIZE += BUFSIZE_STEP; \ |
| } |
| |
| struct ly_ctx *ctx = context->ctx; /* shortcut */ |
| const char *in = (*input), *start; |
| char *buf = NULL, delim; |
| size_t offset; /* read offset in input buffer */ |
| size_t len; /* length of the output string (write offset in output buffer) */ |
| size_t size = 0; /* size of the output buffer */ |
| void *p; |
| uint32_t n; |
| size_t u, newlines; |
| bool empty_content = false; |
| LY_ERR rc = LY_SUCCESS; |
| |
| assert(context); |
| assert(context->status == LYXML_ELEM_CONTENT || context->status == LYXML_ATTR_CONTENT); |
| |
| if (in[0] == '\'') { |
| delim = '\''; |
| ++in; |
| } else if (in[0] == '"') { |
| delim = '"'; |
| ++in; |
| } else { |
| delim = '<'; |
| empty_content = true; |
| } |
| start = in; |
| |
| if (empty_content) { |
| /* only when processing element's content - try to ignore whitespaces used to format XML data |
| * before element's child or closing tag */ |
| for (offset = newlines = 0; in[offset] && is_xmlws(in[offset]); ++offset) { |
| if (in[offset] == '\n') { |
| ++newlines; |
| } |
| } |
| LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID); |
| context->line += newlines; |
| if (in[offset] == '<') { |
| (*input) = in + offset; |
| |
| /* get know if it is child element (indentation) or closing element (whitespace-only content) */ |
| len = offset; |
| offset = 0; |
| in = *input; |
| goto element_endtag_check; |
| } |
| } |
| /* init */ |
| offset = len = 0; |
| empty_content = false; |
| |
| if (0) { |
| getbuffer: |
| /* prepare output buffer */ |
| if (*buffer) { |
| buf = *buffer; |
| size = *buffer_size; |
| } else { |
| buf = malloc(BUFSIZE); |
| size = BUFSIZE; |
| LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM); |
| } |
| } |
| |
| /* parse */ |
| while (in[offset]) { |
| if (in[offset] == '&') { |
| if (output) { |
| if (!buf) { |
| /* it is necessary to modify the input, so we will need a dynamically allocated buffer */ |
| goto getbuffer; |
| } |
| |
| if (offset) { |
| /* store what we have so far */ |
| BUFSIZE_CHECK(ctx, buf, size, len, offset); |
| memcpy(&buf[len], in, offset); |
| len += offset; |
| in += offset; |
| offset = 0; |
| } |
| /* process reference */ |
| /* we will need 4 bytes at most since we support only the predefined |
| * (one-char) entities and character references */ |
| BUFSIZE_CHECK(ctx, buf, size, len, 4); |
| } |
| ++offset; |
| if (in[offset] != '#') { |
| /* entity reference - only predefined references are supported */ |
| if (!strncmp(&in[offset], "lt;", 3)) { |
| if (output) { |
| buf[len++] = '<'; |
| } |
| in += 4; /* < */ |
| } else if (!strncmp(&in[offset], "gt;", 3)) { |
| if (output) { |
| buf[len++] = '>'; |
| } |
| in += 4; /* > */ |
| } else if (!strncmp(&in[offset], "amp;", 4)) { |
| if (output) { |
| buf[len++] = '&'; |
| } |
| in += 5; /* & */ |
| } else if (!strncmp(&in[offset], "apos;", 5)) { |
| if (output) { |
| buf[len++] = '\''; |
| } |
| in += 6; /* ' */ |
| } else if (!strncmp(&in[offset], "quot;", 5)) { |
| if (output) { |
| buf[len++] = '\"'; |
| } |
| in += 6; /* " */ |
| } else { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, |
| "Entity reference \"%.*s\" not supported, only predefined references allowed.", 10, &in[offset-1]); |
| goto error; |
| } |
| offset = 0; |
| } else { |
| p = (void*)&in[offset - 1]; |
| /* character reference */ |
| ++offset; |
| if (isdigit(in[offset])) { |
| for (n = 0; isdigit(in[offset]); offset++) { |
| n = (10 * n) + (in[offset] - '0'); |
| } |
| } else if (in[offset] == 'x' && isxdigit(in[offset + 1])) { |
| for (n = 0, ++offset; isxdigit(in[offset]); offset++) { |
| if (isdigit(in[offset])) { |
| u = (in[offset] - '0'); |
| } else if (in[offset] > 'F') { |
| u = 10 + (in[offset] - 'a'); |
| } else { |
| u = 10 + (in[offset] - 'A'); |
| } |
| n = (16 * n) + u; |
| } |
| } else { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Invalid character reference \"%.*s\".", 12, p); |
| goto error; |
| |
| } |
| LY_CHECK_ERR_GOTO(in[offset] != ';', |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, |
| LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"), |
| error); |
| ++offset; |
| if (output) { |
| rc = lyxml_pututf8(&buf[len], n, &u); |
| } else { |
| char utf8[4]; |
| rc = lyxml_pututf8(&utf8[0], n, &u); |
| } |
| LY_CHECK_ERR_GOTO(rc, LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, |
| "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n), |
| error); |
| len += u; |
| in += offset; |
| offset = 0; |
| } |
| } else if (in[offset] == delim) { |
| /* end of string */ |
| if (buf) { |
| if (len + offset >= size) { |
| buf = ly_realloc(buf, len + offset + 1); |
| LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM); |
| size = len + offset + 1; |
| } |
| memcpy(&buf[len], in, offset); |
| } |
| len += offset; |
| /* in case of element content, keep the leading <, |
| * for attribute's value move after the terminating quotation mark */ |
| element_endtag_check: |
| if (context->status == LYXML_ELEM_CONTENT) { |
| const char *name = NULL, *prefix = NULL; |
| size_t name_len = 0, prefix_len = 0; |
| int closing = 0; |
| /* use fake context to preserve real context (lines, status) since we don't want really parse the element tag here */ |
| struct lyxml_context fakecontext = {.ctx = context->ctx, .line = context->line, .status = context->status}; |
| |
| in += offset; |
| |
| /* get know if it is child element (mixed content) or closing element (regular content) */ |
| /* We don't want actually to parse the closing element, we just need to check mixed content. |
| * The closing element tag is preserved to keep the context for the data (returned string), |
| * since it can contain data using XML prefixes defined in this element and the caller can |
| * want to work with it */ |
| (*input) = in; |
| rc = lyxml_parse_element_start(&fakecontext, &in, &closing); |
| if (rc) { |
| /* some parsing error */ |
| goto error; |
| } else { |
| size_t endtag_len; |
| unsigned int c; |
| struct lyxml_elem *e; |
| |
| LY_CHECK_GOTO(lyxml_parse_element_name(&fakecontext, &in, &endtag_len, &c, &prefix, &prefix_len, &name, &name_len), error); |
| |
| if (!closing) { |
| if (empty_content) { |
| /* the element here is not closing element, so we have the just indentation formatting before the child */ |
| context->status = LYXML_ELEMENT; |
| return LY_EINVAL; |
| } else { |
| /* the element here is not closing element, so we have not allowed mixed content */ |
| struct lyxml_elem *e = (struct lyxml_elem*)context->elements.objs[--context->elements.count]; |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Mixed XML content is not allowed (%.*s).", |
| offset + (in - (*input)), &(*input)[-offset]); |
| free(e); |
| goto error; |
| } |
| } |
| |
| /* closing element start - check the name if it matches the opening element tag */ |
| LY_CHECK_ERR_GOTO(!context->elements.count, |
| LOGVAL(ctx, LY_VLOG_LINE, &fakecontext.line, LYVE_SYNTAX, "Opening and closing elements tag missmatch (\"%.*s\").", |
| name_len, name), |
| error); |
| e = (struct lyxml_elem*)context->elements.objs[context->elements.count - 1]; |
| LY_CHECK_ERR_GOTO(e->prefix_len != prefix_len || e->name_len != name_len |
| || (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len), |
| free(e); --context->elements.count; LOGVAL(ctx, LY_VLOG_LINE, &fakecontext.line, LYVE_SYNTAX, |
| "Opening and closing elements tag missmatch (\"%.*s\").", name_len, name), |
| error); |
| /* opening and closing element tags matches */ |
| /* return input back */ |
| in = (*input); |
| } |
| } else { |
| in += offset + 1; |
| } |
| goto success; |
| } else { |
| /* log lines */ |
| if (in[offset] == '\n') { |
| ++context->line; |
| } |
| |
| /* continue */ |
| ++offset; |
| } |
| } |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF); |
| error: |
| if (!(*buffer)) { |
| /* buffer not provided, buf is local */ |
| free(buf); |
| } else if (buf) { |
| /* buf is shared with caller via buffer, but buf could be reallocated, so update the provided buffer */ |
| (*buffer) = buf; |
| (*buffer_size) = size; |
| } |
| return LY_EVALID; |
| |
| success: |
| if (buf) { |
| if (!(*buffer) && size != len + 1) { |
| /* not using provided buffer, so fit the allocated buffer to what we really have inside */ |
| p = realloc(buf, len + 1); |
| /* ignore realloc fail because we are reducing the buffer, |
| * so just return bigger buffer than needed */ |
| if (p) { |
| size = len + 1; |
| buf = p; |
| } |
| } |
| /* set terminating NULL byte */ |
| buf[len] = '\0'; |
| } |
| |
| context->status -= 1; |
| if (buf) { |
| (*buffer) = buf; |
| (*buffer_size) = size; |
| (*output) = buf; |
| (*dynamic) = 1; |
| (*length) = len; |
| } else if (output) { |
| (*output) = (char*)start; |
| (*dynamic) = 0; |
| (*length) = len; |
| } |
| |
| if (context->status == LYXML_ATTRIBUTE) { |
| /* skip whitespaces after the value */ |
| ign_xmlws(context, in); |
| |
| if (in[0] == '>') { |
| /* element terminated by > - termination of the opening tag */ |
| context->status = LYXML_ELEM_CONTENT; |
| ++in; |
| } else if (in[0] == '/' && in[1] == '>') { |
| /* element terminated by /> - termination of an empty element */ |
| context->status = LYXML_ELEMENT; |
| in += 2; |
| |
| /* remove the closed element record from the tags list */ |
| free(context->elements.objs[context->elements.count - 1]); |
| --context->elements.count; |
| |
| /* remove also the namespaces conneted with the element */ |
| lyxml_ns_rm(context); |
| |
| if (!context->elements.count && in[0] == '\0') { |
| /* EOF */ |
| context->status = LYXML_END; |
| } |
| } /* else another attribute */ |
| } |
| |
| (*input) = in; |
| return rc; |
| |
| #undef BUFSIZE |
| #undef BUFSIZE_STEP |
| #undef BUFSIZE_CHECK |
| } |
| |
| LY_ERR |
| lyxml_get_attribute(struct lyxml_context *context, const char **input, |
| const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) |
| { |
| struct ly_ctx *ctx = context->ctx; /* shortcut */ |
| const char *in = (*input); |
| const char *id; |
| const char *endtag; |
| LY_ERR rc; |
| unsigned int c; |
| size_t endtag_len; |
| int is_ns = 0; |
| const char *ns_prefix = NULL; |
| size_t ns_prefix_len = 0; |
| |
| start: |
| /* initialize output variables */ |
| (*prefix) = (*name) = NULL; |
| (*prefix_len) = (*name_len) = 0; |
| |
| /* skip initial whitespaces */ |
| ign_xmlws(context, in); |
| |
| if (in[0] == '\0') { |
| /* EOF - not expected at this place */ |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF); |
| return LY_EVALID; |
| } |
| |
| /* remember the identifier start before checking its format */ |
| id = in; |
| rc = lyxml_check_qname(context, &in, &c, &endtag_len); |
| LY_CHECK_RET(rc); |
| if (c == ':') { |
| /* we have prefixed identifier */ |
| endtag = in - endtag_len; |
| |
| rc = lyxml_check_qname(context, &in, &c, &endtag_len); |
| LY_CHECK_RET(rc); |
| |
| (*prefix) = id; |
| (*prefix_len) = endtag - id; |
| id = endtag + 1; |
| } |
| if (!is_xmlws(c) && c != '=') { |
| in = in - endtag_len; |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "whitespace or '='"); |
| return LY_EVALID; |
| } |
| in = in - endtag_len; |
| (*name) = id; |
| (*name_len) = in - id; |
| |
| /* eat '=' and stop at the value beginning */ |
| ign_xmlws(context, in); |
| if (in[0] != '=') { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in), in, "'='"); |
| return LY_EVALID; |
| } |
| ++in; |
| ign_xmlws(context, in); |
| if (in[0] != '\'' && in[0] != '"') { |
| LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INSTREXP, |
| LY_VCODE_INSTREXP_len(in), in, "either single or double quotation mark"); |
| return LY_EVALID; |
| } |
| context->status = LYXML_ATTR_CONTENT; |
| |
| is_ns = 0; |
| if (*prefix && *prefix_len == 5 && !strncmp(*prefix, "xmlns", 5)) { |
| is_ns = 1; |
| ns_prefix = *name; |
| ns_prefix_len = *name_len; |
| } else if (*name_len == 5 && !strncmp(*name, "xmlns", 5)) { |
| is_ns = 1; |
| } |
| if (is_ns) { |
| /* instead of attribute, we have namespace specification, |
| * so process it automatically and then move to another attribute (if any) */ |
| char *value = NULL; |
| size_t value_len = 0; |
| int dynamic = 0; |
| |
| LY_CHECK_RET(lyxml_get_string(context, &in, &value, &value_len, &value, &value_len, &dynamic)); |
| if ((rc = lyxml_ns_add(context, ns_prefix, ns_prefix_len, dynamic ? value : strndup(value, value_len)))) { |
| if (dynamic) { |
| free(value); |
| return rc; |
| } |
| } |
| if (context->status == LYXML_ATTRIBUTE) { |
| goto start; |
| } else { |
| (*prefix) = (*name) = NULL; |
| (*prefix_len) = (*name_len) = 0; |
| } |
| } |
| |
| /* move caller's input */ |
| (*input) = in; |
| return LY_SUCCESS; |
| } |
| |
| void |
| lyxml_context_clear(struct lyxml_context *context) |
| { |
| unsigned int u; |
| |
| ly_set_erase(&context->elements, free); |
| for (u = context->ns.count - 1; u + 1 > 0; --u) { |
| /* remove the ns structure */ |
| free(((struct lyxml_ns *)context->ns.objs[u])->prefix); |
| free(((struct lyxml_ns *)context->ns.objs[u])->uri); |
| free(context->ns.objs[u]); |
| } |
| ly_set_erase(&context->ns, NULL); |
| context->status = 0; |
| } |
| |
| LY_ERR |
| lyxml_dump_text(struct lyout *out, const char *text, int attribute) |
| { |
| LY_ERR ret = LY_SUCCESS; |
| unsigned int u; |
| |
| if (!text) { |
| return 0; |
| } |
| |
| for (u = 0; text[u]; u++) { |
| switch (text[u]) { |
| case '&': |
| ret = ly_print(out, "&"); |
| break; |
| case '<': |
| ret = ly_print(out, "<"); |
| break; |
| case '>': |
| /* not needed, just for readability */ |
| ret = ly_print(out, ">"); |
| break; |
| case '"': |
| if (attribute) { |
| ret = ly_print(out, """); |
| break; |
| } |
| /* falls through */ |
| default: |
| ly_write(out, &text[u], 1); |
| } |
| } |
| |
| return ret; |
| } |
| |