xml FEATURE support for CDATA
Fixes #1794
diff --git a/src/xml.c b/src/xml.c
index 663d3e3..aa35fb4 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -262,12 +262,6 @@
sectname = "Comment";
endtag = "-->";
endtag_len = ly_strlen_const("-->");
- } else if (!strncmp(xmlctx->in->current, "[CDATA[", ly_strlen_const("[CDATA["))) {
- /* CDATA section */
- move_input(xmlctx, ly_strlen_const("[CDATA["));
- sectname = "CData";
- endtag = "]]>";
- endtag_len = ly_strlen_const("]]>");
} else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) {
/* Document type declaration - not supported */
LOGVAL(ctx, LY_VCODE_NSUPP, "Document Type Declaration");
@@ -322,6 +316,53 @@
}
/**
+ * @brief Prepare buffer for new data.
+ *
+ * @param[in] ctx Context for logging.
+ * @param[in,out] in XML input data.
+ * @param[in,out] offset Current offset in @p in.
+ * @param[in] need_space Needed additional free space that is allocated.
+ * @param[in,out] buf Dynamic buffer.
+ * @param[in,out] len Current @p buf length (used characters).
+ * @param[in,out] size Current @p buf size (allocated characters).
+ * @return LY_ERR value.
+ */
+static LY_ERR
+lyxml_parse_value_use_buf(const struct ly_ctx *ctx, const char **in, size_t *offset, size_t need_space, char **buf,
+ size_t *len, size_t *size)
+{
+#define BUFSIZE 24
+#define BUFSIZE_STEP 128
+
+ if (!*buf) {
+ /* prepare output buffer */
+ *buf = malloc(BUFSIZE);
+ LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM);
+ *size = BUFSIZE;
+ }
+
+ /* allocate needed space */
+ while (*len + *offset + need_space >= *size) {
+ *buf = ly_realloc(*buf, *size + BUFSIZE_STEP);
+ LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM);
+ *size += BUFSIZE_STEP;
+ }
+
+ if (*offset) {
+ /* store what we have so far */
+ memcpy(&(*buf)[*len], *in, *offset);
+ *len += *offset;
+ *in += *offset;
+ *offset = 0;
+ }
+
+ return LY_SUCCESS;
+
+#undef BUFSIZE
+#undef BUFSIZE_STEP
+}
+
+/**
* @brief Parse XML text content (value).
*
* @param[in] xmlctx XML context to use.
@@ -335,9 +376,6 @@
static LY_ERR
lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic)
{
-#define BUFSIZE 24
-#define BUFSIZE_STEP 128
-
const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */
const char *in = xmlctx->in->current, *start, *in_aux;
char *buf = NULL;
@@ -361,29 +399,10 @@
/* non WS */
ws = 0;
- if (!buf) {
- /* prepare output buffer */
- buf = malloc(BUFSIZE);
- LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
- size = BUFSIZE;
- }
-
- /* allocate enough for the offset and next character,
+ /* use buffer and allocate enough for the offset and next character,
* we will need 4 bytes at most since we support only the predefined
* (one-char) entities and character references */
- while (len + offset + 4 >= size) {
- buf = ly_realloc(buf, size + BUFSIZE_STEP);
- LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
- size += BUFSIZE_STEP;
- }
-
- if (offset) {
- /* store what we have so far */
- memcpy(&buf[len], in, offset);
- len += offset;
- in += offset;
- offset = 0;
- }
+ LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, 4, &buf, &len, &size));
++offset;
if (in[offset] != '#') {
@@ -434,18 +453,50 @@
}
- LY_CHECK_ERR_GOTO(in[offset] != ';',
- LOGVAL(ctx, LY_VCODE_INSTREXP,
- LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"),
- error);
+ if (in[offset] != ';') {
+ LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";");
+ goto error;
+ }
++offset;
- LY_CHECK_ERR_GOTO(ly_pututf8(&buf[len], n, &u),
- LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n),
- error);
+ if (ly_pututf8(&buf[len], n, &u)) {
+ LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.*s\" (0x%08x).", 12, p, n);
+ goto error;
+ }
len += u;
in += offset;
offset = 0;
}
+ } else if (!strncmp(in + offset, "<![CDATA[", ly_strlen_const("<![CDATA["))) {
+ /* CDATA, find the end */
+ in_aux = strstr(in + offset + ly_strlen_const("<![CDATA["), "]]>");
+ if (!in_aux) {
+ LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, "CDATA");
+ goto error;
+ }
+ u = in_aux - (in + offset + ly_strlen_const("<![CDATA["));
+
+ /* use buffer, allocate enough for the whole CDATA */
+ LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, u, &buf, &len, &size));
+
+ /* skip CDATA tag */
+ in += ly_strlen_const("<![CDATA[");
+ assert(!offset);
+
+ /* analyze CDATA for non WS and newline chars */
+ for (n = 0; n < u; ++n) {
+ if (in[n] == '\n') {
+ LY_IN_NEW_LINE(xmlctx->in);
+ } else if (!is_xmlws(in[n])) {
+ ws = 0;
+ }
+ }
+
+ /* copy CDATA */
+ memcpy(buf + len, in, u);
+ len += u;
+
+ /* move input skipping the end tag */
+ in += u + ly_strlen_const("]]>");
} else if (in[offset] == endchar) {
/* end of string */
if (buf) {
@@ -502,9 +553,6 @@
xmlctx->in->current = in;
return LY_SUCCESS;
-
-#undef BUFSIZE
-#undef BUFSIZE_STEP
}
/**