xml parser CHANGE handle mixed XML content
mixed XML content is not allowed, do the detection inside the XML parser
to avoid need of checking it by the callers
diff --git a/src/parser_xml.c b/src/parser_xml.c
index 066a4d1..86ae99a 100644
--- a/src/parser_xml.c
+++ b/src/parser_xml.c
@@ -217,7 +217,12 @@
if (ctx->status == LYXML_ELEM_CONTENT) {
/* get the value */
- lyxml_get_string((struct lyxml_context *)ctx, data, &buffer, &buffer_size, &value, &value_len, &dynamic);
+ LY_ERR r = lyxml_get_string((struct lyxml_context *)ctx, data, &buffer, &buffer_size, &value, &value_len, &dynamic);
+ if (r == LY_EINVAL) {
+ /* just indentation of a child element found */
+ LOGVAL(ctx->ctx, LY_VLOG_LINE, &ctx->line, LYVE_SYNTAX, "Child element inside terminal node \"%s\" found.", cur->schema->name);
+ goto cleanup;
+ }
lyd_value_validate((struct lyd_node_term*)cur, value, value_len,
LY_TYPE_VALIDATE_CANONIZE | (dynamic ? LY_TYPE_VALIDATE_DYNAMIC : 0));
}
diff --git a/src/xml.c b/src/xml.c
index 59991d6..a318420 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -311,9 +311,28 @@
LY_CHECK_ERR_RET(!in[offset], LOGVAL(ctx, LY_VLOG_LINE, &context->line, LY_VCODE_EOF), LY_EVALID);
context->line += newlines;
if (in[offset] == '<') {
+ const char *name, *prefix;
+ size_t name_len, prefix_len;
+
(*input) = in + offset;
- context->status -= 1; /* LYXML_ELEMENT */;
- return LY_EINVAL;
+
+ /* get know if it is child element (indentation) or closing element (whitespace-only content) */
+ in = *input;
+ rc = lyxml_get_element(context, &in, &prefix, &prefix_len, &name, &name_len);
+ if (name) {
+ /* the element here is not closing element, so we have the just indentation formatting before the child */
+ free(context->elements.objs[--context->elements.count]);
+ context->status -= 1; /* LYXML_ELEMENT */
+ return LY_EINVAL;
+ } else if (rc) {
+ /* some parsing error, so pass it */
+ (*input) = in;
+ return rc;
+ } else {
+ /* whitespace-only content */
+ len = offset - 1;
+ goto success;
+ }
}
}
/* init */
@@ -427,7 +446,29 @@
/* in case of element content, keep the leading <,
* for attribute's value move after the terminating quotation mark */
if (context->status == LYXML_ELEM_CONTENT) {
+ const char *name, *prefix;
+ size_t name_len, prefix_len;
+
in += offset;
+
+ /* get know if it is child element (mixed content) or closing element (regular content) */
+ (*input) = in;
+ rc = lyxml_get_element(context, &in, &prefix, &prefix_len, &name, &name_len);
+ if (name) {
+ /* the element here is not closing element, so we have not allowed mixed content */
+ struct lyxml_elem *e = (struct lyxml_elem*)context->elements.objs[--context->elements.count];
+ LOGVAL(ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX, "Mixed XML content is not allowed (%.*s).",
+ offset + (in - (*input)), &(*input)[-offset]);
+ free(e);
+ return LY_EVALID;
+ } else if (rc) {
+ /* some parsing error, so pass it */
+ return rc;
+ } else {
+ /* closing element, so we have regular content */
+ context->status++;
+ goto success;
+ }
} else {
in += offset + 1;
}
diff --git a/tests/src/test_xml.c b/tests/src/test_xml.c
index ac67d9b..216e6b2 100644
--- a/tests/src/test_xml.c
+++ b/tests/src/test_xml.c
@@ -207,6 +207,16 @@
str = "<yin:c⁐element>";
assert_int_equal(LY_EVALID, lyxml_get_element(&ctx, &str, &prefix, &prefix_len, &name, &name_len));
logbuf_assert("Invalid character sequence \"⁐element>\", expected whitespace or element tag termination ('>' or '/>'. Line number 1.");
+
+ /* mixed content */
+ str = "<a>text <b>x</b></a>";
+ assert_int_equal(LY_SUCCESS, lyxml_get_element(&ctx, &str, &prefix, &prefix_len, &name, &name_len));
+ assert_string_equal("text <b>x</b></a>", str);
+ assert_int_equal(LYXML_ELEM_CONTENT, ctx.status);
+ assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+ logbuf_assert("Mixed XML content is not allowed (text <b>). Line number 1.");
+ lyxml_context_clear(&ctx);
+
}
static void
@@ -278,6 +288,8 @@
int dynamic;
const char *str, *p;
char *buf = NULL, *out = NULL;
+ const char *prefix, *name;
+ size_t prefix_len, name_len;
struct lyxml_context ctx;
memset(&ctx, 0, sizeof ctx);
@@ -306,10 +318,12 @@
/* empty element content - only formating before defining child */
ctx.status = LYXML_ELEM_CONTENT;
- str = "\n <";
+ str = "<x>\n <y>";
+ assert_int_equal(LY_SUCCESS, lyxml_get_element(&ctx, &str, &prefix, &prefix_len, &name, &name_len));
assert_int_equal(LY_EINVAL, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
assert_null(buf);
- assert_string_equal("<", str);
+ assert_string_equal("<y>", str);
+ lyxml_context_clear(&ctx);
/* empty element content is invalid - missing content terminating character < */
ctx.status = LYXML_ELEM_CONTENT;
@@ -327,7 +341,8 @@
/* valid strings */
ctx.status = LYXML_ELEM_CONTENT;
- str = "€𠜎Øn \n<&"'> ROK<";
+ str = "<a>€𠜎Øn \n<&"'> ROK</a>";
+ assert_int_equal(LY_SUCCESS, lyxml_get_element(&ctx, &str, &prefix, &prefix_len, &name, &name_len));
assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
assert_int_not_equal(0, dynamic);
assert_non_null(buf);
@@ -335,8 +350,9 @@
assert_int_equal(22, buf_len);
assert_int_equal(21, len);
assert_string_equal("€𠜎Øn \n<&\"\'> ROK", buf);
- assert_string_equal("<", str);
+ assert_string_equal("", str);
assert_int_equal(LYXML_ELEMENT, ctx.status);
+ lyxml_context_clear(&ctx);
/* test using n-bytes UTF8 hexadecimal code points */
ctx.status = LYXML_ATTR_CONTENT;