schema compile BUGFIX backslash in pattern check
An error is returned instead of assert. Also, a check is implemented
when a backslash is used as an escape character in a pattern and when
it is a backslash as such.
diff --git a/src/schema_compile_node.c b/src/schema_compile_node.c
index 0e08923..3776da9 100644
--- a/src/schema_compile_node.c
+++ b/src/schema_compile_node.c
@@ -1176,6 +1176,7 @@
const char *orig_ptr;
PCRE2_SIZE err_offset;
pcre2_code *code_local;
+ ly_bool escaped;
LY_ERR r;
/* adjust the expression to a Perl equivalent
@@ -1197,6 +1198,7 @@
/* we need to replace all "$" and "^" (that are not in "[]") with "\$" and "\^" */
brack = 0;
idx = 0;
+ escaped = 0;
orig_ptr = pattern;
while (orig_ptr[0]) {
switch (orig_ptr[0]) {
@@ -1213,16 +1215,30 @@
++idx;
}
break;
+ case '\\':
+ /* escape character found or backslash is escaped */
+ escaped = !escaped;
+ /* copy backslash and continue with the next character */
+ perl_regex[idx] = orig_ptr[0];
+ ++idx;
+ ++orig_ptr;
+ continue;
case '[':
- /* must not be escaped */
- if ((orig_ptr == pattern) || (orig_ptr[-1] != '\\')) {
+ if (!escaped) {
++brack;
}
break;
case ']':
- if ((orig_ptr == pattern) || (orig_ptr[-1] != '\\')) {
- /* pattern was checked and compiled already */
- assert(brack);
+ if (!brack && !escaped) {
+ /* If ']' does not terminate a character class expression, then pcre2_compile() implicitly escapes the
+ * ']' character. But this seems to be against the regular expressions rules declared in
+ * "XML schema: Datatypes" and therefore an error is returned. So for example if pattern is '\[a]' then
+ * pcre2 match characters '[a]' literally but in YANG such pattern is not allowed.
+ */
+ LOGVAL(ctx, LY_VCODE_INREGEXP, pattern, orig_ptr, "character group doesn't begin with '['");
+ free(perl_regex);
+ return LY_EVALID;
+ } else if (!escaped) {
--brack;
}
break;
@@ -1235,6 +1251,7 @@
++idx;
++orig_ptr;
+ escaped = 0;
}
#ifndef PCRE2_ENDANCHORED
/* anchor match to end of subject */
diff --git a/tests/utests/types/string.c b/tests/utests/types/string.c
index 466c1ae..47ad0ad 100644
--- a/tests/utests/types/string.c
+++ b/tests/utests/types/string.c
@@ -314,6 +314,24 @@
CHECK_LYSP_NODE_LEAF(lysp_leaf, NULL, 0, 0x0, 0, "port", 0, 0, NULL, 0, 0, NULL, NULL);
CHECK_LYSP_TYPE(&(lysp_leaf->type), 0, 0, 0, 0, 0, 0x0, 0, 0, "my_type", 0, 0, 1, 0, 0, 0);
+ /* TEST pattern backslash
+ * The '[' character is escaped, thus character group is broken.
+ */
+
+ schema = MODULE_CREATE_YANG("TPATTERN_BC_ERR_1", "leaf port {type string {"
+ "pattern '\\[a]b';" /* pattern '\[a]b'; */
+ "}}");
+ UTEST_INVALID_MODULE(schema, LYS_IN_YANG, NULL, LY_EVALID);
+ CHECK_LOG_CTX("Regular expression \"\\[a]b\" is not valid (\"]b\": character group doesn't begin with '[').",
+ "/TPATTERN_BC_ERR_1:port");
+
+ schema = MODULE_CREATE_YANG("TPATTERN_BC_ERR_2", "leaf port {type string {"
+ "pattern \"\\\\[a]b\";" /* pattern "\\[a]b"; */
+ "}}");
+ UTEST_INVALID_MODULE(schema, LYS_IN_YANG, NULL, LY_EVALID);
+ CHECK_LOG_CTX("Regular expression \"\\[a]b\" is not valid (\"]b\": character group doesn't begin with '[').",
+ "/TPATTERN_BC_ERR_2:port");
+
/* PATTERN AND LENGTH */
schema = MODULE_CREATE_YANG("TPL_0",
"typedef my_type {"
@@ -1139,6 +1157,30 @@
assert_int_equal(LY_EINVAL, ly_ret);
ly_err_free(err);
+ /* TEST pattern backslash */
+
+ schema = MODULE_CREATE_YANG("TPATTERN_BC_1", "leaf port {type string {"
+ "pattern '\\\\[a]b';" /* pattern '\\[a]b'; */
+ "}}");
+ UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, &mod);
+ lysc_type = ((struct lysc_node_leaf *)mod->compiled->data)->type;
+ val_text = "\\ab";
+ assert_int_equal(LY_SUCCESS, type->store(UTEST_LYCTX, lysc_type, val_text, strlen(val_text),
+ 0, LY_VALUE_XML, NULL, LYD_VALHINT_STRING, NULL, &value, NULL, &err));
+ CHECK_LYD_VALUE(value, STRING, "\\ab");
+ type->free(UTEST_LYCTX, &value);
+
+ schema = MODULE_CREATE_YANG("TPATTERN_BC_2", "leaf port {type string {"
+ "pattern \"\\\\\\\\[a]b\";" /* pattern "\\\\[a]b"; */
+ "}}");
+ UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, &mod);
+ lysc_type = ((struct lysc_node_leaf *)mod->compiled->data)->type;
+ val_text = "\\ab";
+ assert_int_equal(LY_SUCCESS, type->store(UTEST_LYCTX, lysc_type, val_text, strlen(val_text),
+ 0, LY_VALUE_XML, NULL, LYD_VALHINT_STRING, NULL, &value, NULL, &err));
+ CHECK_LYD_VALUE(value, STRING, "\\ab");
+ type->free(UTEST_LYCTX, &value);
+
/* ERROR TESTS */
val_text = "10 \"| bcdei";