YANG parser BUGFIX parsing double-quoted strings
There was still an issue with parsing backslash-escaped characters and
trimming trailing whitespaces - backslashed whitespaces were wrongly
considered as whitespaces, but the trailing whitespaces are supposed to
be removed before the backslash-escaped characters are substituted (so
they cannot be considered as whitespace characters for this purpose)
diff --git a/src/parser_yang.c b/src/parser_yang.c
index ca713b9..f3ca6a4 100644
--- a/src/parser_yang.c
+++ b/src/parser_yang.c
@@ -300,6 +300,7 @@
unsigned int string, block_indent = 0, current_indent = 0, need_buf = 0;
const char *c;
int prefix = 0;
+ unsigned int trailing_ws = 0; /* current number of trailing whitespace characters */
if (**data == '\"') {
string = 2;
@@ -331,11 +332,24 @@
/* string may be finished, but check for + */
string = 4;
MOVE_INPUT(ctx, data, 1);
+ trailing_ws = 0;
break;
case '\\':
/* special character following */
string = 3;
+
+ /* the backslash sequence is substituted, so we will need a buffer to store the result */
+ need_buf = 1;
+
+ /* move forward to the escaped character */
++(*data);
+
+ /* note that the trailing whitespaces are supposed to be trimmed before substitution of
+ * backslash-escaped characters (RFC 7950, 6.1.3), so we have to zero the trailing whitespaces counter */
+ trailing_ws = 0;
+
+ /* since the backslash-escaped character is handled as first non-whitespace character, stop eating indentation */
+ current_indent = block_indent;
break;
case ' ':
if (current_indent < block_indent) {
@@ -345,6 +359,7 @@
/* check and store character */
LY_CHECK_RET(buf_store_char(ctx, data, arg, word_p, word_len, word_b, buf_len, need_buf, &prefix));
}
+ trailing_ws++;
break;
case '\t':
if (current_indent < block_indent) {
@@ -363,6 +378,7 @@
/* additional characters for indentation - only 1 was count in buf_store_char */
ctx->indent += 7;
}
+ trailing_ws++;
break;
case '\n':
if (block_indent) {
@@ -370,11 +386,9 @@
need_buf = 1;
/* remove trailing tabs and spaces */
- while ((*word_len) && ((*word_p)[(*word_len) - 1] == '\t' || (*word_p)[(*word_len) - 1] == ' ')) {
- --(*word_len);
- }
+ (*word_len) = *word_len - trailing_ws;
- /* start indentation */
+ /* restart indentation */
current_indent = 0;
}
@@ -386,6 +400,7 @@
/* reset context indentation counter for possible string after this one */
ctx->indent = 0;
+ trailing_ws = 0;
break;
default:
/* first non-whitespace character, stop eating indentation */
@@ -393,6 +408,7 @@
/* check and store character */
LY_CHECK_RET(buf_store_char(ctx, data, arg, word_p, word_len, word_b, buf_len, need_buf, &prefix));
+ trailing_ws = 0;
break;
}
break;
@@ -401,11 +417,9 @@
switch (**data) {
case 'n':
c = "\n";
- need_buf = 1;
break;
case 't':
c = "\t";
- need_buf = 1;
break;
case '\"':
c = *data;
diff --git a/tests/src/test_parser_yang.c b/tests/src/test_parser_yang.c
index cb9e058..8931fba 100644
--- a/tests/src/test_parser_yang.c
+++ b/tests/src/test_parser_yang.c
@@ -338,6 +338,24 @@
assert_int_equal(14, len);
assert_string_equal("hello\n world!", word);
free(buf);
+ /* In contrast to previous, the backslash-escaped tabs are expanded after trimming, so they are preserved */
+ ctx.indent = 14;
+ str = "\"hello \\t\n\t\\t world!\"";
+ assert_int_equal(LY_SUCCESS, get_argument(&ctx, &str, Y_STR_ARG, NULL, &word, &buf, &len));
+ assert_non_null(buf);
+ assert_ptr_equal(word, buf);
+ assert_int_equal(16, len);
+ assert_string_equal("hello \t\n\t world!", word);
+ free(buf);
+ /* Do not handle whitespaces after backslash-escaped newline as indentation */
+ ctx.indent = 14;
+ str = "\"hello\\n\t\t world!\"";
+ assert_int_equal(LY_SUCCESS, get_argument(&ctx, &str, Y_STR_ARG, NULL, &word, &buf, &len));
+ assert_non_null(buf);
+ assert_ptr_equal(word, buf);
+ assert_int_equal(15, len);
+ assert_string_equal("hello\n\t\t world!", word);
+ free(buf);
ctx.indent = 14;
str = "\"hello\n \tworld!\"";
@@ -362,7 +380,7 @@
free(buf);
str = "\"hel\" +\t\nlo"; /* unquoted the second part */
assert_int_equal(LY_EVALID, get_argument(&ctx, &str, Y_STR_ARG, NULL, &word, &buf, &len));
- logbuf_assert("Both string parts divided by '+' must be quoted. Line number 5.");
+ logbuf_assert("Both string parts divided by '+' must be quoted. Line number 6.");
str = "\'he\'\t\n+ \"llo\"";
assert_int_equal(LY_SUCCESS, get_argument(&ctx, &str, Y_STR_ARG, NULL, &word, &buf, &len));
@@ -381,7 +399,7 @@
/* missing argument */
str = ";";
assert_int_equal(LY_EVALID, get_argument(&ctx, &str, Y_STR_ARG, NULL, &word, &buf, &len));
- logbuf_assert("Invalid character sequence \";\", expected an argument. Line number 7.");
+ logbuf_assert("Invalid character sequence \";\", expected an argument. Line number 8.");
}
static void