Merge remote-tracking branch 'upstream/libyang2' into libyang2
diff --git a/src/common.c b/src/common.c
index 72a30dc..4ce64fd 100644
--- a/src/common.c
+++ b/src/common.c
@@ -240,6 +240,39 @@
return LY_SUCCESS;
}
+/**
+ * @brief Static table of the UTF8 characters lengths according to their first byte.
+ */
+static const unsigned char
+utf8_char_length_table[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+};
+
+size_t
+ly_utf8len(const char *str, size_t bytes)
+{
+ size_t len;
+ const char *ptr;
+
+ for (len = 0, ptr = str; *ptr && (size_t)(ptr - str) < bytes; ++len, ptr += utf8_char_length_table[((unsigned char)(*ptr))]);
+ return len;
+}
+
size_t
LY_VCODE_INSTREXP_len(const char *str)
{
diff --git a/src/common.h b/src/common.h
index 2b5c1d8..2aece07 100644
--- a/src/common.h
+++ b/src/common.h
@@ -388,6 +388,16 @@
LY_ERR ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read);
/**
+ * @brief Get number of characters in the @p str, taking multibyte characters into account.
+ * @param[in] str String to examine.
+ * @param[in] bytes Number of valid bytes that are supposed to be taken into account in @p str.
+ * This parameter is useful mainly for non NULL-terminated strings. In case of NULL-terminated
+ * string, strlen() can be used.
+ * @return Number of characters in (possibly) multibyte characters string.
+ */
+size_t ly_utf8len(const char *str, size_t bytes);
+
+/**
* @brief Parse signed integer with possible limitation.
* @param[in] val_str String value containing signed integer, note that
* nothing else than whitespaces are expected after the value itself.
diff --git a/src/plugins_types.c b/src/plugins_types.c
index e7860cd..1be7af2 100644
--- a/src/plugins_types.c
+++ b/src/plugins_types.c
@@ -782,8 +782,11 @@
/* length restriction of the string */
if (type_str->length) {
char buf[22];
- snprintf(buf, 22, "%lu", value_len);
- LY_CHECK_RET(ly_type_validate_range(LY_TYPE_BINARY, type_str->length, value_len, buf, err));
+ size_t char_count = ly_utf8len(value, value_len);
+
+ /* value_len is in bytes, but we need number of chaarcters here */
+ snprintf(buf, 22, "%lu", char_count);
+ LY_CHECK_RET(ly_type_validate_range(LY_TYPE_BINARY, type_str->length, char_count, buf, err));
}
/* pattern restrictions */
diff --git a/src/plugins_types.h b/src/plugins_types.h
index 5ee152e..0f1ff77 100644
--- a/src/plugins_types.h
+++ b/src/plugins_types.h
@@ -94,7 +94,7 @@
* @param[in] type Type of the value being canonized.
* @param[in] value Lexical representation of the value to be validated (and canonized).
* It is never NULL, empty string is represented as "" with zero @p value_len.
- * @param[in] value_len Length of the given \p value.
+ * @param[in] value_len Length (number of bytes) of the given \p value.
* @param[in] options [Type plugin options ](@ref plugintypeopts).
*
* @param[in] resolve_prefix Parser-specific callback to resolve prefixes used in the value strings.
@@ -266,7 +266,7 @@
* @param[in] patterns ([Sized array](@ref sizedarrays)) of the compiled list of pointers to the pattern restrictions.
* The array can be found in the lysc_type_str::patterns structure.
* @param[in] str String to validate.
- * @param[in] str_len Length of the string to validate (mandatory).
+ * @param[in] str_len Length (number of bytes) of the string to validate (mandatory).
* @param[out] err Error information in case of failure or non-matching @p str. The error structure can be freed by ly_err_free().
* @return LY_SUCCESS when @p matches all the patterns.
* @return LY_EVALID when @p does not match any of the patterns.
diff --git a/src/tree_schema_internal.h b/src/tree_schema_internal.h
index ffda7f5..33f5c7c 100644
--- a/src/tree_schema_internal.h
+++ b/src/tree_schema_internal.h
@@ -29,7 +29,7 @@
c == '_' || c == '-' || c == '.')
/* Macro to check YANG's yang-char grammar rule */
-#define is_yangutf8char(c) ((c >= 0x20 && c <= 0xd77) || c == 0x09 || c == 0x0a || c == 0x0d || \
+#define is_yangutf8char(c) ((c >= 0x20 && c <= 0xd7ff) || c == 0x09 || c == 0x0a || c == 0x0d || \
(c >= 0xe000 && c <= 0xfdcf) || (c >= 0xfdf0 && c <= 0xfffd) || \
(c >= 0x10000 && c <= 0x1fffd) || (c >= 0x20000 && c <= 0x2fffd) || \
(c >= 0x30000 && c <= 0x3fffd) || (c >= 0x40000 && c <= 0x2fffd) || \
diff --git a/tests/features/test_types.c b/tests/features/test_types.c
index 9bf4a90..b60b4eb 100644
--- a/tests/features/test_types.c
+++ b/tests/features/test_types.c
@@ -84,6 +84,7 @@
"leaf dec64-norestr {type decimal64 {fraction-digits 18;}}"
"leaf str {type string {length 8..10; pattern '[a-z ]*';}}"
"leaf str-norestr {type string;}"
+ "leaf str-utf8 {type string{length 2..5; pattern '€*';}}"
"leaf bool {type boolean;}"
"leaf empty {type empty;}"
"leaf ident {type identityref {base defs:interface-type;}}"
@@ -362,6 +363,24 @@
assert_string_equal("teststring", leaf->value.canonized);
lyd_free_all(tree);
+ /* multibyte characters (€ encodes as 3-byte UTF8 character, length restriction is 2-5) */
+ data = "<str-utf8 xmlns=\"urn:tests:types\">€€</str-utf8>";
+ assert_non_null(tree = lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+ assert_int_equal(LYS_LEAF, tree->schema->nodetype);
+ assert_string_equal("str-utf8", tree->schema->name);
+ leaf = (struct lyd_node_term*)tree;
+ assert_string_equal("€€", leaf->value.canonized);
+ lyd_free_all(tree);
+ data = "<str-utf8 xmlns=\"urn:tests:types\">€</str-utf8>";
+ assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+ logbuf_assert("Length \"1\" does not satisfy the length constraint. /");
+ data = "<str-utf8 xmlns=\"urn:tests:types\">€€€€€€</str-utf8>";
+ assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+ logbuf_assert("Length \"6\" does not satisfy the length constraint. /");
+ data = "<str-utf8 xmlns=\"urn:tests:types\">€€x</str-utf8>";
+ assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+ logbuf_assert("String \"€€x\" does not conforms to the 1. pattern restriction of its type. /");
+
/* invalid length */
data = "<str xmlns=\"urn:tests:types\">short</str>";
assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));