Merge remote-tracking branch 'upstream/libyang2' into libyang2
diff --git a/src/common.c b/src/common.c
index 72a30dc..4ce64fd 100644
--- a/src/common.c
+++ b/src/common.c
@@ -240,6 +240,39 @@
     return LY_SUCCESS;
 }
 
+/**
+ * @brief Static table of the UTF8 characters lengths according to their first byte.
+ */
+static const unsigned char
+utf8_char_length_table[] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+};
+
+size_t
+ly_utf8len(const char *str, size_t bytes)
+{
+    size_t len;
+    const char *ptr;
+
+    for (len = 0, ptr = str; *ptr && (size_t)(ptr - str) < bytes; ++len, ptr += utf8_char_length_table[((unsigned char)(*ptr))]);
+    return len;
+}
+
 size_t
 LY_VCODE_INSTREXP_len(const char *str)
 {
diff --git a/src/common.h b/src/common.h
index 2b5c1d8..2aece07 100644
--- a/src/common.h
+++ b/src/common.h
@@ -388,6 +388,16 @@
 LY_ERR ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read);
 
 /**
+ * @brief Get number of characters in the @p str, taking multibyte characters into account.
+ * @param[in] str String to examine.
+ * @param[in] bytes Number of valid bytes that are supposed to be taken into account in @p str.
+ * This parameter is useful mainly for non NULL-terminated strings. In case of NULL-terminated
+ * string, strlen() can be used.
+ * @return Number of characters in (possibly) multibyte characters string.
+ */
+size_t ly_utf8len(const char *str, size_t bytes);
+
+/**
  * @brief Parse signed integer with possible limitation.
  * @param[in] val_str String value containing signed integer, note that
  * nothing else than whitespaces are expected after the value itself.
diff --git a/src/plugins_types.c b/src/plugins_types.c
index e7860cd..1be7af2 100644
--- a/src/plugins_types.c
+++ b/src/plugins_types.c
@@ -782,8 +782,11 @@
     /* length restriction of the string */
     if (type_str->length) {
         char buf[22];
-        snprintf(buf, 22, "%lu", value_len);
-        LY_CHECK_RET(ly_type_validate_range(LY_TYPE_BINARY, type_str->length, value_len, buf, err));
+        size_t char_count = ly_utf8len(value, value_len);
+
+        /* value_len is in bytes, but we need number of chaarcters here */
+        snprintf(buf, 22, "%lu", char_count);
+        LY_CHECK_RET(ly_type_validate_range(LY_TYPE_BINARY, type_str->length, char_count, buf, err));
     }
 
     /* pattern restrictions */
diff --git a/src/plugins_types.h b/src/plugins_types.h
index 5ee152e..0f1ff77 100644
--- a/src/plugins_types.h
+++ b/src/plugins_types.h
@@ -94,7 +94,7 @@
  * @param[in] type Type of the value being canonized.
  * @param[in] value Lexical representation of the value to be validated (and canonized).
  *            It is never NULL, empty string is represented as "" with zero @p value_len.
- * @param[in] value_len Length of the given \p value.
+ * @param[in] value_len Length (number of bytes) of the given \p value.
  * @param[in] options [Type plugin options ](@ref plugintypeopts).
  *
  * @param[in] resolve_prefix Parser-specific callback to resolve prefixes used in the value strings.
@@ -266,7 +266,7 @@
  * @param[in] patterns ([Sized array](@ref sizedarrays)) of the compiled list of pointers to the pattern restrictions.
  * The array can be found in the lysc_type_str::patterns structure.
  * @param[in] str String to validate.
- * @param[in] str_len Length of the string to validate (mandatory).
+ * @param[in] str_len Length (number of bytes) of the string to validate (mandatory).
  * @param[out] err Error information in case of failure or non-matching @p str. The error structure can be freed by ly_err_free().
  * @return LY_SUCCESS when @p matches all the patterns.
  * @return LY_EVALID when @p does not match any of the patterns.
diff --git a/src/tree_schema_internal.h b/src/tree_schema_internal.h
index ffda7f5..33f5c7c 100644
--- a/src/tree_schema_internal.h
+++ b/src/tree_schema_internal.h
@@ -29,7 +29,7 @@
                               c == '_' || c == '-' || c == '.')
 
 /* Macro to check YANG's yang-char grammar rule */
-#define is_yangutf8char(c) ((c >= 0x20 && c <= 0xd77) || c == 0x09 || c == 0x0a || c == 0x0d || \
+#define is_yangutf8char(c) ((c >= 0x20 && c <= 0xd7ff) || c == 0x09 || c == 0x0a || c == 0x0d || \
                             (c >= 0xe000 && c <= 0xfdcf)   || (c >= 0xfdf0 && c <= 0xfffd)   || \
                             (c >= 0x10000 && c <= 0x1fffd) || (c >= 0x20000 && c <= 0x2fffd) || \
                             (c >= 0x30000 && c <= 0x3fffd) || (c >= 0x40000 && c <= 0x2fffd) || \
diff --git a/tests/features/test_types.c b/tests/features/test_types.c
index 9bf4a90..b60b4eb 100644
--- a/tests/features/test_types.c
+++ b/tests/features/test_types.c
@@ -84,6 +84,7 @@
             "leaf dec64-norestr {type decimal64 {fraction-digits 18;}}"
             "leaf str {type string {length 8..10; pattern '[a-z ]*';}}"
             "leaf str-norestr {type string;}"
+            "leaf str-utf8 {type string{length 2..5; pattern '€*';}}"
             "leaf bool {type boolean;}"
             "leaf empty {type empty;}"
             "leaf ident {type identityref {base defs:interface-type;}}"
@@ -362,6 +363,24 @@
     assert_string_equal("teststring", leaf->value.canonized);
     lyd_free_all(tree);
 
+    /* multibyte characters (€ encodes as 3-byte UTF8 character, length restriction is 2-5) */
+    data = "<str-utf8 xmlns=\"urn:tests:types\">€€</str-utf8>";
+    assert_non_null(tree = lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+    assert_int_equal(LYS_LEAF, tree->schema->nodetype);
+    assert_string_equal("str-utf8", tree->schema->name);
+    leaf = (struct lyd_node_term*)tree;
+    assert_string_equal("€€", leaf->value.canonized);
+    lyd_free_all(tree);
+    data = "<str-utf8 xmlns=\"urn:tests:types\">€</str-utf8>";
+    assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+    logbuf_assert("Length \"1\" does not satisfy the length constraint. /");
+    data = "<str-utf8 xmlns=\"urn:tests:types\">€€€€€€</str-utf8>";
+    assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+    logbuf_assert("Length \"6\" does not satisfy the length constraint. /");
+    data = "<str-utf8 xmlns=\"urn:tests:types\">€€x</str-utf8>";
+    assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));
+    logbuf_assert("String \"€€x\" does not conforms to the 1. pattern restriction of its type. /");
+
     /* invalid length */
     data = "<str xmlns=\"urn:tests:types\">short</str>";
     assert_null(lyd_parse_mem(s->ctx, data, LYD_XML, 0, NULL));