common CHANGE make utf8 getter more generic for other parsers, not only for XML
diff --git a/src/common.c b/src/common.c
index c3a737d..912ee13 100644
--- a/src/common.c
+++ b/src/common.c
@@ -152,6 +152,82 @@
     return new_mem;
 }
 
+LY_ERR
+ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read)
+{
+    unsigned int c, len;
+    int aux;
+    int i;
+
+    c = (*input)[0];
+    LY_CHECK_RET(!c, LY_EINVAL);
+
+    if (!(c & 0x80)) {
+        /* one byte character */
+        len = 1;
+
+        if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
+            return LY_EINVAL;
+        }
+    } else if ((c & 0xe0) == 0xc0) {
+        /* two bytes character */
+        len = 2;
+
+        aux = (*input)[1];
+        if ((aux & 0xc0) != 0x80) {
+            return LY_EINVAL;
+        }
+        c = ((c & 0x1f) << 6) | (aux & 0x3f);
+
+        if (c < 0x80) {
+            return LY_EINVAL;
+        }
+    } else if ((c & 0xf0) == 0xe0) {
+        /* three bytes character */
+        len = 3;
+
+        c &= 0x0f;
+        for (i = 1; i <= 2; i++) {
+            aux = (*input)[i];
+            if ((aux & 0xc0) != 0x80) {
+                return LY_EINVAL;
+            }
+
+            c = (c << 6) | (aux & 0x3f);
+        }
+
+        if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
+            return LY_EINVAL;
+        }
+    } else if ((c & 0xf8) == 0xf0) {
+        /* four bytes character */
+        len = 4;
+
+        c &= 0x07;
+        for (i = 1; i <= 3; i++) {
+            aux = (*input)[i];
+            if ((aux & 0xc0) != 0x80) {
+                return LY_EINVAL;
+            }
+
+            c = (c << 6) | (aux & 0x3f);
+        }
+
+        if (c < 0x1000 || c > 0x10ffff) {
+            return LY_EINVAL;
+        }
+    } else {
+        return LY_EINVAL;
+    }
+
+    (*utf8_char) = c;
+    (*input) += len;
+    if (bytes_read) {
+        (*bytes_read) = len;
+    }
+    return LY_SUCCESS;
+}
+
 size_t
 LY_VCODE_INSTREXP_len(const char *str)
 {