common CHANGE make utf8 getter more generic for other parsers, not only for XML

commit: b416be63ae73ac7d8a5c3488ab1e0720b19ac247 [log] [tgz]
author: Radek Krejci <rkrejci@cesnet.cz> Mon Oct 01 14:51:45 2018 +0200
committer: Radek Krejci <rkrejci@cesnet.cz> Mon Oct 01 14:51:45 2018 +0200
tree: 9efe152a86411350f43c55e5ee754ae2a8f3dcff
parent: 7ab47f0b14c001a040f92dd77a9f38e040d1c100 [diff]
diff --git a/src/common.c b/src/common.c
index c3a737d..912ee13 100644
--- a/src/common.c
+++ b/src/common.c

@@ -152,6 +152,82 @@
     return new_mem;
 }
 
+LY_ERR
+ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read)
+{
+    unsigned int c, len;
+    int aux;
+    int i;
+
+    c = (*input)[0];
+    LY_CHECK_RET(!c, LY_EINVAL);
+
+    if (!(c & 0x80)) {
+        /* one byte character */
+        len = 1;
+
+        if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
+            return LY_EINVAL;
+        }
+    } else if ((c & 0xe0) == 0xc0) {
+        /* two bytes character */
+        len = 2;
+
+        aux = (*input)[1];
+        if ((aux & 0xc0) != 0x80) {
+            return LY_EINVAL;
+        }
+        c = ((c & 0x1f) << 6) | (aux & 0x3f);
+
+        if (c < 0x80) {
+            return LY_EINVAL;
+        }
+    } else if ((c & 0xf0) == 0xe0) {
+        /* three bytes character */
+        len = 3;
+
+        c &= 0x0f;
+        for (i = 1; i <= 2; i++) {
+            aux = (*input)[i];
+            if ((aux & 0xc0) != 0x80) {
+                return LY_EINVAL;
+            }
+
+            c = (c << 6) | (aux & 0x3f);
+        }
+
+        if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
+            return LY_EINVAL;
+        }
+    } else if ((c & 0xf8) == 0xf0) {
+        /* four bytes character */
+        len = 4;
+
+        c &= 0x07;
+        for (i = 1; i <= 3; i++) {
+            aux = (*input)[i];
+            if ((aux & 0xc0) != 0x80) {
+                return LY_EINVAL;
+            }
+
+            c = (c << 6) | (aux & 0x3f);
+        }
+
+        if (c < 0x1000 || c > 0x10ffff) {
+            return LY_EINVAL;
+        }
+    } else {
+        return LY_EINVAL;
+    }
+
+    (*utf8_char) = c;
+    (*input) += len;
+    if (bytes_read) {
+        (*bytes_read) = len;
+    }
+    return LY_SUCCESS;
+}
+
 size_t
 LY_VCODE_INSTREXP_len(const char *str)
 {

diff --git a/src/common.h b/src/common.h
index 64ea5a9..731f711 100644
--- a/src/common.h
+++ b/src/common.h

@@ -256,6 +256,16 @@
 void *ly_realloc(void *ptr, size_t size);
 
 /**
+ * @brief Get UTF8 code point of the next character in the input string.
+ *
+ * @param[in,out] input Input string to process, updated according to the processed/read data.
+ * @param[out] utf8_char UTF8 code point of the next character.
+ * @param[out] bytes_read Number of bytes used to encode the read utf8_char.
+ * @return LY_ERR value
+ */
+LY_ERR ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read);
+
+/**
  * @brief Check date string (4DIGIT "-" 2DIGIT "-" 2DIGIT)
  *
  * @param[in] ctx Context to store log message.
@@ -275,7 +285,7 @@
         for (_count = 0; *(ARRAY) && *((uint8_t *)(*(ARRAY) + _count)); ++_count); \
         if (!_count) *(ARRAY) = malloc(sizeof **(ARRAY) + 1); \
             else *(ARRAY) = ly_realloc(*(ARRAY), (_count + 1) * sizeof **(ARRAY) + 1); \
-        LY_CHECK_ERR_RET(!*(ARRAY), LOGMEM(CTX), RETVAL); \
+        LY_CHECK_ERR_RET(!*(ARRAY), LOGMEM(CTX->ctx), RETVAL); \
         *((uint8_t *)(*(ARRAY) + _count + 1)) = 0; \
         (NEW_ITEM) = (*(ARRAY)) + _count; \
         memset(NEW_ITEM, 0, sizeof *(NEW_ITEM));

diff --git a/src/xml.c b/src/xml.c
index 27ce9fa..fe1f2cd 100644
--- a/src/xml.c
+++ b/src/xml.c

@@ -89,90 +89,6 @@
 }
 
 /**
- * @brief Get UTF8 code point of the next character in the input string.
- *
- * @param[in,out] input Input string to process, updated according to the processed/read data.
- * @param[out] utf8_char UTF8 code point of the next character.
- * @param[out] bytes_read Number of bytes used to encode the read utf8_char.
- * @return LY_ERR value
- */
-static LY_ERR
-lyxml_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read)
-{
-    unsigned int c, len;
-    int aux;
-    int i;
-
-    c = (*input)[0];
-    LY_CHECK_RET(!c, LY_EINVAL);
-
-    if (!(c & 0x80)) {
-        /* one byte character */
-        len = 1;
-
-        if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
-            return LY_EINVAL;
-        }
-    } else if ((c & 0xe0) == 0xc0) {
-        /* two bytes character */
-        len = 2;
-
-        aux = (*input)[1];
-        if ((aux & 0xc0) != 0x80) {
-            return LY_EINVAL;
-        }
-        c = ((c & 0x1f) << 6) | (aux & 0x3f);
-
-        if (c < 0x80) {
-            return LY_EINVAL;
-        }
-    } else if ((c & 0xf0) == 0xe0) {
-        /* three bytes character */
-        len = 3;
-
-        c &= 0x0f;
-        for (i = 1; i <= 2; i++) {
-            aux = (*input)[i];
-            if ((aux & 0xc0) != 0x80) {
-                return LY_EINVAL;
-            }
-
-            c = (c << 6) | (aux & 0x3f);
-        }
-
-        if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
-            return LY_EINVAL;
-        }
-    } else if ((c & 0xf8) == 0xf0) {
-        /* four bytes character */
-        len = 4;
-
-        c &= 0x07;
-        for (i = 1; i <= 3; i++) {
-            aux = (*input)[i];
-            if ((aux & 0xc0) != 0x80) {
-                return LY_EINVAL;
-            }
-
-            c = (c << 6) | (aux & 0x3f);
-        }
-
-        if (c < 0x1000 || c > 0x10ffff) {
-            return LY_EINVAL;
-        }
-    } else {
-        return LY_EINVAL;
-    }
-
-    (*utf8_char) = c;
-    (*input) += len;
-    if (bytes_read) {
-        (*bytes_read) = len;
-    }
-    return LY_SUCCESS;
-}
-
-/**
  * Store UTF-8 character specified as 4byte integer into the dst buffer.
  * Returns number of written bytes (4 max), expects that dst has enough space.
  *
@@ -258,7 +174,7 @@
     LY_ERR rc;
 
     /* check NameStartChar (minus colon) */
-    LY_CHECK_ERR_RET(lyxml_getutf8(input, &c, NULL) != LY_SUCCESS,
+    LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
                      LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
     LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
                      LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
@@ -266,9 +182,9 @@
                      LY_EVALID);
 
     /* check rest of the identifier */
-    for (rc = lyxml_getutf8(input, &c, term_char_len);
+    for (rc = ly_getutf8(input, &c, term_char_len);
          rc == LY_SUCCESS && is_xmlqnamechar(c);
-         rc = lyxml_getutf8(input, &c, term_char_len));
+         rc = ly_getutf8(input, &c, term_char_len));
     LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
 
     (*term_char) = c;

diff --git a/src/xml.h b/src/xml.h
index 4dbd06d..f0fbb4e 100644
--- a/src/xml.h
+++ b/src/xml.h

@@ -37,8 +37,6 @@
     struct ly_set ns;     /* handled with LY_SET_OPT_USEASLIST */
 };
 
-#endif /* LY_XML_H_ */
-
 /**
  * @brief Parse input expecting an XML element.
  *
@@ -150,3 +148,5 @@
  * @return LY_ERR values.
  */
 LY_ERR lyxml_ns_rm(struct lyxml_context *context, const char *element_name);
+
+#endif /* LY_XML_H_ */
commit	b416be63ae73ac7d8a5c3488ab1e0720b19ac247	[log] [tgz]
author	Radek Krejci <rkrejci@cesnet.cz>	Mon Oct 01 14:51:45 2018 +0200
committer	Radek Krejci <rkrejci@cesnet.cz>	Mon Oct 01 14:51:45 2018 +0200
tree	9efe152a86411350f43c55e5ee754ae2a8f3dcff
parent	7ab47f0b14c001a040f92dd77a9f38e040d1c100 [diff]