common CHANGE make utf8 getter more generic for other parsers, not only for XML
diff --git a/src/common.c b/src/common.c
index c3a737d..912ee13 100644
--- a/src/common.c
+++ b/src/common.c
@@ -152,6 +152,82 @@
return new_mem;
}
+LY_ERR
+ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read)
+{
+ unsigned int c, len;
+ int aux;
+ int i;
+
+ c = (*input)[0];
+ LY_CHECK_RET(!c, LY_EINVAL);
+
+ if (!(c & 0x80)) {
+ /* one byte character */
+ len = 1;
+
+ if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
+ return LY_EINVAL;
+ }
+ } else if ((c & 0xe0) == 0xc0) {
+ /* two bytes character */
+ len = 2;
+
+ aux = (*input)[1];
+ if ((aux & 0xc0) != 0x80) {
+ return LY_EINVAL;
+ }
+ c = ((c & 0x1f) << 6) | (aux & 0x3f);
+
+ if (c < 0x80) {
+ return LY_EINVAL;
+ }
+ } else if ((c & 0xf0) == 0xe0) {
+ /* three bytes character */
+ len = 3;
+
+ c &= 0x0f;
+ for (i = 1; i <= 2; i++) {
+ aux = (*input)[i];
+ if ((aux & 0xc0) != 0x80) {
+ return LY_EINVAL;
+ }
+
+ c = (c << 6) | (aux & 0x3f);
+ }
+
+ if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
+ return LY_EINVAL;
+ }
+ } else if ((c & 0xf8) == 0xf0) {
+ /* four bytes character */
+ len = 4;
+
+ c &= 0x07;
+ for (i = 1; i <= 3; i++) {
+ aux = (*input)[i];
+ if ((aux & 0xc0) != 0x80) {
+ return LY_EINVAL;
+ }
+
+ c = (c << 6) | (aux & 0x3f);
+ }
+
+ if (c < 0x1000 || c > 0x10ffff) {
+ return LY_EINVAL;
+ }
+ } else {
+ return LY_EINVAL;
+ }
+
+ (*utf8_char) = c;
+ (*input) += len;
+ if (bytes_read) {
+ (*bytes_read) = len;
+ }
+ return LY_SUCCESS;
+}
+
size_t
LY_VCODE_INSTREXP_len(const char *str)
{
diff --git a/src/common.h b/src/common.h
index 64ea5a9..731f711 100644
--- a/src/common.h
+++ b/src/common.h
@@ -256,6 +256,16 @@
void *ly_realloc(void *ptr, size_t size);
/**
+ * @brief Get UTF8 code point of the next character in the input string.
+ *
+ * @param[in,out] input Input string to process, updated according to the processed/read data.
+ * @param[out] utf8_char UTF8 code point of the next character.
+ * @param[out] bytes_read Number of bytes used to encode the read utf8_char.
+ * @return LY_ERR value
+ */
+LY_ERR ly_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read);
+
+/**
* @brief Check date string (4DIGIT "-" 2DIGIT "-" 2DIGIT)
*
* @param[in] ctx Context to store log message.
@@ -275,7 +285,7 @@
for (_count = 0; *(ARRAY) && *((uint8_t *)(*(ARRAY) + _count)); ++_count); \
if (!_count) *(ARRAY) = malloc(sizeof **(ARRAY) + 1); \
else *(ARRAY) = ly_realloc(*(ARRAY), (_count + 1) * sizeof **(ARRAY) + 1); \
- LY_CHECK_ERR_RET(!*(ARRAY), LOGMEM(CTX), RETVAL); \
+ LY_CHECK_ERR_RET(!*(ARRAY), LOGMEM(CTX->ctx), RETVAL); \
*((uint8_t *)(*(ARRAY) + _count + 1)) = 0; \
(NEW_ITEM) = (*(ARRAY)) + _count; \
memset(NEW_ITEM, 0, sizeof *(NEW_ITEM));
diff --git a/src/xml.c b/src/xml.c
index 27ce9fa..fe1f2cd 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -89,90 +89,6 @@
}
/**
- * @brief Get UTF8 code point of the next character in the input string.
- *
- * @param[in,out] input Input string to process, updated according to the processed/read data.
- * @param[out] utf8_char UTF8 code point of the next character.
- * @param[out] bytes_read Number of bytes used to encode the read utf8_char.
- * @return LY_ERR value
- */
-static LY_ERR
-lyxml_getutf8(const char **input, unsigned int *utf8_char, size_t *bytes_read)
-{
- unsigned int c, len;
- int aux;
- int i;
-
- c = (*input)[0];
- LY_CHECK_RET(!c, LY_EINVAL);
-
- if (!(c & 0x80)) {
- /* one byte character */
- len = 1;
-
- if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
- return LY_EINVAL;
- }
- } else if ((c & 0xe0) == 0xc0) {
- /* two bytes character */
- len = 2;
-
- aux = (*input)[1];
- if ((aux & 0xc0) != 0x80) {
- return LY_EINVAL;
- }
- c = ((c & 0x1f) << 6) | (aux & 0x3f);
-
- if (c < 0x80) {
- return LY_EINVAL;
- }
- } else if ((c & 0xf0) == 0xe0) {
- /* three bytes character */
- len = 3;
-
- c &= 0x0f;
- for (i = 1; i <= 2; i++) {
- aux = (*input)[i];
- if ((aux & 0xc0) != 0x80) {
- return LY_EINVAL;
- }
-
- c = (c << 6) | (aux & 0x3f);
- }
-
- if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
- return LY_EINVAL;
- }
- } else if ((c & 0xf8) == 0xf0) {
- /* four bytes character */
- len = 4;
-
- c &= 0x07;
- for (i = 1; i <= 3; i++) {
- aux = (*input)[i];
- if ((aux & 0xc0) != 0x80) {
- return LY_EINVAL;
- }
-
- c = (c << 6) | (aux & 0x3f);
- }
-
- if (c < 0x1000 || c > 0x10ffff) {
- return LY_EINVAL;
- }
- } else {
- return LY_EINVAL;
- }
-
- (*utf8_char) = c;
- (*input) += len;
- if (bytes_read) {
- (*bytes_read) = len;
- }
- return LY_SUCCESS;
-}
-
-/**
* Store UTF-8 character specified as 4byte integer into the dst buffer.
* Returns number of written bytes (4 max), expects that dst has enough space.
*
@@ -258,7 +174,7 @@
LY_ERR rc;
/* check NameStartChar (minus colon) */
- LY_CHECK_ERR_RET(lyxml_getutf8(input, &c, NULL) != LY_SUCCESS,
+ LY_CHECK_ERR_RET(ly_getutf8(input, &c, NULL) != LY_SUCCESS,
LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c),
LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LYVE_SYNTAX,
@@ -266,9 +182,9 @@
LY_EVALID);
/* check rest of the identifier */
- for (rc = lyxml_getutf8(input, &c, term_char_len);
+ for (rc = ly_getutf8(input, &c, term_char_len);
rc == LY_SUCCESS && is_xmlqnamechar(c);
- rc = lyxml_getutf8(input, &c, term_char_len));
+ rc = ly_getutf8(input, &c, term_char_len));
LY_CHECK_ERR_RET(rc != LY_SUCCESS, LOGVAL(context->ctx, LY_VLOG_LINE, &context->line, LY_VCODE_INCHAR, (*input)[0]), LY_EVALID);
(*term_char) = c;
diff --git a/src/xml.h b/src/xml.h
index 4dbd06d..f0fbb4e 100644
--- a/src/xml.h
+++ b/src/xml.h
@@ -37,8 +37,6 @@
struct ly_set ns; /* handled with LY_SET_OPT_USEASLIST */
};
-#endif /* LY_XML_H_ */
-
/**
* @brief Parse input expecting an XML element.
*
@@ -150,3 +148,5 @@
* @return LY_ERR values.
*/
LY_ERR lyxml_ns_rm(struct lyxml_context *context, const char *element_name);
+
+#endif /* LY_XML_H_ */