xml parser CHANGE optimize using dynamically allocated memory

When possible, use references into the original string instead of
allocating memory for storing strings which are the same as in the
input string. Dynamically allocated buffer is needed only in case
the input string must be modified because of character or entity
reference.
diff --git a/src/xml.c b/src/xml.c
index 4af078d..db25d70 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -193,7 +193,7 @@
 }
 
 LY_ERR
-lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size)
+lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic)
 {
 #define BUFSIZE 4096
 #define BUFSIZE_STEP 4096
@@ -205,10 +205,10 @@
     }
 
     struct ly_ctx *ctx = context->ctx; /* shortcut */
-    const char *in = (*input);
-    char *buf, delim;
+    const char *in = (*input), *start;
+    char *buf = NULL, delim;
     size_t offset;  /* read offset in input buffer */
-    size_t len;     /* write offset in output buffer */
+    size_t len;     /* length of the output string (write offset in output buffer) */
     size_t size;    /* size of the output buffer */
     void *p;
     uint32_t n;
@@ -229,6 +229,7 @@
         delim = '<';
         empty_content = true;
     }
+    start = in;
 
     if (empty_content) {
         /* only when processing element's content - try to ignore whitespaces used to format XML data
@@ -244,26 +245,31 @@
             (*input) = in + offset;
             return LY_EINVAL;
         }
-    } else {
-        /* init */
-        offset = 0;
     }
+    /* init */
+    offset = len = 0;
 
-    /* prepare output buffer */
-    if (*buffer) {
-        buf = *buffer;
-        size = *buffer_size;
-    } else {
-        buf = malloc(BUFSIZE);
-        size = BUFSIZE;
-
-        LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
+    if (0) {
+getbuffer:
+        /* prepare output buffer */
+        if (*buffer) {
+            buf = *buffer;
+            size = *buffer_size;
+        } else {
+            buf = malloc(BUFSIZE);
+            size = BUFSIZE;
+            LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
+        }
     }
-    len = 0;
 
     /* parse */
     while (in[offset]) {
         if (in[offset] == '&') {
+            if (!buf) {
+                /* it is necessary to modify the input, so we will need a dynamically allocated buffer */
+                goto getbuffer;
+            }
+
             if (offset) {
                 /* store what we have so far */
                 BUFSIZE_CHECK(ctx, buf, size, len, offset);
@@ -339,12 +345,14 @@
             }
         } else if (in[offset] == delim) {
             /* end of string */
-            if (len + offset >= size) {
-                buf = ly_realloc(buf, len + offset + 1);
-                LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
-                size = len + offset + 1;
+            if (buf) {
+                if (len + offset >= size) {
+                    buf = ly_realloc(buf, len + offset + 1);
+                    LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM);
+                    size = len + offset + 1;
+                }
+                memcpy(&buf[len], in, offset);
             }
-            memcpy(&buf[len], in, offset);
             len += offset;
             /* in case of element content, keep the leading <,
              * for attribute's value move after the terminating quotation mark */
@@ -372,23 +380,34 @@
     return LY_EVALID;
 
 success:
-    if (!(*buffer) && size != len + 1) {
-        /* not using provided buffer, so fit the allocated buffer to what we really have inside */
-        p = realloc(buf, len + 1);
-        /* ignore realloc fail because we are reducing the buffer,
-         * so just return bigger buffer than needed */
-        if (p) {
-            size = len + 1;
-            buf = p;
+    if (buf) {
+        if (!(*buffer) && size != len + 1) {
+            /* not using provided buffer, so fit the allocated buffer to what we really have inside */
+            p = realloc(buf, len + 1);
+            /* ignore realloc fail because we are reducing the buffer,
+             * so just return bigger buffer than needed */
+            if (p) {
+                size = len + 1;
+                buf = p;
+            }
         }
+        /* set terminating NULL byte */
+        buf[len] = '\0';
     }
-    /* set terminating NULL byte */
-    buf[len] = '\0';
 
     context->status -= 1;
     (*input) = in;
-    (*buffer) = buf;
-    (*buffer_size) = size;
+    if (buf) {
+        (*buffer) = buf;
+        (*buffer_size) = size;
+        (*output) = buf;
+        (*dynamic) = 1;
+    } else {
+        (*output) = (char*)start;
+        (*dynamic) = 0;
+    }
+    (*length) = len;
+
     return LY_SUCCESS;
 
 #undef BUFSIZE
diff --git a/src/xml.h b/src/xml.h
index f41a836..f42b27e 100644
--- a/src/xml.h
+++ b/src/xml.h
@@ -101,24 +101,33 @@
  * @brief Parse input as XML text (attribute's values and element's content).
  *
  * Mixed content of XML elements is not allowed. Formating whitespaces before child element are ignored,
- * LY_EINVAL is returned in such a case (buffer is not filled, no error is printed) and input is moved
+ * LY_EINVAL is returned in such a case (output is not set, no error is printed) and input is moved
  * to the beginning of a child definition.
  *
  * In the case of attribute's values, the input string is expected to start on a quotation mark to
  * select which delimiter (single or double quote) is used. Otherwise, the element content is being
  * parsed expected to be terminated by '<' character.
  *
- * If function succeeds, the string in output buffer is always NULL-terminated.
+ * If function succeeds, the string in a dynamically allocated output buffer is always NULL-terminated.
+ *
+ * The dynamically allocated buffer is used only when necessary because of a character or the supported entity
+ * reference which modify the input data. These constructs are replaced by their real value, so in case the output
+ * string will be again printed as an XML data, it may be necessary to correctly encode such characters.
  *
  * @param[in] context XML context to track lines or store errors into libyang context.
  * @param[in,out] input Input string to process, updated according to the processed/read data.
- * @param[out] buffer Storage of the output string. If NULL, the buffer is allocated. Otherwise, the buffer
- * is used and enlarged when necessary.
- * @param[out] buffer_size Allocated size of the returned buffer. If a buffer is provided by a caller, it
+ * @param[in, out] buffer Storage for the output string. If the parameter points to NULL, the buffer is allocated if needed.
+ * Otherwise, when needed, the buffer is used and enlarged when necessary. Whenever the buffer is used, the string is NULL-terminated.
+ * @param[in, out] buffer_size Allocated size of the returned buffer. If a buffer is provided by a caller, it
  * is not being reduced even if the string is shorter. On the other hand, it can be enlarged if needed.
+ * @param[out] output Returns pointer to the resulting string - to the provided/allocated buffer if it was necessary to modify
+ * the input string or directly into the input string (see the \p dynamic parameter).
+ * @param[out] length Length of the \p output string.
+ * @param[out] dynamic Flag if a dynamically allocated memory (\p buffer) was used and caller is supposed to free it at the end.
+ * In case the value is zero, the \p output points directly into the \p input string.
  * @return LY_ERR value.
  */
-LY_ERR lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size);
+LY_ERR lyxml_get_string(struct lyxml_context *context, const char **input, char **buffer, size_t *buffer_size, char **output, size_t *length, int *dynamic);
 
 /**
  * @brief Add namespace definition into XML context.
diff --git a/tests/src/test_xml.c b/tests/src/test_xml.c
index 3288a9b..02cd8b9 100644
--- a/tests/src/test_xml.c
+++ b/tests/src/test_xml.c
@@ -253,9 +253,10 @@
 {
     (void) state; /* unused */
 
-    size_t out_len;
+    size_t buf_len, len;
+    int dynamic;
     const char *str, *p;
-    char *out = NULL;
+    char *buf = NULL, *out = NULL;
 
     struct lyxml_context ctx;
     memset(&ctx, 0, sizeof ctx);
@@ -264,101 +265,120 @@
     /* empty attribute value */
     ctx.status = LYXML_ATTR_CONTENT;
     str = "\"\"";
-    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &out, &out_len));
-    assert_non_null(out);
-    assert_int_equal(1, out_len);
+    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_null(buf);
+    assert_ptr_equal(&str[-1], out);
+    assert_int_equal(0, dynamic);
+    assert_int_equal(0, len);
     assert_true(str[0] == '\0'); /* everything eaten */
-    assert_true(out[0] == '\0'); /* empty string */
     assert_int_equal(LYXML_ATTRIBUTE, ctx.status);
 
     ctx.status = LYXML_ATTR_CONTENT;
     str = "\'\'";
-    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &out, &out_len));
-    assert_non_null(out);
-    assert_int_equal(1, out_len);
+    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_null(buf);
+    assert_ptr_equal(&str[-1], out);
+    assert_int_equal(0, dynamic);
+    assert_int_equal(0, len);
     assert_true(str[0] == '\0'); /* everything eaten */
-    assert_true(out[0] == '\0'); /* empty string */
     assert_int_equal(LYXML_ATTRIBUTE, ctx.status);
 
     /* empty element content - only formating before defining child */
     ctx.status = LYXML_ELEM_CONTENT;
     str = "\n  <";
-    assert_int_equal(LY_EINVAL, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EINVAL, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_null(buf);
     assert_string_equal("<", str);
 
     /* empty element content is invalid - missing content terminating character < */
     ctx.status = LYXML_ELEM_CONTENT;
     str = "";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_null(buf);
     logbuf_assert("Unexpected end-of-file. Line number 2.");
 
-    free(out);
-    out = NULL;
-
     ctx.status = LYXML_ELEM_CONTENT;
     str = p = "xxx";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_null(buf);
     logbuf_assert("Unexpected end-of-file. Line number 2.");
     assert_ptr_equal(p, str); /* input data not eaten */
 
-    free(out);
-    out = NULL;
-
     /* valid strings */
     ctx.status = LYXML_ELEM_CONTENT;
     str = "€𠜎Øn \n&lt;&amp;&quot;&apos;&gt; &#82;&#x4f;&#x4B;<";
-    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &out, &out_len));
-    assert_int_equal(22, out_len);
-    assert_string_equal("€𠜎Øn \n<&\"\'> ROK", out);
+    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_int_not_equal(0, dynamic);
+    assert_non_null(buf);
+    assert_ptr_equal(out, buf);
+    assert_int_equal(22, buf_len);
+    assert_int_equal(21, len);
+    assert_string_equal("€𠜎Øn \n<&\"\'> ROK", buf);
     assert_string_equal("<", str);
     assert_int_equal(LYXML_ELEMENT, ctx.status);
 
+    free(buf);
+    buf = NULL;
+
     /* test using n-bytes UTF8 hexadecimal code points */
     ctx.status = LYXML_ATTR_CONTENT;
     str = "\'&#x0024;&#x00A2;&#x20ac;&#x10348;\'";
-    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &out, &out_len));
-    assert_string_equal("$¢€𐍈", out);
+    assert_int_equal(LY_SUCCESS, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
+    assert_int_not_equal(0, dynamic);
+    assert_non_null(buf);
+    assert_ptr_equal(out, buf);
+    assert_int_equal(11, buf_len);
+    assert_int_equal(10, len);
+    assert_string_equal("$¢€𐍈", buf);
     assert_int_equal(LYXML_ATTRIBUTE, ctx.status);
 
+    free(buf);
+    buf = NULL;
+
     /* invalid characters in string */
     ctx.status = LYXML_ATTR_CONTENT;
     str = p = "\'&#x52\'";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Invalid character sequence \"'\", expected ;. Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
     ctx.status = LYXML_ATTR_CONTENT;
     str = p = "\"&#82\"";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Invalid character sequence \"\"\", expected ;. Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
     ctx.status = LYXML_ATTR_CONTENT;
     str = p = "\"&nonsence;\"";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Entity reference \"&nonsence;\" not supported, only predefined references allowed. Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
     ctx.status = LYXML_ELEM_CONTENT;
     str = p = "&#o122;";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Invalid character reference \"&#o122;\". Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
 
     ctx.status = LYXML_ATTR_CONTENT;
     str = p = "\'&#x06;\'";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Invalid character reference \"&#x06;\'\" (0x00000006). Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
     ctx.status = LYXML_ATTR_CONTENT;
     str = p = "\'&#xfdd0;\'";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Invalid character reference \"&#xfdd0;\'\" (0x0000fdd0). Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
     ctx.status = LYXML_ATTR_CONTENT;
     str = p = "\'&#xffff;\'";
-    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &out, &out_len));
+    assert_int_equal(LY_EVALID, lyxml_get_string(&ctx, &str, &buf, &buf_len, &out, &len, &dynamic));
     logbuf_assert("Invalid character reference \"&#xffff;\'\" (0x0000ffff). Line number 3.");
+    assert_null(buf);
     assert_ptr_equal(p, str); /* input data not eaten */
-
-    free(out);
 }
 
 static void