tree schema FEATURE lyd_dup() for data node duplication
diff --git a/src/tree_data.c b/src/tree_data.c
index 096b7a3..27742cb 100644
--- a/src/tree_data.c
+++ b/src/tree_data.c
@@ -633,3 +633,230 @@
     LOGINT(node1->schema->module->ctx);
     return LY_EINT;
 }
+
+/**
+ * @brief Duplicates just a single node and interconnect it into a @p parent (if present) and after the @p prev
+ * sibling (if present).
+ *
+ * Ignores LYD_DUP_WITH_PARENTS and LYD_DUP_WITH_SIBLINGS which are supposed to be handled by lyd_dup().
+ */
+static struct lyd_node *
+lyd_dup_recursive(const struct lyd_node *node, struct lyd_node_inner *parent, struct lyd_node *prev, int options)
+{
+    struct ly_ctx *ctx;
+    struct lyd_node *dup = NULL;
+
+    LY_CHECK_ARG_RET(NULL, node, NULL);
+    ctx = node->schema->module->ctx;
+
+    switch (node->schema->nodetype) {
+    case LYS_ACTION:
+    case LYS_NOTIF:
+    case LYS_CONTAINER:
+    case LYS_LIST:
+        dup = calloc(1, sizeof(struct lyd_node_inner));
+        break;
+    case LYS_LEAF:
+    case LYS_LEAFLIST:
+        dup = calloc(1, sizeof(struct lyd_node_term));
+        break;
+    case LYS_ANYDATA:
+    case LYS_ANYXML:
+        dup = calloc(1, sizeof(struct lyd_node_any));
+        break;
+    default:
+        LOGINT(ctx);
+        goto error;
+    }
+
+    /* TODO implement LYD_DUP_WITH_WHEN */
+    dup->flags = node->flags;
+    dup->schema = node->schema;
+
+    /* interconnect the node at the end */
+    dup->parent = parent;
+    if (prev) {
+        dup->prev = prev;
+        prev->next = dup;
+    } else {
+        dup->prev = dup;
+        if (parent) {
+            parent->child = dup;
+        }
+    }
+    if (parent) {
+        parent->child->prev = dup;
+    } else if (prev) {
+        struct lyd_node *first;
+        for (first = prev; first->prev != prev; first = first->prev);
+        first->prev = dup;
+    }
+
+    /* TODO duplicate attributes, implement LYD_DUP_NO_ATTR */
+
+    /* nodetype-specific work */
+    if (dup->schema->nodetype & LYD_NODE_TERM) {
+        struct lyd_node_term *term = (struct lyd_node_term*)dup;
+        struct lyd_node_term *orig = (struct lyd_node_term*)node;
+
+        term->hash = orig->hash;
+        term->value.realtype = orig->value.realtype;
+        LY_CHECK_ERR_GOTO(term->value.realtype->plugin->duplicate(ctx, &orig->value, &term->value),
+                          LOGERR(ctx, LY_EINT, "Value duplication failed."), error);
+    } else if (dup->schema->nodetype & LYD_NODE_INNER) {
+        struct lyd_node_inner *inner = (struct lyd_node_inner*)dup;
+        struct lyd_node_inner *orig = (struct lyd_node_inner*)node;
+        struct lyd_node *child, *last = NULL;
+
+        if (options & LYD_DUP_RECURSIVE) {
+            /* duplicate all the children */
+            LY_LIST_FOR(orig->child, child) {
+                last = lyd_dup_recursive(child, inner, last, options);
+                LY_CHECK_GOTO(!last, error);
+            }
+        } else if (dup->schema->nodetype == LYS_LIST && ((struct lysc_node_list*)dup->schema)->keys) {
+            /* always duplicate keys of a list */
+            unsigned int u;
+
+            child = orig->child;
+            LY_ARRAY_FOR(((struct lysc_node_list*)dup->schema)->keys, u) {
+                if (!child) {
+                    /* possibly not keys are present in filtered tree */
+                    break;
+                }
+                last = lyd_dup_recursive(child, inner, last, options);
+                child = child->next;
+            }
+        }
+        lyd_hash(dup);
+    } else if (dup->schema->nodetype & LYD_NODE_ANY) {
+        struct lyd_node_any *any = (struct lyd_node_any*)dup;
+        struct lyd_node_any *orig = (struct lyd_node_any*)node;
+
+        any->hash = orig->hash;
+        any->value_type = orig->value_type;
+        switch (any->value_type) {
+        case LYD_ANYDATA_DATATREE:
+            if (orig->value.tree) {
+                any->value.tree = lyd_dup(orig->value.tree, NULL, LYD_DUP_RECURSIVE | LYD_DUP_WITH_SIBLINGS);
+                LY_CHECK_GOTO(!any->value.tree, error);
+            }
+            break;
+        case LYD_ANYDATA_STRING:
+        case LYD_ANYDATA_XML:
+        case LYD_ANYDATA_JSON:
+            if (orig->value.str) {
+                any->value.str = lydict_insert(ctx, orig->value.str, strlen(orig->value.str));
+            }
+            break;
+        }
+    }
+
+    lyd_insert_hash(dup);
+    return dup;
+
+error:
+    if (!parent && !prev) {
+        lyd_free_tree(dup);
+    }
+    return NULL;
+}
+
+API struct lyd_node *
+lyd_dup(const struct lyd_node *node, struct lyd_node_inner *parent, int options)
+{
+    struct ly_ctx *ctx;
+    const struct lyd_node *orig;          /* original node to be duplicated */
+    struct lyd_node *first = NULL;        /* the first duplicated node, this is returned */
+    struct lyd_node *last = NULL;         /* the last sibling of the duplicated nodes */
+    struct lyd_node *top = NULL;          /* the most higher created node */
+    struct lyd_node_inner *local_parent = NULL; /* the direct parent node for the duplicated node(s) */
+    int keyless_parent_list = 0;
+
+    LY_CHECK_ARG_RET(NULL, node, NULL);
+    ctx = node->schema->module->ctx;
+
+    if (options & LYD_DUP_WITH_PARENTS) {
+        struct lyd_node_inner *orig_parent, *iter;
+        int repeat = 1;
+        for (top = NULL, orig_parent = node->parent; repeat && orig_parent; orig_parent = orig_parent->parent) {
+            if (parent && parent->schema == orig_parent->schema) {
+                /* stop creating parents, connect what we have into the provided parent */
+                iter = parent;
+                repeat = 0;
+                /* get know if there is a keyless list which we will have to rehash */
+                for (struct lyd_node_inner *piter = parent; piter; piter = piter->parent) {
+                    if (piter->schema->nodetype == LYS_LIST && !((struct lysc_node_list*)piter->schema)->keys) {
+                        keyless_parent_list = 1;
+                        break;
+                    }
+                }
+            } else {
+                iter = (struct lyd_node_inner*)lyd_dup_recursive((struct lyd_node*)orig_parent, NULL, NULL, 0);
+                LY_CHECK_GOTO(!iter, error);
+            }
+            if (!local_parent) {
+                local_parent = iter;
+            }
+            if (iter->child) {
+                /* 1) list - add after keys
+                 * 2) provided parent with some children */
+                iter->child->prev->next = top;
+                if (top) {
+                    top->prev = iter->child->prev;
+                    iter->child->prev = top;
+                }
+            } else {
+                iter->child = top;
+                if (iter->schema->nodetype == LYS_LIST) {
+                    /* keyless list - we will need to rehash it since we are going to add nodes into it */
+                    keyless_parent_list = 1;
+                }
+            }
+            if (top) {
+                top->parent = iter;
+            }
+            top = (struct lyd_node*)iter;
+        }
+        if (repeat && parent) {
+            /* given parent and created parents chain actually do not interconnect */
+            LOGERR(ctx, LY_EINVAL, "Invalid argument parent (%s()) - does not interconnect with the created node's parents chain.", __func__);
+            goto error;
+        }
+    } else {
+        local_parent = parent;
+    }
+
+    if (local_parent && local_parent->child) {
+        last = local_parent->child->prev;
+    }
+
+    LY_LIST_FOR(node, orig) {
+        last = lyd_dup_recursive(orig, local_parent, last, options);
+        LY_CHECK_GOTO(!last, error);
+        if (!first) {
+            first = last;
+        }
+
+        if (!(options & LYD_DUP_WITH_SIBLINGS)) {
+            break;
+        }
+    }
+    if (keyless_parent_list) {
+        /* rehash */
+        for (; local_parent; local_parent = local_parent->parent) {
+            if (local_parent->schema->nodetype == LYS_LIST && !((struct lysc_node_list*)local_parent->schema)->keys) {
+                lyd_hash((struct lyd_node*)local_parent);
+            }
+        }
+    }
+    return first;
+
+error:
+    if (top) {
+        lyd_free_tree(top);
+    } else {
+        lyd_free_withsiblings(first);
+    }
+    return NULL;
+}
diff --git a/src/tree_data.h b/src/tree_data.h
index 740750b..e9b1c32 100644
--- a/src/tree_data.h
+++ b/src/tree_data.h
@@ -121,16 +121,16 @@
  * @brief List of possible value types stored in ::lyd_node_anydata.
  */
 typedef enum {
-    LYD_ANYDATA_DATATREE = 0x01,     /**< Value is a pointer to lyd_node structure (first sibling). When provided as input parameter, the pointer
+    LYD_ANYDATA_DATATREE,            /**< Value is a pointer to lyd_node structure (first sibling). When provided as input parameter, the pointer
                                           is directly connected into the anydata node without duplication, caller is supposed to not manipulate
                                           with the data after a successful call (including calling lyd_free() on the provided data) */
-    LYD_ANYDATA_STRING = 0x02,       /**< Value is a generic string without any knowledge about its format (e.g. anyxml value in JSON encoded
+    LYD_ANYDATA_STRING,              /**< Value is a generic string without any knowledge about its format (e.g. anyxml value in JSON encoded
                                           as string). XML sensitive characters (such as & or \>) are automatically escaped when the anydata
                                           is printed in XML format. */
-    LYD_ANYDATA_XML = 0x04,          /**< Value is a string containing the serialized XML data. */
-    LYD_ANYDATA_JSON = 0x08,         /**< Value is a string containing the data modeled by YANG and encoded as I-JSON. */
+    LYD_ANYDATA_XML,                 /**< Value is a string containing the serialized XML data. */
+    LYD_ANYDATA_JSON,                /**< Value is a string containing the data modeled by YANG and encoded as I-JSON. */
 #if 0 /* TODO LYB format */
-    LYD_ANYDATA_LYB = 0x10,          /**< Value is a memory chunk with the serialized data tree in LYB format. */
+    LYD_ANYDATA_LYB,                 /**< Value is a memory chunk with the serialized data tree in LYB format. */
 #endif
 } LYD_ANYDATA_VALUETYPE;
 
@@ -670,7 +670,7 @@
  * @defgroup datacompareoptions Data compare options
  * @ingroup datatree
  *
- * Various options to change the lyd_compare behavior.
+ * Various options to change the lyd_compare() behavior.
  */
 #define LYD_COMPARE_FULL_RECURSION 0x01 /* lists and containers are the same only in case all they children
                                            (subtree, so direct as well as indirect children) are the same. By default,
@@ -679,7 +679,7 @@
 #define LYD_COMPARE_DEFAULTS 0x02       /* By default, implicit and explicit default nodes are considered to be equal. This flag
                                            changes this behavior and implicit (automatically created default node) and explicit
                                            (explicitly created node with the default value) default nodes are considered different. */
-/**@} dataparseroptions */
+/**@} datacompareoptions */
 
 /**
  * @brief Compare 2 data nodes if they are equivalent.
@@ -692,6 +692,47 @@
 LY_ERR lyd_compare(const struct lyd_node *node1, const struct lyd_node *node2, int options);
 
 /**
+ * @defgroup dupoptions Data duplication options
+ * @ingroup datatree
+ *
+ * Various options to change lyd_dup() behavior.
+ *
+ * Default behavior:
+ * - only the specified node is duplicated without siblings, parents, or children.
+ * - all the attributes of the duplicated nodes are also duplicated.
+ * @{
+ */
+
+#define LYD_DUP_RECURSIVE    0x01  /**< Duplicate not just the node but also all the children. Note that
+                                        list's keys are always duplicated. */
+#define LYD_DUP_NO_ATTR      0x02  /**< Do not duplicate attributes of any node. */
+#define LYD_DUP_WITH_PARENTS 0x04  /**< If a nested node is being duplicated, duplicate also all the parents.
+                                        Keys are also duplicated for lists. Return value does not change! */
+#define LYD_DUP_WITH_SIBLINGS 0x08 /**< Duplicate also all the sibling of the given node. */
+#define LYD_DUP_WITH_WHEN     0x10 /**< Also copy any when evaluation state flags. This is useful in case the copied
+                                        nodes are actually still part of the same datastore meaning no dependency data
+                                        could have changed. Otherwise nothing is assumed about the copied node when
+                                        state and it is evaluated from scratch during validation. */
+
+/** @} dupoptions */
+
+/**
+ * @brief Create a copy of the specified data tree \p node. Schema references are kept the same.
+ *
+ * __PARTIAL CHANGE__ - validate after the final change on the data tree (see @ref howtodatamanipulators).
+ *
+ * @param[in] node Data tree node to be duplicated.
+ * @param[in] parent Optional parent node where to connect the duplicated node(s).
+ * If set in combination with LYD_DUP_WITH_PARENTS, the parents chain is duplicated until it comes to and connect with the @p parent
+ * (if the parents chain does not match at some node the schema node of the provided @p parent, duplication fails).
+ * @param[in] options Bitmask of options flags, see @ref dupoptions.
+ * @return Created copy of the provided data \p node (the first of the duplicated siblings when LYD_DUP_WITH_SIBLINGS used).
+ * Note that in case the parents chain is duplicated for the duplicated node(s) (when LYD_DUP_WITH_PARENTS used), the first duplicated node
+ * is still returned, not a pointer to the duplicated parents.
+ */
+struct lyd_node *lyd_dup(const struct lyd_node *node, struct lyd_node_inner *parent, int options);
+
+/**
  * @brief Resolve instance-identifier defined by lyd_value_path structure.
  *
  * @param[in] path Path structure specifying the instance-identifier target.
diff --git a/src/tree_data_hash.c b/src/tree_data_hash.c
index 7627ce5..37d6337 100644
--- a/src/tree_data_hash.c
+++ b/src/tree_data_hash.c
@@ -54,7 +54,7 @@
         if (((struct lysc_node_list*)node->schema)->keys) {
             /* list's hash is made of its keys */
             unsigned int keys_count = LY_ARRAY_SIZE(((struct lysc_node_list*)node->schema)->keys);
-            for (iter = list->child; keys_count; --keys_count, iter = iter->next) {
+            for (iter = list->child; iter && keys_count; --keys_count, iter = iter->next) {
                 int dynamic = 0;
                 struct lysc_type *type = ((struct lysc_node_leaf*)iter->schema)->type;
                 const char *value = type->plugin->print(&((struct lyd_node_term*)iter)->value, LYD_JSON, json_print_get_prefix, NULL, &dynamic);
diff --git a/tests/src/test_parser_xml.c b/tests/src/test_parser_xml.c
index e18db14..39d5d5c 100644
--- a/tests/src/test_parser_xml.c
+++ b/tests/src/test_parser_xml.c
@@ -201,6 +201,11 @@
 
     assert_int_equal(LY_EVALID, lyd_parse_xml(ctx, data, LYD_OPT_STRICT, NULL, &tree));
     logbuf_assert("Invalid position of the key \"b\" in a list. Line number 1.");
+/* TODO validation
+    data = "<l1 xmlns=\"urn:tests:a\"><a>a</a><c>c</c><d>d</d></l1>";
+    assert_int_equal(LY_EVALID, lyd_parse_xml(ctx, data, LYD_OPT_STRICT, NULL, &tree));
+    logbuf_assert("Missing key \"b\" in a list. Line number 1.");
+*/
 
     *state = NULL;
 }
diff --git a/tests/src/test_tree_data.c b/tests/src/test_tree_data.c
index 5ba858a..ff60fc8 100644
--- a/tests/src/test_tree_data.c
+++ b/tests/src/test_tree_data.c
@@ -172,10 +172,101 @@
     *state = NULL;
 }
 
+
+static void
+test_dup(void **state)
+{
+    *state = test_dup;
+
+    struct lyd_node *tree1, *tree2;
+    const char *result;
+    const char *data = "<l1 xmlns=\"urn:tests:a\"><a>a</a><b>b</b><c>x</c></l1>";
+
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, LYD_DUP_RECURSIVE));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    data = "<l1 xmlns=\"urn:tests:a\"><a>a</a><b>b</b><c>x</c></l1>";
+    result = "<l1 xmlns=\"urn:tests:a\"><a>a</a><b>b</b></l1>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, 0));
+    lyd_free_all(tree1);
+    assert_non_null(tree1 = lyd_parse_mem(ctx, result, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    data = "<l2 xmlns=\"urn:tests:a\"><c><x>a</x></c></l2><l2 xmlns=\"urn:tests:a\"><c><x>b</x></c></l2>";
+    result = "<l2 xmlns=\"urn:tests:a\"><c><x>a</x></c></l2>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, LYD_DUP_WITH_SIBLINGS | LYD_DUP_RECURSIVE));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree2);
+
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, LYD_DUP_RECURSIVE));
+    lyd_free_all(tree1);
+    assert_non_null(tree1 = lyd_parse_mem(ctx, result, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree2);
+
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, 0));
+    lyd_free_all(tree1);
+    result = "<l2 xmlns=\"urn:tests:a\"/>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, result, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    data = "<any xmlns=\"urn:tests:a\"><c><a>a</a></c></any>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, 0));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    data = "<l2 xmlns=\"urn:tests:a\"><c><x>b</x></c></l2>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(((struct lyd_node_inner*)((struct lyd_node_inner*)tree1)->child)->child, NULL, LYD_DUP_WITH_PARENTS));
+    assert_string_equal("x", tree2->schema->name);
+    assert_non_null(tree2->parent);
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, (struct lyd_node*)tree2->parent->parent, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    data = "<l1 xmlns=\"urn:tests:a\"><a>a</a><b>b</b><c>c</c></l1>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(((struct lyd_node_inner*)tree1)->child->prev, NULL, LYD_DUP_WITH_PARENTS));
+    assert_string_equal("c", tree2->schema->name);
+    assert_non_null(tree2->parent);
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, (struct lyd_node*)tree2->parent, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    data = "<l2 xmlns=\"urn:tests:a\"><c><x>b</x></c></l2>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_non_null(tree2 = lyd_dup(tree1, NULL, 0));
+    assert_non_null(lyd_dup(((struct lyd_node_inner*)((struct lyd_node_inner*)tree1)->child)->child,
+                            (struct lyd_node_inner*)tree2, LYD_DUP_WITH_PARENTS));
+    assert_int_equal(LY_SUCCESS, lyd_compare(tree1, tree2, LYD_COMPARE_FULL_RECURSION));
+    lyd_free_all(tree1);
+    lyd_free_all(tree2);
+
+    /* invalid */
+    data = "<l1 xmlns=\"urn:tests:a\"><a>a</a><b>b</b><c>c</c></l1><l2 xmlns=\"urn:tests:a\"><c><x>b</x></c></l2>";
+    assert_non_null(tree1 = lyd_parse_mem(ctx, data, LYD_XML, LYD_OPT_DATA, NULL));
+    assert_null(lyd_dup(((struct lyd_node_inner*)tree1)->child->prev, (struct lyd_node_inner*)tree1->next, LYD_DUP_WITH_PARENTS));
+    lyd_free_all(tree1);
+
+    *state = NULL;
+}
+
 int main(void)
 {
     const struct CMUnitTest tests[] = {
         cmocka_unit_test_setup_teardown(test_compare, setup, teardown),
+        cmocka_unit_test_setup_teardown(test_dup, setup, teardown),
     };
 
     return cmocka_run_group_tests(tests, NULL, NULL);