xpath OPTIMIZE extend hash-based child search
Ideally, to all the predicates that specify all key
values of a node and do not depend on the particular
node instance itself.
Refs sysrepo/sysrepo#2843
diff --git a/src/xpath.c b/src/xpath.c
index e84aeff..a8f08bd 100644
--- a/src/xpath.c
+++ b/src/xpath.c
@@ -7563,109 +7563,257 @@
}
/**
+ * @brief Check that a nametest in a predicate matches a key node.
+ *
+ * @param[in] nametest Nametest to check.
+ * @param[in] len Length of @p nametest.
+ * @param[in] ctx_scnode Found schema node as the context for the predicate.
+ * @param[in] set Context set.
+ * @param[in] key Expected key node.
+ * @return LY_SUCCESS on success,
+ * @return LY_ENOT if a predicate could not be compiled.
+ * @return LY_ERR on any error.
+ */
+static LY_ERR
+eval_name_test_try_compile_predicate_key(const char *nametest, uint32_t len, const struct lysc_node *ctx_scnode,
+ const struct lyxp_set *set, const struct lysc_node *key)
+{
+ const struct lys_module *mod;
+
+ /* prefix (module) */
+ LY_CHECK_RET(moveto_resolve_model(&nametest, &len, set, ctx_scnode, &mod));
+ if (mod != key->module) {
+ return LY_ENOT;
+ }
+
+ /* node name */
+ if (ly_strncmp(key->name, nametest, len)) {
+ return LY_ENOT;
+ }
+
+ return LY_SUCCESS;
+}
+
+/**
+ * @brief Append a simple predicate for the node.
+ *
+ * @param[in] exp Full parsed XPath expression.
+ * @param[in] tok_idx Predicate start index in @p exp.
+ * @param[in] end_tok_idx Predicate end index in @p exp.
+ * @param[in] ctx_scnode Found schema node as the context for the predicate.
+ * @param[in] set Context set.
+ * @param[in] pred_node Node with the value referenced in the predicate.
+ * @param[in,out] pred Predicate to append to.
+ * @param[in,out] pred_len Length of @p pred, is updated.
+ * @return LY_SUCCESS on success,
+ * @return LY_ENOT if a predicate could not be compiled.
+ * @return LY_ERR on any error.
+ */
+static LY_ERR
+eval_name_test_try_compile_predicate_append(const struct lyxp_expr *exp, uint16_t tok_idx, uint16_t end_tok_idx,
+ const struct lysc_node *ctx_scnode, const struct lyxp_set *set, const struct lysc_node *pred_node, char **pred,
+ uint32_t *pred_len)
+{
+ LY_ERR rc = LY_SUCCESS;
+ uint32_t i;
+ const struct lyd_node *siblings;
+ struct lyd_node *ctx_node;
+ struct lyxp_expr *val_exp = NULL;
+ struct lyxp_set set2 = {0};
+ char quot;
+
+ /* duplicate the value expression */
+ LY_CHECK_GOTO(rc = lyxp_expr_dup(set->ctx, exp, tok_idx, end_tok_idx, &val_exp), cleanup);
+
+ /* get its atoms */
+ LY_CHECK_GOTO(rc = lyxp_atomize(set->ctx, val_exp, set->cur_mod, set->format, set->prefix_data,
+ set->cur_node ? set->cur_node->schema : NULL, ctx_scnode, &set2, LYXP_SCNODE), cleanup);
+
+ /* check whether we can compile a single predicate (evaluation result value is always the same) */
+ for (i = 0; i < set2.used; ++i) {
+ if ((set2.val.scnodes[i].type != LYXP_NODE_ELEM) || (set2.val.scnodes[i].in_ctx < LYXP_SET_SCNODE_ATOM_NODE)) {
+ /* skip root and context node */
+ continue;
+ }
+
+ /* 1) context node descendants are traversed - value dependents on the specific context node instance */
+ if (set2.val.scnodes[i].scnode->parent == ctx_scnode) {
+ rc = LY_ENOT;
+ goto cleanup;
+ }
+
+ /* 2) multi-instance nodes (list or leaf-list) are traversed - all the instances need to be considered */
+ if (set2.val.scnodes[i].scnode->nodetype & (LYS_LIST | LYS_LEAFLIST)) {
+ rc = LY_ENOT;
+ goto cleanup;
+ }
+
+ }
+
+ /* get any data instance of the context node, we checked it makes no difference */
+ siblings = set->val.nodes[0].node ? lyd_child(set->val.nodes[0].node) : set->tree;
+ LY_CHECK_GOTO(rc = lyd_find_sibling_schema(siblings, ctx_scnode, &ctx_node), cleanup);
+
+ /* evaluate the value subexpression with the root context node */
+ lyxp_set_free_content(&set2);
+ LY_CHECK_GOTO(rc = lyxp_eval(set->ctx, val_exp, set->cur_mod, set->format, set->prefix_data, set->cur_node,
+ ctx_node, set->tree, NULL, &set2, 0), cleanup);
+
+ /* cast it into a string */
+ LY_CHECK_GOTO(rc = lyxp_set_cast(&set2, LYXP_SET_STRING), cleanup);
+
+ /* append the JSON predicate */
+ *pred = ly_realloc(*pred, *pred_len + 1 + strlen(pred_node->name) + 2 + strlen(set2.val.str) + 3);
+ LY_CHECK_ERR_GOTO(!*pred, LOGMEM(set->ctx); rc = LY_EMEM, cleanup);
+ quot = strchr(set2.val.str, '\'') ? '\"' : '\'';
+ *pred_len += sprintf(*pred + *pred_len, "[%s=%c%s%c]", pred_node->name, quot, set2.val.str, quot);
+
+cleanup:
+ lyxp_expr_free(set->ctx, val_exp);
+ lyxp_set_free_content(&set2);
+ return rc;
+}
+
+/**
* @brief Try to compile list or leaf-list predicate in the known format to be used for hash-based instance search.
*
* @param[in] exp Full parsed XPath expression.
* @param[in,out] tok_idx Index in @p exp at the beginning of the predicate, is updated on success.
- * @param[in] ctx_node Found schema node as the context for the predicate.
- * @param[in] cur_mod Current module for the expression.
- * @param[in] cur_node Current (original context) node.
- * @param[in] format Format of any prefixes in key names/values.
- * @param[in] prefix_data Format-specific prefix data (see ::ly_resolve_prefix).
+ * @param[in] ctx_scnode Found schema node as the context for the predicate.
+ * @param[in] set Context set.
* @param[out] predicates Parsed predicates.
* @param[out] pred_type Type of @p predicates.
* @return LY_SUCCESS on success,
+ * @return LY_ENOT if a predicate could not be compiled.
* @return LY_ERR on any error.
*/
static LY_ERR
-eval_name_test_try_compile_predicates(const struct lyxp_expr *exp, uint16_t *tok_idx, const struct lysc_node *ctx_node,
- const struct lys_module *cur_mod, const struct lysc_node *cur_node, LY_VALUE_FORMAT format, void *prefix_data,
- struct ly_path_predicate **predicates, enum ly_path_pred_type *pred_type)
+eval_name_test_try_compile_predicates(const struct lyxp_expr *exp, uint16_t *tok_idx, const struct lysc_node *ctx_scnode,
+ const struct lyxp_set *set, struct ly_path_predicate **predicates, enum ly_path_pred_type *pred_type)
{
- LY_ERR ret = LY_SUCCESS;
- uint16_t key_count, e_idx, pred_idx = 0;
+ LY_ERR rc = LY_SUCCESS;
+ uint16_t e_idx, val_start_idx, pred_idx = 0;
const struct lysc_node *key;
- size_t pred_len;
- uint32_t prev_lo;
+ uint32_t prev_lo, pred_len = 0;
+ char *pred = NULL;
struct lyxp_expr *exp2 = NULL;
- assert(ctx_node->nodetype & (LYS_LIST | LYS_LEAFLIST));
+ assert(ctx_scnode->nodetype & (LYS_LIST | LYS_LEAFLIST));
- if (ctx_node->nodetype == LYS_LIST) {
+ /* turn logging off */
+ prev_lo = ly_log_options(0);
+
+ if (ctx_scnode->nodetype == LYS_LIST) {
+ /* check for predicates "[key1=...][key2=...]..." */
+
/* get key count */
- if (ctx_node->flags & LYS_KEYLESS) {
- return LY_EINVAL;
+ if (ctx_scnode->flags & LYS_KEYLESS) {
+ rc = LY_ENOT;
+ goto cleanup;
}
- for (key_count = 0, key = lysc_node_child(ctx_node); key && (key->flags & LYS_KEY); key = key->next, ++key_count) {}
- assert(key_count);
/* learn where the predicates end */
e_idx = *tok_idx;
- while (key_count) {
+ for (key = lysc_node_child(ctx_scnode); key && (key->flags & LYS_KEY); key = key->next) {
/* '[' */
if (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_BRACK1)) {
- return LY_EINVAL;
+ rc = LY_ENOT;
+ goto cleanup;
}
++e_idx;
if (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_NAMETEST)) {
- /* definitely not a key */
- return LY_EINVAL;
+ /* not a key */
+ rc = LY_ENOT;
+ goto cleanup;
}
+ /* check key */
+ LY_CHECK_GOTO(rc = eval_name_test_try_compile_predicate_key(exp->expr + exp->tok_pos[e_idx],
+ exp->tok_len[e_idx], ctx_scnode, set, key), cleanup);
+
+ ++e_idx;
+
+ if (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_OPER_EQUAL)) {
+ /* not '=' */
+ rc = LY_ENOT;
+ goto cleanup;
+ }
+ ++e_idx;
+
+ /* value start */
+ val_start_idx = e_idx;
+
/* ']' */
while (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_BRACK2)) {
++e_idx;
}
- ++e_idx;
- /* another presumably key predicate parsed */
- --key_count;
+ /* try to evaluate the value */
+ LY_CHECK_GOTO(rc = eval_name_test_try_compile_predicate_append(exp, val_start_idx, e_idx - 1, ctx_scnode,
+ set, key, &pred, &pred_len), cleanup);
+
+ ++e_idx;
}
} else {
+ /* check for predicate "[.=...]" */
+
/* learn just where this single predicate ends */
e_idx = *tok_idx;
/* '[' */
if (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_BRACK1)) {
- return LY_EINVAL;
+ rc = LY_ENOT;
+ goto cleanup;
}
++e_idx;
if (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_DOT)) {
- /* definitely not the value */
- return LY_EINVAL;
+ /* not the node value */
+ rc = LY_ENOT;
+ goto cleanup;
}
+ ++e_idx;
+
+ if (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_OPER_EQUAL)) {
+ /* not '=' */
+ rc = LY_ENOT;
+ goto cleanup;
+ }
+ ++e_idx;
+
+ /* value start */
+ val_start_idx = e_idx;
/* ']' */
while (lyxp_check_token(NULL, exp, e_idx, LYXP_TOKEN_BRACK2)) {
++e_idx;
}
+
+ /* try to evaluate the value */
+ LY_CHECK_GOTO(rc = eval_name_test_try_compile_predicate_append(exp, val_start_idx, e_idx - 1, ctx_scnode, set,
+ ctx_scnode, &pred, &pred_len), cleanup);
+
++e_idx;
}
- /* get the joined length of all the keys predicates/of the single leaf-list predicate */
- pred_len = (exp->tok_pos[e_idx - 1] + exp->tok_len[e_idx - 1]) - exp->tok_pos[*tok_idx];
-
- /* turn logging off */
- prev_lo = ly_log_options(0);
-
/* parse the predicate(s) */
- LY_CHECK_GOTO(ret = ly_path_parse_predicate(ctx_node->module->ctx, ctx_node, exp->expr + exp->tok_pos[*tok_idx],
- pred_len, LY_PATH_PREFIX_OPTIONAL, LY_PATH_PRED_SIMPLE, &exp2), cleanup);
+ LY_CHECK_GOTO(rc = ly_path_parse_predicate(set->ctx, ctx_scnode, pred, pred_len, LY_PATH_PREFIX_OPTIONAL,
+ LY_PATH_PRED_SIMPLE, &exp2), cleanup);
/* compile */
- ret = ly_path_compile_predicate(ctx_node->module->ctx, cur_node, cur_mod, ctx_node, exp2, &pred_idx, format,
- prefix_data, predicates, pred_type);
- LY_CHECK_GOTO(ret, cleanup);
+ rc = ly_path_compile_predicate(set->ctx, set->cur_node ? set->cur_node->schema : NULL, set->cur_mod, ctx_scnode, exp2,
+ &pred_idx, LY_VALUE_JSON, NULL, predicates, pred_type);
+ LY_CHECK_GOTO(rc, cleanup);
/* success, the predicate must include all the needed information for hash-based search */
*tok_idx = e_idx;
cleanup:
ly_log_options(prev_lo);
- lyxp_expr_free(ctx_node->module->ctx, exp2);
- return ret;
+ lyxp_expr_free(set->ctx, exp2);
+ free(pred);
+ return rc;
}
/**
@@ -7867,8 +8015,7 @@
if (scnode && (scnode->nodetype & (LYS_LIST | LYS_LEAFLIST))) {
/* try to create the predicates */
- if (eval_name_test_try_compile_predicates(exp, tok_idx, scnode, set->cur_mod, set->cur_node ?
- set->cur_node->schema : NULL, set->format, set->prefix_data, &predicates, &pred_type)) {
+ if (eval_name_test_try_compile_predicates(exp, tok_idx, scnode, set, &predicates, &pred_type)) {
/* hashes cannot be used */
scnode = NULL;
}
diff --git a/tests/utests/basic/test_xpath.c b/tests/utests/basic/test_xpath.c
index 1e919b1..71252b9 100644
--- a/tests/utests/basic/test_xpath.c
+++ b/tests/utests/basic/test_xpath.c
@@ -109,8 +109,31 @@
data =
"<foo2 xmlns=\"urn:tests:a\">50</foo2>"
+ "<l1 xmlns=\"urn:tests:a\">"
+ " <a>a1</a>"
+ " <b>b1</b>"
+ " <c>c1</c>"
+ "</l1>"
+ "<l1 xmlns=\"urn:tests:a\">"
+ " <a>a2</a>"
+ " <b>b2</b>"
+ "</l1>"
+ "<l1 xmlns=\"urn:tests:a\">"
+ " <a>a3</a>"
+ " <b>b3</b>"
+ "</l1>"
+ "<l1 xmlns=\"urn:tests:a\">"
+ " <a>a4</a>"
+ " <b>b4</b>"
+ " <c>c4</c>"
+ "</l1>"
+ "<l1 xmlns=\"urn:tests:a\">"
+ " <a>a5</a>"
+ " <b>b5</b>"
+ " <c>c5</c>"
+ "</l1>"
"<c xmlns=\"urn:tests:a\">"
- " <x>val</x>"
+ " <x>key2</x>"
" <ll>"
" <a>key1</a>"
" <ll>"
@@ -137,6 +160,17 @@
" <b>val22</b>"
" </ll>"
" </ll>"
+ " <ll>"
+ " <a>key3</a>"
+ " <ll>"
+ " <a>key31</a>"
+ " <b>val31</b>"
+ " </ll>"
+ " <ll>"
+ " <a>key32</a>"
+ " <b>val32</b>"
+ " </ll>"
+ " </ll>"
"</c>";
assert_int_equal(LY_SUCCESS, lyd_parse_data_mem(UTEST_LYCTX, data, LYD_XML, LYD_PARSE_STRICT, LYD_VALIDATE_PRESENT, &tree));
assert_non_null(tree);
@@ -152,6 +186,20 @@
assert_string_equal("key11", lyd_get_value(lyd_child(set->dnodes[0])));
ly_set_free(set, NULL);
+ /* special predicate evaluated using hashes */
+ assert_int_equal(LY_SUCCESS, lyd_find_xpath(tree, "/a:l1[a=concat('a', '1')][b=substring('ab1',2)]", &set));
+ assert_int_equal(1, set->count);
+ ly_set_free(set, NULL);
+
+ assert_int_equal(LY_SUCCESS, lyd_find_xpath(tree, "/a:c/ll[a=../x]", &set));
+ assert_int_equal(1, set->count);
+ ly_set_free(set, NULL);
+
+ /* cannot use hashes */
+ assert_int_equal(LY_SUCCESS, lyd_find_xpath(tree, "/a:c/ll[a=substring(ll/a,1,4)]", &set));
+ assert_int_equal(3, set->count);
+ ly_set_free(set, NULL);
+
lyd_free_all(tree);
}