lyb FEATURE thread-safe cached hash generation

commit: 11f76c8131039c3b25542de83c0cdbeac1120ab7 [log] [tgz]
author: Michal Vasko <mvasko@cesnet.cz> Thu Apr 15 14:36:14 2021 +0200
committer: Michal Vasko <mvasko@cesnet.cz> Fri Apr 16 09:39:40 2021 +0200
tree: e07bdfe84969d19223ddcffc55d58e5ee9347a9d
parent: 1e77101abcea346fbe153d1073a1c5759524976c [diff]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 443a800..09f6dc8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -117,6 +117,7 @@
     src/tree_schema_free.c
     src/tree_schema_helpers.c
     src/in.c
+    src/lyb.c
     src/parser_yang.c
     src/parser_yin.c
     src/parser_stmt.c

diff --git a/src/common.h b/src/common.h
index e7b0d9b..478c6d0 100644
--- a/src/common.h
+++ b/src/common.h

@@ -304,6 +304,7 @@
     uint16_t change_count;            /**< Count of changes of the context, on some changes it could be incremented more times */
     uint16_t flags;                   /**< context settings, see @ref contextoptions. */
     pthread_key_t errlist_key;        /**< key for the thread-specific list of errors related to the context */
+    pthread_mutex_t lyb_hash_lock;    /**< lock for storing LYB schema hashes in schema nodes */
 };
 
 /**

diff --git a/src/context.c b/src/context.c
index 33d3f17..faa886a 100644
--- a/src/context.c
+++ b/src/context.c

@@ -236,6 +236,9 @@
     /* initialize thread-specific keys */
     while ((pthread_key_create(&ctx->errlist_key, ly_err_free)) == EAGAIN) {}
 
+    /* init LYB hash lock */
+    pthread_mutex_init(&ctx->lyb_hash_lock, NULL);
+
     /* models list */
     ctx->flags = options;
     if (search_dir) {
@@ -1123,6 +1126,9 @@
     /* dictionary */
     lydict_clean(&ctx->dict);
 
+    /* LYB hash lock */
+    pthread_mutex_destroy(&ctx->lyb_hash_lock);
+
     /* plugins - will be removed only if this is the last context */
     lyplg_clean();
 

diff --git a/src/lyb.c b/src/lyb.c
new file mode 100644
index 0000000..87806c8
--- /dev/null
+++ b/src/lyb.c

@@ -0,0 +1,125 @@
+/**
+ * @file lyb.c
+ * @author Michal Vasko <mvasko@cesnet.cz>
+ * @brief LYB format common functionality.
+ *
+ * Copyright (c) 2021 CESNET, z.s.p.o.
+ *
+ * This source code is licensed under BSD 3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://opensource.org/licenses/BSD-3-Clause
+ */
+
+#include "lyb.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common.h"
+#include "compat.h"
+#include "tree_schema.h"
+
+/**
+ * @brief Generate single hash for a schema node to be used for LYB data.
+ *
+ * @param[in] node Node to hash.
+ * @param[in] collision_id Collision ID of the hash to generate.
+ * @return Generated hash.
+ */
+static LYB_HASH
+lyb_generate_hash(const struct lysc_node *node, uint8_t collision_id)
+{
+    const struct lys_module *mod = node->module;
+    uint32_t full_hash;
+    LYB_HASH hash;
+
+    /* generate full hash */
+    full_hash = dict_hash_multi(0, mod->name, strlen(mod->name));
+    full_hash = dict_hash_multi(full_hash, node->name, strlen(node->name));
+    if (collision_id) {
+        size_t ext_len;
+
+        if (collision_id > strlen(mod->name)) {
+            /* fine, we will not hash more bytes, just use more bits from the hash than previously */
+            ext_len = strlen(mod->name);
+        } else {
+            /* use one more byte from the module name than before */
+            ext_len = collision_id;
+        }
+        full_hash = dict_hash_multi(full_hash, mod->name, ext_len);
+    }
+    full_hash = dict_hash_multi(full_hash, NULL, 0);
+
+    /* use the shortened hash */
+    hash = full_hash & (LYB_HASH_MASK >> collision_id);
+    /* add collision identificator */
+    hash |= LYB_HASH_COLLISION_ID >> collision_id;
+
+    return hash;
+}
+
+LYB_HASH
+lyb_get_hash(const struct lysc_node *node, uint8_t collision_id)
+{
+    /* hashes must be cached */
+    assert(node->hash[0]);
+
+    if (collision_id < LYS_NODE_HASH_COUNT) {
+        /* read from cache */
+        return node->hash[collision_id];
+    }
+
+    /* generate */
+    return lyb_generate_hash(node, collision_id);
+}
+
+/**
+ * @brief Module DFS callback filling all cached hashes of a schema node.
+ */
+static LY_ERR
+lyb_cache_node_hash_cb(struct lysc_node *node, void *UNUSED(data), ly_bool *UNUSED(dfs_continue))
+{
+    if (node->hash[0]) {
+        /* already cached, stop the DFS */
+        return LY_EEXIST;
+    }
+
+    for (uint8_t i = 0; i < LYS_NODE_HASH_COUNT; ++i) {
+        /* store the hash in the cache */
+        node->hash[i] = lyb_generate_hash(node, i);
+    }
+
+    return LY_SUCCESS;
+}
+
+void
+lyb_cache_module_hash(const struct lys_module *mod)
+{
+    /* LOCK */
+    pthread_mutex_lock(&mod->ctx->lyb_hash_lock);
+
+    /* store all cached hashes for all the nodes */
+    lysc_module_dfs_full(mod, lyb_cache_node_hash_cb, NULL);
+
+    /* UNLOCK */
+    pthread_mutex_unlock(&mod->ctx->lyb_hash_lock);
+}
+
+ly_bool
+lyb_has_schema_model(const struct lysc_node *node, const struct lys_module **models)
+{
+    LY_ARRAY_COUNT_TYPE u;
+
+    LY_ARRAY_FOR(models, u) {
+        if (node->module == models[u]) {
+            return 1;
+        }
+    }
+
+    return 0;
+}

diff --git a/src/lyb.h b/src/lyb.h
index e56457d..0aba895 100644
--- a/src/lyb.h
+++ b/src/lyb.h

@@ -122,10 +122,18 @@
  *
  * Hash is divided to collision ID and hash itself.
  *
+ * @anchor collisionid
+ *
  * First bits are collision ID until 1 is found. The rest is truncated 32b hash.
  * 1xxx xxxx - collision ID 0 (no collisions)
  * 01xx xxxx - collision ID 1 (collision ID 0 hash collided)
  * 001x xxxx - collision ID 2 ...
+ *
+ * When finding a match for a unique schema (siblings) hash (sequence of hashes with increasing collision ID), the highest
+ * collision ID can be read from the last hash (LYB parser).
+ *
+ * To learn what is the highest collision ID of a hash that must be included in a unique schema (siblings) hash,
+ * collisions with all the preceding sibling schema hashes must be checked (LYB printer).
  */
 
 /* Number of bits the whole hash will take (including hash collision ID) */
@@ -168,4 +176,29 @@
 /* Type large enough for all meta data */
 #define LYB_META uint16_t
 
+/**
+ * @brief Get single hash for a schema node to be used for LYB data. Read from cache, if possible.
+ *
+ * @param[in] node Node to hash.
+ * @param[in] collision_id Collision ID of the hash to generate, see @ref collisionid.
+ * @return Generated hash.
+ */
+LYB_HASH lyb_get_hash(const struct lysc_node *node, uint8_t collision_id);
+
+/**
+ * @brief Fill the hash cache of all the schema nodes of a module.
+ *
+ * @param[in] mod Module to process.
+ */
+void lyb_cache_module_hash(const struct lys_module *mod);
+
+/**
+ * @brief Check whether a node's module is in a module array.
+ *
+ * @param[in] node Node to check.
+ * @param[in] models Modules in a sized array.
+ * @return Boolean value whether @p node's module was found in the given @p models array.
+ */
+ly_bool lyb_has_schema_model(const struct lysc_node *node, const struct lys_module **models);
+
 #endif /* LY_LYB_H_ */

diff --git a/src/parser_lyb.c b/src/parser_lyb.c
index 9cebd2d..b011d8a 100644
--- a/src/parser_lyb.c
+++ b/src/parser_lyb.c

@@ -330,6 +330,9 @@
 
     }
 
+    /* fill cached hashes, if not already */
+    lyb_cache_module_hash(*mod);
+
 cleanup:
     free(mod_name);
     return ret;
@@ -582,7 +585,7 @@
 
     /* compare all the hashes starting from collision ID 0 */
     for (i = 0; i < hash_count; ++i) {
-        sibling_hash = lyb_hash(sibling, i);
+        sibling_hash = lyb_get_hash(sibling, i);
         if (sibling_hash != hash[i]) {
             return 0;
         }

diff --git a/src/printer_lyb.c b/src/printer_lyb.c
index 4f606bd..ba098a7 100644
--- a/src/printer_lyb.c
+++ b/src/printer_lyb.c

@@ -83,7 +83,7 @@
     struct lysc_node **col_node;
 
     /* get the first node inserted with last hash col ID ht_col_id */
-    if (lyht_find(ht, &sibling, lyb_hash(sibling, ht_col_id), (void **)&col_node)) {
+    if (lyht_find(ht, &sibling, lyb_get_hash(sibling, ht_col_id), (void **)&col_node)) {
         /* there is none. valid situation */
         return LY_SUCCESS;
     }
@@ -92,7 +92,7 @@
     do {
         int64_t j;
         for (j = (int64_t)compare_col_id; j > -1; --j) {
-            if (lyb_hash(sibling, j) != lyb_hash(*col_node, j)) {
+            if (lyb_get_hash(sibling, j) != lyb_get_hash(*col_node, j)) {
                 /* one non-colliding hash */
                 break;
             }
@@ -104,7 +104,7 @@
         }
 
         /* get next node inserted with last hash col ID ht_col_id */
-    } while (!lyht_find_next(ht, col_node, lyb_hash(*col_node, ht_col_id), (void **)&col_node));
+    } while (!lyht_find_next(ht, col_node, lyb_get_hash(*col_node, ht_col_id), (void **)&col_node));
 
     lyht_set_cb(ht, lyb_hash_equal_cb);
     return LY_SUCCESS;
@@ -154,7 +154,7 @@
             }
 
             /* try to insert node with the current collision ID */
-            if (!lyht_insert_with_resize_cb(ht, &sibling, lyb_hash(sibling, i), lyb_ptr_equal_cb, NULL)) {
+            if (!lyht_insert_with_resize_cb(ht, &sibling, lyb_get_hash(sibling, i), lyb_ptr_equal_cb, NULL)) {
                 /* success, no collision */
                 break;
             }
@@ -163,7 +163,7 @@
             if (i && !lyb_hash_sequence_check(ht, sibling, i, i)) {
                 /* it can be inserted after all, even though there is already a node with the same last collision ID */
                 lyht_set_cb(ht, lyb_ptr_equal_cb);
-                if (lyht_insert(ht, &sibling, lyb_hash(sibling, i), NULL)) {
+                if (lyht_insert(ht, &sibling, lyb_get_hash(sibling, i), NULL)) {
                     LOGINT(sibling->module->ctx);
                     lyht_set_cb(ht, lyb_hash_equal_cb);
                     lyht_free(ht);
@@ -205,7 +205,7 @@
     uint32_t i;
 
     for (i = 0; i < LYB_HASH_BITS; ++i) {
-        hash = lyb_hash(node, i);
+        hash = lyb_get_hash(node, i);
         if (!hash) {
             LOGINT_RET(node->module->ctx);
         }
@@ -450,6 +450,11 @@
     }
     LY_CHECK_RET(lyb_write_number(revision, sizeof revision, out, lybctx));
 
+    if (mod) {
+        /* fill cached hashes, if not already */
+        lyb_cache_module_hash(mod);
+    }
+
     return LY_SUCCESS;
 }
 
@@ -890,7 +895,7 @@
     for (i = 0; !(hash & (LYB_HASH_COLLISION_ID >> i)); ++i) {}
 
     for ( ; i; --i) {
-        hash = lyb_hash(schema, i - 1);
+        hash = lyb_get_hash(schema, i - 1);
         if (!hash) {
             return LY_EINT;
         }

diff --git a/src/tree_data_helpers.c b/src/tree_data_helpers.c
index 1edfb0f..2a2d347 100644
--- a/src/tree_data_helpers.c
+++ b/src/tree_data_helpers.c

@@ -373,62 +373,6 @@
     return LY_SUCCESS;
 }
 
-LYB_HASH
-lyb_hash(struct lysc_node *sibling, uint8_t collision_id)
-{
-    const struct lys_module *mod;
-    uint32_t full_hash;
-    LYB_HASH hash;
-
-    if ((collision_id < LYS_NODE_HASH_COUNT) && sibling->hash[collision_id]) {
-        return sibling->hash[collision_id];
-    }
-
-    mod = sibling->module;
-
-    full_hash = dict_hash_multi(0, mod->name, strlen(mod->name));
-    full_hash = dict_hash_multi(full_hash, sibling->name, strlen(sibling->name));
-    if (collision_id) {
-        size_t ext_len;
-
-        if (collision_id > strlen(mod->name)) {
-            /* fine, we will not hash more bytes, just use more bits from the hash than previously */
-            ext_len = strlen(mod->name);
-        } else {
-            /* use one more byte from the module name than before */
-            ext_len = collision_id;
-        }
-        full_hash = dict_hash_multi(full_hash, mod->name, ext_len);
-    }
-    full_hash = dict_hash_multi(full_hash, NULL, 0);
-
-    /* use the shortened hash */
-    hash = full_hash & (LYB_HASH_MASK >> collision_id);
-    /* add colision identificator */
-    hash |= LYB_HASH_COLLISION_ID >> collision_id;
-
-    /* save this hash */
-    if (collision_id < LYS_NODE_HASH_COUNT) {
-        sibling->hash[collision_id] = hash;
-    }
-
-    return hash;
-}
-
-ly_bool
-lyb_has_schema_model(const struct lysc_node *sibling, const struct lys_module **models)
-{
-    LY_ARRAY_COUNT_TYPE u;
-
-    LY_ARRAY_FOR(models, u) {
-        if (sibling->module == models[u]) {
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
 void
 lyd_del_move_root(struct lyd_node **root, const struct lyd_node *to_del, const struct lys_module *mod)
 {

diff --git a/src/tree_data_internal.h b/src/tree_data_internal.h
index b7d2e2e..05ad5fc 100644
--- a/src/tree_data_internal.h
+++ b/src/tree_data_internal.h

@@ -16,7 +16,6 @@
 #define LY_TREE_DATA_INTERNAL_H_
 
 #include "log.h"
-#include "lyb.h"
 #include "plugins_types.h"
 #include "set.h"
 #include "tree_data.h"
@@ -34,25 +33,6 @@
 #define LY_LYB_SUFFIX_LEN 4
 
 /**
- * @brief Hash schema sibling to be used for LYB data.
- *
- * @param[in] sibling Sibling to hash.
- * @param[in] collision_id Collision ID of the hash to generate.
- * @return Generated hash.
- */
-LYB_HASH lyb_hash(struct lysc_node *sibling, uint8_t collision_id);
-
-/**
- * @brief Check whether a sibling's module is in a module array.
- *
- * @param[in] sibling Sibling to check.
- * @param[in] models Modules in a sized array.
- * @return non-zero if the module was found,
- * @return Boolean value whether @p sibling's module found in the given @p models array.
- */
-ly_bool lyb_has_schema_model(const struct lysc_node *sibling, const struct lys_module **models);
-
-/**
  * @brief Check whether a node to be deleted is the root node, move it if it is.
  *
  * @param[in] root Root sibling.
commit	11f76c8131039c3b25542de83c0cdbeac1120ab7	[log] [tgz]
author	Michal Vasko <mvasko@cesnet.cz>	Thu Apr 15 14:36:14 2021 +0200
committer	Michal Vasko <mvasko@cesnet.cz>	Fri Apr 16 09:39:40 2021 +0200
tree	e07bdfe84969d19223ddcffc55d58e5ee9347a9d
parent	1e77101abcea346fbe153d1073a1c5759524976c [diff]