blob: bec7c864f54085500d2282396eebf70a4db34c1f [file] [log] [blame]
Radek Krejci5aeea3a2018-09-05 13:29:36 +02001/**
2 * @file hash_table.h
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @author Michal Vasko <mvasko@cesnet.cz>
5 * @brief libyang hash table
6 *
Michal Vaskoa655fca2022-09-05 15:48:31 +02007 * Copyright (c) 2015 - 2022 CESNET, z.s.p.o.
Radek Krejci5aeea3a2018-09-05 13:29:36 +02008 *
9 * This source code is licensed under BSD 3-Clause License (the "License").
10 * You may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * https://opensource.org/licenses/BSD-3-Clause
14 */
15
16#ifndef LY_HASH_TABLE_H_
17#define LY_HASH_TABLE_H_
18
Radek Krejcie7b95092019-05-15 11:03:07 +020019#include <pthread.h>
20#include <stddef.h>
21#include <stdint.h>
22
Michal Vaskoc5a22832020-08-20 13:21:33 +020023#include "compat.h"
Radek Krejcie7b95092019-05-15 11:03:07 +020024#include "log.h"
Radek Krejci5aeea3a2018-09-05 13:29:36 +020025
26/**
27 * @brief Compute hash from (several) string(s).
28 *
29 * Usage:
30 * - init hash to 0
Radek Krejci8678fa42020-08-18 16:07:28 +020031 * - repeatedly call ::dict_hash_multi(), provide hash from the last call
32 * - call ::dict_hash_multi() with key_part = NULL to finish the hash
Radek Krejci5aeea3a2018-09-05 13:29:36 +020033 */
34uint32_t dict_hash_multi(uint32_t hash, const char *key_part, size_t len);
35
Michal Vaskoa655fca2022-09-05 15:48:31 +020036/**
Radek Krejcif2dc4c52018-11-08 09:04:13 +010037 * @brief Compute hash from a string.
38 */
39uint32_t dict_hash(const char *key, size_t len);
40
Radek Krejci5aeea3a2018-09-05 13:29:36 +020041/**
42 * @brief Callback for checking hash table values equivalence.
43 *
Michal Vasko90932a92020-02-12 14:33:03 +010044 * @param[in] val1_p Pointer to the first value, the one being searched (inserted/removed).
45 * @param[in] val2_p Pointer to the second value, the one stored in the hash table.
Radek Krejci5aeea3a2018-09-05 13:29:36 +020046 * @param[in] mod Whether the operation modifies the hash table (insert or remove) or not (find).
47 * @param[in] cb_data User callback data.
Radek Krejci857189e2020-09-01 13:26:36 +020048 * @return false (non-equal) or true (equal).
Radek Krejci5aeea3a2018-09-05 13:29:36 +020049 */
Michal Vasko62524a92021-02-26 10:08:50 +010050typedef ly_bool (*lyht_value_equal_cb)(void *val1_p, void *val2_p, ly_bool mod, void *cb_data);
Radek Krejci5aeea3a2018-09-05 13:29:36 +020051
Radek Krejcif13b87b2020-12-01 22:02:17 +010052/** reference value for 100% */
53#define LYHT_HUNDRED_PERCENTAGE 100
54
Radek Krejci5aeea3a2018-09-05 13:29:36 +020055/** when the table is at least this much percent full, it is enlarged (double the size) */
56#define LYHT_ENLARGE_PERCENTAGE 75
57
58/** only once the table is this much percent full, enable shrinking */
59#define LYHT_FIRST_SHRINK_PERCENTAGE 50
60
61/** when the table is less than this much percent full, it is shrunk (half the size) */
62#define LYHT_SHRINK_PERCENTAGE 25
63
Michal Vaskodc95d9c2021-04-12 15:11:48 +020064/** when the table has less than this much percent empty records, it is rehashed to get rid of all the invalid records */
65#define LYHT_REHASH_PERCENTAGE 2
66
Radek Krejci5aeea3a2018-09-05 13:29:36 +020067/** never shrink beyond this size */
68#define LYHT_MIN_SIZE 8
69
70/**
71 * @brief Generic hash table record.
72 */
Michal Vasko8efac242023-03-30 08:24:56 +020073struct ly_ht_rec {
Radek Krejci5aeea3a2018-09-05 13:29:36 +020074 uint32_t hash; /* hash of the value */
75 int32_t hits; /* collision/overflow value count - 1 (a filled entry has 1 hit,
76 * special value -1 means a deleted record) */
77 unsigned char val[1]; /* arbitrary-size value */
78} _PACKED;
79
80/**
81 * @brief (Very) generic hash table.
82 *
83 * Hash table with open addressing collision resolution and
84 * linear probing of interval 1 (next free record is used).
85 * Removal is lazy (removed records are only marked), but
86 * if possible, they are fully emptied.
87 */
Michal Vasko8efac242023-03-30 08:24:56 +020088struct ly_ht {
Radek Krejci5aeea3a2018-09-05 13:29:36 +020089 uint32_t used; /* number of values stored in the hash table (filled records) */
90 uint32_t size; /* always holds 2^x == size (is power of 2), actually number of records allocated */
Michal Vaskodc95d9c2021-04-12 15:11:48 +020091 uint32_t invalid; /* number of invalid records (deleted) */
Michal Vasko62524a92021-02-26 10:08:50 +010092 lyht_value_equal_cb val_equal; /* callback for testing value equivalence */
Radek Krejci5aeea3a2018-09-05 13:29:36 +020093 void *cb_data; /* user data callback arbitrary value */
94 uint16_t resize; /* 0 - resizing is disabled, *
95 * 1 - enlarging is enabled, *
96 * 2 - both shrinking and enlarging is enabled */
97 uint16_t rec_size; /* real size (in bytes) of one record for accessing recs array */
98 unsigned char *recs; /* pointer to the hash table itself (array of struct ht_rec) */
99};
100
Michal Vasko8efac242023-03-30 08:24:56 +0200101struct ly_dict_rec {
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200102 char *value;
103 uint32_t refcount;
104};
105
106/**
107 * dictionary to store repeating strings
108 */
Michal Vasko8efac242023-03-30 08:24:56 +0200109struct ly_dict {
110 struct ly_ht *hash_tab;
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200111 pthread_mutex_t lock;
112};
113
114/**
115 * @brief Initiate content (non-zero values) of the dictionary
116 *
117 * @param[in] dict Dictionary table to initiate
118 */
Michal Vasko8efac242023-03-30 08:24:56 +0200119void lydict_init(struct ly_dict *dict);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200120
121/**
122 * @brief Cleanup the dictionary content
123 *
124 * @param[in] dict Dictionary table to cleanup
125 */
Michal Vasko8efac242023-03-30 08:24:56 +0200126void lydict_clean(struct ly_dict *dict);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200127
128/**
129 * @brief Create new hash table.
130 *
131 * @param[in] size Starting size of the hash table (capacity of values), must be power of 2.
132 * @param[in] val_size Size in bytes of value (the stored hashed item).
133 * @param[in] val_equal Callback for checking value equivalence.
Michal Vaskoa655fca2022-09-05 15:48:31 +0200134 * @param[in] cb_data User data always passed to @p val_equal.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200135 * @param[in] resize Whether to resize the table on too few/too many records taken.
136 * @return Empty hash table, NULL on error.
137 */
Michal Vasko8efac242023-03-30 08:24:56 +0200138struct ly_ht *lyht_new(uint32_t size, uint16_t val_size, lyht_value_equal_cb val_equal, void *cb_data, uint16_t resize);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200139
140/**
141 * @brief Set hash table value equal callback.
142 *
143 * @param[in] ht Hash table to modify.
144 * @param[in] new_val_equal New callback for checking value equivalence.
145 * @return Previous callback for checking value equivalence.
146 */
Michal Vasko8efac242023-03-30 08:24:56 +0200147lyht_value_equal_cb lyht_set_cb(struct ly_ht *ht, lyht_value_equal_cb new_val_equal);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200148
149/**
150 * @brief Set hash table value equal callback user data.
151 *
152 * @param[in] ht Hash table to modify.
153 * @param[in] new_cb_data New data for values callback.
154 * @return Previous data for values callback.
155 */
Michal Vasko8efac242023-03-30 08:24:56 +0200156void *lyht_set_cb_data(struct ly_ht *ht, void *new_cb_data);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200157
158/**
159 * @brief Make a duplicate of an existing hash table.
160 *
161 * @param[in] orig Original hash table to duplicate.
Michal Vaskoa655fca2022-09-05 15:48:31 +0200162 * @return Duplicated hash table @p orig, NULL on error.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200163 */
Michal Vasko8efac242023-03-30 08:24:56 +0200164struct ly_ht *lyht_dup(const struct ly_ht *orig);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200165
166/**
167 * @brief Free a hash table.
168 *
169 * @param[in] ht Hash table to be freed.
Michal Vasko77b7f90a2023-01-31 15:42:41 +0100170 * @param[in] val_free Optional callback for freeing allthe stored values, @p val_p is a pointer to a stored value.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200171 */
Michal Vasko8efac242023-03-30 08:24:56 +0200172void lyht_free(struct ly_ht *ht, void (*val_free)(void *val_p));
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200173
174/**
175 * @brief Find a value in a hash table.
176 *
177 * @param[in] ht Hash table to search in.
178 * @param[in] val_p Pointer to the value to find.
179 * @param[in] hash Hash of the stored value.
180 * @param[out] match_p Pointer to the matching value, optional.
Michal Vaskoda859032020-07-14 12:20:14 +0200181 * @return LY_SUCCESS if value was found,
182 * @return LY_ENOTFOUND if not found.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200183 */
Michal Vasko8efac242023-03-30 08:24:56 +0200184LY_ERR lyht_find(struct ly_ht *ht, void *val_p, uint32_t hash, void **match_p);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200185
186/**
187 * @brief Find another equal value in the hash table.
188 *
189 * @param[in] ht Hash table to search in.
Michal Vaskoa655fca2022-09-05 15:48:31 +0200190 * @param[in] val_p Pointer to the previously found value in @p ht.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200191 * @param[in] hash Hash of the previously found value.
192 * @param[out] match_p Pointer to the matching value, optional.
Michal Vaskoda859032020-07-14 12:20:14 +0200193 * @return LY_SUCCESS if value was found,
194 * @return LY_ENOTFOUND if not found.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200195 */
Michal Vasko8efac242023-03-30 08:24:56 +0200196LY_ERR lyht_find_next(struct ly_ht *ht, void *val_p, uint32_t hash, void **match_p);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200197
198/**
Michal Vasko6374de22022-09-05 15:48:48 +0200199 * @brief Find another equal value in the hash table. Same functionality as ::lyht_find_next()
200 * but allows to specify a collision val equal callback to be used for checking for matching colliding values.
201 *
202 * @param[in] ht Hash table to search in.
203 * @param[in] val_p Pointer to the previously found value in @p ht.
204 * @param[in] hash Hash of the previously found value.
205 * @param[in] collision_val_equal Val equal callback to use for checking collisions.
206 * @param[out] match_p Pointer to the matching value, optional.
207 * @return LY_SUCCESS if value was found,
208 * @return LY_ENOTFOUND if not found.
209 */
Michal Vasko8efac242023-03-30 08:24:56 +0200210LY_ERR lyht_find_next_with_collision_cb(struct ly_ht *ht, void *val_p, uint32_t hash,
Michal Vasko6374de22022-09-05 15:48:48 +0200211 lyht_value_equal_cb collision_val_equal, void **match_p);
212
213/**
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200214 * @brief Insert a value into a hash table.
215 *
216 * @param[in] ht Hash table to insert into.
217 * @param[in] val_p Pointer to the value to insert. Be careful, if the values stored in the hash table
Michal Vaskoa655fca2022-09-05 15:48:31 +0200218 * are pointers, @p val_p must be a pointer to a pointer.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200219 * @param[in] hash Hash of the stored value.
220 * @param[out] match_p Pointer to the stored value, optional
Michal Vasko4a4c7ed2020-07-17 09:30:12 +0200221 * @return LY_SUCCESS on success,
Radek Krejci011e4aa2020-09-04 15:22:31 +0200222 * @return LY_EEXIST in case the value is already present.
223 * @return LY_EMEM in case of memory allocation failure.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200224 */
Michal Vasko8efac242023-03-30 08:24:56 +0200225LY_ERR lyht_insert(struct ly_ht *ht, void *val_p, uint32_t hash, void **match_p);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200226
227/**
Radek Krejci8678fa42020-08-18 16:07:28 +0200228 * @brief Insert a value into hash table. Same functionality as ::lyht_insert()
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200229 * but allows to specify a temporary val equal callback to be used in case the hash table
230 * will be resized after successful insertion.
231 *
232 * @param[in] ht Hash table to insert into.
233 * @param[in] val_p Pointer to the value to insert. Be careful, if the values stored in the hash table
Michal Vaskoa655fca2022-09-05 15:48:31 +0200234 * are pointers, @p val_p must be a pointer to a pointer.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200235 * @param[in] hash Hash of the stored value.
236 * @param[in] resize_val_equal Val equal callback to use for resizing.
237 * @param[out] match_p Pointer to the stored value, optional
Michal Vasko4a4c7ed2020-07-17 09:30:12 +0200238 * @return LY_SUCCESS on success,
Radek Krejci011e4aa2020-09-04 15:22:31 +0200239 * @return LY_EEXIST in case the value is already present.
240 * @return LY_EMEM in case of memory allocation failure.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200241 */
Michal Vasko8efac242023-03-30 08:24:56 +0200242LY_ERR lyht_insert_with_resize_cb(struct ly_ht *ht, void *val_p, uint32_t hash, lyht_value_equal_cb resize_val_equal,
Radek Krejci0f969882020-08-21 16:56:47 +0200243 void **match_p);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200244
245/**
246 * @brief Remove a value from a hash table.
247 *
248 * @param[in] ht Hash table to remove from.
Michal Vasko5bcc33b2020-10-06 15:33:44 +0200249 * @param[in] val_p Pointer to value to be removed. Be careful, if the values stored in the hash table
Michal Vaskoa655fca2022-09-05 15:48:31 +0200250 * are pointers, @p val_p must be a pointer to a pointer.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200251 * @param[in] hash Hash of the stored value.
Michal Vasko4a4c7ed2020-07-17 09:30:12 +0200252 * @return LY_SUCCESS on success,
253 * @return LY_ENOTFOUND if value was not found.
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200254 */
Michal Vasko8efac242023-03-30 08:24:56 +0200255LY_ERR lyht_remove(struct ly_ht *ht, void *val_p, uint32_t hash);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200256
Michal Vasko5bcc33b2020-10-06 15:33:44 +0200257/**
Radek Krejci8678fa42020-08-18 16:07:28 +0200258 * @brief Remove a value from a hash table. Same functionality as ::lyht_remove()
Michal Vasko5bcc33b2020-10-06 15:33:44 +0200259 * but allows to specify a temporary val equal callback to be used in case the hash table
260 * will be resized after successful removal.
261 *
262 * @param[in] ht Hash table to remove from.
263 * @param[in] val_p Pointer to value to be removed. Be careful, if the values stored in the hash table
Michal Vaskoa655fca2022-09-05 15:48:31 +0200264 * are pointers, @p val_p must be a pointer to a pointer.
Michal Vasko5bcc33b2020-10-06 15:33:44 +0200265 * @param[in] hash Hash of the stored value.
266 * @param[in] resize_val_equal Val equal callback to use for resizing.
267 * @return LY_SUCCESS on success,
268 * @return LY_ENOTFOUND if value was not found.
269 */
Michal Vasko8efac242023-03-30 08:24:56 +0200270LY_ERR lyht_remove_with_resize_cb(struct ly_ht *ht, void *val_p, uint32_t hash, lyht_value_equal_cb resize_val_equal);
Michal Vasko5bcc33b2020-10-06 15:33:44 +0200271
Michal Vasko626196f2022-08-05 12:49:52 +0200272/**
273 * @brief Get suitable size of a hash table for a fixed number of items.
274 *
275 * @param[in] item_count Number of stored items.
276 * @return Hash table size.
277 */
278uint32_t lyht_get_fixed_size(uint32_t item_count);
279
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200280#endif /* LY_HASH_TABLE_H_ */