Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 1 | /** |
| 2 | * @file hash_table.h |
| 3 | * @author Radek Krejci <rkrejci@cesnet.cz> |
| 4 | * @author Michal Vasko <mvasko@cesnet.cz> |
| 5 | * @brief libyang hash table |
| 6 | * |
| 7 | * Copyright (c) 2015 - 2018 CESNET, z.s.p.o. |
| 8 | * |
| 9 | * This source code is licensed under BSD 3-Clause License (the "License"). |
| 10 | * You may not use this file except in compliance with the License. |
| 11 | * You may obtain a copy of the License at |
| 12 | * |
| 13 | * https://opensource.org/licenses/BSD-3-Clause |
| 14 | */ |
| 15 | |
| 16 | #ifndef LY_HASH_TABLE_H_ |
| 17 | #define LY_HASH_TABLE_H_ |
| 18 | |
Radek Krejci | e7b9509 | 2019-05-15 11:03:07 +0200 | [diff] [blame] | 19 | #include <pthread.h> |
| 20 | #include <stddef.h> |
| 21 | #include <stdint.h> |
| 22 | |
Michal Vasko | c5a2283 | 2020-08-20 13:21:33 +0200 | [diff] [blame] | 23 | #include "compat.h" |
Radek Krejci | e7b9509 | 2019-05-15 11:03:07 +0200 | [diff] [blame] | 24 | #include "log.h" |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 25 | |
| 26 | /** |
| 27 | * @brief Compute hash from (several) string(s). |
| 28 | * |
| 29 | * Usage: |
| 30 | * - init hash to 0 |
| 31 | * - repeatedly call dict_hash_multi(), provide hash from the last call |
| 32 | * - call dict_hash_multi() with key_part = NULL to finish the hash |
| 33 | */ |
| 34 | uint32_t dict_hash_multi(uint32_t hash, const char *key_part, size_t len); |
| 35 | |
Radek Krejci | f2dc4c5 | 2018-11-08 09:04:13 +0100 | [diff] [blame] | 36 | /* |
| 37 | * @brief Compute hash from a string. |
| 38 | */ |
| 39 | uint32_t dict_hash(const char *key, size_t len); |
| 40 | |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 41 | /** |
| 42 | * @brief Callback for checking hash table values equivalence. |
| 43 | * |
Michal Vasko | 90932a9 | 2020-02-12 14:33:03 +0100 | [diff] [blame] | 44 | * @param[in] val1_p Pointer to the first value, the one being searched (inserted/removed). |
| 45 | * @param[in] val2_p Pointer to the second value, the one stored in the hash table. |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 46 | * @param[in] mod Whether the operation modifies the hash table (insert or remove) or not (find). |
| 47 | * @param[in] cb_data User callback data. |
| 48 | * @return 0 on non-equal, non-zero on equal. |
| 49 | */ |
Radek Krejci | 1deb5be | 2020-08-26 16:43:36 +0200 | [diff] [blame] | 50 | typedef uint8_t (*values_equal_cb)(void *val1_p, void *val2_p, uint8_t mod, void *cb_data); |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 51 | |
| 52 | /** when the table is at least this much percent full, it is enlarged (double the size) */ |
| 53 | #define LYHT_ENLARGE_PERCENTAGE 75 |
| 54 | |
| 55 | /** only once the table is this much percent full, enable shrinking */ |
| 56 | #define LYHT_FIRST_SHRINK_PERCENTAGE 50 |
| 57 | |
| 58 | /** when the table is less than this much percent full, it is shrunk (half the size) */ |
| 59 | #define LYHT_SHRINK_PERCENTAGE 25 |
| 60 | |
| 61 | /** never shrink beyond this size */ |
| 62 | #define LYHT_MIN_SIZE 8 |
| 63 | |
| 64 | /** |
| 65 | * @brief Generic hash table record. |
| 66 | */ |
| 67 | struct ht_rec { |
| 68 | uint32_t hash; /* hash of the value */ |
| 69 | int32_t hits; /* collision/overflow value count - 1 (a filled entry has 1 hit, |
| 70 | * special value -1 means a deleted record) */ |
| 71 | unsigned char val[1]; /* arbitrary-size value */ |
| 72 | } _PACKED; |
| 73 | |
| 74 | /** |
| 75 | * @brief (Very) generic hash table. |
| 76 | * |
| 77 | * Hash table with open addressing collision resolution and |
| 78 | * linear probing of interval 1 (next free record is used). |
| 79 | * Removal is lazy (removed records are only marked), but |
| 80 | * if possible, they are fully emptied. |
| 81 | */ |
| 82 | struct hash_table { |
| 83 | uint32_t used; /* number of values stored in the hash table (filled records) */ |
| 84 | uint32_t size; /* always holds 2^x == size (is power of 2), actually number of records allocated */ |
| 85 | values_equal_cb val_equal; /* callback for testing value equivalence */ |
| 86 | void *cb_data; /* user data callback arbitrary value */ |
| 87 | uint16_t resize; /* 0 - resizing is disabled, * |
| 88 | * 1 - enlarging is enabled, * |
| 89 | * 2 - both shrinking and enlarging is enabled */ |
| 90 | uint16_t rec_size; /* real size (in bytes) of one record for accessing recs array */ |
| 91 | unsigned char *recs; /* pointer to the hash table itself (array of struct ht_rec) */ |
| 92 | }; |
| 93 | |
| 94 | struct dict_rec { |
| 95 | char *value; |
| 96 | uint32_t refcount; |
| 97 | }; |
| 98 | |
| 99 | /** |
| 100 | * dictionary to store repeating strings |
| 101 | */ |
| 102 | struct dict_table { |
| 103 | struct hash_table *hash_tab; |
| 104 | pthread_mutex_t lock; |
| 105 | }; |
| 106 | |
| 107 | /** |
| 108 | * @brief Initiate content (non-zero values) of the dictionary |
| 109 | * |
| 110 | * @param[in] dict Dictionary table to initiate |
| 111 | */ |
| 112 | void lydict_init(struct dict_table *dict); |
| 113 | |
| 114 | /** |
| 115 | * @brief Cleanup the dictionary content |
| 116 | * |
| 117 | * @param[in] dict Dictionary table to cleanup |
| 118 | */ |
| 119 | void lydict_clean(struct dict_table *dict); |
| 120 | |
| 121 | /** |
| 122 | * @brief Create new hash table. |
| 123 | * |
| 124 | * @param[in] size Starting size of the hash table (capacity of values), must be power of 2. |
| 125 | * @param[in] val_size Size in bytes of value (the stored hashed item). |
| 126 | * @param[in] val_equal Callback for checking value equivalence. |
| 127 | * @param[in] cb_data User data always passed to \p val_equal. |
| 128 | * @param[in] resize Whether to resize the table on too few/too many records taken. |
| 129 | * @return Empty hash table, NULL on error. |
| 130 | */ |
Radek Krejci | 1deb5be | 2020-08-26 16:43:36 +0200 | [diff] [blame] | 131 | struct hash_table *lyht_new(uint32_t size, uint16_t val_size, values_equal_cb val_equal, void *cb_data, uint16_t resize); |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 132 | |
| 133 | /** |
| 134 | * @brief Set hash table value equal callback. |
| 135 | * |
| 136 | * @param[in] ht Hash table to modify. |
| 137 | * @param[in] new_val_equal New callback for checking value equivalence. |
| 138 | * @return Previous callback for checking value equivalence. |
| 139 | */ |
| 140 | values_equal_cb lyht_set_cb(struct hash_table *ht, values_equal_cb new_val_equal); |
| 141 | |
| 142 | /** |
| 143 | * @brief Set hash table value equal callback user data. |
| 144 | * |
| 145 | * @param[in] ht Hash table to modify. |
| 146 | * @param[in] new_cb_data New data for values callback. |
| 147 | * @return Previous data for values callback. |
| 148 | */ |
| 149 | void *lyht_set_cb_data(struct hash_table *ht, void *new_cb_data); |
| 150 | |
| 151 | /** |
| 152 | * @brief Make a duplicate of an existing hash table. |
| 153 | * |
| 154 | * @param[in] orig Original hash table to duplicate. |
| 155 | * @return Duplicated hash table \p orig, NULL on error. |
| 156 | */ |
| 157 | struct hash_table *lyht_dup(const struct hash_table *orig); |
| 158 | |
| 159 | /** |
| 160 | * @brief Free a hash table. |
| 161 | * |
| 162 | * @param[in] ht Hash table to be freed. |
| 163 | */ |
| 164 | void lyht_free(struct hash_table *ht); |
| 165 | |
| 166 | /** |
| 167 | * @brief Find a value in a hash table. |
| 168 | * |
| 169 | * @param[in] ht Hash table to search in. |
| 170 | * @param[in] val_p Pointer to the value to find. |
| 171 | * @param[in] hash Hash of the stored value. |
| 172 | * @param[out] match_p Pointer to the matching value, optional. |
Michal Vasko | da85903 | 2020-07-14 12:20:14 +0200 | [diff] [blame] | 173 | * @return LY_SUCCESS if value was found, |
| 174 | * @return LY_ENOTFOUND if not found. |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 175 | */ |
Michal Vasko | da85903 | 2020-07-14 12:20:14 +0200 | [diff] [blame] | 176 | LY_ERR lyht_find(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p); |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 177 | |
| 178 | /** |
| 179 | * @brief Find another equal value in the hash table. |
| 180 | * |
| 181 | * @param[in] ht Hash table to search in. |
| 182 | * @param[in] val_p Pointer to the previously found value in \p ht. |
| 183 | * @param[in] hash Hash of the previously found value. |
| 184 | * @param[out] match_p Pointer to the matching value, optional. |
Michal Vasko | da85903 | 2020-07-14 12:20:14 +0200 | [diff] [blame] | 185 | * @return LY_SUCCESS if value was found, |
| 186 | * @return LY_ENOTFOUND if not found. |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 187 | */ |
Michal Vasko | da85903 | 2020-07-14 12:20:14 +0200 | [diff] [blame] | 188 | LY_ERR lyht_find_next(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p); |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 189 | |
| 190 | /** |
| 191 | * @brief Insert a value into a hash table. |
| 192 | * |
| 193 | * @param[in] ht Hash table to insert into. |
| 194 | * @param[in] val_p Pointer to the value to insert. Be careful, if the values stored in the hash table |
| 195 | * are pointers, \p val_p must be a pointer to a pointer. |
| 196 | * @param[in] hash Hash of the stored value. |
| 197 | * @param[out] match_p Pointer to the stored value, optional |
Michal Vasko | 4a4c7ed | 2020-07-17 09:30:12 +0200 | [diff] [blame] | 198 | * @return LY_SUCCESS on success, |
| 199 | * @return LY_EEXIST if the value is already present. |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 200 | */ |
Radek Krejci | 0ae092d | 2018-09-20 16:43:19 +0200 | [diff] [blame] | 201 | LY_ERR lyht_insert(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p); |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 202 | |
| 203 | /** |
| 204 | * @brief Insert a value into hash table. Same functionality as lyht_insert() |
| 205 | * but allows to specify a temporary val equal callback to be used in case the hash table |
| 206 | * will be resized after successful insertion. |
| 207 | * |
| 208 | * @param[in] ht Hash table to insert into. |
| 209 | * @param[in] val_p Pointer to the value to insert. Be careful, if the values stored in the hash table |
| 210 | * are pointers, \p val_p must be a pointer to a pointer. |
| 211 | * @param[in] hash Hash of the stored value. |
| 212 | * @param[in] resize_val_equal Val equal callback to use for resizing. |
| 213 | * @param[out] match_p Pointer to the stored value, optional |
Michal Vasko | 4a4c7ed | 2020-07-17 09:30:12 +0200 | [diff] [blame] | 214 | * @return LY_SUCCESS on success, |
| 215 | * @return LY_EEXIST if the value is already present. |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 216 | */ |
| 217 | LY_ERR lyht_insert_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash, values_equal_cb resize_val_equal, |
Radek Krejci | 0f96988 | 2020-08-21 16:56:47 +0200 | [diff] [blame] | 218 | void **match_p); |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 219 | |
| 220 | /** |
| 221 | * @brief Remove a value from a hash table. |
| 222 | * |
| 223 | * @param[in] ht Hash table to remove from. |
| 224 | * @param[in] value_p Pointer to value to be removed. Be careful, if the values stored in the hash table |
| 225 | * are pointers, \p value_p must be a pointer to a pointer. |
| 226 | * @param[in] hash Hash of the stored value. |
Michal Vasko | 4a4c7ed | 2020-07-17 09:30:12 +0200 | [diff] [blame] | 227 | * @return LY_SUCCESS on success, |
| 228 | * @return LY_ENOTFOUND if value was not found. |
Radek Krejci | 5aeea3a | 2018-09-05 13:29:36 +0200 | [diff] [blame] | 229 | */ |
| 230 | LY_ERR lyht_remove(struct hash_table *ht, void *val_p, uint32_t hash); |
| 231 | |
| 232 | #endif /* LY_HASH_TABLE_H_ */ |