blob: e41a1f39068c0eb3a9615a53ac57225b49aebe02 [file] [log] [blame]
Radek Krejciee627582015-04-20 17:39:59 +02001/**
Michal Vasko6c810702018-03-14 16:23:21 +01002 * @file hash_table.c
Radek Krejciee627582015-04-20 17:39:59 +02003 * @author Radek Krejci <rkrejci@cesnet.cz>
Michal Vasko6c810702018-03-14 16:23:21 +01004 * @brief libyang dictionary for storing strings and generic hash table
Radek Krejciee627582015-04-20 17:39:59 +02005 *
Michal Vasko6c810702018-03-14 16:23:21 +01006 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
Radek Krejciee627582015-04-20 17:39:59 +02007 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejciee627582015-04-20 17:39:59 +020013 */
14
15#include <string.h>
16#include <stdint.h>
17#include <stdlib.h>
Radek Krejcifebcbfb2016-02-04 14:58:42 +010018#include <pthread.h>
Michal Vasko6c810702018-03-14 16:23:21 +010019#include <assert.h>
Radek Krejciee627582015-04-20 17:39:59 +020020
Radek Krejciee627582015-04-20 17:39:59 +020021#include "common.h"
Radek Krejcida04f4a2015-05-21 12:54:09 +020022#include "context.h"
Michal Vasko6c810702018-03-14 16:23:21 +010023#include "hash_table.h"
Radek Krejciee627582015-04-20 17:39:59 +020024
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020025void
26lydict_init(struct dict_table *dict)
Radek Krejcida04f4a2015-05-21 12:54:09 +020027{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020028 if (!dict) {
Michal Vasko53b7da02018-02-13 15:28:42 +010029 LOGARG;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020030 return;
31 }
Radek Krejciee627582015-04-20 17:39:59 +020032
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020033 dict->hash_mask = DICT_SIZE - 1;
Radek Krejcifebcbfb2016-02-04 14:58:42 +010034 pthread_mutex_init(&dict->lock, NULL);
Radek Krejcida04f4a2015-05-21 12:54:09 +020035}
36
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020037void
38lydict_clean(struct dict_table *dict)
Radek Krejcida04f4a2015-05-21 12:54:09 +020039{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020040 int i;
41 struct dict_rec *chain, *rec;
Radek Krejcida04f4a2015-05-21 12:54:09 +020042
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020043 if (!dict) {
Michal Vasko53b7da02018-02-13 15:28:42 +010044 LOGARG;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020045 return;
46 }
Radek Krejcida04f4a2015-05-21 12:54:09 +020047
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020048 for (i = 0; i < DICT_SIZE; i++) {
49 rec = &dict->recs[i];
50 chain = rec->next;
Radek Krejcida04f4a2015-05-21 12:54:09 +020051
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020052 free(rec->value);
53 while (chain) {
54 rec = chain;
55 chain = rec->next;
Radek Krejcida04f4a2015-05-21 12:54:09 +020056
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020057 free(rec->value);
58 free(rec);
59 }
60 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +010061
62 pthread_mutex_destroy(&dict->lock);
Radek Krejcida04f4a2015-05-21 12:54:09 +020063}
Radek Krejciee627582015-04-20 17:39:59 +020064
65/*
66 * Bob Jenkin's one-at-a-time hash
67 * http://www.burtleburtle.net/bob/hash/doobs.html
68 *
69 * Spooky hash is faster, but it works only for little endian architectures.
70 */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020071static uint32_t
72dict_hash(const char *key, size_t len)
Radek Krejciee627582015-04-20 17:39:59 +020073{
74 uint32_t hash, i;
75
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020076 for (hash = i = 0; i < len; ++i) {
Radek Krejciee627582015-04-20 17:39:59 +020077 hash += key[i];
78 hash += (hash << 10);
79 hash ^= (hash >> 6);
80 }
81 hash += (hash << 3);
82 hash ^= (hash >> 11);
83 hash += (hash << 15);
84 return hash;
85}
86
Radek Krejci63b79c82016-08-10 10:09:33 +020087/*
88 * Usage:
89 * - init hash to 0
90 * - repeatedly call dict_hash_multi(), provide hash from the last call
91 * - call dict_hash_multi() with key_part = NULL to finish the hash
92 */
93uint32_t
94dict_hash_multi(uint32_t hash, const char *key_part, size_t len)
95{
96 uint32_t i;
97
98 if (key_part) {
99 for (i = 0; i < len; ++i) {
100 hash += key_part[i];
101 hash += (hash << 10);
102 hash ^= (hash >> 6);
103 }
104 } else {
105 hash += (hash << 3);
106 hash ^= (hash >> 11);
107 hash += (hash << 15);
108 }
109
110 return hash;
111}
112
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200113API void
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200114lydict_remove(struct ly_ctx *ctx, const char *value)
Radek Krejciee627582015-04-20 17:39:59 +0200115{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200116 size_t len;
117 uint32_t index;
118 struct dict_rec *record, *prev = NULL;
Radek Krejciee627582015-04-20 17:39:59 +0200119
Michal Vasko5c328e72016-10-13 12:17:37 +0200120 if (!value || !ctx) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200121 return;
122 }
Radek Krejciee627582015-04-20 17:39:59 +0200123
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200124 len = strlen(value);
Radek Krejciee627582015-04-20 17:39:59 +0200125
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100126 pthread_mutex_lock(&ctx->dict.lock);
127
Michal Vasko5c328e72016-10-13 12:17:37 +0200128 if (!ctx->dict.used) {
129 pthread_mutex_unlock(&ctx->dict.lock);
130 return;
131 }
132
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200133 index = dict_hash(value, len) & ctx->dict.hash_mask;
134 record = &ctx->dict.recs[index];
Radek Krejciee627582015-04-20 17:39:59 +0200135
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200136 while (record && record->value != value) {
137 prev = record;
138 record = record->next;
139 }
Radek Krejciee627582015-04-20 17:39:59 +0200140
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200141 if (!record) {
142 /* record not found */
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100143 pthread_mutex_unlock(&ctx->dict.lock);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200144 return;
145 }
Radek Krejciee627582015-04-20 17:39:59 +0200146
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200147 record->refcount--;
148 if (!record->refcount) {
149 free(record->value);
150 if (record->next) {
151 if (prev) {
152 /* change in dynamically allocated chain */
153 prev->next = record->next;
154 free(record);
155 } else {
156 /* move dynamically allocated record into the static array */
157 prev = record->next; /* temporary storage */
158 memcpy(record, record->next, sizeof *record);
159 free(prev);
160 }
161 } else if (prev) {
162 /* removing last record from the dynamically allocated chain */
163 prev->next = NULL;
164 free(record);
165 } else {
166 /* clean the static record content */
167 memset(record, 0, sizeof *record);
168 }
Radek Krejci6b0a8882016-02-16 17:37:21 +0100169 ctx->dict.used--;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200170 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100171
172 pthread_mutex_unlock(&ctx->dict.lock);
Radek Krejciee627582015-04-20 17:39:59 +0200173}
174
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200175static char *
176dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy)
Radek Krejciee627582015-04-20 17:39:59 +0200177{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200178 uint32_t index;
Radek Krejci592e00d2016-05-19 12:38:08 +0200179 int match = 0;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200180 struct dict_rec *record, *new;
Radek Krejciee627582015-04-20 17:39:59 +0200181
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200182 index = dict_hash(value, len) & ctx->dict.hash_mask;
183 record = &ctx->dict.recs[index];
Radek Krejciee627582015-04-20 17:39:59 +0200184
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200185 if (!record->value) {
186 /* first record with this hash */
187 if (zerocopy) {
188 record->value = value;
189 } else {
190 record->value = malloc((len + 1) * sizeof *record->value);
Michal Vasko53b7da02018-02-13 15:28:42 +0100191 LY_CHECK_ERR_RETURN(!record->value, LOGMEM(ctx), NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200192 memcpy(record->value, value, len);
193 record->value[len] = '\0';
194 }
195 record->refcount = 1;
Radek Krejci592e00d2016-05-19 12:38:08 +0200196 if (len > DICT_REC_MAXLEN) {
197 record->len = 0;
198 } else {
199 record->len = len;
200 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200201 record->next = NULL;
Radek Krejciee627582015-04-20 17:39:59 +0200202
Radek Krejci6b0a8882016-02-16 17:37:21 +0100203 ctx->dict.used++;
204
Michal Vasko3e3228d2017-02-24 14:55:32 +0100205 LOGDBG(LY_LDGDICT, "inserting \"%s\"", record->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200206 return record->value;
207 }
Radek Krejciee627582015-04-20 17:39:59 +0200208
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200209 /* collision, search if the value is already in dict */
210 while (record) {
Radek Krejci592e00d2016-05-19 12:38:08 +0200211 if (record->len) {
212 /* for strings shorter than DICT_REC_MAXLEN we are able to speed up
213 * recognition of varying strings according to their lengths, and
214 * for strings with the same length it is safe to use faster memcmp()
215 * instead of strncmp() */
216 if ((record->len == len) && !memcmp(value, record->value, len)) {
217 match = 1;
218 }
219 } else {
220 if (!strncmp(value, record->value, len) && record->value[len] == '\0') {
221 match = 1;
222 }
223 }
224 if (match) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200225 /* record found */
Radek Krejci592e00d2016-05-19 12:38:08 +0200226 if (record->refcount == DICT_REC_MAXCOUNT) {
Michal Vasko53b7da02018-02-13 15:28:42 +0100227 LOGWRN(ctx, "Refcount overflow detected, duplicating dictionary record");
Radek Krejci592e00d2016-05-19 12:38:08 +0200228 break;
229 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200230 record->refcount++;
Radek Krejci4ea08382015-04-21 09:41:40 +0200231
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200232 if (zerocopy) {
233 free(value);
234 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200235
Michal Vasko3e3228d2017-02-24 14:55:32 +0100236 LOGDBG(LY_LDGDICT, "inserting (refcount) \"%s\"", record->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200237 return record->value;
238 }
Radek Krejciee627582015-04-20 17:39:59 +0200239
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200240 if (!record->next) {
241 /* not present, add as a new record in chain */
242 break;
243 }
Radek Krejciee627582015-04-20 17:39:59 +0200244
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200245 record = record->next;
246 }
Radek Krejciee627582015-04-20 17:39:59 +0200247
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200248 /* create new record and add it behind the last record */
249 new = malloc(sizeof *record);
Michal Vasko53b7da02018-02-13 15:28:42 +0100250 LY_CHECK_ERR_RETURN(!new, LOGMEM(ctx), NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200251 if (zerocopy) {
252 new->value = value;
253 } else {
254 new->value = malloc((len + 1) * sizeof *record->value);
Michal Vasko53b7da02018-02-13 15:28:42 +0100255 LY_CHECK_ERR_RETURN(!new->value, LOGMEM(ctx); free(new), NULL);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200256 memcpy(new->value, value, len);
257 new->value[len] = '\0';
258 }
259 new->refcount = 1;
Radek Krejci592e00d2016-05-19 12:38:08 +0200260 if (len > DICT_REC_MAXLEN) {
261 new->len = 0;
262 } else {
263 new->len = len;
264 }
265 new->next = record->next; /* in case of refcount overflow, we are not at the end of chain */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200266 record->next = new;
Radek Krejciee627582015-04-20 17:39:59 +0200267
Radek Krejci6b0a8882016-02-16 17:37:21 +0100268 ctx->dict.used++;
269
Michal Vasko3e3228d2017-02-24 14:55:32 +0100270 LOGDBG(LY_LDGDICT, "inserting \"%s\" with collision ", new->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200271 return new->value;
Radek Krejciee627582015-04-20 17:39:59 +0200272}
Radek Krejci4ea08382015-04-21 09:41:40 +0200273
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200274API const char *
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200275lydict_insert(struct ly_ctx *ctx, const char *value, size_t len)
Radek Krejci4ea08382015-04-21 09:41:40 +0200276{
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100277 const char *result;
278
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200279 if (value && !len) {
280 len = strlen(value);
281 }
Radek Krejci8bc9ca02015-06-04 15:52:46 +0200282
Radek Krejcia5269642015-07-20 19:04:11 +0200283 if (!value) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200284 return NULL;
285 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100286
287 pthread_mutex_lock(&ctx->dict.lock);
288 result = dict_insert(ctx, (char *)value, len, 0);
289 pthread_mutex_unlock(&ctx->dict.lock);
290
291 return result;
Radek Krejci4ea08382015-04-21 09:41:40 +0200292}
293
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200294API const char *
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200295lydict_insert_zc(struct ly_ctx *ctx, char *value)
Radek Krejci4ea08382015-04-21 09:41:40 +0200296{
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100297 const char *result;
298
Radek Krejcia5269642015-07-20 19:04:11 +0200299 if (!value) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200300 return NULL;
301 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100302
303 pthread_mutex_lock(&ctx->dict.lock);
304 result = dict_insert(ctx, value, strlen(value), 1);
305 pthread_mutex_unlock(&ctx->dict.lock);
306
307 return result;
Radek Krejci4ea08382015-04-21 09:41:40 +0200308}
Michal Vasko6c810702018-03-14 16:23:21 +0100309
Michal Vasko419fce02018-03-21 11:55:09 +0100310static struct ht_rec *
311lyht_get_rec(unsigned char *recs, uint16_t rec_size, uint32_t idx)
312{
313 return (struct ht_rec *)&recs[idx * rec_size];
314}
315
Michal Vasko6c810702018-03-14 16:23:21 +0100316struct hash_table *
Michal Vasko419fce02018-03-21 11:55:09 +0100317lyht_new(uint32_t size, uint16_t val_size, values_equal_cb val_equal, void *cb_data, int resize)
Michal Vasko6c810702018-03-14 16:23:21 +0100318{
319 struct hash_table *ht;
320
321 /* check that 2^x == size (power of 2) */
Michal Vaskod0f84c32018-03-16 12:13:15 +0100322 assert(size && !(size & (size - 1)));
Michal Vasko419fce02018-03-21 11:55:09 +0100323 assert(val_equal && val_size);
Michal Vasko04210d72018-03-16 12:07:47 +0100324 assert(resize == 0 || resize == 1);
Michal Vasko6c810702018-03-14 16:23:21 +0100325
Michal Vaskod0f84c32018-03-16 12:13:15 +0100326 if (size < LYHT_MIN_SIZE) {
327 size = LYHT_MIN_SIZE;
328 }
329
Michal Vasko6c810702018-03-14 16:23:21 +0100330 ht = malloc(sizeof *ht);
331 LY_CHECK_ERR_RETURN(!ht, LOGMEM(NULL), NULL);
332
Michal Vasko6c810702018-03-14 16:23:21 +0100333 ht->used = 0;
334 ht->size = size;
335 ht->val_equal = val_equal;
336 ht->cb_data = cb_data;
Michal Vasko419fce02018-03-21 11:55:09 +0100337 ht->resize = (uint16_t)resize;
338
339 ht->rec_size = (sizeof(struct ht_rec) - 1) + val_size;
340 /* allocate the records correctly */
341 ht->recs = calloc(size, ht->rec_size);
342 LY_CHECK_ERR_RETURN(!ht->recs, free(ht); LOGMEM(NULL), NULL);
343
Michal Vasko6c810702018-03-14 16:23:21 +0100344 return ht;
345}
346
Michal Vasko8ac94532018-05-18 09:38:13 +0200347values_equal_cb
348lyht_set_cb(struct hash_table *ht, values_equal_cb new_val_equal)
349{
350 values_equal_cb prev;
351
352 prev = ht->val_equal;
353 ht->val_equal = new_val_equal;
354 return prev;
355}
356
357void *
358lyht_set_cb_data(struct hash_table *ht, void *new_cb_data)
359{
360 void *prev;
361
362 prev = ht->cb_data;
363 ht->cb_data = new_cb_data;
364 return prev;
365}
366
Michal Vasko8690d122018-04-26 14:28:58 +0200367struct hash_table *
368lyht_dup(const struct hash_table *orig)
369{
370 struct hash_table *ht;
371
372 if (!orig) {
373 return NULL;
374 }
375
376 ht = lyht_new(orig->size, orig->rec_size - (sizeof(struct ht_rec) - 1), orig->val_equal, orig->cb_data, orig->resize ? 1 : 0);
377 if (!ht) {
378 return NULL;
379 }
380
381 memcpy(ht->recs, orig->recs, orig->used * orig->rec_size);
382 ht->used = orig->used;
383 return ht;
384}
385
Michal Vasko6c810702018-03-14 16:23:21 +0100386void
387lyht_free(struct hash_table *ht)
388{
389 if (ht) {
390 free(ht->recs);
391 free(ht);
392 }
393}
394
Michal Vasko04210d72018-03-16 12:07:47 +0100395static int
396lyht_resize(struct hash_table *ht, int enlarge)
397{
Michal Vasko419fce02018-03-21 11:55:09 +0100398 struct ht_rec *rec;
399 unsigned char *old_recs;
Michal Vasko24affa02018-04-03 09:06:06 +0200400 uint32_t i, old_size;
Michal Vasko5a28e0e2018-08-07 11:29:06 +0200401 int ret;
Michal Vasko04210d72018-03-16 12:07:47 +0100402
403 old_recs = ht->recs;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200404 old_size = ht->size;
Michal Vasko04210d72018-03-16 12:07:47 +0100405
406 if (enlarge) {
407 /* double the size */
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200408 ht->size <<= 1;
Michal Vasko04210d72018-03-16 12:07:47 +0100409 } else {
410 /* half the size */
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200411 ht->size >>= 1;
Michal Vasko04210d72018-03-16 12:07:47 +0100412 }
413
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200414 ht->recs = calloc(ht->size, ht->rec_size);
415 LY_CHECK_ERR_RETURN(!ht->recs, LOGMEM(NULL); ht->recs = old_recs; ht->size = old_size, -1);
Michal Vasko04210d72018-03-16 12:07:47 +0100416
417 /* reset used, it will increase again */
418 ht->used = 0;
419
420 /* add all the old records into the new records array */
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200421 for (i = 0; i < old_size; ++i) {
Michal Vasko419fce02018-03-21 11:55:09 +0100422 rec = lyht_get_rec(old_recs, ht->rec_size, i);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200423 if (rec->hits > 0) {
Michal Vasko5a28e0e2018-08-07 11:29:06 +0200424 ret = lyht_insert(ht, rec->val, rec->hash);
425 assert(!ret);
426 (void)ret;
Michal Vasko04210d72018-03-16 12:07:47 +0100427 }
428 }
429
430 /* final touches */
Michal Vasko04210d72018-03-16 12:07:47 +0100431 free(old_recs);
432 return 0;
433}
434
Michal Vasko24affa02018-04-03 09:06:06 +0200435/* return: 0 - hash found, returned its record,
436 * 1 - hash not found, returned the record where it would be inserted */
Michal Vasko419fce02018-03-21 11:55:09 +0100437static int
Michal Vasko24affa02018-04-03 09:06:06 +0200438lyht_find_first(struct hash_table *ht, uint32_t hash, struct ht_rec **rec_p)
Michal Vasko6c810702018-03-14 16:23:21 +0100439{
Michal Vasko419fce02018-03-21 11:55:09 +0100440 struct ht_rec *rec;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200441 uint32_t i, idx;
Michal Vasko6c810702018-03-14 16:23:21 +0100442
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200443 if (rec_p) {
444 *rec_p = NULL;
445 }
446
447 idx = i = hash & (ht->size - 1);
Michal Vasko419fce02018-03-21 11:55:09 +0100448 rec = lyht_get_rec(ht->recs, ht->rec_size, idx);
Michal Vasko6c810702018-03-14 16:23:21 +0100449
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200450 /* skip through overflow and deleted records */
451 while ((rec->hits != 0) && ((rec->hits == -1) || ((rec->hash & (ht->size - 1)) != idx))) {
452 if ((rec->hits == -1) && rec_p && !(*rec_p)) {
453 /* remember this record for return */
454 *rec_p = rec;
455 }
456 i = (i + 1) % ht->size;
Michal Vasko7f6f0f92018-05-21 13:57:30 +0200457 if (i == idx) {
458 /* we went through all the records (very unlikely, but possible when many records are invalid),
459 * just return not found */
460 assert(!rec_p || *rec_p);
461 return 1;
462 }
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200463 rec = lyht_get_rec(ht->recs, ht->rec_size, i);
Michal Vasko419fce02018-03-21 11:55:09 +0100464 }
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200465 if (rec->hits == 0) {
Michal Vasko419fce02018-03-21 11:55:09 +0100466 /* we could not find the value */
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200467 if (rec_p && !*rec_p) {
468 *rec_p = rec;
Michal Vasko6c810702018-03-14 16:23:21 +0100469 }
Michal Vasko419fce02018-03-21 11:55:09 +0100470 return 1;
Michal Vasko6c810702018-03-14 16:23:21 +0100471 }
472
Michal Vasko78548832018-06-04 12:07:52 +0200473 /* we have found a record with equal (shortened) hash */
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200474 if (rec_p) {
475 *rec_p = rec;
476 }
Michal Vasko24affa02018-04-03 09:06:06 +0200477 return 0;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200478}
479
Michal Vasko1de7bd12018-05-21 14:27:02 +0200480/**
481 * @brief Search for the next collision.
482 *
483 * @param[in] ht Hash table to search in.
484 * @param[in,out] last Last returned collision record.
485 * @param[in] first First collision record (hits > 1).
486 * @return 0 when hash collision found, \p last points to this next collision,
487 * 1 when hash collision not found, \p last points to the record where it would be inserted.
488 */
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200489static int
Michal Vasko1de7bd12018-05-21 14:27:02 +0200490lyht_find_collision(struct hash_table *ht, struct ht_rec **last, struct ht_rec *first)
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200491{
492 struct ht_rec *empty = NULL;
Michal Vasko1de7bd12018-05-21 14:27:02 +0200493 uint32_t i, idx;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200494
495 assert(last && *last);
496
497 idx = (*last)->hash & (ht->size - 1);
Michal Vasko1de7bd12018-05-21 14:27:02 +0200498 i = (((unsigned char *)*last) - ht->recs) / ht->rec_size;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200499
500 do {
501 i = (i + 1) % ht->size;
Michal Vasko1de7bd12018-05-21 14:27:02 +0200502 *last = lyht_get_rec(ht->recs, ht->rec_size, i);
503 if (*last == first) {
Michal Vasko1d89f902018-05-21 13:01:32 +0200504 /* we went through all the records (very unlikely, but possible when many records are invalid),
505 * just return an invalid record */
506 assert(empty);
507 *last = empty;
508 return 1;
509 }
Michal Vasko1de7bd12018-05-21 14:27:02 +0200510
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200511 if (((*last)->hits == -1) && !empty) {
512 empty = *last;
513 }
514 } while (((*last)->hits != 0) && (((*last)->hits == -1) || (((*last)->hash & (ht->size - 1)) != idx)));
515
516 if ((*last)->hits > 0) {
517 /* we found a collision */
518 assert((*last)->hits == 1);
519 return 0;
520 }
521
522 /* no next collision found, return the record where it would be inserted */
523 if (empty) {
524 *last = empty;
525 } /* else (*last)->hits == 0, it is already correct */
526 return 1;
527}
528
529int
Michal Vaskofd5581a2018-04-25 13:20:20 +0200530lyht_find(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200531{
Michal Vasko1de7bd12018-05-21 14:27:02 +0200532 struct ht_rec *rec, *crec;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200533 uint32_t i, c;
Michal Vasko24affa02018-04-03 09:06:06 +0200534 int r;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200535
Michal Vasko24affa02018-04-03 09:06:06 +0200536 if (lyht_find_first(ht, hash, &rec)) {
537 /* not found */
538 return 1;
539 }
Michal Vasko78548832018-06-04 12:07:52 +0200540 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) {
Michal Vasko24affa02018-04-03 09:06:06 +0200541 /* even the value matches */
Michal Vaskofd5581a2018-04-25 13:20:20 +0200542 if (match_p) {
543 *match_p = rec->val;
544 }
Michal Vasko24affa02018-04-03 09:06:06 +0200545 return 0;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200546 }
547
548 /* some collisions, we need to go through them, too */
Michal Vasko1de7bd12018-05-21 14:27:02 +0200549 crec = rec;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200550 c = rec->hits;
551 for (i = 1; i < c; ++i) {
Michal Vasko1de7bd12018-05-21 14:27:02 +0200552 r = lyht_find_collision(ht, &rec, crec);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200553 assert(!r);
Michal Vasko24affa02018-04-03 09:06:06 +0200554 (void)r;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200555
556 /* compare values */
Michal Vasko78548832018-06-04 12:07:52 +0200557 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) {
Michal Vaskofd5581a2018-04-25 13:20:20 +0200558 if (match_p) {
559 *match_p = rec->val;
560 }
Michal Vasko419fce02018-03-21 11:55:09 +0100561 return 0;
562 }
Michal Vasko419fce02018-03-21 11:55:09 +0100563 }
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200564
565 /* not found even in collisions */
566 return 1;
Michal Vasko419fce02018-03-21 11:55:09 +0100567}
568
569int
Michal Vaskofd5581a2018-04-25 13:20:20 +0200570lyht_find_next(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
571{
Michal Vasko1de7bd12018-05-21 14:27:02 +0200572 struct ht_rec *rec, *crec;
Michal Vaskofd5581a2018-04-25 13:20:20 +0200573 uint32_t i, c;
574 int r, found = 0;
575
576 if (lyht_find_first(ht, hash, &rec)) {
577 /* not found, cannot happen */
578 assert(0);
579 }
580
Michal Vasko78548832018-06-04 12:07:52 +0200581 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
Michal Vaskofd5581a2018-04-25 13:20:20 +0200582 /* previously returned value */
583 found = 1;
584 }
585
586 if (rec->hits == 1) {
587 /* there are no more similar values */
588 assert(rec->hash == hash);
589 assert(found);
590 return 1;
591 }
592
593 /* go through collisions and find next one after the previous one */
Michal Vasko1de7bd12018-05-21 14:27:02 +0200594 crec = rec;
Michal Vaskofd5581a2018-04-25 13:20:20 +0200595 c = rec->hits;
596 for (i = 1; i < c; ++i) {
Michal Vasko1de7bd12018-05-21 14:27:02 +0200597 r = lyht_find_collision(ht, &rec, crec);
Michal Vaskofd5581a2018-04-25 13:20:20 +0200598 assert(!r);
599 (void)r;
600
601 if (rec->hash != hash) {
602 /* a normal collision, we are not interested in those */
603 continue;
604 }
605
606 if (found) {
607 /* next value with equal hash, found our value */
608 if (match_p) {
609 *match_p = rec->val;
610 }
611 return 0;
612 }
613
Michal Vasko66342942018-04-27 13:56:21 +0200614 if (!ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
Michal Vaskofd5581a2018-04-25 13:20:20 +0200615 /* already returned value, skip */
616 continue;
617 }
618
619 /* this one was returned previously, continue looking */
620 found = 1;
621 }
622
623 /* the last equal value was already returned */
624 assert(found);
625 return 1;
626}
627
628int
Michal Vaskoa1db4be2018-08-07 11:30:43 +0200629lyht_insert_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash, values_equal_cb resize_val_equal)
Michal Vasko419fce02018-03-21 11:55:09 +0100630{
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200631 struct ht_rec *rec, *crec = NULL;
632 int32_t i;
Michal Vasko24affa02018-04-03 09:06:06 +0200633 int r, ret;
Michal Vaskoa1db4be2018-08-07 11:30:43 +0200634 values_equal_cb old_val_equal;
Michal Vasko419fce02018-03-21 11:55:09 +0100635
Michal Vasko24affa02018-04-03 09:06:06 +0200636 if (!lyht_find_first(ht, hash, &rec)) {
Michal Vasko78548832018-06-04 12:07:52 +0200637 /* we found matching shortened hash */
638 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
Michal Vasko24affa02018-04-03 09:06:06 +0200639 /* even the value matches */
640 return 1;
641 }
Michal Vasko419fce02018-03-21 11:55:09 +0100642
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200643 /* some collisions, we need to go through them, too */
644 crec = rec;
645 for (i = 1; i < crec->hits; ++i) {
Michal Vasko1de7bd12018-05-21 14:27:02 +0200646 r = lyht_find_collision(ht, &rec, crec);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200647 assert(!r);
648
649 /* compare values */
Michal Vasko78548832018-06-04 12:07:52 +0200650 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200651 return 1;
652 }
Michal Vasko419fce02018-03-21 11:55:09 +0100653 }
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200654
655 /* value not found, get the record where it will be inserted */
Michal Vasko1de7bd12018-05-21 14:27:02 +0200656 r = lyht_find_collision(ht, &rec, crec);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200657 assert(r);
Michal Vasko419fce02018-03-21 11:55:09 +0100658 }
659
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200660 /* insert it into the returned record */
661 assert(rec->hits < 1);
Michal Vasko419fce02018-03-21 11:55:09 +0100662 rec->hash = hash;
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200663 rec->hits = 1;
Michal Vasko419fce02018-03-21 11:55:09 +0100664 memcpy(&rec->val, val_p, ht->rec_size - (sizeof(struct ht_rec) - 1));
665
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200666 if (crec) {
667 /* there was a collision, increase hits */
Michal Vaskoe330d712018-04-25 13:20:43 +0200668 if (crec->hits == INT32_MAX) {
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200669 LOGINT(NULL);
670 }
671 ++crec->hits;
672 }
673
Michal Vasko419fce02018-03-21 11:55:09 +0100674 /* check size & enlarge if needed */
675 ret = 0;
Michal Vasko6c810702018-03-14 16:23:21 +0100676 ++ht->used;
Michal Vasko04210d72018-03-16 12:07:47 +0100677 if (ht->resize) {
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200678 r = (ht->used * 100) / ht->size;
679 if ((ht->resize == 1) && (r >= LYHT_FIRST_SHRINK_PERCENTAGE)) {
Michal Vasko04210d72018-03-16 12:07:47 +0100680 /* enable shrinking */
681 ht->resize = 2;
Michal Vasko419fce02018-03-21 11:55:09 +0100682 }
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200683 if ((ht->resize == 2) && (r >= LYHT_ENLARGE_PERCENTAGE)) {
Michal Vaskoa1db4be2018-08-07 11:30:43 +0200684 if (resize_val_equal) {
685 old_val_equal = lyht_set_cb(ht, resize_val_equal);
686 }
687
Michal Vasko04210d72018-03-16 12:07:47 +0100688 /* enlarge */
689 ret = lyht_resize(ht, 1);
Michal Vaskoa1db4be2018-08-07 11:30:43 +0200690
691 if (resize_val_equal) {
692 lyht_set_cb(ht, old_val_equal);
693 }
Michal Vasko04210d72018-03-16 12:07:47 +0100694 }
695 }
696 return ret;
Michal Vasko6c810702018-03-14 16:23:21 +0100697}
698
699int
Michal Vaskoa1db4be2018-08-07 11:30:43 +0200700lyht_insert(struct hash_table *ht, void *val_p, uint32_t hash)
701{
702 return lyht_insert_with_resize_cb(ht, val_p, hash, NULL);
703}
704
705int
Michal Vasko419fce02018-03-21 11:55:09 +0100706lyht_remove(struct hash_table *ht, void *val_p, uint32_t hash)
Michal Vasko6c810702018-03-14 16:23:21 +0100707{
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200708 struct ht_rec *rec, *crec;
709 int32_t i;
Michal Vasko24affa02018-04-03 09:06:06 +0200710 int first_matched = 0, r, ret;
Michal Vasko6c810702018-03-14 16:23:21 +0100711
Michal Vasko24affa02018-04-03 09:06:06 +0200712 if (lyht_find_first(ht, hash, &rec)) {
713 /* hash not found */
Michal Vasko6c810702018-03-14 16:23:21 +0100714 return 1;
715 }
Michal Vasko78548832018-06-04 12:07:52 +0200716 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
Michal Vasko24affa02018-04-03 09:06:06 +0200717 /* even the value matches */
718 first_matched = 1;
719 }
Michal Vasko6c810702018-03-14 16:23:21 +0100720
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200721 /* we always need to go through collisions */
722 crec = rec;
723 for (i = 1; i < crec->hits; ++i) {
Michal Vasko1de7bd12018-05-21 14:27:02 +0200724 r = lyht_find_collision(ht, &rec, crec);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200725 assert(!r);
Michal Vasko419fce02018-03-21 11:55:09 +0100726
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200727 /* compare values */
Michal Vasko78548832018-06-04 12:07:52 +0200728 if (!first_matched && (rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200729 break;
730 }
Michal Vasko419fce02018-03-21 11:55:09 +0100731 }
732
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200733 if (i < crec->hits) {
734 /* one of collisions matched, reduce collision count, remove the record */
Michal Vasko24affa02018-04-03 09:06:06 +0200735 assert(!first_matched);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200736 --crec->hits;
737 rec->hits = -1;
Michal Vasko24affa02018-04-03 09:06:06 +0200738 } else if (first_matched) {
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200739 /* the first record matches */
740 if (crec != rec) {
741 /* ... so put the last collision in its place */
742 rec->hits = crec->hits - 1;
743 memcpy(crec, rec, ht->rec_size);
744 }
745 rec->hits = -1;
746 } else {
747 /* value not found even in collisions */
Michal Vasko24affa02018-04-03 09:06:06 +0200748 assert(!first_matched);
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200749 return 1;
750 }
751
Michal Vasko419fce02018-03-21 11:55:09 +0100752 /* check size & shrink if needed */
753 ret = 0;
754 --ht->used;
755 if (ht->resize == 2) {
Michal Vasko7d3ad5a2018-03-27 14:22:19 +0200756 r = (ht->used * 100) / ht->size;
757 if ((r < LYHT_SHRINK_PERCENTAGE) && (ht->size > LYHT_MIN_SIZE)) {
Michal Vasko419fce02018-03-21 11:55:09 +0100758 /* shrink */
759 ret = lyht_resize(ht, 0);
Michal Vasko6c810702018-03-14 16:23:21 +0100760 }
Michal Vasko419fce02018-03-21 11:55:09 +0100761 }
Michal Vasko6c810702018-03-14 16:23:21 +0100762
Michal Vasko419fce02018-03-21 11:55:09 +0100763 return ret;
Michal Vasko6c810702018-03-14 16:23:21 +0100764}