blob: 74f197153d9394db74b5aa6f3c3a5e2e2534b132 [file] [log] [blame]
Radek Krejci5aeea3a2018-09-05 13:29:36 +02001/**
2 * @file hash_table.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief libyang dictionary for storing strings and generic hash table
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include <string.h>
16#include <stdint.h>
17#include <stdlib.h>
18#include <pthread.h>
19#include <assert.h>
20
21#include "common.h"
22#include "context.h"
23#include "hash_table.h"
24
25static int
26lydict_val_eq(void *val1_p, void *val2_p, int UNUSED(mod), void *cb_data)
27{
28 LY_CHECK_ARG_NON_NULL_RETURN(NULL, val1_p, val2_p, cb_data, 0);
29
30 const char *str1 = ((struct dict_rec *)val1_p)->value;
31 const char *str2 = ((struct dict_rec *)val2_p)->value;
32
33 LY_CHECK_ERR_RETURN(!str1, LOGARG(NULL, val1_p), 0);
34 LY_CHECK_ERR_RETURN(!str2, LOGARG(NULL, val2_p), 0);
35
36 if (strncmp(str1, str2, *(size_t *)cb_data) == 0) {
37 return 1;
38 }
39
40 return 0;
41}
42
43void
44lydict_init(struct dict_table *dict)
45{
46 LY_CHECK_ARG_NON_NULL_RETURN(NULL, dict,);
47
48 dict->hash_tab = lyht_new(1024, sizeof(struct dict_rec), lydict_val_eq, NULL, 1);
49 LY_CHECK_ERR_RETURN(!dict->hash_tab, LOGINT(NULL), );
50 pthread_mutex_init(&dict->lock, NULL);
51}
52
53void
54lydict_clean(struct dict_table *dict)
55{
56 unsigned int i;
57 struct dict_rec *dict_rec = NULL;
58 struct ht_rec *rec = NULL;
59
60 LY_CHECK_ARG_NON_NULL_RETURN(NULL, dict,);
61
62 for (i = 0; i < dict->hash_tab->size; i++) {
63 /* get ith record */
64 rec = (struct ht_rec *)&dict->hash_tab->recs[i * dict->hash_tab->rec_size];
65 if (rec->hits == 1) {
66 /*
67 * this should not happen, all records inserted into
68 * dictionary are supposed to be removed using lydict_remove()
69 * before calling lydict_clean()
70 */
71 dict_rec = (struct dict_rec *)rec->val;
72 LOGWRN(NULL, "String \"%s\" not freed from the dictionary, refcount %d", dict_rec->value, dict_rec->refcount);
73 /* if record wasn't removed before free string allocated for that record */
74#ifdef NDEBUG
75 free(dict_rec->value);
76#endif
77 }
78 }
79
80 /* free table and destroy mutex */
81 lyht_free(dict->hash_tab);
82 pthread_mutex_destroy(&dict->lock);
83}
84
85/*
86 * Bob Jenkin's one-at-a-time hash
87 * http://www.burtleburtle.net/bob/hash/doobs.html
88 *
89 * Spooky hash is faster, but it works only for little endian architectures.
90 */
91static uint32_t
92dict_hash(const char *key, size_t len)
93{
94 uint32_t hash, i;
95
96 for (hash = i = 0; i < len; ++i) {
97 hash += key[i];
98 hash += (hash << 10);
99 hash ^= (hash >> 6);
100 }
101 hash += (hash << 3);
102 hash ^= (hash >> 11);
103 hash += (hash << 15);
104 return hash;
105}
106
107/*
108 * Usage:
109 * - init hash to 0
110 * - repeatedly call dict_hash_multi(), provide hash from the last call
111 * - call dict_hash_multi() with key_part = NULL to finish the hash
112 */
113uint32_t
114dict_hash_multi(uint32_t hash, const char *key_part, size_t len)
115{
116 uint32_t i;
117
118 if (key_part) {
119 for (i = 0; i < len; ++i) {
120 hash += key_part[i];
121 hash += (hash << 10);
122 hash ^= (hash >> 6);
123 }
124 } else {
125 hash += (hash << 3);
126 hash ^= (hash >> 11);
127 hash += (hash << 15);
128 }
129
130 return hash;
131}
132
133API void
134lydict_remove(struct ly_ctx *ctx, const char *value)
135{
136 size_t len;
137 int ret;
138 uint32_t hash;
139 struct dict_rec rec, *match = NULL;
140 char *val_p;
141
142 LY_CHECK_ARG_NON_NULL_RETURN(NULL, value, ctx,);
143
144 len = strlen(value);
145 hash = dict_hash(value, len);
146
147 /* create record for lyht_find call */
148 rec.value = (char *)value;
149 rec.refcount = 0;
150
151 pthread_mutex_lock(&ctx->dict.lock);
152 /* set len as data for compare callback */
153 lyht_set_cb_data(ctx->dict.hash_tab, (void *)&len);
154 /* check if value is already inserted */
155 ret = lyht_find(ctx->dict.hash_tab, &rec, hash, (void **)&match);
156
157 if (ret == 0) {
158 LY_CHECK_ERR_GOTO(!match, LOGINT(ctx), finish);
159
160 /* if value is already in dictionary, decrement reference counter */
161 match->refcount--;
162 if (match->refcount == 0) {
163 /*
164 * remove record
165 * save pointer to stored string before lyht_remove to
166 * free it after it is removed from hash table
167 */
168 val_p = match->value;
169 ret = lyht_remove(ctx->dict.hash_tab, &rec, hash);
170 free(val_p);
171 LY_CHECK_ERR_GOTO(ret, LOGINT(ctx), finish);
172 }
173 }
174
175finish:
176 pthread_mutex_unlock(&ctx->dict.lock);
177}
178
179static char *
180dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy)
181{
182 struct dict_rec *match = NULL, rec;
183 int ret = 0;
184 uint32_t hash;
185
186 hash = dict_hash(value, len);
187 /* set len as data for compare callback */
188 lyht_set_cb_data(ctx->dict.hash_tab, (void *)&len);
189 /* create record for lyht_insert */
190 rec.value = value;
191 rec.refcount = 1;
192
193 LOGDBG(LY_LDGDICT, "inserting \"%s\"", rec.value);
194 ret = lyht_insert(ctx->dict.hash_tab, (void *)&rec, hash, (void **)&match);
195 if (ret == 1) {
196 match->refcount++;
197 if (zerocopy) {
198 free(value);
199 }
200 } else if (ret == 0) {
201 if (!zerocopy) {
202 /*
203 * allocate string for new record
204 * record is already inserted in hash table
205 */
206 match->value = malloc(sizeof *match->value * (len + 1));
207 LY_CHECK_ERR_RETURN(!match->value, LOGMEM(ctx), NULL);
208 memcpy(match->value, value, len);
209 match->value[len] = '\0';
210 }
211 } else {
212 /* lyht_insert returned error */
213 LOGINT(ctx);
214 return NULL;
215 }
216
217 return match->value;
218}
219
220API const char *
221lydict_insert(struct ly_ctx *ctx, const char *value, size_t len)
222{
223 const char *result;
224
225 LY_CHECK_ARG_NON_NULL_RETURN(NULL, value, NULL);
226
227 if (!len) {
228 len = strlen(value);
229 }
230
231 pthread_mutex_lock(&ctx->dict.lock);
232 result = dict_insert(ctx, (char *)value, len, 0);
233 pthread_mutex_unlock(&ctx->dict.lock);
234
235 return result;
236}
237
238API const char *
239lydict_insert_zc(struct ly_ctx *ctx, char *value)
240{
241 const char *result;
242
243 LY_CHECK_ARG_NON_NULL_RETURN(NULL, value, NULL);
244
245 pthread_mutex_lock(&ctx->dict.lock);
246 result = dict_insert(ctx, value, strlen(value), 1);
247 pthread_mutex_unlock(&ctx->dict.lock);
248
249 return result;
250}
251
252static struct ht_rec *
253lyht_get_rec(unsigned char *recs, uint16_t rec_size, uint32_t idx)
254{
255 return (struct ht_rec *)&recs[idx * rec_size];
256}
257
258struct hash_table *
259lyht_new(uint32_t size, uint16_t val_size, values_equal_cb val_equal, void *cb_data, int resize)
260{
261 struct hash_table *ht;
262
263 /* check that 2^x == size (power of 2) */
264 assert(size && !(size & (size - 1)));
265 assert(val_equal && val_size);
266 assert(resize == 0 || resize == 1);
267
268 if (size < LYHT_MIN_SIZE) {
269 size = LYHT_MIN_SIZE;
270 }
271
272 ht = malloc(sizeof *ht);
273 LY_CHECK_ERR_RETURN(!ht, LOGMEM(NULL), NULL);
274
275 ht->used = 0;
276 ht->size = size;
277 ht->val_equal = val_equal;
278 ht->cb_data = cb_data;
279 ht->resize = (uint16_t)resize;
280
281 ht->rec_size = (sizeof(struct ht_rec) - 1) + val_size;
282 /* allocate the records correctly */
283 ht->recs = calloc(size, ht->rec_size);
284 LY_CHECK_ERR_RETURN(!ht->recs, free(ht); LOGMEM(NULL), NULL);
285
286 return ht;
287}
288
289values_equal_cb
290lyht_set_cb(struct hash_table *ht, values_equal_cb new_val_equal)
291{
292 values_equal_cb prev;
293
294 prev = ht->val_equal;
295 ht->val_equal = new_val_equal;
296 return prev;
297}
298
299void *
300lyht_set_cb_data(struct hash_table *ht, void *new_cb_data)
301{
302 void *prev;
303
304 prev = ht->cb_data;
305 ht->cb_data = new_cb_data;
306 return prev;
307}
308
309struct hash_table *
310lyht_dup(const struct hash_table *orig)
311{
312 struct hash_table *ht;
313
314 LY_CHECK_ARG_NON_NULL_RETURN(NULL, orig, NULL);
315
316 ht = lyht_new(orig->size, orig->rec_size - (sizeof(struct ht_rec) - 1), orig->val_equal, orig->cb_data, orig->resize ? 1 : 0);
317 if (!ht) {
318 return NULL;
319 }
320
321 memcpy(ht->recs, orig->recs, orig->used * orig->rec_size);
322 ht->used = orig->used;
323 return ht;
324}
325
326void
327lyht_free(struct hash_table *ht)
328{
329 if (ht) {
330 free(ht->recs);
331 free(ht);
332 }
333}
334
335static LY_ERR
336lyht_resize(struct hash_table *ht, int enlarge)
337{
338 struct ht_rec *rec;
339 unsigned char *old_recs;
340 uint32_t i, old_size;
341 int ret;
342
343 old_recs = ht->recs;
344 old_size = ht->size;
345
346 if (enlarge) {
347 /* double the size */
348 ht->size <<= 1;
349 } else {
350 /* half the size */
351 ht->size >>= 1;
352 }
353
354 ht->recs = calloc(ht->size, ht->rec_size);
355 LY_CHECK_ERR_RETURN(!ht->recs, LOGMEM(NULL); ht->recs = old_recs; ht->size = old_size, LY_EMEM);
356
357 /* reset used, it will increase again */
358 ht->used = 0;
359
360 /* add all the old records into the new records array */
361 for (i = 0; i < old_size; ++i) {
362 rec = lyht_get_rec(old_recs, ht->rec_size, i);
363 if (rec->hits > 0) {
364 ret = lyht_insert(ht, rec->val, rec->hash, NULL);
365 assert(!ret);
366 (void)ret;
367 }
368 }
369
370 /* final touches */
371 free(old_recs);
372 return LY_SUCCESS;
373}
374
375/* return: 0 - hash found, returned its record,
376 * 1 - hash not found, returned the record where it would be inserted */
377static int
378lyht_find_first(struct hash_table *ht, uint32_t hash, struct ht_rec **rec_p)
379{
380 struct ht_rec *rec;
381 uint32_t i, idx;
382
383 if (rec_p) {
384 *rec_p = NULL;
385 }
386
387 idx = i = hash & (ht->size - 1);
388 rec = lyht_get_rec(ht->recs, ht->rec_size, idx);
389
390 /* skip through overflow and deleted records */
391 while ((rec->hits != 0) && ((rec->hits == -1) || ((rec->hash & (ht->size - 1)) != idx))) {
392 if ((rec->hits == -1) && rec_p && !(*rec_p)) {
393 /* remember this record for return */
394 *rec_p = rec;
395 }
396 i = (i + 1) % ht->size;
397 if (i == idx) {
398 /* we went through all the records (very unlikely, but possible when many records are invalid),
399 * just return not found */
400 assert(!rec_p || *rec_p);
401 return 1;
402 }
403 rec = lyht_get_rec(ht->recs, ht->rec_size, i);
404 }
405 if (rec->hits == 0) {
406 /* we could not find the value */
407 if (rec_p && !*rec_p) {
408 *rec_p = rec;
409 }
410 return 1;
411 }
412
413 /* we have found a record with equal (shortened) hash */
414 if (rec_p) {
415 *rec_p = rec;
416 }
417 return 0;
418}
419
420/**
421 * @brief Search for the next collision.
422 *
423 * @param[in] ht Hash table to search in.
424 * @param[in,out] last Last returned collision record.
425 * @param[in] first First collision record (hits > 1).
426 * @return 0 when hash collision found, \p last points to this next collision,
427 * 1 when hash collision not found, \p last points to the record where it would be inserted.
428 */
429static int
430lyht_find_collision(struct hash_table *ht, struct ht_rec **last, struct ht_rec *first)
431{
432 struct ht_rec *empty = NULL;
433 uint32_t i, idx;
434
435 assert(last && *last);
436
437 idx = (*last)->hash & (ht->size - 1);
438 i = (((unsigned char *)*last) - ht->recs) / ht->rec_size;
439
440 do {
441 i = (i + 1) % ht->size;
442 *last = lyht_get_rec(ht->recs, ht->rec_size, i);
443 if (*last == first) {
444 /* we went through all the records (very unlikely, but possible when many records are invalid),
445 * just return an invalid record */
446 assert(empty);
447 *last = empty;
448 return 1;
449 }
450
451 if (((*last)->hits == -1) && !empty) {
452 empty = *last;
453 }
454 } while (((*last)->hits != 0) && (((*last)->hits == -1) || (((*last)->hash & (ht->size - 1)) != idx)));
455
456 if ((*last)->hits > 0) {
457 /* we found a collision */
458 assert((*last)->hits == 1);
459 return 0;
460 }
461
462 /* no next collision found, return the record where it would be inserted */
463 if (empty) {
464 *last = empty;
465 } /* else (*last)->hits == 0, it is already correct */
466 return 1;
467}
468
469int
470lyht_find(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
471{
472 struct ht_rec *rec, *crec;
473 uint32_t i, c;
474 int r;
475
476 if (lyht_find_first(ht, hash, &rec)) {
477 /* not found */
478 return 1;
479 }
480 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) {
481 /* even the value matches */
482 if (match_p) {
483 *match_p = rec->val;
484 }
485 return 0;
486 }
487
488 /* some collisions, we need to go through them, too */
489 crec = rec;
490 c = rec->hits;
491 for (i = 1; i < c; ++i) {
492 r = lyht_find_collision(ht, &rec, crec);
493 assert(!r);
494 (void)r;
495
496 /* compare values */
497 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) {
498 if (match_p) {
499 *match_p = rec->val;
500 }
501 return 0;
502 }
503 }
504
505 /* not found even in collisions */
506 return 1;
507}
508
509int
510lyht_find_next(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
511{
512 struct ht_rec *rec, *crec;
513 uint32_t i, c;
514 int r, found = 0;
515
516 if (lyht_find_first(ht, hash, &rec)) {
517 /* not found, cannot happen */
518 assert(0);
519 }
520
521 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
522 /* previously returned value */
523 found = 1;
524 }
525
526 if (rec->hits == 1) {
527 /* there are no more similar values */
528 assert(rec->hash == hash);
529 assert(found);
530 return 1;
531 }
532
533 /* go through collisions and find next one after the previous one */
534 crec = rec;
535 c = rec->hits;
536 for (i = 1; i < c; ++i) {
537 r = lyht_find_collision(ht, &rec, crec);
538 assert(!r);
539 (void)r;
540
541 if (rec->hash != hash) {
542 /* a normal collision, we are not interested in those */
543 continue;
544 }
545
546 if (found) {
547 /* next value with equal hash, found our value */
548 if (match_p) {
549 *match_p = rec->val;
550 }
551 return 0;
552 }
553
554 if (!ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
555 /* already returned value, skip */
556 continue;
557 }
558
559 /* this one was returned previously, continue looking */
560 found = 1;
561 }
562
563 /* the last equal value was already returned */
564 assert(found);
565 return 1;
566}
567
568LY_ERR
569lyht_insert_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash,
570 values_equal_cb resize_val_equal, void **match_p)
571{
572 struct ht_rec *rec, *crec = NULL;
573 int32_t i;
574 int r, ret;
575 values_equal_cb old_val_equal;
576
577 if (!lyht_find_first(ht, hash, &rec)) {
578 /* we found matching shortened hash */
579 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
580 /* even the value matches */
581 if (match_p) {
582 *match_p = (void *)&rec->val;
583 }
584 return LY_EEXIST;
585 }
586
587 /* some collisions, we need to go through them, too */
588 crec = rec;
589 for (i = 1; i < crec->hits; ++i) {
590 r = lyht_find_collision(ht, &rec, crec);
591 assert(!r);
592
593 /* compare values */
594 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
595 if (match_p) {
596 *match_p = (void *)&rec->val;
597 }
598 return LY_EEXIST;
599 }
600 }
601
602 /* value not found, get the record where it will be inserted */
603 r = lyht_find_collision(ht, &rec, crec);
604 assert(r);
605 }
606
607 /* insert it into the returned record */
608 assert(rec->hits < 1);
609 rec->hash = hash;
610 rec->hits = 1;
611 memcpy(&rec->val, val_p, ht->rec_size - (sizeof(struct ht_rec) - 1));
612 if (match_p) {
613 *match_p = (void *)&rec->val;
614 }
615
616 if (crec) {
617 /* there was a collision, increase hits */
618 if (crec->hits == INT32_MAX) {
619 LOGINT(NULL);
620 }
621 ++crec->hits;
622 }
623
624 /* check size & enlarge if needed */
625 ret = LY_SUCCESS;
626 ++ht->used;
627 if (ht->resize) {
628 r = (ht->used * 100) / ht->size;
629 if ((ht->resize == 1) && (r >= LYHT_FIRST_SHRINK_PERCENTAGE)) {
630 /* enable shrinking */
631 ht->resize = 2;
632 }
633 if ((ht->resize == 2) && (r >= LYHT_ENLARGE_PERCENTAGE)) {
634 if (resize_val_equal) {
635 old_val_equal = lyht_set_cb(ht, resize_val_equal);
636 }
637
638 /* enlarge */
639 ret = lyht_resize(ht, 1);
640 /* if hash_table was resized, we need to find new matching value */
641 if (ret == LY_SUCCESS && match_p) {
642 lyht_find(ht, val_p, hash, match_p);
643 }
644
645 if (resize_val_equal) {
646 lyht_set_cb(ht, old_val_equal);
647 }
648 }
649 }
650 return ret;
651}
652
653int
654lyht_insert(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
655{
656 return lyht_insert_with_resize_cb(ht, val_p, hash, NULL, match_p);
657}
658
659LY_ERR
660lyht_remove(struct hash_table *ht, void *val_p, uint32_t hash)
661{
662 struct ht_rec *rec, *crec;
663 int32_t i;
664 int first_matched = 0, r, ret;
665
666 LY_CHECK_ERR_RETURN(lyht_find_first(ht, hash, &rec),
667 LOGARG(NULL, hash), LY_EINVAL); /* hash not found */
668
669 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
670 /* even the value matches */
671 first_matched = 1;
672 }
673
674 /* we always need to go through collisions */
675 crec = rec;
676 for (i = 1; i < crec->hits; ++i) {
677 r = lyht_find_collision(ht, &rec, crec);
678 assert(!r);
679
680 /* compare values */
681 if (!first_matched && (rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
682 break;
683 }
684 }
685
686 if (i < crec->hits) {
687 /* one of collisions matched, reduce collision count, remove the record */
688 assert(!first_matched);
689 --crec->hits;
690 rec->hits = -1;
691 } else if (first_matched) {
692 /* the first record matches */
693 if (crec != rec) {
694 /* ... so put the last collision in its place */
695 rec->hits = crec->hits - 1;
696 memcpy(crec, rec, ht->rec_size);
697 }
698 rec->hits = -1;
699 } else {
700 /* value not found even in collisions */
701 LOGINT(NULL);
702 return LY_EINT;
703 }
704
705 /* check size & shrink if needed */
706 ret = LY_SUCCESS;
707 --ht->used;
708 if (ht->resize == 2) {
709 r = (ht->used * 100) / ht->size;
710 if ((r < LYHT_SHRINK_PERCENTAGE) && (ht->size > LYHT_MIN_SIZE)) {
711 /* shrink */
712 ret = lyht_resize(ht, 0);
713 }
714 }
715
716 return ret;
717}