blob: c39b9025db8e4eb4ee33dd4917695113fe7b3d36 [file] [log] [blame]
Radek Krejci5aeea3a2018-09-05 13:29:36 +02001/**
2 * @file hash_table.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief libyang dictionary for storing strings and generic hash table
5 *
6 * Copyright (c) 2015 - 2018 CESNET, z.s.p.o.
7 *
8 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * https://opensource.org/licenses/BSD-3-Clause
13 */
14
15#include <string.h>
16#include <stdint.h>
17#include <stdlib.h>
18#include <pthread.h>
19#include <assert.h>
20
21#include "common.h"
22#include "context.h"
23#include "hash_table.h"
24
25static int
26lydict_val_eq(void *val1_p, void *val2_p, int UNUSED(mod), void *cb_data)
27{
Michal Vaskob3d0d6b2018-09-07 10:17:33 +020028 LY_CHECK_ARG_RET(NULL, val1_p, val2_p, cb_data, 0);
Radek Krejci5aeea3a2018-09-05 13:29:36 +020029
30 const char *str1 = ((struct dict_rec *)val1_p)->value;
31 const char *str2 = ((struct dict_rec *)val2_p)->value;
32
Michal Vaskob3d0d6b2018-09-07 10:17:33 +020033 LY_CHECK_ERR_RET(!str1, LOGARG(NULL, val1_p), 0);
34 LY_CHECK_ERR_RET(!str2, LOGARG(NULL, val2_p), 0);
Radek Krejci5aeea3a2018-09-05 13:29:36 +020035
36 if (strncmp(str1, str2, *(size_t *)cb_data) == 0) {
37 return 1;
38 }
39
40 return 0;
41}
42
43void
44lydict_init(struct dict_table *dict)
45{
Michal Vaskob3d0d6b2018-09-07 10:17:33 +020046 LY_CHECK_ARG_RET(NULL, dict,);
Radek Krejci5aeea3a2018-09-05 13:29:36 +020047
48 dict->hash_tab = lyht_new(1024, sizeof(struct dict_rec), lydict_val_eq, NULL, 1);
Michal Vaskob3d0d6b2018-09-07 10:17:33 +020049 LY_CHECK_ERR_RET(!dict->hash_tab, LOGINT(NULL), );
Radek Krejci5aeea3a2018-09-05 13:29:36 +020050 pthread_mutex_init(&dict->lock, NULL);
51}
52
53void
54lydict_clean(struct dict_table *dict)
55{
56 unsigned int i;
57 struct dict_rec *dict_rec = NULL;
58 struct ht_rec *rec = NULL;
59
Michal Vaskob3d0d6b2018-09-07 10:17:33 +020060 LY_CHECK_ARG_RET(NULL, dict,);
Radek Krejci5aeea3a2018-09-05 13:29:36 +020061
62 for (i = 0; i < dict->hash_tab->size; i++) {
63 /* get ith record */
64 rec = (struct ht_rec *)&dict->hash_tab->recs[i * dict->hash_tab->rec_size];
65 if (rec->hits == 1) {
66 /*
67 * this should not happen, all records inserted into
68 * dictionary are supposed to be removed using lydict_remove()
69 * before calling lydict_clean()
70 */
71 dict_rec = (struct dict_rec *)rec->val;
72 LOGWRN(NULL, "String \"%s\" not freed from the dictionary, refcount %d", dict_rec->value, dict_rec->refcount);
73 /* if record wasn't removed before free string allocated for that record */
74#ifdef NDEBUG
75 free(dict_rec->value);
76#endif
77 }
78 }
79
80 /* free table and destroy mutex */
81 lyht_free(dict->hash_tab);
82 pthread_mutex_destroy(&dict->lock);
83}
84
85/*
86 * Bob Jenkin's one-at-a-time hash
87 * http://www.burtleburtle.net/bob/hash/doobs.html
88 *
89 * Spooky hash is faster, but it works only for little endian architectures.
90 */
91static uint32_t
92dict_hash(const char *key, size_t len)
93{
94 uint32_t hash, i;
95
96 for (hash = i = 0; i < len; ++i) {
97 hash += key[i];
98 hash += (hash << 10);
99 hash ^= (hash >> 6);
100 }
101 hash += (hash << 3);
102 hash ^= (hash >> 11);
103 hash += (hash << 15);
104 return hash;
105}
106
107/*
108 * Usage:
109 * - init hash to 0
110 * - repeatedly call dict_hash_multi(), provide hash from the last call
111 * - call dict_hash_multi() with key_part = NULL to finish the hash
112 */
113uint32_t
114dict_hash_multi(uint32_t hash, const char *key_part, size_t len)
115{
116 uint32_t i;
117
118 if (key_part) {
119 for (i = 0; i < len; ++i) {
120 hash += key_part[i];
121 hash += (hash << 10);
122 hash ^= (hash >> 6);
123 }
124 } else {
125 hash += (hash << 3);
126 hash ^= (hash >> 11);
127 hash += (hash << 15);
128 }
129
130 return hash;
131}
132
133API void
134lydict_remove(struct ly_ctx *ctx, const char *value)
135{
136 size_t len;
137 int ret;
138 uint32_t hash;
139 struct dict_rec rec, *match = NULL;
140 char *val_p;
141
Michal Vasko0f6b3e22018-09-07 12:18:12 +0200142 LY_CHECK_ARG_RET(ctx, ctx,);
143
144 if (!value) {
145 return NULL;
146 }
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200147
148 len = strlen(value);
149 hash = dict_hash(value, len);
150
151 /* create record for lyht_find call */
152 rec.value = (char *)value;
153 rec.refcount = 0;
154
155 pthread_mutex_lock(&ctx->dict.lock);
156 /* set len as data for compare callback */
157 lyht_set_cb_data(ctx->dict.hash_tab, (void *)&len);
158 /* check if value is already inserted */
159 ret = lyht_find(ctx->dict.hash_tab, &rec, hash, (void **)&match);
160
161 if (ret == 0) {
162 LY_CHECK_ERR_GOTO(!match, LOGINT(ctx), finish);
163
164 /* if value is already in dictionary, decrement reference counter */
165 match->refcount--;
166 if (match->refcount == 0) {
167 /*
168 * remove record
169 * save pointer to stored string before lyht_remove to
170 * free it after it is removed from hash table
171 */
172 val_p = match->value;
173 ret = lyht_remove(ctx->dict.hash_tab, &rec, hash);
174 free(val_p);
175 LY_CHECK_ERR_GOTO(ret, LOGINT(ctx), finish);
176 }
177 }
178
179finish:
180 pthread_mutex_unlock(&ctx->dict.lock);
181}
182
183static char *
184dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy)
185{
186 struct dict_rec *match = NULL, rec;
187 int ret = 0;
188 uint32_t hash;
189
190 hash = dict_hash(value, len);
191 /* set len as data for compare callback */
192 lyht_set_cb_data(ctx->dict.hash_tab, (void *)&len);
193 /* create record for lyht_insert */
194 rec.value = value;
195 rec.refcount = 1;
196
197 LOGDBG(LY_LDGDICT, "inserting \"%s\"", rec.value);
198 ret = lyht_insert(ctx->dict.hash_tab, (void *)&rec, hash, (void **)&match);
199 if (ret == 1) {
200 match->refcount++;
201 if (zerocopy) {
202 free(value);
203 }
204 } else if (ret == 0) {
205 if (!zerocopy) {
206 /*
207 * allocate string for new record
208 * record is already inserted in hash table
209 */
210 match->value = malloc(sizeof *match->value * (len + 1));
Michal Vaskob3d0d6b2018-09-07 10:17:33 +0200211 LY_CHECK_ERR_RET(!match->value, LOGMEM(ctx), NULL);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200212 memcpy(match->value, value, len);
213 match->value[len] = '\0';
214 }
215 } else {
216 /* lyht_insert returned error */
217 LOGINT(ctx);
218 return NULL;
219 }
220
221 return match->value;
222}
223
224API const char *
225lydict_insert(struct ly_ctx *ctx, const char *value, size_t len)
226{
227 const char *result;
228
Michal Vasko0f6b3e22018-09-07 12:18:12 +0200229 LY_CHECK_ARG_RET(ctx, ctx, NULL);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200230
Michal Vasko0f6b3e22018-09-07 12:18:12 +0200231 if (!value) {
232 return NULL;
233 } else if (!len) {
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200234 len = strlen(value);
235 }
236
237 pthread_mutex_lock(&ctx->dict.lock);
238 result = dict_insert(ctx, (char *)value, len, 0);
239 pthread_mutex_unlock(&ctx->dict.lock);
240
241 return result;
242}
243
244API const char *
245lydict_insert_zc(struct ly_ctx *ctx, char *value)
246{
247 const char *result;
248
Michal Vasko0f6b3e22018-09-07 12:18:12 +0200249 LY_CHECK_ARG_RET(ctx, ctx, NULL);
250
251 if (!value) {
252 return NULL;
253 }
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200254
255 pthread_mutex_lock(&ctx->dict.lock);
256 result = dict_insert(ctx, value, strlen(value), 1);
257 pthread_mutex_unlock(&ctx->dict.lock);
258
259 return result;
260}
261
262static struct ht_rec *
263lyht_get_rec(unsigned char *recs, uint16_t rec_size, uint32_t idx)
264{
265 return (struct ht_rec *)&recs[idx * rec_size];
266}
267
268struct hash_table *
269lyht_new(uint32_t size, uint16_t val_size, values_equal_cb val_equal, void *cb_data, int resize)
270{
271 struct hash_table *ht;
272
273 /* check that 2^x == size (power of 2) */
274 assert(size && !(size & (size - 1)));
275 assert(val_equal && val_size);
276 assert(resize == 0 || resize == 1);
277
278 if (size < LYHT_MIN_SIZE) {
279 size = LYHT_MIN_SIZE;
280 }
281
282 ht = malloc(sizeof *ht);
Michal Vaskob3d0d6b2018-09-07 10:17:33 +0200283 LY_CHECK_ERR_RET(!ht, LOGMEM(NULL), NULL);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200284
285 ht->used = 0;
286 ht->size = size;
287 ht->val_equal = val_equal;
288 ht->cb_data = cb_data;
289 ht->resize = (uint16_t)resize;
290
291 ht->rec_size = (sizeof(struct ht_rec) - 1) + val_size;
292 /* allocate the records correctly */
293 ht->recs = calloc(size, ht->rec_size);
Michal Vaskob3d0d6b2018-09-07 10:17:33 +0200294 LY_CHECK_ERR_RET(!ht->recs, free(ht); LOGMEM(NULL), NULL);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200295
296 return ht;
297}
298
299values_equal_cb
300lyht_set_cb(struct hash_table *ht, values_equal_cb new_val_equal)
301{
302 values_equal_cb prev;
303
304 prev = ht->val_equal;
305 ht->val_equal = new_val_equal;
306 return prev;
307}
308
309void *
310lyht_set_cb_data(struct hash_table *ht, void *new_cb_data)
311{
312 void *prev;
313
314 prev = ht->cb_data;
315 ht->cb_data = new_cb_data;
316 return prev;
317}
318
319struct hash_table *
320lyht_dup(const struct hash_table *orig)
321{
322 struct hash_table *ht;
323
Michal Vaskob3d0d6b2018-09-07 10:17:33 +0200324 LY_CHECK_ARG_RET(NULL, orig, NULL);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200325
326 ht = lyht_new(orig->size, orig->rec_size - (sizeof(struct ht_rec) - 1), orig->val_equal, orig->cb_data, orig->resize ? 1 : 0);
327 if (!ht) {
328 return NULL;
329 }
330
331 memcpy(ht->recs, orig->recs, orig->used * orig->rec_size);
332 ht->used = orig->used;
333 return ht;
334}
335
336void
337lyht_free(struct hash_table *ht)
338{
339 if (ht) {
340 free(ht->recs);
341 free(ht);
342 }
343}
344
345static LY_ERR
346lyht_resize(struct hash_table *ht, int enlarge)
347{
348 struct ht_rec *rec;
349 unsigned char *old_recs;
350 uint32_t i, old_size;
351 int ret;
352
353 old_recs = ht->recs;
354 old_size = ht->size;
355
356 if (enlarge) {
357 /* double the size */
358 ht->size <<= 1;
359 } else {
360 /* half the size */
361 ht->size >>= 1;
362 }
363
364 ht->recs = calloc(ht->size, ht->rec_size);
Michal Vaskob3d0d6b2018-09-07 10:17:33 +0200365 LY_CHECK_ERR_RET(!ht->recs, LOGMEM(NULL); ht->recs = old_recs; ht->size = old_size, LY_EMEM);
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200366
367 /* reset used, it will increase again */
368 ht->used = 0;
369
370 /* add all the old records into the new records array */
371 for (i = 0; i < old_size; ++i) {
372 rec = lyht_get_rec(old_recs, ht->rec_size, i);
373 if (rec->hits > 0) {
374 ret = lyht_insert(ht, rec->val, rec->hash, NULL);
375 assert(!ret);
376 (void)ret;
377 }
378 }
379
380 /* final touches */
381 free(old_recs);
382 return LY_SUCCESS;
383}
384
385/* return: 0 - hash found, returned its record,
386 * 1 - hash not found, returned the record where it would be inserted */
387static int
388lyht_find_first(struct hash_table *ht, uint32_t hash, struct ht_rec **rec_p)
389{
390 struct ht_rec *rec;
391 uint32_t i, idx;
392
393 if (rec_p) {
394 *rec_p = NULL;
395 }
396
397 idx = i = hash & (ht->size - 1);
398 rec = lyht_get_rec(ht->recs, ht->rec_size, idx);
399
400 /* skip through overflow and deleted records */
401 while ((rec->hits != 0) && ((rec->hits == -1) || ((rec->hash & (ht->size - 1)) != idx))) {
402 if ((rec->hits == -1) && rec_p && !(*rec_p)) {
403 /* remember this record for return */
404 *rec_p = rec;
405 }
406 i = (i + 1) % ht->size;
407 if (i == idx) {
408 /* we went through all the records (very unlikely, but possible when many records are invalid),
409 * just return not found */
410 assert(!rec_p || *rec_p);
411 return 1;
412 }
413 rec = lyht_get_rec(ht->recs, ht->rec_size, i);
414 }
415 if (rec->hits == 0) {
416 /* we could not find the value */
417 if (rec_p && !*rec_p) {
418 *rec_p = rec;
419 }
420 return 1;
421 }
422
423 /* we have found a record with equal (shortened) hash */
424 if (rec_p) {
425 *rec_p = rec;
426 }
427 return 0;
428}
429
430/**
431 * @brief Search for the next collision.
432 *
433 * @param[in] ht Hash table to search in.
434 * @param[in,out] last Last returned collision record.
435 * @param[in] first First collision record (hits > 1).
436 * @return 0 when hash collision found, \p last points to this next collision,
437 * 1 when hash collision not found, \p last points to the record where it would be inserted.
438 */
439static int
440lyht_find_collision(struct hash_table *ht, struct ht_rec **last, struct ht_rec *first)
441{
442 struct ht_rec *empty = NULL;
443 uint32_t i, idx;
444
445 assert(last && *last);
446
447 idx = (*last)->hash & (ht->size - 1);
448 i = (((unsigned char *)*last) - ht->recs) / ht->rec_size;
449
450 do {
451 i = (i + 1) % ht->size;
452 *last = lyht_get_rec(ht->recs, ht->rec_size, i);
453 if (*last == first) {
454 /* we went through all the records (very unlikely, but possible when many records are invalid),
455 * just return an invalid record */
456 assert(empty);
457 *last = empty;
458 return 1;
459 }
460
461 if (((*last)->hits == -1) && !empty) {
462 empty = *last;
463 }
464 } while (((*last)->hits != 0) && (((*last)->hits == -1) || (((*last)->hash & (ht->size - 1)) != idx)));
465
466 if ((*last)->hits > 0) {
467 /* we found a collision */
468 assert((*last)->hits == 1);
469 return 0;
470 }
471
472 /* no next collision found, return the record where it would be inserted */
473 if (empty) {
474 *last = empty;
475 } /* else (*last)->hits == 0, it is already correct */
476 return 1;
477}
478
479int
480lyht_find(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
481{
482 struct ht_rec *rec, *crec;
483 uint32_t i, c;
484 int r;
485
486 if (lyht_find_first(ht, hash, &rec)) {
487 /* not found */
488 return 1;
489 }
490 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) {
491 /* even the value matches */
492 if (match_p) {
493 *match_p = rec->val;
494 }
495 return 0;
496 }
497
498 /* some collisions, we need to go through them, too */
499 crec = rec;
500 c = rec->hits;
501 for (i = 1; i < c; ++i) {
502 r = lyht_find_collision(ht, &rec, crec);
503 assert(!r);
504 (void)r;
505
506 /* compare values */
507 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) {
508 if (match_p) {
509 *match_p = rec->val;
510 }
511 return 0;
512 }
513 }
514
515 /* not found even in collisions */
516 return 1;
517}
518
519int
520lyht_find_next(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
521{
522 struct ht_rec *rec, *crec;
523 uint32_t i, c;
524 int r, found = 0;
525
526 if (lyht_find_first(ht, hash, &rec)) {
527 /* not found, cannot happen */
528 assert(0);
529 }
530
531 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
532 /* previously returned value */
533 found = 1;
534 }
535
536 if (rec->hits == 1) {
537 /* there are no more similar values */
538 assert(rec->hash == hash);
539 assert(found);
540 return 1;
541 }
542
543 /* go through collisions and find next one after the previous one */
544 crec = rec;
545 c = rec->hits;
546 for (i = 1; i < c; ++i) {
547 r = lyht_find_collision(ht, &rec, crec);
548 assert(!r);
549 (void)r;
550
551 if (rec->hash != hash) {
552 /* a normal collision, we are not interested in those */
553 continue;
554 }
555
556 if (found) {
557 /* next value with equal hash, found our value */
558 if (match_p) {
559 *match_p = rec->val;
560 }
561 return 0;
562 }
563
564 if (!ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
565 /* already returned value, skip */
566 continue;
567 }
568
569 /* this one was returned previously, continue looking */
570 found = 1;
571 }
572
573 /* the last equal value was already returned */
574 assert(found);
575 return 1;
576}
577
578LY_ERR
579lyht_insert_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash,
580 values_equal_cb resize_val_equal, void **match_p)
581{
582 struct ht_rec *rec, *crec = NULL;
583 int32_t i;
584 int r, ret;
585 values_equal_cb old_val_equal;
586
587 if (!lyht_find_first(ht, hash, &rec)) {
588 /* we found matching shortened hash */
589 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
590 /* even the value matches */
591 if (match_p) {
592 *match_p = (void *)&rec->val;
593 }
594 return LY_EEXIST;
595 }
596
597 /* some collisions, we need to go through them, too */
598 crec = rec;
599 for (i = 1; i < crec->hits; ++i) {
600 r = lyht_find_collision(ht, &rec, crec);
601 assert(!r);
602
603 /* compare values */
604 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
605 if (match_p) {
606 *match_p = (void *)&rec->val;
607 }
608 return LY_EEXIST;
609 }
610 }
611
612 /* value not found, get the record where it will be inserted */
613 r = lyht_find_collision(ht, &rec, crec);
614 assert(r);
615 }
616
617 /* insert it into the returned record */
618 assert(rec->hits < 1);
619 rec->hash = hash;
620 rec->hits = 1;
621 memcpy(&rec->val, val_p, ht->rec_size - (sizeof(struct ht_rec) - 1));
622 if (match_p) {
623 *match_p = (void *)&rec->val;
624 }
625
626 if (crec) {
627 /* there was a collision, increase hits */
628 if (crec->hits == INT32_MAX) {
629 LOGINT(NULL);
630 }
631 ++crec->hits;
632 }
633
634 /* check size & enlarge if needed */
635 ret = LY_SUCCESS;
636 ++ht->used;
637 if (ht->resize) {
638 r = (ht->used * 100) / ht->size;
639 if ((ht->resize == 1) && (r >= LYHT_FIRST_SHRINK_PERCENTAGE)) {
640 /* enable shrinking */
641 ht->resize = 2;
642 }
643 if ((ht->resize == 2) && (r >= LYHT_ENLARGE_PERCENTAGE)) {
644 if (resize_val_equal) {
645 old_val_equal = lyht_set_cb(ht, resize_val_equal);
646 }
647
648 /* enlarge */
649 ret = lyht_resize(ht, 1);
650 /* if hash_table was resized, we need to find new matching value */
651 if (ret == LY_SUCCESS && match_p) {
652 lyht_find(ht, val_p, hash, match_p);
653 }
654
655 if (resize_val_equal) {
656 lyht_set_cb(ht, old_val_equal);
657 }
658 }
659 }
660 return ret;
661}
662
663int
664lyht_insert(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p)
665{
666 return lyht_insert_with_resize_cb(ht, val_p, hash, NULL, match_p);
667}
668
669LY_ERR
670lyht_remove(struct hash_table *ht, void *val_p, uint32_t hash)
671{
672 struct ht_rec *rec, *crec;
673 int32_t i;
674 int first_matched = 0, r, ret;
675
Michal Vaskob3d0d6b2018-09-07 10:17:33 +0200676 LY_CHECK_ERR_RET(lyht_find_first(ht, hash, &rec), LOGARG(NULL, hash), LY_EINVAL); /* hash not found */
Radek Krejci5aeea3a2018-09-05 13:29:36 +0200677
678 if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
679 /* even the value matches */
680 first_matched = 1;
681 }
682
683 /* we always need to go through collisions */
684 crec = rec;
685 for (i = 1; i < crec->hits; ++i) {
686 r = lyht_find_collision(ht, &rec, crec);
687 assert(!r);
688
689 /* compare values */
690 if (!first_matched && (rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) {
691 break;
692 }
693 }
694
695 if (i < crec->hits) {
696 /* one of collisions matched, reduce collision count, remove the record */
697 assert(!first_matched);
698 --crec->hits;
699 rec->hits = -1;
700 } else if (first_matched) {
701 /* the first record matches */
702 if (crec != rec) {
703 /* ... so put the last collision in its place */
704 rec->hits = crec->hits - 1;
705 memcpy(crec, rec, ht->rec_size);
706 }
707 rec->hits = -1;
708 } else {
709 /* value not found even in collisions */
710 LOGINT(NULL);
711 return LY_EINT;
712 }
713
714 /* check size & shrink if needed */
715 ret = LY_SUCCESS;
716 --ht->used;
717 if (ht->resize == 2) {
718 r = (ht->used * 100) / ht->size;
719 if ((r < LYHT_SHRINK_PERCENTAGE) && (ht->size > LYHT_MIN_SIZE)) {
720 /* shrink */
721 ret = lyht_resize(ht, 0);
722 }
723 }
724
725 return ret;
726}