| /** |
| * @file hash_table.c |
| * @author Radek Krejci <rkrejci@cesnet.cz> |
| * @brief libyang dictionary for storing strings and generic hash table |
| * |
| * Copyright (c) 2015 - 2018 CESNET, z.s.p.o. |
| * |
| * This source code is licensed under BSD 3-Clause License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * https://opensource.org/licenses/BSD-3-Clause |
| */ |
| |
| #include <string.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <pthread.h> |
| #include <assert.h> |
| |
| #include "common.h" |
| #include "context.h" |
| #include "hash_table.h" |
| |
| static int |
| lydict_val_eq(void *val1_p, void *val2_p, int UNUSED(mod), void *cb_data) |
| { |
| if (!val1_p || !val2_p) { |
| LOGARG; |
| return 0; |
| } |
| |
| const char *str1 = ((struct dict_rec *)val1_p)->value; |
| const char *str2 = ((struct dict_rec *)val2_p)->value; |
| |
| if (!str1 || !str2 || !cb_data) { |
| LOGARG; |
| return 0; |
| } |
| |
| if (strncmp(str1, str2, *(size_t *)cb_data) == 0) { |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| void |
| lydict_init(struct dict_table *dict) |
| { |
| if (!dict) { |
| LOGARG; |
| return; |
| } |
| |
| dict->hash_tab = lyht_new(1024, sizeof(struct dict_rec), lydict_val_eq, NULL, 1); |
| LY_CHECK_ERR_RETURN(!dict->hash_tab, LOGINT(NULL), ); |
| pthread_mutex_init(&dict->lock, NULL); |
| } |
| |
| void |
| lydict_clean(struct dict_table *dict) |
| { |
| unsigned int i; |
| struct dict_rec *dict_rec = NULL; |
| struct ht_rec *rec = NULL; |
| |
| if (!dict) { |
| LOGARG; |
| return; |
| } |
| |
| for (i = 0; i < dict->hash_tab->size; i++) { |
| /* get ith record */ |
| rec = (struct ht_rec *)&dict->hash_tab->recs[i * dict->hash_tab->rec_size]; |
| if (rec->hits == 1) { |
| /* |
| * this should not happen, all records inserted into |
| * dictionary are supposed to be removed using lydict_remove() |
| * before calling lydict_clean() |
| */ |
| dict_rec = (struct dict_rec *)rec->val; |
| LOGWRN(NULL, "String \"%s\" not freed from the dictionary, refcount %d", dict_rec->value, dict_rec->refcount); |
| /* if record wasn't removed before free string allocated for that record */ |
| #ifdef NDEBUG |
| free(dict_rec->value); |
| #endif |
| } |
| } |
| |
| /* free table and destroy mutex */ |
| lyht_free(dict->hash_tab); |
| pthread_mutex_destroy(&dict->lock); |
| } |
| |
| /* |
| * Bob Jenkin's one-at-a-time hash |
| * http://www.burtleburtle.net/bob/hash/doobs.html |
| * |
| * Spooky hash is faster, but it works only for little endian architectures. |
| */ |
| static uint32_t |
| dict_hash(const char *key, size_t len) |
| { |
| uint32_t hash, i; |
| |
| for (hash = i = 0; i < len; ++i) { |
| hash += key[i]; |
| hash += (hash << 10); |
| hash ^= (hash >> 6); |
| } |
| hash += (hash << 3); |
| hash ^= (hash >> 11); |
| hash += (hash << 15); |
| return hash; |
| } |
| |
| /* |
| * Usage: |
| * - init hash to 0 |
| * - repeatedly call dict_hash_multi(), provide hash from the last call |
| * - call dict_hash_multi() with key_part = NULL to finish the hash |
| */ |
| uint32_t |
| dict_hash_multi(uint32_t hash, const char *key_part, size_t len) |
| { |
| uint32_t i; |
| |
| if (key_part) { |
| for (i = 0; i < len; ++i) { |
| hash += key_part[i]; |
| hash += (hash << 10); |
| hash ^= (hash >> 6); |
| } |
| } else { |
| hash += (hash << 3); |
| hash ^= (hash >> 11); |
| hash += (hash << 15); |
| } |
| |
| return hash; |
| } |
| |
| API void |
| lydict_remove(struct ly_ctx *ctx, const char *value) |
| { |
| size_t len; |
| int ret; |
| uint32_t hash; |
| struct dict_rec rec, *match = NULL; |
| char *val_p; |
| |
| if (!value || !ctx) { |
| return; |
| } |
| |
| len = strlen(value); |
| hash = dict_hash(value, len); |
| |
| /* create record for lyht_find call */ |
| rec.value = (char *)value; |
| rec.refcount = 0; |
| |
| pthread_mutex_lock(&ctx->dict.lock); |
| /* set len as data for compare callback */ |
| lyht_set_cb_data(ctx->dict.hash_tab, (void *)&len); |
| /* check if value is already inserted */ |
| ret = lyht_find(ctx->dict.hash_tab, &rec, hash, (void **)&match); |
| |
| if (ret == 0) { |
| LY_CHECK_ERR_GOTO(!match, LOGINT(ctx), finish); |
| |
| /* if value is already in dictionary, decrement reference counter */ |
| match->refcount--; |
| if (match->refcount == 0) { |
| /* |
| * remove record |
| * save pointer to stored string before lyht_remove to |
| * free it after it is removed from hash table |
| */ |
| val_p = match->value; |
| ret = lyht_remove(ctx->dict.hash_tab, &rec, hash); |
| free(val_p); |
| LY_CHECK_ERR_GOTO(ret, LOGINT(ctx), finish); |
| } |
| } |
| |
| finish: |
| pthread_mutex_unlock(&ctx->dict.lock); |
| } |
| |
| static char * |
| dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy) |
| { |
| struct dict_rec *match = NULL, rec; |
| int ret = 0; |
| uint32_t hash; |
| |
| hash = dict_hash(value, len); |
| /* set len as data for compare callback */ |
| lyht_set_cb_data(ctx->dict.hash_tab, (void *)&len); |
| /* create record for lyht_insert */ |
| rec.value = value; |
| rec.refcount = 1; |
| |
| LOGDBG(LY_LDGDICT, "inserting \"%s\"", rec.value); |
| ret = lyht_insert(ctx->dict.hash_tab, (void *)&rec, hash, (void **)&match); |
| if (ret == 1) { |
| match->refcount++; |
| if (zerocopy) { |
| free(value); |
| } |
| } else if (ret == 0) { |
| if (!zerocopy) { |
| /* |
| * allocate string for new record |
| * record is already inserted in hash table |
| */ |
| match->value = malloc(sizeof *match->value * (len + 1)); |
| LY_CHECK_ERR_RETURN(!match->value, LOGMEM(ctx), NULL); |
| memcpy(match->value, value, len); |
| match->value[len] = '\0'; |
| } |
| } else { |
| /* lyht_insert returned error */ |
| LOGINT(ctx); |
| return NULL; |
| } |
| |
| return match->value; |
| } |
| |
| API const char * |
| lydict_insert(struct ly_ctx *ctx, const char *value, size_t len) |
| { |
| const char *result; |
| |
| if (!value) { |
| return NULL; |
| } |
| |
| if (!len) { |
| len = strlen(value); |
| } |
| |
| pthread_mutex_lock(&ctx->dict.lock); |
| result = dict_insert(ctx, (char *)value, len, 0); |
| pthread_mutex_unlock(&ctx->dict.lock); |
| |
| return result; |
| } |
| |
| API const char * |
| lydict_insert_zc(struct ly_ctx *ctx, char *value) |
| { |
| const char *result; |
| |
| if (!value) { |
| return NULL; |
| } |
| |
| pthread_mutex_lock(&ctx->dict.lock); |
| result = dict_insert(ctx, value, strlen(value), 1); |
| pthread_mutex_unlock(&ctx->dict.lock); |
| |
| return result; |
| } |
| |
| struct ht_rec * |
| lyht_get_rec(unsigned char *recs, uint16_t rec_size, uint32_t idx) |
| { |
| return (struct ht_rec *)&recs[idx * rec_size]; |
| } |
| |
| struct hash_table * |
| lyht_new(uint32_t size, uint16_t val_size, values_equal_cb val_equal, void *cb_data, int resize) |
| { |
| struct hash_table *ht; |
| |
| /* check that 2^x == size (power of 2) */ |
| assert(size && !(size & (size - 1))); |
| assert(val_equal && val_size); |
| assert(resize == 0 || resize == 1); |
| |
| if (size < LYHT_MIN_SIZE) { |
| size = LYHT_MIN_SIZE; |
| } |
| |
| ht = malloc(sizeof *ht); |
| LY_CHECK_ERR_RETURN(!ht, LOGMEM(NULL), NULL); |
| |
| ht->used = 0; |
| ht->size = size; |
| ht->val_equal = val_equal; |
| ht->cb_data = cb_data; |
| ht->resize = (uint16_t)resize; |
| |
| ht->rec_size = (sizeof(struct ht_rec) - 1) + val_size; |
| /* allocate the records correctly */ |
| ht->recs = calloc(size, ht->rec_size); |
| LY_CHECK_ERR_RETURN(!ht->recs, free(ht); LOGMEM(NULL), NULL); |
| |
| return ht; |
| } |
| |
| values_equal_cb |
| lyht_set_cb(struct hash_table *ht, values_equal_cb new_val_equal) |
| { |
| values_equal_cb prev; |
| |
| prev = ht->val_equal; |
| ht->val_equal = new_val_equal; |
| return prev; |
| } |
| |
| void * |
| lyht_set_cb_data(struct hash_table *ht, void *new_cb_data) |
| { |
| void *prev; |
| |
| prev = ht->cb_data; |
| ht->cb_data = new_cb_data; |
| return prev; |
| } |
| |
| struct hash_table * |
| lyht_dup(const struct hash_table *orig) |
| { |
| struct hash_table *ht; |
| |
| if (!orig) { |
| return NULL; |
| } |
| |
| ht = lyht_new(orig->size, orig->rec_size - (sizeof(struct ht_rec) - 1), orig->val_equal, orig->cb_data, orig->resize ? 1 : 0); |
| if (!ht) { |
| return NULL; |
| } |
| |
| memcpy(ht->recs, orig->recs, orig->used * orig->rec_size); |
| ht->used = orig->used; |
| return ht; |
| } |
| |
| void |
| lyht_free(struct hash_table *ht) |
| { |
| if (ht) { |
| free(ht->recs); |
| free(ht); |
| } |
| } |
| |
| static int |
| lyht_resize(struct hash_table *ht, int enlarge) |
| { |
| struct ht_rec *rec; |
| unsigned char *old_recs; |
| uint32_t i, old_size; |
| int ret; |
| |
| old_recs = ht->recs; |
| old_size = ht->size; |
| |
| if (enlarge) { |
| /* double the size */ |
| ht->size <<= 1; |
| } else { |
| /* half the size */ |
| ht->size >>= 1; |
| } |
| |
| ht->recs = calloc(ht->size, ht->rec_size); |
| LY_CHECK_ERR_RETURN(!ht->recs, LOGMEM(NULL); ht->recs = old_recs; ht->size = old_size, -1); |
| |
| /* reset used, it will increase again */ |
| ht->used = 0; |
| |
| /* add all the old records into the new records array */ |
| for (i = 0; i < old_size; ++i) { |
| rec = lyht_get_rec(old_recs, ht->rec_size, i); |
| if (rec->hits > 0) { |
| ret = lyht_insert(ht, rec->val, rec->hash, NULL); |
| assert(!ret); |
| (void)ret; |
| } |
| } |
| |
| /* final touches */ |
| free(old_recs); |
| return 0; |
| } |
| |
| /* return: 0 - hash found, returned its record, |
| * 1 - hash not found, returned the record where it would be inserted */ |
| static int |
| lyht_find_first(struct hash_table *ht, uint32_t hash, struct ht_rec **rec_p) |
| { |
| struct ht_rec *rec; |
| uint32_t i, idx; |
| |
| if (rec_p) { |
| *rec_p = NULL; |
| } |
| |
| idx = i = hash & (ht->size - 1); |
| rec = lyht_get_rec(ht->recs, ht->rec_size, idx); |
| |
| /* skip through overflow and deleted records */ |
| while ((rec->hits != 0) && ((rec->hits == -1) || ((rec->hash & (ht->size - 1)) != idx))) { |
| if ((rec->hits == -1) && rec_p && !(*rec_p)) { |
| /* remember this record for return */ |
| *rec_p = rec; |
| } |
| i = (i + 1) % ht->size; |
| if (i == idx) { |
| /* we went through all the records (very unlikely, but possible when many records are invalid), |
| * just return not found */ |
| assert(!rec_p || *rec_p); |
| return 1; |
| } |
| rec = lyht_get_rec(ht->recs, ht->rec_size, i); |
| } |
| if (rec->hits == 0) { |
| /* we could not find the value */ |
| if (rec_p && !*rec_p) { |
| *rec_p = rec; |
| } |
| return 1; |
| } |
| |
| /* we have found a record with equal (shortened) hash */ |
| if (rec_p) { |
| *rec_p = rec; |
| } |
| return 0; |
| } |
| |
| /** |
| * @brief Search for the next collision. |
| * |
| * @param[in] ht Hash table to search in. |
| * @param[in,out] last Last returned collision record. |
| * @param[in] first First collision record (hits > 1). |
| * @return 0 when hash collision found, \p last points to this next collision, |
| * 1 when hash collision not found, \p last points to the record where it would be inserted. |
| */ |
| static int |
| lyht_find_collision(struct hash_table *ht, struct ht_rec **last, struct ht_rec *first) |
| { |
| struct ht_rec *empty = NULL; |
| uint32_t i, idx; |
| |
| assert(last && *last); |
| |
| idx = (*last)->hash & (ht->size - 1); |
| i = (((unsigned char *)*last) - ht->recs) / ht->rec_size; |
| |
| do { |
| i = (i + 1) % ht->size; |
| *last = lyht_get_rec(ht->recs, ht->rec_size, i); |
| if (*last == first) { |
| /* we went through all the records (very unlikely, but possible when many records are invalid), |
| * just return an invalid record */ |
| assert(empty); |
| *last = empty; |
| return 1; |
| } |
| |
| if (((*last)->hits == -1) && !empty) { |
| empty = *last; |
| } |
| } while (((*last)->hits != 0) && (((*last)->hits == -1) || (((*last)->hash & (ht->size - 1)) != idx))); |
| |
| if ((*last)->hits > 0) { |
| /* we found a collision */ |
| assert((*last)->hits == 1); |
| return 0; |
| } |
| |
| /* no next collision found, return the record where it would be inserted */ |
| if (empty) { |
| *last = empty; |
| } /* else (*last)->hits == 0, it is already correct */ |
| return 1; |
| } |
| |
| int |
| lyht_find(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p) |
| { |
| struct ht_rec *rec, *crec; |
| uint32_t i, c; |
| int r; |
| |
| if (lyht_find_first(ht, hash, &rec)) { |
| /* not found */ |
| return 1; |
| } |
| if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) { |
| /* even the value matches */ |
| if (match_p) { |
| *match_p = rec->val; |
| } |
| return 0; |
| } |
| |
| /* some collisions, we need to go through them, too */ |
| crec = rec; |
| c = rec->hits; |
| for (i = 1; i < c; ++i) { |
| r = lyht_find_collision(ht, &rec, crec); |
| assert(!r); |
| (void)r; |
| |
| /* compare values */ |
| if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 0, ht->cb_data)) { |
| if (match_p) { |
| *match_p = rec->val; |
| } |
| return 0; |
| } |
| } |
| |
| /* not found even in collisions */ |
| return 1; |
| } |
| |
| int |
| lyht_find_next(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p) |
| { |
| struct ht_rec *rec, *crec; |
| uint32_t i, c; |
| int r, found = 0; |
| |
| if (lyht_find_first(ht, hash, &rec)) { |
| /* not found, cannot happen */ |
| assert(0); |
| } |
| |
| if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) { |
| /* previously returned value */ |
| found = 1; |
| } |
| |
| if (rec->hits == 1) { |
| /* there are no more similar values */ |
| assert(rec->hash == hash); |
| assert(found); |
| return 1; |
| } |
| |
| /* go through collisions and find next one after the previous one */ |
| crec = rec; |
| c = rec->hits; |
| for (i = 1; i < c; ++i) { |
| r = lyht_find_collision(ht, &rec, crec); |
| assert(!r); |
| (void)r; |
| |
| if (rec->hash != hash) { |
| /* a normal collision, we are not interested in those */ |
| continue; |
| } |
| |
| if (found) { |
| /* next value with equal hash, found our value */ |
| if (match_p) { |
| *match_p = rec->val; |
| } |
| return 0; |
| } |
| |
| if (!ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) { |
| /* already returned value, skip */ |
| continue; |
| } |
| |
| /* this one was returned previously, continue looking */ |
| found = 1; |
| } |
| |
| /* the last equal value was already returned */ |
| assert(found); |
| return 1; |
| } |
| |
| int |
| lyht_insert_with_resize_cb(struct hash_table *ht, void *val_p, uint32_t hash, |
| values_equal_cb resize_val_equal, void **match_p) |
| { |
| struct ht_rec *rec, *crec = NULL; |
| int32_t i; |
| int r, ret; |
| values_equal_cb old_val_equal; |
| |
| if (!lyht_find_first(ht, hash, &rec)) { |
| /* we found matching shortened hash */ |
| if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) { |
| /* even the value matches */ |
| if (match_p) { |
| *match_p = (void *)&rec->val; |
| } |
| return 1; |
| } |
| |
| /* some collisions, we need to go through them, too */ |
| crec = rec; |
| for (i = 1; i < crec->hits; ++i) { |
| r = lyht_find_collision(ht, &rec, crec); |
| assert(!r); |
| |
| /* compare values */ |
| if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) { |
| if (match_p) { |
| *match_p = (void *)&rec->val; |
| } |
| return 1; |
| } |
| } |
| |
| /* value not found, get the record where it will be inserted */ |
| r = lyht_find_collision(ht, &rec, crec); |
| assert(r); |
| } |
| |
| /* insert it into the returned record */ |
| assert(rec->hits < 1); |
| rec->hash = hash; |
| rec->hits = 1; |
| memcpy(&rec->val, val_p, ht->rec_size - (sizeof(struct ht_rec) - 1)); |
| if (match_p) { |
| *match_p = (void *)&rec->val; |
| } |
| |
| if (crec) { |
| /* there was a collision, increase hits */ |
| if (crec->hits == INT32_MAX) { |
| LOGINT(NULL); |
| } |
| ++crec->hits; |
| } |
| |
| /* check size & enlarge if needed */ |
| ret = 0; |
| ++ht->used; |
| if (ht->resize) { |
| r = (ht->used * 100) / ht->size; |
| if ((ht->resize == 1) && (r >= LYHT_FIRST_SHRINK_PERCENTAGE)) { |
| /* enable shrinking */ |
| ht->resize = 2; |
| } |
| if ((ht->resize == 2) && (r >= LYHT_ENLARGE_PERCENTAGE)) { |
| if (resize_val_equal) { |
| old_val_equal = lyht_set_cb(ht, resize_val_equal); |
| } |
| |
| /* enlarge */ |
| ret = lyht_resize(ht, 1); |
| /* if hash_table was resized, we need to find new matching value */ |
| if (ret == 0 && match_p) { |
| lyht_find(ht, val_p, hash, match_p); |
| } |
| |
| if (resize_val_equal) { |
| lyht_set_cb(ht, old_val_equal); |
| } |
| } |
| } |
| return ret; |
| } |
| |
| int |
| lyht_insert(struct hash_table *ht, void *val_p, uint32_t hash, void **match_p) |
| { |
| return lyht_insert_with_resize_cb(ht, val_p, hash, NULL, match_p); |
| } |
| |
| int |
| lyht_remove(struct hash_table *ht, void *val_p, uint32_t hash) |
| { |
| struct ht_rec *rec, *crec; |
| int32_t i; |
| int first_matched = 0, r, ret; |
| |
| if (lyht_find_first(ht, hash, &rec)) { |
| /* hash not found */ |
| return 1; |
| } |
| if ((rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) { |
| /* even the value matches */ |
| first_matched = 1; |
| } |
| |
| /* we always need to go through collisions */ |
| crec = rec; |
| for (i = 1; i < crec->hits; ++i) { |
| r = lyht_find_collision(ht, &rec, crec); |
| assert(!r); |
| |
| /* compare values */ |
| if (!first_matched && (rec->hash == hash) && ht->val_equal(val_p, &rec->val, 1, ht->cb_data)) { |
| break; |
| } |
| } |
| |
| if (i < crec->hits) { |
| /* one of collisions matched, reduce collision count, remove the record */ |
| assert(!first_matched); |
| --crec->hits; |
| rec->hits = -1; |
| } else if (first_matched) { |
| /* the first record matches */ |
| if (crec != rec) { |
| /* ... so put the last collision in its place */ |
| rec->hits = crec->hits - 1; |
| memcpy(crec, rec, ht->rec_size); |
| } |
| rec->hits = -1; |
| } else { |
| /* value not found even in collisions */ |
| assert(!first_matched); |
| return 1; |
| } |
| |
| /* check size & shrink if needed */ |
| ret = 0; |
| --ht->used; |
| if (ht->resize == 2) { |
| r = (ht->used * 100) / ht->size; |
| if ((r < LYHT_SHRINK_PERCENTAGE) && (ht->size > LYHT_MIN_SIZE)) { |
| /* shrink */ |
| ret = lyht_resize(ht, 0); |
| } |
| } |
| |
| return ret; |
| } |