blob: 885322b8cb27e5df875164580be600165f508fee [file] [log] [blame]
Radek Krejciee627582015-04-20 17:39:59 +02001/**
2 * @file dict.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief libyang dictionary for storing strings
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejciee627582015-04-20 17:39:59 +020013 */
14
15#include <string.h>
16#include <stdint.h>
17#include <stdlib.h>
Radek Krejcifebcbfb2016-02-04 14:58:42 +010018#include <pthread.h>
Radek Krejciee627582015-04-20 17:39:59 +020019
Radek Krejciee627582015-04-20 17:39:59 +020020#include "common.h"
Radek Krejcida04f4a2015-05-21 12:54:09 +020021#include "context.h"
Radek Krejci41912fe2015-10-22 10:22:12 +020022#include "dict_private.h"
Radek Krejciee627582015-04-20 17:39:59 +020023
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020024void
25lydict_init(struct dict_table *dict)
Radek Krejcida04f4a2015-05-21 12:54:09 +020026{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020027 if (!dict) {
28 ly_errno = LY_EINVAL;
29 return;
30 }
Radek Krejciee627582015-04-20 17:39:59 +020031
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020032 dict->hash_mask = DICT_SIZE - 1;
Radek Krejcifebcbfb2016-02-04 14:58:42 +010033 pthread_mutex_init(&dict->lock, NULL);
Radek Krejcida04f4a2015-05-21 12:54:09 +020034}
35
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020036void
37lydict_clean(struct dict_table *dict)
Radek Krejcida04f4a2015-05-21 12:54:09 +020038{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020039 int i;
40 struct dict_rec *chain, *rec;
Radek Krejcida04f4a2015-05-21 12:54:09 +020041
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020042 if (!dict) {
43 ly_errno = LY_EINVAL;
44 return;
45 }
Radek Krejcida04f4a2015-05-21 12:54:09 +020046
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020047 for (i = 0; i < DICT_SIZE; i++) {
48 rec = &dict->recs[i];
49 chain = rec->next;
Radek Krejcida04f4a2015-05-21 12:54:09 +020050
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020051 free(rec->value);
52 while (chain) {
53 rec = chain;
54 chain = rec->next;
Radek Krejcida04f4a2015-05-21 12:54:09 +020055
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020056 free(rec->value);
57 free(rec);
58 }
59 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +010060
61 pthread_mutex_destroy(&dict->lock);
Radek Krejcida04f4a2015-05-21 12:54:09 +020062}
Radek Krejciee627582015-04-20 17:39:59 +020063
64/*
65 * Bob Jenkin's one-at-a-time hash
66 * http://www.burtleburtle.net/bob/hash/doobs.html
67 *
68 * Spooky hash is faster, but it works only for little endian architectures.
69 */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020070static uint32_t
71dict_hash(const char *key, size_t len)
Radek Krejciee627582015-04-20 17:39:59 +020072{
73 uint32_t hash, i;
74
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020075 for (hash = i = 0; i < len; ++i) {
Radek Krejciee627582015-04-20 17:39:59 +020076 hash += key[i];
77 hash += (hash << 10);
78 hash ^= (hash >> 6);
79 }
80 hash += (hash << 3);
81 hash ^= (hash >> 11);
82 hash += (hash << 15);
83 return hash;
84}
85
Radek Krejci63b79c82016-08-10 10:09:33 +020086/*
87 * Usage:
88 * - init hash to 0
89 * - repeatedly call dict_hash_multi(), provide hash from the last call
90 * - call dict_hash_multi() with key_part = NULL to finish the hash
91 */
92uint32_t
93dict_hash_multi(uint32_t hash, const char *key_part, size_t len)
94{
95 uint32_t i;
96
97 if (key_part) {
98 for (i = 0; i < len; ++i) {
99 hash += key_part[i];
100 hash += (hash << 10);
101 hash ^= (hash >> 6);
102 }
103 } else {
104 hash += (hash << 3);
105 hash ^= (hash >> 11);
106 hash += (hash << 15);
107 }
108
109 return hash;
110}
111
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200112API void
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200113lydict_remove(struct ly_ctx *ctx, const char *value)
Radek Krejciee627582015-04-20 17:39:59 +0200114{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200115 size_t len;
116 uint32_t index;
117 struct dict_rec *record, *prev = NULL;
Radek Krejciee627582015-04-20 17:39:59 +0200118
Radek Krejcic071c542016-01-27 14:57:51 +0100119 if (!value || !ctx || !ctx->dict.used) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200120 return;
121 }
Radek Krejciee627582015-04-20 17:39:59 +0200122
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200123 len = strlen(value);
Radek Krejciee627582015-04-20 17:39:59 +0200124
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100125 pthread_mutex_lock(&ctx->dict.lock);
126
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200127 index = dict_hash(value, len) & ctx->dict.hash_mask;
128 record = &ctx->dict.recs[index];
Radek Krejciee627582015-04-20 17:39:59 +0200129
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200130 while (record && record->value != value) {
131 prev = record;
132 record = record->next;
133 }
Radek Krejciee627582015-04-20 17:39:59 +0200134
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200135 if (!record) {
136 /* record not found */
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100137 pthread_mutex_unlock(&ctx->dict.lock);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200138 return;
139 }
Radek Krejciee627582015-04-20 17:39:59 +0200140
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200141 record->refcount--;
142 if (!record->refcount) {
143 free(record->value);
144 if (record->next) {
145 if (prev) {
146 /* change in dynamically allocated chain */
147 prev->next = record->next;
148 free(record);
149 } else {
150 /* move dynamically allocated record into the static array */
151 prev = record->next; /* temporary storage */
152 memcpy(record, record->next, sizeof *record);
153 free(prev);
154 }
155 } else if (prev) {
156 /* removing last record from the dynamically allocated chain */
157 prev->next = NULL;
158 free(record);
159 } else {
160 /* clean the static record content */
161 memset(record, 0, sizeof *record);
162 }
Radek Krejci6b0a8882016-02-16 17:37:21 +0100163 ctx->dict.used--;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200164 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100165
166 pthread_mutex_unlock(&ctx->dict.lock);
Radek Krejciee627582015-04-20 17:39:59 +0200167}
168
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200169static char *
170dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy)
Radek Krejciee627582015-04-20 17:39:59 +0200171{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200172 uint32_t index;
Radek Krejci592e00d2016-05-19 12:38:08 +0200173 int match = 0;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200174 struct dict_rec *record, *new;
Radek Krejciee627582015-04-20 17:39:59 +0200175
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200176 index = dict_hash(value, len) & ctx->dict.hash_mask;
177 record = &ctx->dict.recs[index];
Radek Krejciee627582015-04-20 17:39:59 +0200178
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200179 if (!record->value) {
180 /* first record with this hash */
181 if (zerocopy) {
182 record->value = value;
183 } else {
184 record->value = malloc((len + 1) * sizeof *record->value);
Michal Vasko253035f2015-12-17 16:58:13 +0100185 if (!record->value) {
186 LOGMEM;
187 return NULL;
188 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200189 memcpy(record->value, value, len);
190 record->value[len] = '\0';
191 }
192 record->refcount = 1;
Radek Krejci592e00d2016-05-19 12:38:08 +0200193 if (len > DICT_REC_MAXLEN) {
194 record->len = 0;
195 } else {
196 record->len = len;
197 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200198 record->next = NULL;
Radek Krejciee627582015-04-20 17:39:59 +0200199
Radek Krejci6b0a8882016-02-16 17:37:21 +0100200 ctx->dict.used++;
201
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200202 LOGDBG("DICT: inserting \"%s\"", record->value);
203 return record->value;
204 }
Radek Krejciee627582015-04-20 17:39:59 +0200205
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200206 /* collision, search if the value is already in dict */
207 while (record) {
Radek Krejci592e00d2016-05-19 12:38:08 +0200208 if (record->len) {
209 /* for strings shorter than DICT_REC_MAXLEN we are able to speed up
210 * recognition of varying strings according to their lengths, and
211 * for strings with the same length it is safe to use faster memcmp()
212 * instead of strncmp() */
213 if ((record->len == len) && !memcmp(value, record->value, len)) {
214 match = 1;
215 }
216 } else {
217 if (!strncmp(value, record->value, len) && record->value[len] == '\0') {
218 match = 1;
219 }
220 }
221 if (match) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200222 /* record found */
Radek Krejci592e00d2016-05-19 12:38:08 +0200223 if (record->refcount == DICT_REC_MAXCOUNT) {
224 LOGWRN("DICT: refcount overflow detected, duplicating record");
225 break;
226 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200227 record->refcount++;
Radek Krejci4ea08382015-04-21 09:41:40 +0200228
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200229 if (zerocopy) {
230 free(value);
231 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200232
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200233 LOGDBG("DICT: inserting (refcount) \"%s\"", record->value);
234 return record->value;
235 }
Radek Krejciee627582015-04-20 17:39:59 +0200236
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200237 if (!record->next) {
238 /* not present, add as a new record in chain */
239 break;
240 }
Radek Krejciee627582015-04-20 17:39:59 +0200241
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200242 record = record->next;
243 }
Radek Krejciee627582015-04-20 17:39:59 +0200244
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200245 /* create new record and add it behind the last record */
246 new = malloc(sizeof *record);
Michal Vasko253035f2015-12-17 16:58:13 +0100247 if (!new) {
248 LOGMEM;
249 return NULL;
250 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200251 if (zerocopy) {
252 new->value = value;
253 } else {
254 new->value = malloc((len + 1) * sizeof *record->value);
Michal Vasko253035f2015-12-17 16:58:13 +0100255 if (!new->value) {
256 LOGMEM;
257 free(new);
258 return NULL;
259 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200260 memcpy(new->value, value, len);
261 new->value[len] = '\0';
262 }
263 new->refcount = 1;
Radek Krejci592e00d2016-05-19 12:38:08 +0200264 if (len > DICT_REC_MAXLEN) {
265 new->len = 0;
266 } else {
267 new->len = len;
268 }
269 new->next = record->next; /* in case of refcount overflow, we are not at the end of chain */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200270 record->next = new;
Radek Krejciee627582015-04-20 17:39:59 +0200271
Radek Krejci6b0a8882016-02-16 17:37:21 +0100272 ctx->dict.used++;
273
Michal Vasko29e7bce2015-09-11 16:00:15 +0200274 LOGDBG("DICT: inserting \"%s\" with collision ", new->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200275 return new->value;
Radek Krejciee627582015-04-20 17:39:59 +0200276}
Radek Krejci4ea08382015-04-21 09:41:40 +0200277
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200278API const char *
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200279lydict_insert(struct ly_ctx *ctx, const char *value, size_t len)
Radek Krejci4ea08382015-04-21 09:41:40 +0200280{
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100281 const char *result;
282
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200283 if (value && !len) {
284 len = strlen(value);
285 }
Radek Krejci8bc9ca02015-06-04 15:52:46 +0200286
Radek Krejcia5269642015-07-20 19:04:11 +0200287 if (!value) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200288 return NULL;
289 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100290
291 pthread_mutex_lock(&ctx->dict.lock);
292 result = dict_insert(ctx, (char *)value, len, 0);
293 pthread_mutex_unlock(&ctx->dict.lock);
294
295 return result;
Radek Krejci4ea08382015-04-21 09:41:40 +0200296}
297
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200298API const char *
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200299lydict_insert_zc(struct ly_ctx *ctx, char *value)
Radek Krejci4ea08382015-04-21 09:41:40 +0200300{
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100301 const char *result;
302
Radek Krejcia5269642015-07-20 19:04:11 +0200303 if (!value) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200304 return NULL;
305 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100306
307 pthread_mutex_lock(&ctx->dict.lock);
308 result = dict_insert(ctx, value, strlen(value), 1);
309 pthread_mutex_unlock(&ctx->dict.lock);
310
311 return result;
Radek Krejci4ea08382015-04-21 09:41:40 +0200312}