blob: da6bf1bca0402126f916db4e1c59855c26ab64ad [file] [log] [blame]
Radek Krejciee627582015-04-20 17:39:59 +02001/**
2 * @file dict.c
3 * @author Radek Krejci <rkrejci@cesnet.cz>
4 * @brief libyang dictionary for storing strings
5 *
6 * Copyright (c) 2015 CESNET, z.s.p.o.
7 *
Radek Krejci54f6fb32016-02-24 12:56:39 +01008 * This source code is licensed under BSD 3-Clause License (the "License").
9 * You may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
Michal Vasko8de098c2016-02-26 10:00:25 +010011 *
Radek Krejci54f6fb32016-02-24 12:56:39 +010012 * https://opensource.org/licenses/BSD-3-Clause
Radek Krejciee627582015-04-20 17:39:59 +020013 */
14
15#include <string.h>
16#include <stdint.h>
17#include <stdlib.h>
Radek Krejcifebcbfb2016-02-04 14:58:42 +010018#include <pthread.h>
Radek Krejciee627582015-04-20 17:39:59 +020019
Radek Krejciee627582015-04-20 17:39:59 +020020#include "common.h"
Radek Krejcida04f4a2015-05-21 12:54:09 +020021#include "context.h"
Radek Krejci41912fe2015-10-22 10:22:12 +020022#include "dict_private.h"
Radek Krejciee627582015-04-20 17:39:59 +020023
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020024void
25lydict_init(struct dict_table *dict)
Radek Krejcida04f4a2015-05-21 12:54:09 +020026{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020027 if (!dict) {
28 ly_errno = LY_EINVAL;
29 return;
30 }
Radek Krejciee627582015-04-20 17:39:59 +020031
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020032 dict->hash_mask = DICT_SIZE - 1;
Radek Krejcifebcbfb2016-02-04 14:58:42 +010033 pthread_mutex_init(&dict->lock, NULL);
Radek Krejcida04f4a2015-05-21 12:54:09 +020034}
35
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020036void
37lydict_clean(struct dict_table *dict)
Radek Krejcida04f4a2015-05-21 12:54:09 +020038{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020039 int i;
40 struct dict_rec *chain, *rec;
Radek Krejcida04f4a2015-05-21 12:54:09 +020041
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020042 if (!dict) {
43 ly_errno = LY_EINVAL;
44 return;
45 }
Radek Krejcida04f4a2015-05-21 12:54:09 +020046
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020047 for (i = 0; i < DICT_SIZE; i++) {
48 rec = &dict->recs[i];
49 chain = rec->next;
Radek Krejcida04f4a2015-05-21 12:54:09 +020050
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020051 free(rec->value);
52 while (chain) {
53 rec = chain;
54 chain = rec->next;
Radek Krejcida04f4a2015-05-21 12:54:09 +020055
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020056 free(rec->value);
57 free(rec);
58 }
59 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +010060
61 pthread_mutex_destroy(&dict->lock);
Radek Krejcida04f4a2015-05-21 12:54:09 +020062}
Radek Krejciee627582015-04-20 17:39:59 +020063
64/*
65 * Bob Jenkin's one-at-a-time hash
66 * http://www.burtleburtle.net/bob/hash/doobs.html
67 *
68 * Spooky hash is faster, but it works only for little endian architectures.
69 */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020070static uint32_t
71dict_hash(const char *key, size_t len)
Radek Krejciee627582015-04-20 17:39:59 +020072{
73 uint32_t hash, i;
74
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020075 for (hash = i = 0; i < len; ++i) {
Radek Krejciee627582015-04-20 17:39:59 +020076 hash += key[i];
77 hash += (hash << 10);
78 hash ^= (hash >> 6);
79 }
80 hash += (hash << 3);
81 hash ^= (hash >> 11);
82 hash += (hash << 15);
83 return hash;
84}
85
Radek Krejcifcf37bd2015-10-22 10:27:53 +020086API void
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020087lydict_remove(struct ly_ctx *ctx, const char *value)
Radek Krejciee627582015-04-20 17:39:59 +020088{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020089 size_t len;
90 uint32_t index;
91 struct dict_rec *record, *prev = NULL;
Radek Krejciee627582015-04-20 17:39:59 +020092
Radek Krejcic071c542016-01-27 14:57:51 +010093 if (!value || !ctx || !ctx->dict.used) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020094 return;
95 }
Radek Krejciee627582015-04-20 17:39:59 +020096
Radek Krejci6e4ffbb2015-06-16 10:34:41 +020097 len = strlen(value);
Radek Krejciee627582015-04-20 17:39:59 +020098
Radek Krejcifebcbfb2016-02-04 14:58:42 +010099 pthread_mutex_lock(&ctx->dict.lock);
100
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200101 index = dict_hash(value, len) & ctx->dict.hash_mask;
102 record = &ctx->dict.recs[index];
Radek Krejciee627582015-04-20 17:39:59 +0200103
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200104 while (record && record->value != value) {
105 prev = record;
106 record = record->next;
107 }
Radek Krejciee627582015-04-20 17:39:59 +0200108
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200109 if (!record) {
110 /* record not found */
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100111 pthread_mutex_unlock(&ctx->dict.lock);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200112 return;
113 }
Radek Krejciee627582015-04-20 17:39:59 +0200114
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200115 record->refcount--;
116 if (!record->refcount) {
117 free(record->value);
118 if (record->next) {
119 if (prev) {
120 /* change in dynamically allocated chain */
121 prev->next = record->next;
122 free(record);
123 } else {
124 /* move dynamically allocated record into the static array */
125 prev = record->next; /* temporary storage */
126 memcpy(record, record->next, sizeof *record);
127 free(prev);
128 }
129 } else if (prev) {
130 /* removing last record from the dynamically allocated chain */
131 prev->next = NULL;
132 free(record);
133 } else {
134 /* clean the static record content */
135 memset(record, 0, sizeof *record);
136 }
Radek Krejci6b0a8882016-02-16 17:37:21 +0100137 ctx->dict.used--;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200138 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100139
140 pthread_mutex_unlock(&ctx->dict.lock);
Radek Krejciee627582015-04-20 17:39:59 +0200141}
142
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200143static char *
144dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy)
Radek Krejciee627582015-04-20 17:39:59 +0200145{
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200146 uint32_t index;
Radek Krejci592e00d2016-05-19 12:38:08 +0200147 int match = 0;
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200148 struct dict_rec *record, *new;
Radek Krejciee627582015-04-20 17:39:59 +0200149
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200150 index = dict_hash(value, len) & ctx->dict.hash_mask;
151 record = &ctx->dict.recs[index];
Radek Krejciee627582015-04-20 17:39:59 +0200152
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200153 if (!record->value) {
154 /* first record with this hash */
155 if (zerocopy) {
156 record->value = value;
157 } else {
158 record->value = malloc((len + 1) * sizeof *record->value);
Michal Vasko253035f2015-12-17 16:58:13 +0100159 if (!record->value) {
160 LOGMEM;
161 return NULL;
162 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200163 memcpy(record->value, value, len);
164 record->value[len] = '\0';
165 }
166 record->refcount = 1;
Radek Krejci592e00d2016-05-19 12:38:08 +0200167 if (len > DICT_REC_MAXLEN) {
168 record->len = 0;
169 } else {
170 record->len = len;
171 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200172 record->next = NULL;
Radek Krejciee627582015-04-20 17:39:59 +0200173
Radek Krejci6b0a8882016-02-16 17:37:21 +0100174 ctx->dict.used++;
175
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200176 LOGDBG("DICT: inserting \"%s\"", record->value);
177 return record->value;
178 }
Radek Krejciee627582015-04-20 17:39:59 +0200179
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200180 /* collision, search if the value is already in dict */
181 while (record) {
Radek Krejci592e00d2016-05-19 12:38:08 +0200182 if (record->len) {
183 /* for strings shorter than DICT_REC_MAXLEN we are able to speed up
184 * recognition of varying strings according to their lengths, and
185 * for strings with the same length it is safe to use faster memcmp()
186 * instead of strncmp() */
187 if ((record->len == len) && !memcmp(value, record->value, len)) {
188 match = 1;
189 }
190 } else {
191 if (!strncmp(value, record->value, len) && record->value[len] == '\0') {
192 match = 1;
193 }
194 }
195 if (match) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200196 /* record found */
Radek Krejci592e00d2016-05-19 12:38:08 +0200197 if (record->refcount == DICT_REC_MAXCOUNT) {
198 LOGWRN("DICT: refcount overflow detected, duplicating record");
199 break;
200 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200201 record->refcount++;
Radek Krejci4ea08382015-04-21 09:41:40 +0200202
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200203 if (zerocopy) {
204 free(value);
205 }
Radek Krejci4ea08382015-04-21 09:41:40 +0200206
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200207 LOGDBG("DICT: inserting (refcount) \"%s\"", record->value);
208 return record->value;
209 }
Radek Krejciee627582015-04-20 17:39:59 +0200210
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200211 if (!record->next) {
212 /* not present, add as a new record in chain */
213 break;
214 }
Radek Krejciee627582015-04-20 17:39:59 +0200215
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200216 record = record->next;
217 }
Radek Krejciee627582015-04-20 17:39:59 +0200218
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200219 /* create new record and add it behind the last record */
220 new = malloc(sizeof *record);
Michal Vasko253035f2015-12-17 16:58:13 +0100221 if (!new) {
222 LOGMEM;
223 return NULL;
224 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200225 if (zerocopy) {
226 new->value = value;
227 } else {
228 new->value = malloc((len + 1) * sizeof *record->value);
Michal Vasko253035f2015-12-17 16:58:13 +0100229 if (!new->value) {
230 LOGMEM;
231 free(new);
232 return NULL;
233 }
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200234 memcpy(new->value, value, len);
235 new->value[len] = '\0';
236 }
237 new->refcount = 1;
Radek Krejci592e00d2016-05-19 12:38:08 +0200238 if (len > DICT_REC_MAXLEN) {
239 new->len = 0;
240 } else {
241 new->len = len;
242 }
243 new->next = record->next; /* in case of refcount overflow, we are not at the end of chain */
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200244 record->next = new;
Radek Krejciee627582015-04-20 17:39:59 +0200245
Radek Krejci6b0a8882016-02-16 17:37:21 +0100246 ctx->dict.used++;
247
Michal Vasko29e7bce2015-09-11 16:00:15 +0200248 LOGDBG("DICT: inserting \"%s\" with collision ", new->value);
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200249 return new->value;
Radek Krejciee627582015-04-20 17:39:59 +0200250}
Radek Krejci4ea08382015-04-21 09:41:40 +0200251
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200252API const char *
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200253lydict_insert(struct ly_ctx *ctx, const char *value, size_t len)
Radek Krejci4ea08382015-04-21 09:41:40 +0200254{
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100255 const char *result;
256
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200257 if (value && !len) {
258 len = strlen(value);
259 }
Radek Krejci8bc9ca02015-06-04 15:52:46 +0200260
Radek Krejcia5269642015-07-20 19:04:11 +0200261 if (!value) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200262 return NULL;
263 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100264
265 pthread_mutex_lock(&ctx->dict.lock);
266 result = dict_insert(ctx, (char *)value, len, 0);
267 pthread_mutex_unlock(&ctx->dict.lock);
268
269 return result;
Radek Krejci4ea08382015-04-21 09:41:40 +0200270}
271
Radek Krejcifcf37bd2015-10-22 10:27:53 +0200272API const char *
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200273lydict_insert_zc(struct ly_ctx *ctx, char *value)
Radek Krejci4ea08382015-04-21 09:41:40 +0200274{
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100275 const char *result;
276
Radek Krejcia5269642015-07-20 19:04:11 +0200277 if (!value) {
Radek Krejci6e4ffbb2015-06-16 10:34:41 +0200278 return NULL;
279 }
Radek Krejcifebcbfb2016-02-04 14:58:42 +0100280
281 pthread_mutex_lock(&ctx->dict.lock);
282 result = dict_insert(ctx, value, strlen(value), 1);
283 pthread_mutex_unlock(&ctx->dict.lock);
284
285 return result;
Radek Krejci4ea08382015-04-21 09:41:40 +0200286}