dict OPTIMIZE searching in dictionary if possible, use faster memcmp() instead of strncmp()

commit: 592e00d862ce5f9022e9e841b3ec7ade6eeee50e [log] [tgz]
author: Radek Krejci <rkrejci@cesnet.cz> Thu May 19 12:38:08 2016 +0200
committer: Radek Krejci <rkrejci@cesnet.cz> Thu May 19 12:38:08 2016 +0200
tree: f4cea5f0e2064fa33fb47efb198eb2b93e57b71f
parent: 34e8b3479eb943c9817a27ce011db128fd53356f [diff] [blame]
diff --git a/src/dict.c b/src/dict.c
index a59251a..da6bf1b 100644
--- a/src/dict.c
+++ b/src/dict.c

@@ -144,6 +144,7 @@
 dict_insert(struct ly_ctx *ctx, char *value, size_t len, int zerocopy)
 {
     uint32_t index;
+    int match = 0;
     struct dict_rec *record, *new;
 
     index = dict_hash(value, len) & ctx->dict.hash_mask;
@@ -163,6 +164,11 @@
             record->value[len] = '\0';
         }
         record->refcount = 1;
+        if (len > DICT_REC_MAXLEN) {
+            record->len = 0;
+        } else {
+            record->len = len;
+        }
         record->next = NULL;
 
         ctx->dict.used++;
@@ -173,8 +179,25 @@
 
     /* collision, search if the value is already in dict */
     while (record) {
-        if (!strncmp(value, record->value, len) && record->value[len] == '\0') {
+        if (record->len) {
+            /* for strings shorter than DICT_REC_MAXLEN we are able to speed up
+             * recognition of varying strings according to their lengths, and
+             * for strings with the same length it is safe to use faster memcmp()
+             * instead of strncmp() */
+            if ((record->len == len) && !memcmp(value, record->value, len)) {
+                match = 1;
+            }
+        } else {
+            if (!strncmp(value, record->value, len) && record->value[len] == '\0') {
+                match = 1;
+            }
+        }
+        if (match) {
             /* record found */
+            if (record->refcount == DICT_REC_MAXCOUNT) {
+                LOGWRN("DICT: refcount overflow detected, duplicating record");
+                break;
+            }
             record->refcount++;
 
             if (zerocopy) {
@@ -212,8 +235,12 @@
         new->value[len] = '\0';
     }
     new->refcount = 1;
-    new->next = NULL;
-
+    if (len > DICT_REC_MAXLEN) {
+        new->len = 0;
+    } else {
+        new->len = len;
+    }
+    new->next = record->next; /* in case of refcount overflow, we are not at the end of chain */
     record->next = new;
 
     ctx->dict.used++;
commit	592e00d862ce5f9022e9e841b3ec7ade6eeee50e	[log] [tgz]
author	Radek Krejci <rkrejci@cesnet.cz>	Thu May 19 12:38:08 2016 +0200
committer	Radek Krejci <rkrejci@cesnet.cz>	Thu May 19 12:38:08 2016 +0200
tree	f4cea5f0e2064fa33fb47efb198eb2b93e57b71f
parent	34e8b3479eb943c9817a27ce011db128fd53356f [diff] [blame]