1 /*
2 * $Id: linkhash.c,v 1.4 2006/01/26 02:16:28 mclark Exp $
3 *
4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 * Michael Clark <[email protected]>
6 * Copyright (c) 2009 Hewlett-Packard Development Company, L.P.
7 *
8 * This library is free software; you can redistribute it and/or modify
9 * it under the terms of the MIT license. See COPYING for details.
10 *
11 */
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <stdlib.h>
16 #include <stdarg.h>
17 #include <stddef.h>
18 #include <limits.h>
19
20 #ifdef HAVE_ENDIAN_H
21 # include <endian.h> /* attempt to define endianness */
22 #endif
23
24 #include "random_seed.h"
25 #include "linkhash.h"
26
lh_abort(const char * msg,...)27 void lh_abort(const char *msg, ...)
28 {
29 va_list ap;
30 va_start(ap, msg);
31 vprintf(msg, ap);
32 va_end(ap);
33 exit(1);
34 }
35
lh_ptr_hash(const void * k)36 unsigned long lh_ptr_hash(const void *k)
37 {
38 /* CAW: refactored to be 64bit nice */
39 return (unsigned long)((((ptrdiff_t)k * LH_PRIME) >> 4) & ULONG_MAX);
40 }
41
lh_ptr_equal(const void * k1,const void * k2)42 int lh_ptr_equal(const void *k1, const void *k2)
43 {
44 return (k1 == k2);
45 }
46
47 /*
48 * hashlittle from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
49 * http://burtleburtle.net/bob/c/lookup3.c
50 * minor modifications to make functions static so no symbols are exported
51 * minor mofifications to compile with -Werror
52 */
53
54 /*
55 -------------------------------------------------------------------------------
56 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
57
58 These are functions for producing 32-bit hashes for hash table lookup.
59 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
60 are externally useful functions. Routines to test the hash are included
61 if SELF_TEST is defined. You can use this free for any purpose. It's in
62 the public domain. It has no warranty.
63
64 You probably want to use hashlittle(). hashlittle() and hashbig()
65 hash byte arrays. hashlittle() is is faster than hashbig() on
66 little-endian machines. Intel and AMD are little-endian machines.
67 On second thought, you probably want hashlittle2(), which is identical to
68 hashlittle() except it returns two 32-bit hashes for the price of one.
69 You could implement hashbig2() if you wanted but I haven't bothered here.
70
71 If you want to find a hash of, say, exactly 7 integers, do
72 a = i1; b = i2; c = i3;
73 mix(a,b,c);
74 a += i4; b += i5; c += i6;
75 mix(a,b,c);
76 a += i7;
77 final(a,b,c);
78 then use c as the hash value. If you have a variable length array of
79 4-byte integers to hash, use hashword(). If you have a byte array (like
80 a character string), use hashlittle(). If you have several byte arrays, or
81 a mix of things, see the comments above hashlittle().
82
83 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
84 then mix those integers. This is fast (you can do a lot more thorough
85 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
86 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
87 -------------------------------------------------------------------------------
88 */
89
90 /*
91 * My best guess at if you are big-endian or little-endian. This may
92 * need adjustment.
93 */
94 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
95 __BYTE_ORDER == __LITTLE_ENDIAN) || \
96 (defined(i386) || defined(__i386__) || defined(__i486__) || \
97 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
98 # define HASH_LITTLE_ENDIAN 1
99 # define HASH_BIG_ENDIAN 0
100 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
101 __BYTE_ORDER == __BIG_ENDIAN) || \
102 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
103 # define HASH_LITTLE_ENDIAN 0
104 # define HASH_BIG_ENDIAN 1
105 #else
106 # define HASH_LITTLE_ENDIAN 0
107 # define HASH_BIG_ENDIAN 0
108 #endif
109
110 #define hashsize(n) ((uint32_t)1<<(n))
111 #define hashmask(n) (hashsize(n)-1)
112 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
113
114 /*
115 -------------------------------------------------------------------------------
116 mix -- mix 3 32-bit values reversibly.
117
118 This is reversible, so any information in (a,b,c) before mix() is
119 still in (a,b,c) after mix().
120
121 If four pairs of (a,b,c) inputs are run through mix(), or through
122 mix() in reverse, there are at least 32 bits of the output that
123 are sometimes the same for one pair and different for another pair.
124 This was tested for:
125 * pairs that differed by one bit, by two bits, in any combination
126 of top bits of (a,b,c), or in any combination of bottom bits of
127 (a,b,c).
128 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
129 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
130 is commonly produced by subtraction) look like a single 1-bit
131 difference.
132 * the base values were pseudorandom, all zero but one bit set, or
133 all zero plus a counter that starts at zero.
134
135 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
136 satisfy this are
137 4 6 8 16 19 4
138 9 15 3 18 27 15
139 14 9 3 7 17 3
140 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
141 for "differ" defined as + with a one-bit base and a two-bit delta. I
142 used http://burtleburtle.net/bob/hash/avalanche.html to choose
143 the operations, constants, and arrangements of the variables.
144
145 This does not achieve avalanche. There are input bits of (a,b,c)
146 that fail to affect some output bits of (a,b,c), especially of a. The
147 most thoroughly mixed value is c, but it doesn't really even achieve
148 avalanche in c.
149
150 This allows some parallelism. Read-after-writes are good at doubling
151 the number of bits affected, so the goal of mixing pulls in the opposite
152 direction as the goal of parallelism. I did what I could. Rotates
153 seem to cost as much as shifts on every machine I could lay my hands
154 on, and rotates are much kinder to the top and bottom bits, so I used
155 rotates.
156 -------------------------------------------------------------------------------
157 */
158 #define mix(a,b,c) \
159 { \
160 a -= c; a ^= rot(c, 4); c += b; \
161 b -= a; b ^= rot(a, 6); a += c; \
162 c -= b; c ^= rot(b, 8); b += a; \
163 a -= c; a ^= rot(c,16); c += b; \
164 b -= a; b ^= rot(a,19); a += c; \
165 c -= b; c ^= rot(b, 4); b += a; \
166 }
167
168 /*
169 -------------------------------------------------------------------------------
170 final -- final mixing of 3 32-bit values (a,b,c) into c
171
172 Pairs of (a,b,c) values differing in only a few bits will usually
173 produce values of c that look totally different. This was tested for
174 * pairs that differed by one bit, by two bits, in any combination
175 of top bits of (a,b,c), or in any combination of bottom bits of
176 (a,b,c).
177 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
178 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
179 is commonly produced by subtraction) look like a single 1-bit
180 difference.
181 * the base values were pseudorandom, all zero but one bit set, or
182 all zero plus a counter that starts at zero.
183
184 These constants passed:
185 14 11 25 16 4 14 24
186 12 14 25 16 4 14 24
187 and these came close:
188 4 8 15 26 3 22 24
189 10 8 15 26 3 22 24
190 11 8 15 26 3 22 24
191 -------------------------------------------------------------------------------
192 */
193 #define final(a,b,c) \
194 { \
195 c ^= b; c -= rot(b,14); \
196 a ^= c; a -= rot(c,11); \
197 b ^= a; b -= rot(a,25); \
198 c ^= b; c -= rot(b,16); \
199 a ^= c; a -= rot(c,4); \
200 b ^= a; b -= rot(a,14); \
201 c ^= b; c -= rot(b,24); \
202 }
203
204
205 /*
206 -------------------------------------------------------------------------------
207 hashlittle() -- hash a variable-length key into a 32-bit value
208 k : the key (the unaligned variable-length array of bytes)
209 length : the length of the key, counting by bytes
210 initval : can be any 4-byte value
211 Returns a 32-bit value. Every bit of the key affects every bit of
212 the return value. Two keys differing by one or two bits will have
213 totally different hash values.
214
215 The best hash table sizes are powers of 2. There is no need to do
216 mod a prime (mod is sooo slow!). If you need less than 32 bits,
217 use a bitmask. For example, if you need only 10 bits, do
218 h = (h & hashmask(10));
219 In which case, the hash table should have hashsize(10) elements.
220
221 If you are hashing n strings (uint8_t **)k, do it like this:
222 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
223
224 By Bob Jenkins, 2006. [email protected]. You may use this
225 code any way you wish, private, educational, or commercial. It's free.
226
227 Use for hash table lookup, or anything where one collision in 2^^32 is
228 acceptable. Do NOT use for cryptographic purposes.
229 -------------------------------------------------------------------------------
230 */
231
hashlittle(const void * key,size_t length,uint32_t initval)232 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
233 {
234 uint32_t a,b,c; /* internal state */
235 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
236
237 /* Set up the internal state */
238 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
239
240 u.ptr = key;
241 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
242 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
243
244 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
245 while (length > 12)
246 {
247 a += k[0];
248 b += k[1];
249 c += k[2];
250 mix(a,b,c);
251 length -= 12;
252 k += 3;
253 }
254
255 /*----------------------------- handle the last (probably partial) block */
256 /*
257 * "k[2]&0xffffff" actually reads beyond the end of the string, but
258 * then masks off the part it's not allowed to read. Because the
259 * string is aligned, the masked-off tail is in the same word as the
260 * rest of the string. Every machine with memory protection I've seen
261 * does it on word boundaries, so is OK with this. But VALGRIND will
262 * still catch it and complain. The masking trick does make the hash
263 * noticably faster for short strings (like English words).
264 */
265 #ifndef VALGRIND
266
267 switch(length)
268 {
269 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
270 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
271 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
272 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
273 case 8 : b+=k[1]; a+=k[0]; break;
274 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
275 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
276 case 5 : b+=k[1]&0xff; a+=k[0]; break;
277 case 4 : a+=k[0]; break;
278 case 3 : a+=k[0]&0xffffff; break;
279 case 2 : a+=k[0]&0xffff; break;
280 case 1 : a+=k[0]&0xff; break;
281 case 0 : return c; /* zero length strings require no mixing */
282 }
283
284 #else /* make valgrind happy */
285
286 const uint8_t *k8 = (const uint8_t *)k;
287 switch(length)
288 {
289 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
290 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
291 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
292 case 9 : c+=k8[8]; /* fall through */
293 case 8 : b+=k[1]; a+=k[0]; break;
294 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
295 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
296 case 5 : b+=k8[4]; /* fall through */
297 case 4 : a+=k[0]; break;
298 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
299 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
300 case 1 : a+=k8[0]; break;
301 case 0 : return c;
302 }
303
304 #endif /* !valgrind */
305
306 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
307 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
308 const uint8_t *k8;
309
310 /*--------------- all but last block: aligned reads and different mixing */
311 while (length > 12)
312 {
313 a += k[0] + (((uint32_t)k[1])<<16);
314 b += k[2] + (((uint32_t)k[3])<<16);
315 c += k[4] + (((uint32_t)k[5])<<16);
316 mix(a,b,c);
317 length -= 12;
318 k += 6;
319 }
320
321 /*----------------------------- handle the last (probably partial) block */
322 k8 = (const uint8_t *)k;
323 switch(length)
324 {
325 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
326 b+=k[2]+(((uint32_t)k[3])<<16);
327 a+=k[0]+(((uint32_t)k[1])<<16);
328 break;
329 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
330 case 10: c+=k[4];
331 b+=k[2]+(((uint32_t)k[3])<<16);
332 a+=k[0]+(((uint32_t)k[1])<<16);
333 break;
334 case 9 : c+=k8[8]; /* fall through */
335 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
336 a+=k[0]+(((uint32_t)k[1])<<16);
337 break;
338 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
339 case 6 : b+=k[2];
340 a+=k[0]+(((uint32_t)k[1])<<16);
341 break;
342 case 5 : b+=k8[4]; /* fall through */
343 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
344 break;
345 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
346 case 2 : a+=k[0];
347 break;
348 case 1 : a+=k8[0];
349 break;
350 case 0 : return c; /* zero length requires no mixing */
351 }
352
353 } else { /* need to read the key one byte at a time */
354 const uint8_t *k = (const uint8_t *)key;
355
356 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
357 while (length > 12)
358 {
359 a += k[0];
360 a += ((uint32_t)k[1])<<8;
361 a += ((uint32_t)k[2])<<16;
362 a += ((uint32_t)k[3])<<24;
363 b += k[4];
364 b += ((uint32_t)k[5])<<8;
365 b += ((uint32_t)k[6])<<16;
366 b += ((uint32_t)k[7])<<24;
367 c += k[8];
368 c += ((uint32_t)k[9])<<8;
369 c += ((uint32_t)k[10])<<16;
370 c += ((uint32_t)k[11])<<24;
371 mix(a,b,c);
372 length -= 12;
373 k += 12;
374 }
375
376 /*-------------------------------- last block: affect all 32 bits of (c) */
377 switch(length) /* all the case statements fall through */
378 {
379 case 12: c+=((uint32_t)k[11])<<24;
380 case 11: c+=((uint32_t)k[10])<<16;
381 case 10: c+=((uint32_t)k[9])<<8;
382 case 9 : c+=k[8];
383 case 8 : b+=((uint32_t)k[7])<<24;
384 case 7 : b+=((uint32_t)k[6])<<16;
385 case 6 : b+=((uint32_t)k[5])<<8;
386 case 5 : b+=k[4];
387 case 4 : a+=((uint32_t)k[3])<<24;
388 case 3 : a+=((uint32_t)k[2])<<16;
389 case 2 : a+=((uint32_t)k[1])<<8;
390 case 1 : a+=k[0];
391 break;
392 case 0 : return c;
393 }
394 }
395
396 final(a,b,c);
397 return c;
398 }
399
lh_char_hash(const void * k)400 unsigned long lh_char_hash(const void *k)
401 {
402 static volatile int random_seed = -1;
403
404 if (random_seed == -1) {
405 int seed;
406 /* we can't use -1 as it is the unitialized sentinel */
407 while ((seed = json_c_get_random_seed()) == -1);
408 #if defined __GNUC__
409 __sync_val_compare_and_swap(&random_seed, -1, seed);
410 #elif defined _MSC_VER
411 InterlockedCompareExchange(&random_seed, seed, -1);
412 #else
413 #warning "racy random seed initializtion if used by multiple threads"
414 random_seed = seed; /* potentially racy */
415 #endif
416 }
417
418 return hashlittle((const char*)k, strlen((const char*)k), random_seed);
419 }
420
lh_char_equal(const void * k1,const void * k2)421 int lh_char_equal(const void *k1, const void *k2)
422 {
423 return (strcmp((const char*)k1, (const char*)k2) == 0);
424 }
425
lh_table_new(int size,const char * name,lh_entry_free_fn * free_fn,lh_hash_fn * hash_fn,lh_equal_fn * equal_fn)426 struct lh_table* lh_table_new(int size, const char *name,
427 lh_entry_free_fn *free_fn,
428 lh_hash_fn *hash_fn,
429 lh_equal_fn *equal_fn)
430 {
431 int i;
432 struct lh_table *t;
433
434 t = (struct lh_table*)calloc(1, sizeof(struct lh_table));
435 if(!t) lh_abort("lh_table_new: calloc failed\n");
436 t->count = 0;
437 t->size = size;
438 t->name = name;
439 t->table = (struct lh_entry*)calloc(size, sizeof(struct lh_entry));
440 if(!t->table) lh_abort("lh_table_new: calloc failed\n");
441 t->free_fn = free_fn;
442 t->hash_fn = hash_fn;
443 t->equal_fn = equal_fn;
444 for(i = 0; i < size; i++) t->table[i].k = LH_EMPTY;
445 return t;
446 }
447
lh_kchar_table_new(int size,const char * name,lh_entry_free_fn * free_fn)448 struct lh_table* lh_kchar_table_new(int size, const char *name,
449 lh_entry_free_fn *free_fn)
450 {
451 return lh_table_new(size, name, free_fn, lh_char_hash, lh_char_equal);
452 }
453
lh_kptr_table_new(int size,const char * name,lh_entry_free_fn * free_fn)454 struct lh_table* lh_kptr_table_new(int size, const char *name,
455 lh_entry_free_fn *free_fn)
456 {
457 return lh_table_new(size, name, free_fn, lh_ptr_hash, lh_ptr_equal);
458 }
459
lh_table_resize(struct lh_table * t,int new_size)460 void lh_table_resize(struct lh_table *t, int new_size)
461 {
462 struct lh_table *new_t;
463 struct lh_entry *ent;
464
465 new_t = lh_table_new(new_size, t->name, NULL, t->hash_fn, t->equal_fn);
466 ent = t->head;
467 while(ent) {
468 lh_table_insert(new_t, ent->k, ent->v);
469 ent = ent->next;
470 }
471 free(t->table);
472 t->table = new_t->table;
473 t->size = new_size;
474 t->head = new_t->head;
475 t->tail = new_t->tail;
476 t->resizes++;
477 free(new_t);
478 }
479
lh_table_free(struct lh_table * t)480 void lh_table_free(struct lh_table *t)
481 {
482 struct lh_entry *c;
483 for(c = t->head; c != NULL; c = c->next) {
484 if(t->free_fn) {
485 t->free_fn(c);
486 }
487 }
488 free(t->table);
489 free(t);
490 }
491
492
lh_table_insert(struct lh_table * t,void * k,const void * v)493 int lh_table_insert(struct lh_table *t, void *k, const void *v)
494 {
495 unsigned long h, n;
496
497 t->inserts++;
498 if(t->count >= t->size * LH_LOAD_FACTOR) lh_table_resize(t, t->size * 2);
499
500 h = t->hash_fn(k);
501 n = h % t->size;
502
503 while( 1 ) {
504 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) break;
505 t->collisions++;
506 if ((int)++n == t->size) n = 0;
507 }
508
509 t->table[n].k = k;
510 t->table[n].v = v;
511 t->count++;
512
513 if(t->head == NULL) {
514 t->head = t->tail = &t->table[n];
515 t->table[n].next = t->table[n].prev = NULL;
516 } else {
517 t->tail->next = &t->table[n];
518 t->table[n].prev = t->tail;
519 t->table[n].next = NULL;
520 t->tail = &t->table[n];
521 }
522
523 return 0;
524 }
525
526
lh_table_lookup_entry(struct lh_table * t,const void * k)527 struct lh_entry* lh_table_lookup_entry(struct lh_table *t, const void *k)
528 {
529 unsigned long h = t->hash_fn(k);
530 unsigned long n = h % t->size;
531 int count = 0;
532
533 t->lookups++;
534 while( count < t->size ) {
535 if(t->table[n].k == LH_EMPTY) return NULL;
536 if(t->table[n].k != LH_FREED &&
537 t->equal_fn(t->table[n].k, k)) return &t->table[n];
538 if ((int)++n == t->size) n = 0;
539 count++;
540 }
541 return NULL;
542 }
543
544
lh_table_lookup(struct lh_table * t,const void * k)545 const void* lh_table_lookup(struct lh_table *t, const void *k)
546 {
547 void *result;
548 lh_table_lookup_ex(t, k, &result);
549 return result;
550 }
551
lh_table_lookup_ex(struct lh_table * t,const void * k,void ** v)552 json_bool lh_table_lookup_ex(struct lh_table* t, const void* k, void **v)
553 {
554 struct lh_entry *e = lh_table_lookup_entry(t, k);
555 if (e != NULL) {
556 if (v != NULL) *v = (void *)e->v;
557 return TRUE; /* key found */
558 }
559 if (v != NULL) *v = NULL;
560 return FALSE; /* key not found */
561 }
562
lh_table_delete_entry(struct lh_table * t,struct lh_entry * e)563 int lh_table_delete_entry(struct lh_table *t, struct lh_entry *e)
564 {
565 ptrdiff_t n = (ptrdiff_t)(e - t->table); /* CAW: fixed to be 64bit nice, still need the crazy negative case... */
566
567 /* CAW: this is bad, really bad, maybe stack goes other direction on this machine... */
568 if(n < 0) { return -2; }
569
570 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) return -1;
571 t->count--;
572 if(t->free_fn) t->free_fn(e);
573 t->table[n].v = NULL;
574 t->table[n].k = LH_FREED;
575 if(t->tail == &t->table[n] && t->head == &t->table[n]) {
576 t->head = t->tail = NULL;
577 } else if (t->head == &t->table[n]) {
578 t->head->next->prev = NULL;
579 t->head = t->head->next;
580 } else if (t->tail == &t->table[n]) {
581 t->tail->prev->next = NULL;
582 t->tail = t->tail->prev;
583 } else {
584 t->table[n].prev->next = t->table[n].next;
585 t->table[n].next->prev = t->table[n].prev;
586 }
587 t->table[n].next = t->table[n].prev = NULL;
588 return 0;
589 }
590
591
lh_table_delete(struct lh_table * t,const void * k)592 int lh_table_delete(struct lh_table *t, const void *k)
593 {
594 struct lh_entry *e = lh_table_lookup_entry(t, k);
595 if(!e) return -1;
596 return lh_table_delete_entry(t, e);
597 }
598
lh_table_length(struct lh_table * t)599 int lh_table_length(struct lh_table *t)
600 {
601 return t->count;
602 }
603