Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

uthash.h @ 8710

Last change on this file since 8710 was 8502, checked in by steve, 12 years ago
Added in uthash so that I can write some C code to do some neighbourhood calculations
File size: 57.5 KB

Line
1	/*
2	Copyright (c) 2003-2012, Troy D. Hanson http://uthash.sourceforge.net
3	All rights reserved.
4
5	Redistribution and use in source and binary forms, with or without
6	modification, are permitted provided that the following conditions are met:
7
8	* Redistributions of source code must retain the above copyright
9	notice, this list of conditions and the following disclaimer.
10
11	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
12	IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
13	TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14	PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
15	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
19	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22	*/
23
24	#ifndef UTHASH_H
25	#define UTHASH_H
26
27	#include <string.h> /* memcmp,strlen */
28	#include <stddef.h> /* ptrdiff_t */
29	#include <stdlib.h> /* exit() */
30
31	/* These macros use decltype or the earlier __typeof GNU extension.
32	As decltype is only available in newer compilers (VS2010 or gcc 4.3+
33	when compiling c++ source) this code uses whatever method is needed
34	or, for VS2008 where neither is available, uses casting workarounds. */
35	#ifdef _MSC_VER /* MS compiler */
36	#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
37	#define DECLTYPE(x) (decltype(x))
38	#else /* VS2008 or older (or VS2010 in C mode) */
39	#define NO_DECLTYPE
40	#define DECLTYPE(x)
41	#endif
42	#else /* GNU, Sun and other compilers */
43	#define DECLTYPE(x) (__typeof(x))
44	#endif
45
46	#ifdef NO_DECLTYPE
47	#define DECLTYPE_ASSIGN(dst,src) \
48	do { \
49	char _da_dst = (char)(&(dst)); \
50	_da_dst = (char)(src); \
51	} while(0)
52	#else
53	#define DECLTYPE_ASSIGN(dst,src) \
54	do { \
55	(dst) = DECLTYPE(dst)(src); \
56	} while(0)
57	#endif
58
59	/* a number of the hash function use uint32_t which isn't defined on win32 */
60	#ifdef _MSC_VER
61	typedef unsigned int uint32_t;
62	typedef unsigned char uint8_t;
63	#else
64	#include <inttypes.h> /* uint32_t */
65	#endif
66
67	#define UTHASH_VERSION 1.9.6
68
69	#ifndef uthash_fatal
70	#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
71	#endif
72	#ifndef uthash_malloc
73	#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
74	#endif
75	#ifndef uthash_free
76	#define uthash_free(ptr,sz) free(ptr) /* free fcn */
77	#endif
78
79	#ifndef uthash_noexpand_fyi
80	#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
81	#endif
82	#ifndef uthash_expand_fyi
83	#define uthash_expand_fyi(tbl) /* can be defined to log expands */
84	#endif
85
86	/* initial number of buckets */
87	#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
88	#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
89	#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
90
91	/* calculate the element whose hash handle address is hhe */
92	#define ELMT_FROM_HH(tbl,hhp) ((void)(((char)(hhp)) - ((tbl)->hho)))
93
94	#define HASH_FIND(hh,head,keyptr,keylen,out) \
95	do { \
96	unsigned _hf_bkt,_hf_hashv; \
97	out=NULL; \
98	if (head) { \
99	HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
100	if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
101	HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
102	keyptr,keylen,out); \
103	} \
104	} \
105	} while (0)
106
107	#ifdef HASH_BLOOM
108	#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
109	#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
110	#define HASH_BLOOM_MAKE(tbl) \
111	do { \
112	(tbl)->bloom_nbits = HASH_BLOOM; \
113	(tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
114	if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
115	memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
116	(tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
117	} while (0)
118
119	#define HASH_BLOOM_FREE(tbl) \
120	do { \
121	uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
122	} while (0)
123
124	#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] \|= (1U << ((idx)%8)))
125	#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
126
127	#define HASH_BLOOM_ADD(tbl,hashv) \
128	HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
129
130	#define HASH_BLOOM_TEST(tbl,hashv) \
131	HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
132
133	#else
134	#define HASH_BLOOM_MAKE(tbl)
135	#define HASH_BLOOM_FREE(tbl)
136	#define HASH_BLOOM_ADD(tbl,hashv)
137	#define HASH_BLOOM_TEST(tbl,hashv) (1)
138	#endif
139
140	#define HASH_MAKE_TABLE(hh,head) \
141	do { \
142	(head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
143	sizeof(UT_hash_table)); \
144	if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
145	memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
146	(head)->hh.tbl->tail = &((head)->hh); \
147	(head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
148	(head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
149	(head)->hh.tbl->hho = (char)(&(head)->hh) - (char)(head); \
150	(head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
151	HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
152	if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
153	memset((head)->hh.tbl->buckets, 0, \
154	HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
155	HASH_BLOOM_MAKE((head)->hh.tbl); \
156	(head)->hh.tbl->signature = HASH_SIGNATURE; \
157	} while(0)
158
159	#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
160	HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
161
162	#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
163	do { \
164	unsigned _ha_bkt; \
165	(add)->hh.next = NULL; \
166	(add)->hh.key = (char*)keyptr; \
167	(add)->hh.keylen = (unsigned)keylen_in; \
168	if (!(head)) { \
169	head = (add); \
170	(head)->hh.prev = NULL; \
171	HASH_MAKE_TABLE(hh,head); \
172	} else { \
173	(head)->hh.tbl->tail->next = (add); \
174	(add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
175	(head)->hh.tbl->tail = &((add)->hh); \
176	} \
177	(head)->hh.tbl->num_items++; \
178	(add)->hh.tbl = (head)->hh.tbl; \
179	HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
180	(add)->hh.hashv, _ha_bkt); \
181	HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
182	HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
183	HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
184	HASH_FSCK(hh,head); \
185	} while(0)
186
187	#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
188	do { \
189	bkt = ((hashv) & ((num_bkts) - 1)); \
190	} while(0)
191
192	/* delete "delptr" from the hash table.
193	* "the usual" patch-up process for the app-order doubly-linked-list.
194	* The use of _hd_hh_del below deserves special explanation.
195	* These used to be expressed using (delptr) but that led to a bug
196	* if someone used the same symbol for the head and deletee, like
197	* HASH_DELETE(hh,users,users);
198	* We want that to work, but by changing the head (users) below
199	* we were forfeiting our ability to further refer to the deletee (users)
200	* in the patch-up process. Solution: use scratch space to
201	* copy the deletee pointer, then the latter references are via that
202	* scratch pointer rather than through the repointed (users) symbol.
203	*/
204	#define HASH_DELETE(hh,head,delptr) \
205	do { \
206	unsigned _hd_bkt; \
207	struct UT_hash_handle *_hd_hh_del; \
208	if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
209	uthash_free((head)->hh.tbl->buckets, \
210	(head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
211	HASH_BLOOM_FREE((head)->hh.tbl); \
212	uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
213	head = NULL; \
214	} else { \
215	_hd_hh_del = &((delptr)->hh); \
216	if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
217	(head)->hh.tbl->tail = \
218	(UT_hash_handle)((char)((delptr)->hh.prev) + \
219	(head)->hh.tbl->hho); \
220	} \
221	if ((delptr)->hh.prev) { \
222	((UT_hash_handle)((char)((delptr)->hh.prev) + \
223	(head)->hh.tbl->hho))->next = (delptr)->hh.next; \
224	} else { \
225	DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
226	} \
227	if (_hd_hh_del->next) { \
228	((UT_hash_handle)((char)_hd_hh_del->next + \
229	(head)->hh.tbl->hho))->prev = \
230	_hd_hh_del->prev; \
231	} \
232	HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
233	HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
234	(head)->hh.tbl->num_items--; \
235	} \
236	HASH_FSCK(hh,head); \
237	} while (0)
238
239
240	/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
241	#define HASH_FIND_STR(head,findstr,out) \
242	HASH_FIND(hh,head,findstr,strlen(findstr),out)
243	#define HASH_ADD_STR(head,strfield,add) \
244	HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
245	#define HASH_FIND_INT(head,findint,out) \
246	HASH_FIND(hh,head,findint,sizeof(int),out)
247	#define HASH_ADD_INT(head,intfield,add) \
248	HASH_ADD(hh,head,intfield,sizeof(int),add)
249	#define HASH_FIND_PTR(head,findptr,out) \
250	HASH_FIND(hh,head,findptr,sizeof(void *),out)
251	#define HASH_ADD_PTR(head,ptrfield,add) \
252	HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
253	#define HASH_DEL(head,delptr) \
254	HASH_DELETE(hh,head,delptr)
255
256	/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
257	* This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
258	*/
259	#ifdef HASH_DEBUG
260	#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
261	#define HASH_FSCK(hh,head) \
262	do { \
263	unsigned _bkt_i; \
264	unsigned _count, _bkt_count; \
265	char *_prev; \
266	struct UT_hash_handle *_thh; \
267	if (head) { \
268	_count = 0; \
269	for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
270	_bkt_count = 0; \
271	_thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
272	_prev = NULL; \
273	while (_thh) { \
274	if (_prev != (char*)(_thh->hh_prev)) { \
275	HASH_OOPS("invalid hh_prev %p, actual %p\n", \
276	_thh->hh_prev, _prev ); \
277	} \
278	_bkt_count++; \
279	_prev = (char*)(_thh); \
280	_thh = _thh->hh_next; \
281	} \
282	_count += _bkt_count; \
283	if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
284	HASH_OOPS("invalid bucket count %d, actual %d\n", \
285	(head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
286	} \
287	} \
288	if (_count != (head)->hh.tbl->num_items) { \
289	HASH_OOPS("invalid hh item count %d, actual %d\n", \
290	(head)->hh.tbl->num_items, _count ); \
291	} \
292	/* traverse hh in app order; check next/prev integrity, count */ \
293	_count = 0; \
294	_prev = NULL; \
295	_thh = &(head)->hh; \
296	while (_thh) { \
297	_count++; \
298	if (_prev !=(char*)(_thh->prev)) { \
299	HASH_OOPS("invalid prev %p, actual %p\n", \
300	_thh->prev, _prev ); \
301	} \
302	_prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
303	_thh = ( _thh->next ? (UT_hash_handle)((char)(_thh->next) + \
304	(head)->hh.tbl->hho) : NULL ); \
305	} \
306	if (_count != (head)->hh.tbl->num_items) { \
307	HASH_OOPS("invalid app item count %d, actual %d\n", \
308	(head)->hh.tbl->num_items, _count ); \
309	} \
310	} \
311	} while (0)
312	#else
313	#define HASH_FSCK(hh,head)
314	#endif
315
316	/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
317	* the descriptor to which this macro is defined for tuning the hash function.
318	* The app can #include <unistd.h> to get the prototype for write(2). */
319	#ifdef HASH_EMIT_KEYS
320	#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
321	do { \
322	unsigned _klen = fieldlen; \
323	write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
324	write(HASH_EMIT_KEYS, keyptr, fieldlen); \
325	} while (0)
326	#else
327	#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
328	#endif
329
330	/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
331	#ifdef HASH_FUNCTION
332	#define HASH_FCN HASH_FUNCTION
333	#else
334	#define HASH_FCN HASH_JEN
335	#endif
336
337	/* The Bernstein hash function, used in Perl prior to v5.6 */
338	#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
339	do { \
340	unsigned _hb_keylen=keylen; \
341	char _hb_key=(char)(key); \
342	(hashv) = 0; \
343	while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
344	bkt = (hashv) & (num_bkts-1); \
345	} while (0)
346
347
348	/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
349	* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
350	#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
351	do { \
352	unsigned _sx_i; \
353	char _hs_key=(char)(key); \
354	hashv = 0; \
355	for(_sx_i=0; _sx_i < keylen; _sx_i++) \
356	hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
357	bkt = hashv & (num_bkts-1); \
358	} while (0)
359
360	#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
361	do { \
362	unsigned _fn_i; \
363	char _hf_key=(char)(key); \
364	hashv = 2166136261UL; \
365	for(_fn_i=0; _fn_i < keylen; _fn_i++) \
366	hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
367	bkt = hashv & (num_bkts-1); \
368	} while(0)
369
370	#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
371	do { \
372	unsigned _ho_i; \
373	char _ho_key=(char)(key); \
374	hashv = 0; \
375	for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
376	hashv += _ho_key[_ho_i]; \
377	hashv += (hashv << 10); \
378	hashv ^= (hashv >> 6); \
379	} \
380	hashv += (hashv << 3); \
381	hashv ^= (hashv >> 11); \
382	hashv += (hashv << 15); \
383	bkt = hashv & (num_bkts-1); \
384	} while(0)
385
386	#define HASH_JEN_MIX(a,b,c) \
387	do { \
388	a -= b; a -= c; a ^= ( c >> 13 ); \
389	b -= c; b -= a; b ^= ( a << 8 ); \
390	c -= a; c -= b; c ^= ( b >> 13 ); \
391	a -= b; a -= c; a ^= ( c >> 12 ); \
392	b -= c; b -= a; b ^= ( a << 16 ); \
393	c -= a; c -= b; c ^= ( b >> 5 ); \
394	a -= b; a -= c; a ^= ( c >> 3 ); \
395	b -= c; b -= a; b ^= ( a << 10 ); \
396	c -= a; c -= b; c ^= ( b >> 15 ); \
397	} while (0)
398
399	#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
400	do { \
401	unsigned _hj_i,_hj_j,_hj_k; \
402	char _hj_key=(char)(key); \
403	hashv = 0xfeedbeef; \
404	_hj_i = _hj_j = 0x9e3779b9; \
405	_hj_k = (unsigned)keylen; \
406	while (_hj_k >= 12) { \
407	_hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
408	+ ( (unsigned)_hj_key[2] << 16 ) \
409	+ ( (unsigned)_hj_key[3] << 24 ) ); \
410	_hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
411	+ ( (unsigned)_hj_key[6] << 16 ) \
412	+ ( (unsigned)_hj_key[7] << 24 ) ); \
413	hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
414	+ ( (unsigned)_hj_key[10] << 16 ) \
415	+ ( (unsigned)_hj_key[11] << 24 ) ); \
416	\
417	HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
418	\
419	_hj_key += 12; \
420	_hj_k -= 12; \
421	} \
422	hashv += keylen; \
423	switch ( _hj_k ) { \
424	case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
425	case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
426	case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
427	case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
428	case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
429	case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
430	case 5: _hj_j += _hj_key[4]; \
431	case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
432	case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
433	case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
434	case 1: _hj_i += _hj_key[0]; \
435	} \
436	HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
437	bkt = hashv & (num_bkts-1); \
438	} while(0)
439
440	/* The Paul Hsieh hash function */
441	#undef get16bits
442	#if (defined(__GNUC__) && defined(__i386__)) \|\| defined(__WATCOMC__) \
443	\|\| defined(_MSC_VER) \|\| defined (__BORLANDC__) \|\| defined (__TURBOC__)
444	#define get16bits(d) (((const uint16_t ) (d)))
445	#endif
446
447	#if !defined (get16bits)
448	#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
449	+(uint32_t)(((const uint8_t *)(d))[0]) )
450	#endif
451	#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
452	do { \
453	char _sfh_key=(char)(key); \
454	uint32_t _sfh_tmp, _sfh_len = keylen; \
455	\
456	int _sfh_rem = _sfh_len & 3; \
457	_sfh_len >>= 2; \
458	hashv = 0xcafebabe; \
459	\
460	/* Main loop */ \
461	for (;_sfh_len > 0; _sfh_len--) { \
462	hashv += get16bits (_sfh_key); \
463	_sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \
464	hashv = (hashv << 16) ^ _sfh_tmp; \
465	_sfh_key += 2*sizeof (uint16_t); \
466	hashv += hashv >> 11; \
467	} \
468	\
469	/* Handle end cases */ \
470	switch (_sfh_rem) { \
471	case 3: hashv += get16bits (_sfh_key); \
472	hashv ^= hashv << 16; \
473	hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \
474	hashv += hashv >> 11; \
475	break; \
476	case 2: hashv += get16bits (_sfh_key); \
477	hashv ^= hashv << 11; \
478	hashv += hashv >> 17; \
479	break; \
480	case 1: hashv += *_sfh_key; \
481	hashv ^= hashv << 10; \
482	hashv += hashv >> 1; \
483	} \
484	\
485	/* Force "avalanching" of final 127 bits */ \
486	hashv ^= hashv << 3; \
487	hashv += hashv >> 5; \
488	hashv ^= hashv << 4; \
489	hashv += hashv >> 17; \
490	hashv ^= hashv << 25; \
491	hashv += hashv >> 6; \
492	bkt = hashv & (num_bkts-1); \
493	} while(0)
494
495	#ifdef HASH_USING_NO_STRICT_ALIASING
496	/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
497	* For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
498	* MurmurHash uses the faster approach only on CPU's where we know it's safe.
499	*
500	* Note the preprocessor built-in defines can be emitted using:
501	*
502	* gcc -m64 -dM -E - < /dev/null (on gcc)
503	* cc -## a.c (where a.c is a simple test file) (Sun Studio)
504	*/
505	#if (defined(__i386__) \|\| defined(__x86_64__))
506	#define MUR_GETBLOCK(p,i) p[i]
507	#else /* non intel */
508	#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
509	#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
510	#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
511	#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
512	#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
513	#if (defined(__BIG_ENDIAN__) \|\| defined(SPARC) \|\| defined(__ppc__) \|\| defined(__ppc64__))
514	#define MUR_THREE_ONE(p) ((((WP(p))&0x00ffffff) << 8) \| ((((WP(p)+1))&0xff000000) >> 24))
515	#define MUR_TWO_TWO(p) ((((WP(p))&0x0000ffff) <<16) \| ((((WP(p)+1))&0xffff0000) >> 16))
516	#define MUR_ONE_THREE(p) ((((WP(p))&0x000000ff) <<24) \| ((((WP(p)+1))&0xffffff00) >> 8))
517	#else /* assume little endian non-intel */
518	#define MUR_THREE_ONE(p) ((((WP(p))&0xffffff00) >> 8) \| ((((WP(p)+1))&0x000000ff) << 24))
519	#define MUR_TWO_TWO(p) ((((WP(p))&0xffff0000) >>16) \| ((((WP(p)+1))&0x0000ffff) << 16))
520	#define MUR_ONE_THREE(p) ((((WP(p))&0xff000000) >>24) \| ((((WP(p)+1))&0x00ffffff) << 8))
521	#endif
522	#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
523	(MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
524	(MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
525	MUR_ONE_THREE(p))))
526	#endif
527	#define MUR_ROTL32(x,r) (((x) << (r)) \| ((x) >> (32 - (r))))
528	#define MUR_FMIX(_h) \
529	do { \
530	_h ^= _h >> 16; \
531	_h *= 0x85ebca6b; \
532	_h ^= _h >> 13; \
533	_h *= 0xc2b2ae35l; \
534	_h ^= _h >> 16; \
535	} while(0)
536
537	#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
538	do { \
539	const uint8_t _mur_data = (const uint8_t)(key); \
540	const int _mur_nblocks = (keylen) / 4; \
541	uint32_t _mur_h1 = 0xf88D5353; \
542	uint32_t _mur_c1 = 0xcc9e2d51; \
543	uint32_t _mur_c2 = 0x1b873593; \
544	const uint32_t _mur_blocks = (const uint32_t)(_mur_data+_mur_nblocks*4); \
545	int _mur_i; \
546	for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
547	uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
548	_mur_k1 *= _mur_c1; \
549	_mur_k1 = MUR_ROTL32(_mur_k1,15); \
550	_mur_k1 *= _mur_c2; \
551	\
552	_mur_h1 ^= _mur_k1; \
553	_mur_h1 = MUR_ROTL32(_mur_h1,13); \
554	_mur_h1 = _mur_h1*5+0xe6546b64; \
555	} \
556	const uint8_t _mur_tail = (const uint8_t)(_mur_data + _mur_nblocks*4); \
557	uint32_t _mur_k1=0; \
558	switch((keylen) & 3) { \
559	case 3: _mur_k1 ^= _mur_tail[2] << 16; \
560	case 2: _mur_k1 ^= _mur_tail[1] << 8; \
561	case 1: _mur_k1 ^= _mur_tail[0]; \
562	_mur_k1 *= _mur_c1; \
563	_mur_k1 = MUR_ROTL32(_mur_k1,15); \
564	_mur_k1 *= _mur_c2; \
565	_mur_h1 ^= _mur_k1; \
566	} \
567	_mur_h1 ^= (keylen); \
568	MUR_FMIX(_mur_h1); \
569	hashv = _mur_h1; \
570	bkt = hashv & (num_bkts-1); \
571	} while(0)
572	#endif /* HASH_USING_NO_STRICT_ALIASING */
573
574	/* key comparison function; return 0 if keys equal */
575	#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
576
577	/* iterate over items in a known bucket to find desired item */
578	#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
579	do { \
580	if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
581	else out=NULL; \
582	while (out) { \
583	if ((out)->hh.keylen == keylen_in) { \
584	if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
585	} \
586	if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
587	else out = NULL; \
588	} \
589	} while(0)
590
591	/* add an item to a bucket */
592	#define HASH_ADD_TO_BKT(head,addhh) \
593	do { \
594	head.count++; \
595	(addhh)->hh_next = head.hh_head; \
596	(addhh)->hh_prev = NULL; \
597	if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
598	(head).hh_head=addhh; \
599	if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
600	&& (addhh)->tbl->noexpand != 1) { \
601	HASH_EXPAND_BUCKETS((addhh)->tbl); \
602	} \
603	} while(0)
604
605	/* remove an item from a given bucket */
606	#define HASH_DEL_IN_BKT(hh,head,hh_del) \
607	(head).count--; \
608	if ((head).hh_head == hh_del) { \
609	(head).hh_head = hh_del->hh_next; \
610	} \
611	if (hh_del->hh_prev) { \
612	hh_del->hh_prev->hh_next = hh_del->hh_next; \
613	} \
614	if (hh_del->hh_next) { \
615	hh_del->hh_next->hh_prev = hh_del->hh_prev; \
616	}
617
618	/* Bucket expansion has the effect of doubling the number of buckets
619	* and redistributing the items into the new buckets. Ideally the
620	* items will distribute more or less evenly into the new buckets
621	* (the extent to which this is true is a measure of the quality of
622	* the hash function as it applies to the key domain).
623	*
624	* With the items distributed into more buckets, the chain length
625	* (item count) in each bucket is reduced. Thus by expanding buckets
626	* the hash keeps a bound on the chain length. This bounded chain
627	* length is the essence of how a hash provides constant time lookup.
628	*
629	* The calculation of tbl->ideal_chain_maxlen below deserves some
630	* explanation. First, keep in mind that we're calculating the ideal
631	* maximum chain length based on the new (doubled) bucket count.
632	* In fractions this is just n/b (n=number of items,b=new num buckets).
633	* Since the ideal chain length is an integer, we want to calculate
634	* ceil(n/b). We don't depend on floating point arithmetic in this
635	* hash, so to calculate ceil(n/b) with integers we could write
636	*
637	* ceil(n/b) = (n/b) + ((n%b)?1:0)
638	*
639	* and in fact a previous version of this hash did just that.
640	* But now we have improved things a bit by recognizing that b is
641	* always a power of two. We keep its base 2 log handy (call it lb),
642	* so now we can write this with a bit shift and logical AND:
643	*
644	* ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
645	*
646	*/
647	#define HASH_EXPAND_BUCKETS(tbl) \
648	do { \
649	unsigned _he_bkt; \
650	unsigned _he_bkt_i; \
651	struct UT_hash_handle _he_thh, _he_hh_nxt; \
652	UT_hash_bucket _he_new_buckets, _he_newbkt; \
653	_he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
654	2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
655	if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
656	memset(_he_new_buckets, 0, \
657	2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
658	tbl->ideal_chain_maxlen = \
659	(tbl->num_items >> (tbl->log2_num_buckets+1)) + \
660	((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
661	tbl->nonideal_items = 0; \
662	for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
663	{ \
664	_he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
665	while (_he_thh) { \
666	_he_hh_nxt = _he_thh->hh_next; \
667	HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
668	_he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
669	if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
670	tbl->nonideal_items++; \
671	_he_newbkt->expand_mult = _he_newbkt->count / \
672	tbl->ideal_chain_maxlen; \
673	} \
674	_he_thh->hh_prev = NULL; \
675	_he_thh->hh_next = _he_newbkt->hh_head; \
676	if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
677	_he_thh; \
678	_he_newbkt->hh_head = _he_thh; \
679	_he_thh = _he_hh_nxt; \
680	} \
681	} \
682	uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
683	tbl->num_buckets *= 2; \
684	tbl->log2_num_buckets++; \
685	tbl->buckets = _he_new_buckets; \
686	tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
687	(tbl->ineff_expands+1) : 0; \
688	if (tbl->ineff_expands > 1) { \
689	tbl->noexpand=1; \
690	uthash_noexpand_fyi(tbl); \
691	} \
692	uthash_expand_fyi(tbl); \
693	} while(0)
694
695
696	/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
697	/* Note that HASH_SORT assumes the hash handle name to be hh.
698	* HASH_SRT was added to allow the hash handle name to be passed in. */
699	#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
700	#define HASH_SRT(hh,head,cmpfcn) \
701	do { \
702	unsigned _hs_i; \
703	unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
704	struct UT_hash_handle _hs_p, _hs_q, _hs_e, _hs_list, *_hs_tail; \
705	if (head) { \
706	_hs_insize = 1; \
707	_hs_looping = 1; \
708	_hs_list = &((head)->hh); \
709	while (_hs_looping) { \
710	_hs_p = _hs_list; \
711	_hs_list = NULL; \
712	_hs_tail = NULL; \
713	_hs_nmerges = 0; \
714	while (_hs_p) { \
715	_hs_nmerges++; \
716	_hs_q = _hs_p; \
717	_hs_psize = 0; \
718	for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
719	_hs_psize++; \
720	_hs_q = (UT_hash_handle*)((_hs_q->next) ? \
721	((void)((char)(_hs_q->next) + \
722	(head)->hh.tbl->hho)) : NULL); \
723	if (! (_hs_q) ) break; \
724	} \
725	_hs_qsize = _hs_insize; \
726	while ((_hs_psize > 0) \|\| ((_hs_qsize > 0) && _hs_q )) { \
727	if (_hs_psize == 0) { \
728	_hs_e = _hs_q; \
729	_hs_q = (UT_hash_handle*)((_hs_q->next) ? \
730	((void)((char)(_hs_q->next) + \
731	(head)->hh.tbl->hho)) : NULL); \
732	_hs_qsize--; \
733	} else if ( (_hs_qsize == 0) \|\| !(_hs_q) ) { \
734	_hs_e = _hs_p; \
735	_hs_p = (UT_hash_handle*)((_hs_p->next) ? \
736	((void)((char)(_hs_p->next) + \
737	(head)->hh.tbl->hho)) : NULL); \
738	_hs_psize--; \
739	} else if (( \
740	cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
741	DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
742	) <= 0) { \
743	_hs_e = _hs_p; \
744	_hs_p = (UT_hash_handle*)((_hs_p->next) ? \
745	((void)((char)(_hs_p->next) + \
746	(head)->hh.tbl->hho)) : NULL); \
747	_hs_psize--; \
748	} else { \
749	_hs_e = _hs_q; \
750	_hs_q = (UT_hash_handle*)((_hs_q->next) ? \
751	((void)((char)(_hs_q->next) + \
752	(head)->hh.tbl->hho)) : NULL); \
753	_hs_qsize--; \
754	} \
755	if ( _hs_tail ) { \
756	_hs_tail->next = ((_hs_e) ? \
757	ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
758	} else { \
759	_hs_list = _hs_e; \
760	} \
761	_hs_e->prev = ((_hs_tail) ? \
762	ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
763	_hs_tail = _hs_e; \
764	} \
765	_hs_p = _hs_q; \
766	} \
767	_hs_tail->next = NULL; \
768	if ( _hs_nmerges <= 1 ) { \
769	_hs_looping=0; \
770	(head)->hh.tbl->tail = _hs_tail; \
771	DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
772	} \
773	_hs_insize *= 2; \
774	} \
775	HASH_FSCK(hh,head); \
776	} \
777	} while (0)
778
779	/* This function selects items from one hash into another hash.
780	* The end result is that the selected items have dual presence
781	* in both hashes. There is no copy of the items made; rather
782	* they are added into the new hash through a secondary hash
783	* hash handle that must be present in the structure. */
784	#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
785	do { \
786	unsigned _src_bkt, _dst_bkt; \
787	void _last_elt=NULL, _elt; \
788	UT_hash_handle _src_hh, _dst_hh, *_last_elt_hh=NULL; \
789	ptrdiff_t _dst_hho = ((char)(&(dst)->hh_dst) - (char)(dst)); \
790	if (src) { \
791	for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
792	for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
793	_src_hh; \
794	_src_hh = _src_hh->hh_next) { \
795	_elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
796	if (cond(_elt)) { \
797	_dst_hh = (UT_hash_handle)(((char)_elt) + _dst_hho); \
798	_dst_hh->key = _src_hh->key; \
799	_dst_hh->keylen = _src_hh->keylen; \
800	_dst_hh->hashv = _src_hh->hashv; \
801	_dst_hh->prev = _last_elt; \
802	_dst_hh->next = NULL; \
803	if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
804	if (!dst) { \
805	DECLTYPE_ASSIGN(dst,_elt); \
806	HASH_MAKE_TABLE(hh_dst,dst); \
807	} else { \
808	_dst_hh->tbl = (dst)->hh_dst.tbl; \
809	} \
810	HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
811	HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
812	(dst)->hh_dst.tbl->num_items++; \
813	_last_elt = _elt; \
814	_last_elt_hh = _dst_hh; \
815	} \
816	} \
817	} \
818	} \
819	HASH_FSCK(hh_dst,dst); \
820	} while (0)
821
822	#define HASH_CLEAR(hh,head) \
823	do { \
824	if (head) { \
825	uthash_free((head)->hh.tbl->buckets, \
826	(head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
827	HASH_BLOOM_FREE((head)->hh.tbl); \
828	uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
829	(head)=NULL; \
830	} \
831	} while(0)
832
833	#ifdef NO_DECLTYPE
834	#define HASH_ITER(hh,head,el,tmp) \
835	for((el)=(head), ((char)(&(tmp)))=(char)((head)?(head)->hh.next:NULL); \
836	el; (el)=(tmp),((char)(&(tmp)))=(char)((tmp)?(tmp)->hh.next:NULL))
837	#else
838	#define HASH_ITER(hh,head,el,tmp) \
839	for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
840	el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
841	#endif
842
843	/* obtain a count of items in the hash */
844	#define HASH_COUNT(head) HASH_CNT(hh,head)
845	#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
846
847	typedef struct UT_hash_bucket {
848	struct UT_hash_handle *hh_head;
849	unsigned count;
850
851	/* expand_mult is normally set to 0. In this situation, the max chain length
852	* threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
853	* the bucket's chain exceeds this length, bucket expansion is triggered).
854	* However, setting expand_mult to a non-zero value delays bucket expansion
855	* (that would be triggered by additions to this particular bucket)
856	* until its chain length reaches a multiple of HASH_BKT_CAPACITY_THRESH.
857	* (The multiplier is simply expand_mult+1). The whole idea of this
858	* multiplier is to reduce bucket expansions, since they are expensive, in
859	* situations where we know that a particular bucket tends to be overused.
860	* It is better to let its chain length grow to a longer yet-still-bounded
861	* value, than to do an O(n) bucket expansion too often.
862	*/
863	unsigned expand_mult;
864
865	} UT_hash_bucket;
866
867	/* random signature used only to find hash tables in external analysis */
868	#define HASH_SIGNATURE 0xa0111fe1
869	#define HASH_BLOOM_SIGNATURE 0xb12220f2
870
871	typedef struct UT_hash_table {
872	UT_hash_bucket *buckets;
873	unsigned num_buckets, log2_num_buckets;
874	unsigned num_items;
875	struct UT_hash_handle tail; / tail hh in app order, for fast append */
876	ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
877
878	/* in an ideal situation (all buckets used equally), no bucket would have
879	* more than ceil(#items/#buckets) items. that's the ideal chain length. */
880	unsigned ideal_chain_maxlen;
881
882	/* nonideal_items is the number of items in the hash whose chain position
883	* exceeds the ideal chain maxlen. these items pay the penalty for an uneven
884	* hash distribution; reaching them in a chain traversal takes >ideal steps */
885	unsigned nonideal_items;
886
887	/* ineffective expands occur when a bucket doubling was performed, but
888	* afterward, more than half the items in the hash had nonideal chain
889	* positions. If this happens on two consecutive expansions we inhibit any
890	* further expansion, as it's not helping; this happens when the hash
891	* function isn't a good fit for the key domain. When expansion is inhibited
892	* the hash will still work, albeit no longer in constant time. */
893	unsigned ineff_expands, noexpand;
894
895	uint32_t signature; /* used only to find hash tables in external analysis */
896	#ifdef HASH_BLOOM
897	uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
898	uint8_t *bloom_bv;
899	char bloom_nbits;
900	#endif
901
902	} UT_hash_table;
903
904	typedef struct UT_hash_handle {
905	struct UT_hash_table *tbl;
906	void prev; / prev element in app order */
907	void next; / next element in app order */
908	struct UT_hash_handle hh_prev; / previous hh in bucket order */
909	struct UT_hash_handle hh_next; / next hh in bucket order */
910	void key; / ptr to enclosing struct's key */
911	unsigned keylen; /* enclosing struct's key len */
912	unsigned hashv; /* result of hash-fcn(key) */
913	} UT_hash_handle;
914
915	#endif /* UTHASH_H */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/anuga_core/source/anuga/utilities/uthash.h @ 8710

Download in other formats: