librsync  2.0.2
hashtable.h
Go to the documentation of this file.
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * hashtable.h -- a generic open addressing hashtable.
4  *
5  * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 #ifndef _HASHTABLE_H_
21 # define _HASHTABLE_H_
22 
23 # include <assert.h>
24 # include <stdlib.h>
25 
26 /** \file hashtable.h A generic open addressing hashtable.
27  *
28  * This is a minimal hashtable containing pointers to arbitrary entries with
29  * configurable hashtable size and support for custom hash() and cmp() methods.
30  * The cmp() method can either be a simple comparison between two keys, or can
31  * be against a special match object containing additional mutable state. This
32  * allows for things like deferred and cached evaluation of costly comparison
33  * data. The hash() function doesn't need to avoid clustering behaviour.
34  *
35  * It uses open addressing with quadratic probing for collisions. The
36  * MurmurHash3 finalization function is used on the hash() output to avoid
37  * clustering. There is no support for removing entries, only adding them.
38  * Multiple entries with the same key can be added, and you can use a fancy
39  * cmp() function to find particular entries by more than just their key. There
40  * is an iterator for iterating through all entries in the hashtable. There are
41  * optional hashtable_find() find/match/hashcmp/entrycmp stats counters that
42  * can be disabled by defining HASHTABLE_NSTATS.
43  *
44  * The types and methods of the hashtable and its contents are specified by
45  * using \#define parameters set to their basenames (the prefixes for the *_t
46  * type and *_func() methods) before doing \#include "hashtable.h". This
47  * produces static inline type-safe methods that are either application
48  * optimized for speed or wrappers around void* implementation methods for
49  * compactness.
50  *
51  * \param ENTRY - the entry type basename.
52  *
53  * \param KEY - optional key type basename (default: ENTRY).
54  *
55  * \param MATCH - optional match type basename (default: KEY).
56  *
57  * \param NAME - optional hashtable type basename (default: ENTRY_hashtable).
58  *
59  * Example: \code
60  * typedef ... mykey_t;
61  * int mykey_hash(const mykey_t *e);
62  * int mykey_cmp(mykey_t *e, const mykey_t *o);
63  *
64  * typedef struct myentry {
65  * mykey_t key; // Inherit from mykey_t.
66  * ...extra entry value data...
67  * } myentry_t;
68  * void myentry_init(myentry_t *e, ...);
69  *
70  * #define ENTRY myentry
71  * #define KEY mykey
72  * #include "hashtable.h"
73  *
74  * hashtable_t *t;
75  * myentry_t entries[300];
76  * mykey_t k;
77  * myentry_t *e;
78  *
79  * t = myentry_hashtable_new(300);
80  * myentry_init(&entries[5], ...);
81  * myentry_hashtable_add(t, &entries[5]);
82  * k = ...;
83  * e = myentry_hashtable_find(t, &k);
84  *
85  * hashtable_iter_t i;
86  * for (e = myentry_hashtable_iter(&i, t); e != NULL;
87  * e = myentry_hashtable_next(&i))
88  * ...
89  *
90  * myentry_hashtable_free(t);
91  * \endcode
92  *
93  * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to
94  * mykey/myentry instances the same as the pointers stored in the hashtable.
95  * However it is also possible for them to take "match objects" that are a
96  * "subclass" of the entry type that contain additional state for complicated
97  * comparision operations.
98  *
99  * Example: \code
100  * typedef struct mymatch {
101  * mykey_t key; // Inherit from mykey_t;
102  * ...extra match criteria and state data...
103  * } mymatch_t;
104  * int mymatch_cmp(mymatch_t *m, const myentry_t *e);
105  *
106  * #define ENTRY myentry
107  * #define KEY mykey
108  * #define MATCH mymatch
109  * #include "hashtable.h"
110  *
111  * ...
112  * mymatch_t m;
113  *
114  * t = myentry_hashtable_new(300);
115  * ...
116  * m = ...;
117  * e = myentry_hashtable_find(t, &m);
118  * \endcode
119  *
120  * The mymatch_cmp() function is only called for finding hashtable entries and
121  * can mutate the mymatch_t object for doing things like deferred and cached
122  * evaluation of expensive match data. It can also access the whole myentry_t
123  * object to match against more than just the key. */
124 
125 /** The hashtable type. */
126 typedef struct hashtable {
127  int size; /**< Size of allocated hashtable. */
128  int count; /**< Number of entries in hashtable. */
129 # ifndef HASHTABLE_NSTATS
130  /* The following are for accumulating hashtable_find() stats. */
131  long find_count; /**< The count of finds tried. */
132  long match_count; /**< The count of matches found. */
133  long hashcmp_count; /**< The count of hash compares done. */
134  long entrycmp_count; /**< The count of entry compares done. */
135 # endif
136  void **etable; /**< Table of pointers to entries. */
137  unsigned ktable[]; /**< Table of hash keys. */
138 } hashtable_t;
139 
140 /** The hashtable iterator type. */
141 typedef struct hashtable_iter {
142  hashtable_t *htable; /**< The hashtable to iterate over. */
143  int index; /**< The index to scan from next. */
145 
146 /* void* implementations for the type-safe static inline wrappers below. */
147 hashtable_t *_hashtable_new(int size);
148 void _hashtable_free(hashtable_t *t);
149 void *_hashtable_iter(hashtable_iter_t *i, hashtable_t *t);
150 void *_hashtable_next(hashtable_iter_t *i);
151 
152 /** MurmurHash3 finalization mix function. */
153 static inline unsigned mix32(unsigned int h)
154 {
155  h ^= h >> 16;
156  h *= 0x85ebca6b;
157  h ^= h >> 13;
158  h *= 0xc2b2ae35;
159  h ^= h >> 16;
160  return h;
161 }
162 
163 #endif /* _HASHTABLE_H_ */
164 
165 /* If ENTRY is defined, define type-dependent static inline methods. */
166 #ifdef ENTRY
167 
168 # define _JOIN2(x, y) x##y
169 # define _JOIN(x, y) _JOIN2(x, y)
170 
171 # ifndef KEY
172 # define KEY ENTRY
173 # endif
174 
175 # ifndef MATCH
176 # define MATCH KEY
177 # endif
178 
179 # ifndef NAME
180 # define NAME _JOIN(ENTRY, _hashtable)
181 # endif
182 
183 # define ENTRY_T _JOIN(ENTRY, _t) /**< The entry type. */
184 # define KEY_T _JOIN(KEY, _t) /**< The key type. */
185 # define MATCH_T _JOIN(MATCH, _t) /**< The match type. */
186 /** The key hash(k) method. */
187 # define KEY_HASH(k) _JOIN(KEY, _hash)(k)
188 /** The match cmp(m, e) method. */
189 # define MATCH_CMP(m, e) _JOIN(MATCH, _cmp)(m, e)
190 # define _FUNC(f) _JOIN(NAME, f)
191 
192 /* Loop macro for probing table t for key k, setting hk to the hash for k
193  reserving zero for empty buckets, and iterating with index i and entry hash
194  h, terminating at an empty bucket. */
195 # define _for_probe(t, k, hk, i, h) \
196  const unsigned mask = t->size - 1;\
197  unsigned hk = KEY_HASH((KEY_T *)k), i, s, h;\
198  hk = hk ? hk : -1;\
199  for (i = mix32(hk) & mask, s = 0; (h = t->ktable[i]); i = (i + ++s) & mask)
200 
201 /* Conditional macro for incrementing stats counters. */
202 # ifndef HASHTABLE_NSTATS
203 # define _stats_inc(c) (c++)
204 # else
205 # define _stats_inc(c)
206 # endif
207 
208 /** Allocate and initialize a hashtable instance.
209  *
210  * The provided size is used as an indication of the number of entries you wish
211  * to add, but the allocated size will probably be larger depending on the
212  * implementation to enable optimisations or avoid degraded performance. It may
213  * be possible to fill the table beyond the requested size, but performance can
214  * start to degrade badly if it is over filled.
215  *
216  * \param size - The desired minimum size of the hash table.
217  *
218  * \return The initialized hashtable instance or NULL if it failed. */
219 static inline hashtable_t *_FUNC(_new) (int size) {
220  return _hashtable_new(size);
221 }
222 
223 /** Destroy and free a hashtable instance.
224  *
225  * This will free the hashtable, but will not free the entries in the
226  * hashtable. If you want to free the entries too, use a hashtable iterator to
227  * free the the entries first.
228  *
229  * \param *t - The hashtable to destroy and free. */
230 static inline void _FUNC(_free) (hashtable_t *t) {
231  _hashtable_free(t);
232 }
233 
234 /** Initialize hashtable stats counters.
235  *
236  * This will reset all the stats counters for the hashtable,
237  *
238  * \param *t - The hashtable to initializ stats for. */
239 static inline void _FUNC(_stats_init) (hashtable_t *t) {
240 # ifndef HASHTABLE_NSTATS
241  t->find_count = t->match_count = t->hashcmp_count = t->entrycmp_count = 0;
242 # endif
243 }
244 
245 /** Add an entry to a hashtable.
246  *
247  * This doesn't use MATCH_CMP() or do any checks for existing copies or
248  * instances, so it will add duplicates. If you want to avoid adding
249  * duplicates, use hashtable_find() to check for existing entries first.
250  *
251  * \param *t - The hashtable to add to.
252  *
253  * \param *e - The entry object to add.
254  *
255  * \return The added entry, or NULL if the table is full. */
256 static inline ENTRY_T *_FUNC(_add) (hashtable_t *t, ENTRY_T * e) {
257  assert(e != NULL);
258  if (t->count + 1 == t->size)
259  return NULL;
260  _for_probe(t, e, he, i, h);
261  t->count++;
262  t->ktable[i] = he;
263  return t->etable[i] = e;
264 }
265 
266 /** Find an entry in a hashtable.
267  *
268  * Uses MATCH_CMP() to find the first matching entry in the table in the same
269  * hash() bucket.
270  *
271  * \param *t - The hashtable to search.
272  *
273  * \param *m - The key or match object to search for.
274  *
275  * \return The first found entry, or NULL if nothing was found. */
276 static inline ENTRY_T *_FUNC(_find) (hashtable_t *t, MATCH_T * m) {
277  assert(m != NULL);
278  ENTRY_T *e;
279 
280  _stats_inc(t->find_count);
281  _for_probe(t, m, hm, i, he) {
282  _stats_inc(t->hashcmp_count);
283  if (hm == he) {
284  _stats_inc(t->entrycmp_count);
285  if (!MATCH_CMP(m, e = t->etable[i])) {
286  _stats_inc(t->match_count);
287  return e;
288  }
289  }
290  }
291  return NULL;
292 }
293 
294 /** Initialize a hashtable_iter_t and return the first entry.
295  *
296  * This works together with hashtable_next() for iterating through all entries
297  * in a hashtable.
298  *
299  * Example: \code
300  * for (e = hashtable_iter(&i, t); e != NULL; e = hashtable_next(&i))
301  * ...
302  * \endcode
303  *
304  * \param *i - the hashtable iterator to initialize.
305  *
306  * \param *t - the hashtable to iterate over.
307  *
308  * \return The first entry or NULL if the hashtable is empty. */
309 static inline ENTRY_T *_FUNC(_iter) (hashtable_iter_t *i, hashtable_t *t) {
310  return _hashtable_iter(i, t);
311 }
312 
313 /** Get the next entry from a hashtable iterator or NULL when finished.
314  *
315  * \param *i - the hashtable iterator to use.
316  *
317  * \return The next entry or NULL if the iterator is finished. */
318 static inline ENTRY_T *_FUNC(_next) (hashtable_iter_t *i) {
319  return _hashtable_next(i);
320 }
321 
322 # undef ENTRY
323 # undef KEY
324 # undef MATCH
325 # undef NAME
326 # undef ENTRY_T
327 # undef KEY_T
328 # undef MATCH_T
329 # undef KEY_HASH
330 # undef MATCH_CMP
331 # undef _FUNC
332 #endif /* ENTRY */
long hashcmp_count
The count of hash compares done.
Definition: hashtable.h:133
long match_count
The count of matches found.
Definition: hashtable.h:132
#define MATCH_T
The match type.
Definition: hashtable.h:185
The hashtable iterator type.
Definition: hashtable.h:141
static unsigned mix32(unsigned int h)
MurmurHash3 finalization mix function.
Definition: hashtable.h:153
int size
Size of allocated hashtable.
Definition: hashtable.h:127
struct hashtable_iter hashtable_iter_t
The hashtable iterator type.
long entrycmp_count
The count of entry compares done.
Definition: hashtable.h:134
#define ENTRY_T
The entry type.
Definition: hashtable.h:183
unsigned ktable[]
Table of hash keys.
Definition: hashtable.h:137
hashtable_t * htable
The hashtable to iterate over.
Definition: hashtable.h:142
#define MATCH_CMP(m, e)
The match cmp(m, e) method.
Definition: hashtable.h:189
int index
The index to scan from next.
Definition: hashtable.h:143
long find_count
The count of finds tried.
Definition: hashtable.h:131
struct hashtable hashtable_t
The hashtable type.
void ** etable
Table of pointers to entries.
Definition: hashtable.h:136
The hashtable type.
Definition: hashtable.h:126
int count
Number of entries in hashtable.
Definition: hashtable.h:128