MobilityDB 1.1
simplehash.h
Go to the documentation of this file.
1/*
2 * simplehash.h
3 *
4 * When included this file generates a "templated" (by way of macros)
5 * open-addressing hash table implementation specialized to user-defined
6 * types.
7 *
8 * It's probably not worthwhile to generate such a specialized implementation
9 * for hash tables that aren't performance or space sensitive.
10 *
11 * Compared to dynahash, simplehash has the following benefits:
12 *
13 * - Due to the "templated" code generation has known structure sizes and no
14 * indirect function calls (which show up substantially in dynahash
15 * profiles). These features considerably increase speed for small
16 * entries.
17 * - Open addressing has better CPU cache behavior than dynahash's chained
18 * hashtables.
19 * - The generated interface is type-safe and easier to use than dynahash,
20 * though at the cost of more complex setup.
21 * - Allocates memory in a MemoryContext or another allocator with a
22 * malloc/free style interface (which isn't easily usable in a shared
23 * memory context)
24 * - Does not require the overhead of a separate memory context.
25 *
26 * Usage notes:
27 *
28 * To generate a hash-table and associated functions for a use case several
29 * macros have to be #define'ed before this file is included. Including
30 * the file #undef's all those, so a new hash table can be generated
31 * afterwards.
32 * The relevant parameters are:
33 * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
34 * will result in hash table type 'foo_hash' and functions like
35 * 'foo_insert'/'foo_lookup' and so forth.
36 * - SH_ELEMENT_TYPE - type of the contained elements
37 * - SH_KEY_TYPE - type of the hashtable's key
38 * - SH_DECLARE - if defined function prototypes and type declarations are
39 * generated
40 * - SH_DEFINE - if defined function definitions are generated
41 * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
42 * declarations reside
43 * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
44 * use this to allocate bytes. The allocator must zero the returned space.
45 * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
46 * are defined, so you can supply your own
47 * The following parameters are only relevant when SH_DEFINE is defined:
48 * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
49 * - SH_EQUAL(table, a, b) - compare two table keys
50 * - SH_HASH_KEY(table, key) - generate hash for the key
51 * - SH_STORE_HASH - if defined the hash is stored in the elements
52 * - SH_GET_HASH(tb, a) - return the field to store the hash in
53 *
54 * The element type is required to contain a "status" member that can store
55 * the range of values defined in the SH_STATUS enum.
56 *
57 * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
58 * the hash table implementation needs to compare hashes to move elements
59 * (particularly when growing the hash), it's preferable, if possible, to
60 * store the element's hash in the element's data type. If the hash is so
61 * stored, the hash table will also compare hashes before calling SH_EQUAL
62 * when comparing two keys.
63 *
64 * For convenience the hash table create functions accept a void pointer
65 * that will be stored in the hash table type's member private_data. This
66 * allows callbacks to reference caller provided data.
67 *
68 * For examples of usage look at tidbitmap.c (file local definition) and
69 * execnodes.h/execGrouping.c (exposed declaration, file local
70 * implementation).
71 *
72 * Hash table design:
73 *
74 * The hash table design chosen is a variant of linear open-addressing. The
75 * reason for doing so is that linear addressing is CPU cache & pipeline
76 * friendly. The biggest disadvantage of simple linear addressing schemes
77 * are highly variable lookup times due to clustering, and deletions
78 * leaving a lot of tombstones around. To address these issues a variant
79 * of "robin hood" hashing is employed. Robin hood hashing optimizes
80 * chaining lengths by moving elements close to their optimal bucket
81 * ("rich" elements), out of the way if a to-be-inserted element is further
82 * away from its optimal position (i.e. it's "poor"). While that can make
83 * insertions slower, the average lookup performance is a lot better, and
84 * higher fill factors can be used in a still performant manner. To avoid
85 * tombstones - which normally solve the issue that a deleted node's
86 * presence is relevant to determine whether a lookup needs to continue
87 * looking or is done - buckets following a deleted element are shifted
88 * backwards, unless they're empty or already at their optimal position.
89 *
90 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
91 * Portions Copyright (c) 1994, Regents of the University of California
92 *
93 * src/include/lib/simplehash.h
94 */
95
96#include "port/pg_bitutils.h"
97
98/* helpers */
99#define SH_MAKE_PREFIX(a) CppConcat(a,_)
100#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
101#define SH_MAKE_NAME_(a,b) CppConcat(a,b)
102
103/* name macros for: */
104
105/* type declarations */
106#define SH_TYPE SH_MAKE_NAME(hash)
107#define SH_STATUS SH_MAKE_NAME(status)
108#define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
109#define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
110#define SH_ITERATOR SH_MAKE_NAME(iterator)
111
112/* function declarations */
113#define SH_CREATE SH_MAKE_NAME(create)
114#define SH_DESTROY SH_MAKE_NAME(destroy)
115#define SH_RESET SH_MAKE_NAME(reset)
116#define SH_INSERT SH_MAKE_NAME(insert)
117#define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
118#define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
119#define SH_DELETE SH_MAKE_NAME(delete)
120#define SH_LOOKUP SH_MAKE_NAME(lookup)
121#define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
122#define SH_GROW SH_MAKE_NAME(grow)
123#define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
124#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
125#define SH_ITERATE SH_MAKE_NAME(iterate)
126#define SH_ALLOCATE SH_MAKE_NAME(allocate)
127#define SH_FREE SH_MAKE_NAME(free)
128#define SH_STAT SH_MAKE_NAME(stat)
129
130/* internal helper functions (no externally visible prototypes) */
131#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
132#define SH_NEXT SH_MAKE_NAME(next)
133#define SH_PREV SH_MAKE_NAME(prev)
134#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
135#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
136#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
137#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
138#define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
139
140/* generate forward declarations necessary to use the hash table */
141#ifdef SH_DECLARE
142
143/* type definitions */
144typedef struct SH_TYPE
145{
146 /*
147 * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
148 * tables. Note that the maximum number of elements is lower
149 * (SH_MAX_FILLFACTOR)
150 */
151 uint64 size;
152
153 /* how many elements have valid contents */
154 uint32 members;
155
156 /* mask for bucket and size calculations, based on size */
157 uint32 sizemask;
158
159 /* boundary after which to grow hashtable */
160 uint32 grow_threshold;
161
162 /* hash buckets */
163 SH_ELEMENT_TYPE *data;
164
165#ifndef SH_RAW_ALLOCATOR
166 /* memory context to use for allocations */
167 MemoryContext ctx;
168#endif
169
170 /* user defined data, useful for callbacks */
171 void *private_data;
172} SH_TYPE;
173
174typedef enum SH_STATUS
175{
176 SH_STATUS_EMPTY = 0x00,
177 SH_STATUS_IN_USE = 0x01
178} SH_STATUS;
179
180typedef struct SH_ITERATOR
181{
182 uint32 cur; /* current element */
183 uint32 end;
184 bool done; /* iterator exhausted? */
186
187/* externally visible function prototypes */
188#ifdef SH_RAW_ALLOCATOR
189/* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
190SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
191#else
192/*
193 * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
194 * void *private_data)
195 */
196SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
197 void *private_data);
198#endif
199
200/* void <prefix>_destroy(<prefix>_hash *tb) */
201SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
202
203/* void <prefix>_reset(<prefix>_hash *tb) */
204SH_SCOPE void SH_RESET(SH_TYPE * tb);
205
206/* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
207SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
208
209/* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
210SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
211
212/*
213 * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
214 * bool *found)
215 */
217 uint32 hash, bool *found);
218
219/* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
221
222/* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
224 uint32 hash);
225
226/* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
228
229/* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
230SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
231
232/* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
234
235/*
236 * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
237 * uint32 at)
238 */
240
241/* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
243
244/* void <prefix>_stat(<prefix>_hash *tb */
245SH_SCOPE void SH_STAT(SH_TYPE * tb);
246
247#endif /* SH_DECLARE */
248
249
250/* generate implementation of the hash table */
251#ifdef SH_DEFINE
252
253#ifndef SH_RAW_ALLOCATOR
254#include "utils/memutils.h"
255#endif
256
257/* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
258#define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
259
260/* normal fillfactor, unless already close to maximum */
261#ifndef SH_FILLFACTOR
262#define SH_FILLFACTOR (0.9)
263#endif
264/* increase fillfactor if we otherwise would error out */
265#define SH_MAX_FILLFACTOR (0.98)
266/* grow if actual and optimal location bigger than */
267#ifndef SH_GROW_MAX_DIB
268#define SH_GROW_MAX_DIB 25
269#endif
270/* grow if more than elements to move when inserting */
271#ifndef SH_GROW_MAX_MOVE
272#define SH_GROW_MAX_MOVE 150
273#endif
274#ifndef SH_GROW_MIN_FILLFACTOR
275/* but do not grow due to SH_GROW_MAX_* if below */
276#define SH_GROW_MIN_FILLFACTOR 0.1
277#endif
278
279#ifdef SH_STORE_HASH
280#define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
281#else
282#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
283#endif
284
285/*
286 * Wrap the following definitions in include guards, to avoid multiple
287 * definition errors if this header is included more than once. The rest of
288 * the file deliberately has no include guards, because it can be included
289 * with different parameters to define functions and types with non-colliding
290 * names.
291 */
292#ifndef SIMPLEHASH_H
293#define SIMPLEHASH_H
294
295#ifdef FRONTEND
296#define sh_error(...) pg_fatal(__VA_ARGS__)
297#define sh_log(...) pg_log_info(__VA_ARGS__)
298#else
299#define sh_error(...) elog(ERROR, __VA_ARGS__)
300#define sh_log(...) elog(LOG, __VA_ARGS__)
301#endif
302
303#endif
304
305/*
306 * Compute sizing parameters for hashtable. Called when creating and growing
307 * the hashtable.
308 */
309static inline void
311{
312 uint64 size;
313
314 /* supporting zero sized hashes would complicate matters */
315 size = Max(newsize, 2);
316
317 /* round up size to the next power of 2, that's how bucketing works */
318 size = pg_nextpower2_64(size);
319 Assert(size <= SH_MAX_SIZE);
320
321 /*
322 * Verify that allocation of ->data is possible on this platform, without
323 * overflowing Size.
324 */
325 if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
326 sh_error("hash table too large");
327
328 /* now set size */
329 tb->size = size;
330 tb->sizemask = (uint32) (size - 1);
331
332 /*
333 * Compute the next threshold at which we need to grow the hash table
334 * again.
335 */
336 if (tb->size == SH_MAX_SIZE)
337 tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
338 else
339 tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
340}
341
342/* return the optimal bucket for the hash */
343static inline uint32
345{
346 return hash & tb->sizemask;
347}
348
349/* return next bucket after the current, handling wraparound */
350static inline uint32
351SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
352{
353 curelem = (curelem + 1) & tb->sizemask;
354
355 Assert(curelem != startelem);
356
357 return curelem;
358}
359
360/* return bucket before the current, handling wraparound */
361static inline uint32
362SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
363{
364 curelem = (curelem - 1) & tb->sizemask;
365
366 Assert(curelem != startelem);
367
368 return curelem;
369}
370
371/* return distance between bucket and its optimal position */
372static inline uint32
373SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
374{
375 if (optimal <= bucket)
376 return bucket - optimal;
377 else
378 return (tb->size + bucket) - optimal;
379}
380
381static inline uint32
383{
384#ifdef SH_STORE_HASH
385 return SH_GET_HASH(tb, entry);
386#else
387 return SH_HASH_KEY(tb, entry->SH_KEY);
388#endif
389}
390
391/* default memory allocator function */
392static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
393static inline void SH_FREE(SH_TYPE * type, void *pointer);
394
395#ifndef SH_USE_NONDEFAULT_ALLOCATOR
396
397/* default memory allocator function */
398static inline void *
399SH_ALLOCATE(SH_TYPE * type, Size size)
400{
401#ifdef SH_RAW_ALLOCATOR
402 return SH_RAW_ALLOCATOR(size);
403#else
404 return MemoryContextAllocExtended(type->ctx, size,
405 MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
406#endif
407}
408
409/* default memory free function */
410static inline void
411SH_FREE(SH_TYPE * type, void *pointer)
412{
413 pfree(pointer);
414}
415
416#endif
417
418/*
419 * Create a hash table with enough space for `nelements` distinct members.
420 * Memory for the hash table is allocated from the passed-in context. If
421 * desired, the array of elements can be allocated using a passed-in allocator;
422 * this could be useful in order to place the array of elements in a shared
423 * memory, or in a context that will outlive the rest of the hash table.
424 * Memory other than for the array of elements will still be allocated from
425 * the passed-in context.
426 */
427#ifdef SH_RAW_ALLOCATOR
429SH_CREATE(uint32 nelements, void *private_data)
430#else
432SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
433#endif
434{
435 SH_TYPE *tb;
436 uint64 size;
437
438#ifdef SH_RAW_ALLOCATOR
439 tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
440#else
441 tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
442 tb->ctx = ctx;
443#endif
444 tb->private_data = private_data;
445
446 /* increase nelements by fillfactor, want to store nelements elements */
447 size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
448
449 SH_COMPUTE_PARAMETERS(tb, size);
450
451 tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
452
453 return tb;
454}
455
456/* destroy a previously created hash table */
457SH_SCOPE void
459{
460 SH_FREE(tb, tb->data);
461 pfree(tb);
462}
463
464/* reset the contents of a previously created hash table */
465SH_SCOPE void
466SH_RESET(SH_TYPE * tb)
467{
468 memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
469 tb->members = 0;
470}
471
472/*
473 * Grow a hash table to at least `newsize` buckets.
474 *
475 * Usually this will automatically be called by insertions/deletions, when
476 * necessary. But resizing to the exact input size can be advantageous
477 * performance-wise, when known at some point.
478 */
479SH_SCOPE void
480SH_GROW(SH_TYPE * tb, uint64 newsize)
481{
482 uint64 oldsize = tb->size;
483 SH_ELEMENT_TYPE *olddata = tb->data;
484 SH_ELEMENT_TYPE *newdata;
485 uint32 i;
486 uint32 startelem = 0;
487 uint32 copyelem;
488
489 Assert(oldsize == pg_nextpower2_64(oldsize));
490 Assert(oldsize != SH_MAX_SIZE);
491 Assert(oldsize < newsize);
492
493 /* compute parameters for new table */
494 SH_COMPUTE_PARAMETERS(tb, newsize);
495
496 tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
497
498 newdata = tb->data;
499
500 /*
501 * Copy entries from the old data to newdata. We theoretically could use
502 * SH_INSERT here, to avoid code duplication, but that's more general than
503 * we need. We neither want tb->members increased, nor do we need to do
504 * deal with deleted elements, nor do we need to compare keys. So a
505 * special-cased implementation is lot faster. As resizing can be time
506 * consuming and frequent, that's worthwhile to optimize.
507 *
508 * To be able to simply move entries over, we have to start not at the
509 * first bucket (i.e olddata[0]), but find the first bucket that's either
510 * empty, or is occupied by an entry at its optimal position. Such a
511 * bucket has to exist in any table with a load factor under 1, as not all
512 * buckets are occupied, i.e. there always has to be an empty bucket. By
513 * starting at such a bucket we can move the entries to the larger table,
514 * without having to deal with conflicts.
515 */
516
517 /* search for the first element in the hash that's not wrapped around */
518 for (i = 0; i < oldsize; i++)
519 {
520 SH_ELEMENT_TYPE *oldentry = &olddata[i];
521 uint32 hash;
522 uint32 optimal;
523
524 if (oldentry->status != SH_STATUS_IN_USE)
525 {
526 startelem = i;
527 break;
528 }
529
530 hash = SH_ENTRY_HASH(tb, oldentry);
531 optimal = SH_INITIAL_BUCKET(tb, hash);
532
533 if (optimal == i)
534 {
535 startelem = i;
536 break;
537 }
538 }
539
540 /* and copy all elements in the old table */
541 copyelem = startelem;
542 for (i = 0; i < oldsize; i++)
543 {
544 SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
545
546 if (oldentry->status == SH_STATUS_IN_USE)
547 {
548 uint32 hash;
549 uint32 startelem;
550 uint32 curelem;
551 SH_ELEMENT_TYPE *newentry;
552
553 hash = SH_ENTRY_HASH(tb, oldentry);
554 startelem = SH_INITIAL_BUCKET(tb, hash);
555 curelem = startelem;
556
557 /* find empty element to put data into */
558 while (true)
559 {
560 newentry = &newdata[curelem];
561
562 if (newentry->status == SH_STATUS_EMPTY)
563 {
564 break;
565 }
566
567 curelem = SH_NEXT(tb, curelem, startelem);
568 }
569
570 /* copy entry to new slot */
571 memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
572 }
573
574 /* can't use SH_NEXT here, would use new size */
575 copyelem++;
576 if (copyelem >= oldsize)
577 {
578 copyelem = 0;
579 }
580 }
581
582 SH_FREE(tb, olddata);
583}
584
585/*
586 * This is a separate static inline function, so it can be reliably be inlined
587 * into its wrapper functions even if SH_SCOPE is extern.
588 */
589static inline SH_ELEMENT_TYPE *
590SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
591{
592 uint32 startelem;
593 uint32 curelem;
594 SH_ELEMENT_TYPE *data;
595 uint32 insertdist;
596
597restart:
598 insertdist = 0;
599
600 /*
601 * We do the grow check even if the key is actually present, to avoid
602 * doing the check inside the loop. This also lets us avoid having to
603 * re-find our position in the hashtable after resizing.
604 *
605 * Note that this also reached when resizing the table due to
606 * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
607 */
608 if (unlikely(tb->members >= tb->grow_threshold))
609 {
610 if (unlikely(tb->size == SH_MAX_SIZE))
611 sh_error("hash table size exceeded");
612
613 /*
614 * When optimizing, it can be very useful to print these out.
615 */
616 /* SH_STAT(tb); */
617 SH_GROW(tb, tb->size * 2);
618 /* SH_STAT(tb); */
619 }
620
621 /* perform insert, start bucket search at optimal location */
622 data = tb->data;
623 startelem = SH_INITIAL_BUCKET(tb, hash);
624 curelem = startelem;
625 while (true)
626 {
627 uint32 curdist;
628 uint32 curhash;
629 uint32 curoptimal;
630 SH_ELEMENT_TYPE *entry = &data[curelem];
631
632 /* any empty bucket can directly be used */
633 if (entry->status == SH_STATUS_EMPTY)
634 {
635 tb->members++;
636 entry->SH_KEY = key;
637#ifdef SH_STORE_HASH
638 SH_GET_HASH(tb, entry) = hash;
639#endif
640 entry->status = SH_STATUS_IN_USE;
641 *found = false;
642 return entry;
643 }
644
645 /*
646 * If the bucket is not empty, we either found a match (in which case
647 * we're done), or we have to decide whether to skip over or move the
648 * colliding entry. When the colliding element's distance to its
649 * optimal position is smaller than the to-be-inserted entry's, we
650 * shift the colliding entry (and its followers) forward by one.
651 */
652
653 if (SH_COMPARE_KEYS(tb, hash, key, entry))
654 {
655 Assert(entry->status == SH_STATUS_IN_USE);
656 *found = true;
657 return entry;
658 }
659
660 curhash = SH_ENTRY_HASH(tb, entry);
661 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
662 curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
663
664 if (insertdist > curdist)
665 {
666 SH_ELEMENT_TYPE *lastentry = entry;
667 uint32 emptyelem = curelem;
668 uint32 moveelem;
669 int32 emptydist = 0;
670
671 /* find next empty bucket */
672 while (true)
673 {
674 SH_ELEMENT_TYPE *emptyentry;
675
676 emptyelem = SH_NEXT(tb, emptyelem, startelem);
677 emptyentry = &data[emptyelem];
678
679 if (emptyentry->status == SH_STATUS_EMPTY)
680 {
681 lastentry = emptyentry;
682 break;
683 }
684
685 /*
686 * To avoid negative consequences from overly imbalanced
687 * hashtables, grow the hashtable if collisions would require
688 * us to move a lot of entries. The most likely cause of such
689 * imbalance is filling a (currently) small table, from a
690 * currently big one, in hash-table order. Don't grow if the
691 * hashtable would be too empty, to prevent quick space
692 * explosion for some weird edge cases.
693 */
694 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
695 ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
696 {
697 tb->grow_threshold = 0;
698 goto restart;
699 }
700 }
701
702 /* shift forward, starting at last occupied element */
703
704 /*
705 * TODO: This could be optimized to be one memcpy in many cases,
706 * excepting wrapping around at the end of ->data. Hasn't shown up
707 * in profiles so far though.
708 */
709 moveelem = emptyelem;
710 while (moveelem != curelem)
711 {
712 SH_ELEMENT_TYPE *moveentry;
713
714 moveelem = SH_PREV(tb, moveelem, startelem);
715 moveentry = &data[moveelem];
716
717 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
718 lastentry = moveentry;
719 }
720
721 /* and fill the now empty spot */
722 tb->members++;
723
724 entry->SH_KEY = key;
725#ifdef SH_STORE_HASH
726 SH_GET_HASH(tb, entry) = hash;
727#endif
728 entry->status = SH_STATUS_IN_USE;
729 *found = false;
730 return entry;
731 }
732
733 curelem = SH_NEXT(tb, curelem, startelem);
734 insertdist++;
735
736 /*
737 * To avoid negative consequences from overly imbalanced hashtables,
738 * grow the hashtable if collisions lead to large runs. The most
739 * likely cause of such imbalance is filling a (currently) small
740 * table, from a currently big one, in hash-table order. Don't grow
741 * if the hashtable would be too empty, to prevent quick space
742 * explosion for some weird edge cases.
743 */
744 if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
745 ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
746 {
747 tb->grow_threshold = 0;
748 goto restart;
749 }
750 }
751}
752
753/*
754 * Insert the key key into the hash-table, set *found to true if the key
755 * already exists, false otherwise. Returns the hash-table entry in either
756 * case.
757 */
759SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
760{
761 uint32 hash = SH_HASH_KEY(tb, key);
762
763 return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
764}
765
766/*
767 * Insert the key key into the hash-table using an already-calculated
768 * hash. Set *found to true if the key already exists, false
769 * otherwise. Returns the hash-table entry in either case.
770 */
772SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
773{
774 return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
775}
776
777/*
778 * This is a separate static inline function, so it can be reliably be inlined
779 * into its wrapper functions even if SH_SCOPE is extern.
780 */
781static inline SH_ELEMENT_TYPE *
783{
784 const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
785 uint32 curelem = startelem;
786
787 while (true)
788 {
789 SH_ELEMENT_TYPE *entry = &tb->data[curelem];
790
791 if (entry->status == SH_STATUS_EMPTY)
792 {
793 return NULL;
794 }
795
796 Assert(entry->status == SH_STATUS_IN_USE);
797
798 if (SH_COMPARE_KEYS(tb, hash, key, entry))
799 return entry;
800
801 /*
802 * TODO: we could stop search based on distance. If the current
803 * buckets's distance-from-optimal is smaller than what we've skipped
804 * already, the entry doesn't exist. Probably only do so if
805 * SH_STORE_HASH is defined, to avoid re-computing hashes?
806 */
807
808 curelem = SH_NEXT(tb, curelem, startelem);
809 }
810}
811
812/*
813 * Lookup up entry in hash table. Returns NULL if key not present.
814 */
817{
818 uint32 hash = SH_HASH_KEY(tb, key);
819
820 return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
821}
822
823/*
824 * Lookup up entry in hash table using an already-calculated hash.
825 *
826 * Returns NULL if key not present.
827 */
830{
831 return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
832}
833
834/*
835 * Delete entry from hash table by key. Returns whether to-be-deleted key was
836 * present.
837 */
838SH_SCOPE bool
840{
841 uint32 hash = SH_HASH_KEY(tb, key);
842 uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
843 uint32 curelem = startelem;
844
845 while (true)
846 {
847 SH_ELEMENT_TYPE *entry = &tb->data[curelem];
848
849 if (entry->status == SH_STATUS_EMPTY)
850 return false;
851
852 if (entry->status == SH_STATUS_IN_USE &&
853 SH_COMPARE_KEYS(tb, hash, key, entry))
854 {
855 SH_ELEMENT_TYPE *lastentry = entry;
856
857 tb->members--;
858
859 /*
860 * Backward shift following elements till either an empty element
861 * or an element at its optimal position is encountered.
862 *
863 * While that sounds expensive, the average chain length is short,
864 * and deletions would otherwise require tombstones.
865 */
866 while (true)
867 {
868 SH_ELEMENT_TYPE *curentry;
869 uint32 curhash;
870 uint32 curoptimal;
871
872 curelem = SH_NEXT(tb, curelem, startelem);
873 curentry = &tb->data[curelem];
874
875 if (curentry->status != SH_STATUS_IN_USE)
876 {
877 lastentry->status = SH_STATUS_EMPTY;
878 break;
879 }
880
881 curhash = SH_ENTRY_HASH(tb, curentry);
882 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
883
884 /* current is at optimal position, done */
885 if (curoptimal == curelem)
886 {
887 lastentry->status = SH_STATUS_EMPTY;
888 break;
889 }
890
891 /* shift */
892 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
893
894 lastentry = curentry;
895 }
896
897 return true;
898 }
899
900 /* TODO: return false; if distance too big */
901
902 curelem = SH_NEXT(tb, curelem, startelem);
903 }
904}
905
906/*
907 * Delete entry from hash table by entry pointer
908 */
909SH_SCOPE void
911{
912 SH_ELEMENT_TYPE *lastentry = entry;
913 uint32 hash = SH_ENTRY_HASH(tb, entry);
914 uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
915 uint32 curelem;
916
917 /* Calculate the index of 'entry' */
918 curelem = entry - &tb->data[0];
919
920 tb->members--;
921
922 /*
923 * Backward shift following elements till either an empty element or an
924 * element at its optimal position is encountered.
925 *
926 * While that sounds expensive, the average chain length is short, and
927 * deletions would otherwise require tombstones.
928 */
929 while (true)
930 {
931 SH_ELEMENT_TYPE *curentry;
932 uint32 curhash;
933 uint32 curoptimal;
934
935 curelem = SH_NEXT(tb, curelem, startelem);
936 curentry = &tb->data[curelem];
937
938 if (curentry->status != SH_STATUS_IN_USE)
939 {
940 lastentry->status = SH_STATUS_EMPTY;
941 break;
942 }
943
944 curhash = SH_ENTRY_HASH(tb, curentry);
945 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
946
947 /* current is at optimal position, done */
948 if (curoptimal == curelem)
949 {
950 lastentry->status = SH_STATUS_EMPTY;
951 break;
952 }
953
954 /* shift */
955 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
956
957 lastentry = curentry;
958 }
959}
960
961/*
962 * Initialize iterator.
963 */
964SH_SCOPE void
966{
967 int i;
968 uint64 startelem = PG_UINT64_MAX;
969
970 /*
971 * Search for the first empty element. As deletions during iterations are
972 * supported, we want to start/end at an element that cannot be affected
973 * by elements being shifted.
974 */
975 for (i = 0; i < tb->size; i++)
976 {
977 SH_ELEMENT_TYPE *entry = &tb->data[i];
978
979 if (entry->status != SH_STATUS_IN_USE)
980 {
981 startelem = i;
982 break;
983 }
984 }
985
986 Assert(startelem < SH_MAX_SIZE);
987
988 /*
989 * Iterate backwards, that allows the current element to be deleted, even
990 * if there are backward shifts
991 */
992 iter->cur = startelem;
993 iter->end = iter->cur;
994 iter->done = false;
995}
996
997/*
998 * Initialize iterator to a specific bucket. That's really only useful for
999 * cases where callers are partially iterating over the hashspace, and that
1000 * iteration deletes and inserts elements based on visited entries. Doing that
1001 * repeatedly could lead to an unbalanced keyspace when always starting at the
1002 * same position.
1003 */
1004SH_SCOPE void
1006{
1007 /*
1008 * Iterate backwards, that allows the current element to be deleted, even
1009 * if there are backward shifts.
1010 */
1011 iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
1012 iter->end = iter->cur;
1013 iter->done = false;
1014}
1015
1016/*
1017 * Iterate over all entries in the hash-table. Return the next occupied entry,
1018 * or NULL if done.
1019 *
1020 * During iteration the current entry in the hash table may be deleted,
1021 * without leading to elements being skipped or returned twice. Additionally
1022 * the rest of the table may be modified (i.e. there can be insertions or
1023 * deletions), but if so, there's neither a guarantee that all nodes are
1024 * visited at least once, nor a guarantee that a node is visited at most once.
1025 */
1027SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
1028{
1029 while (!iter->done)
1030 {
1031 SH_ELEMENT_TYPE *elem;
1032
1033 elem = &tb->data[iter->cur];
1034
1035 /* next element in backward direction */
1036 iter->cur = (iter->cur - 1) & tb->sizemask;
1037
1038 if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
1039 iter->done = true;
1040 if (elem->status == SH_STATUS_IN_USE)
1041 {
1042 return elem;
1043 }
1044 }
1045
1046 return NULL;
1047}
1048
1049/*
1050 * Report some statistics about the state of the hashtable. For
1051 * debugging/profiling purposes only.
1052 */
1053SH_SCOPE void
1054SH_STAT(SH_TYPE * tb)
1055{
1056 uint32 max_chain_length = 0;
1057 uint32 total_chain_length = 0;
1058 double avg_chain_length;
1059 double fillfactor;
1060 uint32 i;
1061
1062 uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32));
1063 uint32 total_collisions = 0;
1064 uint32 max_collisions = 0;
1065 double avg_collisions;
1066
1067 for (i = 0; i < tb->size; i++)
1068 {
1069 uint32 hash;
1070 uint32 optimal;
1071 uint32 dist;
1072 SH_ELEMENT_TYPE *elem;
1073
1074 elem = &tb->data[i];
1075
1076 if (elem->status != SH_STATUS_IN_USE)
1077 continue;
1078
1079 hash = SH_ENTRY_HASH(tb, elem);
1080 optimal = SH_INITIAL_BUCKET(tb, hash);
1081 dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
1082
1083 if (dist > max_chain_length)
1084 max_chain_length = dist;
1085 total_chain_length += dist;
1086
1087 collisions[optimal]++;
1088 }
1089
1090 for (i = 0; i < tb->size; i++)
1091 {
1092 uint32 curcoll = collisions[i];
1093
1094 if (curcoll == 0)
1095 continue;
1096
1097 /* single contained element is not a collision */
1098 curcoll--;
1099 total_collisions += curcoll;
1100 if (curcoll > max_collisions)
1101 max_collisions = curcoll;
1102 }
1103
1104 if (tb->members > 0)
1105 {
1106 fillfactor = tb->members / ((double) tb->size);
1107 avg_chain_length = ((double) total_chain_length) / tb->members;
1108 avg_collisions = ((double) total_collisions) / tb->members;
1109 }
1110 else
1111 {
1112 fillfactor = 0;
1113 avg_chain_length = 0;
1114 avg_collisions = 0;
1115 }
1116
1117 sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
1118 tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
1119 total_collisions, max_collisions, avg_collisions);
1120}
1121
1122#endif /* SH_DEFINE */
1123
1124
1125/* undefine external parameters, so next hash table can be defined */
1126#undef SH_PREFIX
1127#undef SH_KEY_TYPE
1128#undef SH_KEY
1129#undef SH_ELEMENT_TYPE
1130#undef SH_HASH_KEY
1131#undef SH_SCOPE
1132#undef SH_DECLARE
1133#undef SH_DEFINE
1134#undef SH_GET_HASH
1135#undef SH_STORE_HASH
1136#undef SH_USE_NONDEFAULT_ALLOCATOR
1137#undef SH_EQUAL
1138
1139/* undefine locally declared macros */
1140#undef SH_MAKE_PREFIX
1141#undef SH_MAKE_NAME
1142#undef SH_MAKE_NAME_
1143#undef SH_FILLFACTOR
1144#undef SH_MAX_FILLFACTOR
1145#undef SH_GROW_MAX_DIB
1146#undef SH_GROW_MAX_MOVE
1147#undef SH_GROW_MIN_FILLFACTOR
1148#undef SH_MAX_SIZE
1149
1150/* types */
1151#undef SH_TYPE
1152#undef SH_STATUS
1153#undef SH_STATUS_EMPTY
1154#undef SH_STATUS_IN_USE
1155#undef SH_ITERATOR
1156
1157/* external function names */
1158#undef SH_CREATE
1159#undef SH_DESTROY
1160#undef SH_RESET
1161#undef SH_INSERT
1162#undef SH_INSERT_HASH
1163#undef SH_DELETE_ITEM
1164#undef SH_DELETE
1165#undef SH_LOOKUP
1166#undef SH_LOOKUP_HASH
1167#undef SH_GROW
1168#undef SH_START_ITERATE
1169#undef SH_START_ITERATE_AT
1170#undef SH_ITERATE
1171#undef SH_ALLOCATE
1172#undef SH_FREE
1173#undef SH_STAT
1174
1175/* internal function names */
1176#undef SH_COMPUTE_PARAMETERS
1177#undef SH_COMPARE_KEYS
1178#undef SH_INITIAL_BUCKET
1179#undef SH_NEXT
1180#undef SH_PREV
1181#undef SH_DISTANCE_FROM_OPTIMAL
1182#undef SH_ENTRY_HASH
1183#undef SH_INSERT_HASH_INTERNAL
1184#undef SH_LOOKUP_HASH_INTERNAL
#define Min(x, y)
Definition: c.h:993
#define Max(x, y)
Definition: c.h:987
#define Assert(condition)
Definition: c.h:811
#define UINT64_FORMAT
Definition: c.h:489
#define unlikely(x)
Definition: c.h:274
#define PG_UINT64_MAX
Definition: c.h:533
size_t Size
Definition: c.h:545
if(${POSTGRESQL_VERSION_NUMBER} GREATER_EQUAL 110000) set(072_tpoint_spgist 072_tpoint_spgist) endif() SET(LOCAL_FILES 050_geoset 050_stbox 051_tpoint 052_tpoint_inout 054_tpoint_compops 055_geography_functions 056_tpoint_spatialfuncs 057_tpoint_tile 058_tpoint_boxops 060_tpoint_posops 062_tpoint_distance 063_tpoint_similarity 064_tpoint_aggfuncs 066_tpoint_spatialrels 068_tpoint_tempspatialrels 070_tpoint_gist $
Definition: CMakeLists.txt:3
#define SH_HASH_KEY(tb, key)
Definition: meos_catalog.c:98
#define SH_ELEMENT_TYPE
Definition: meos_catalog.c:95
#define SH_KEY_TYPE
Definition: meos_catalog.c:96
#define SH_SCOPE
Definition: meos_catalog.c:100
static uint64 pg_nextpower2_64(uint64 num)
Definition: pg_bitutils.h:169
unsigned int uint32
Definition: pg_ext_defs.in.h:13
signed int int32
Definition: pg_ext_defs.in.h:8
unsigned long int uint64
Definition: pg_ext_defs.in.h:14
#define palloc0(X)
Definition: postgres.h:62
#define pfree
Definition: postgres.h:65
#define SH_GROW
Definition: simplehash.h:122
#define SH_STAT
Definition: simplehash.h:128
#define SH_INITIAL_BUCKET
Definition: simplehash.h:135
#define SH_INSERT_HASH
Definition: simplehash.h:117
#define SH_PREV
Definition: simplehash.h:133
#define SH_STATUS
Definition: simplehash.h:107
#define SH_CREATE
Definition: simplehash.h:113
#define SH_LOOKUP_HASH
Definition: simplehash.h:121
#define SH_START_ITERATE
Definition: simplehash.h:123
#define SH_COMPUTE_PARAMETERS
Definition: simplehash.h:131
#define SH_FREE
Definition: simplehash.h:127
#define SH_STATUS_IN_USE
Definition: simplehash.h:109
#define SH_DISTANCE_FROM_OPTIMAL
Definition: simplehash.h:134
#define SH_LOOKUP_HASH_INTERNAL
Definition: simplehash.h:138
#define SH_ITERATOR
Definition: simplehash.h:110
#define SH_NEXT
Definition: simplehash.h:132
#define SH_ITERATE
Definition: simplehash.h:125
#define SH_DELETE
Definition: simplehash.h:119
#define SH_INSERT
Definition: simplehash.h:116
#define SH_INSERT_HASH_INTERNAL
Definition: simplehash.h:137
#define SH_RESET
Definition: simplehash.h:115
#define SH_ENTRY_HASH
Definition: simplehash.h:136
#define SH_DELETE_ITEM
Definition: simplehash.h:118
#define SH_ALLOCATE
Definition: simplehash.h:126
#define SH_LOOKUP
Definition: simplehash.h:120
#define SH_TYPE
Definition: simplehash.h:106
#define SH_START_ITERATE_AT
Definition: simplehash.h:124
#define SH_STATUS_EMPTY
Definition: simplehash.h:108
#define SH_DESTROY
Definition: simplehash.h:114