Postgres内存中的Hash表结构

1.hash表概述

hash表是一种快速定位到元素位置的手段。Hash表有两个重要操作，一个是put操作，通过put操作把元素存入hash表中；一种是get操作，快速的找到元素取。

（1）项或条目(entry)：在实现中，我们把存入到hash表中的元素成为项或条目.

项的结构如下

*HASHELEMENT is the private part of a hashtable entry. The caller's data

*follows the HASHELEMENT structure (on a MAXALIGN'd boundary). The hash key

* isexpected to be at the start of the caller's hash entry data structure.

哈希表中的entry头部结构如下：

typedef struct HASHELEMENT

{

structHASHELEMENT *link; /* link to nextentry in same bucket */

uint32 hashvalue; /* hash function result for this entry */

} HASHELEMENT;

（2）桶：hash表中的每一个项被散列到桶中。在这里，桶是由桶中项组成的链表（list）组成。

也就是说，hash函数计算得出的值是桶的编号。具有相同hash值的项称作冲突项，通过链表的形式将他们组织到桶中来解决冲突。

/* A hash bucket is a linked list of HASHELEMENTs */

typedef HASHELEMENT *HASHBUCKET;//是指向项链表的一个指针。

（3）段：段可以说是hash表的一个二级索引。Hash表中的桶都是分布在段中。一个段是由桶构成的数组组成。

/* A hash segment is an array ofbucket headers */

typedef HASHBUCKET *HASHSEGMENT;

//是指向桶数组的指针。

（4）目录：目录可以说是hash表的一级索引。目录是一个由指向段的指针构成的数组。

HASHSEGMENT *dir; /* directory of segment starts*/

（5） Hash表结构：hash表结构是存储用来存储hash表信息的数据结构。他描述了hash表操作过程中要用到的各种信息。

* Topcontrol structure for a hashtable --- need not be shared, since

* nofields change at runtime

typedef struct HTAB

{

HASHHDR *hctl; /*shared control information */

HASHSEGMENT*dir; /* directory ofsegment starts */

//下面是操作hash表的函数。

HashValueFunchash; /* hash function*/

HashCompareFuncmatch; /* key comparisonfunction */

HashCopyFunckeycopy; /* key copyingfunction */

HashAllocFuncalloc; /* memory allocator */

char *tabname; /* table name (forerror messages) */

} HTAB;

/* Header structure for a hash table ---contains all changeable info */

//hash表的头结构，包含了所有可变信息。

typedef struct HASHHDR

{

//目录大小，说明了段的个数。

long dsize; /*Directory Size */

//段的大小，说明了每个段包含的桶数，为2的幂。

long ssize; /*Segment Size --- must be power of 2 */

/*************************************************************************

*这个属性有点怪，给定一个hash值我们首先要定位到段号；而后定位到段内偏移量，也就是目标桶的位置。

*如何定位段号呢？很明显，hash值减1除以段大小ssize再加1就是段号，但这样做很麻烦，不如移位方便和高效，

*其实除以ssize就相当于向右移位log2(ssize)个位，当然段的编号是从0开始。

*************************************************************************/

int sshift; /* Segment shift = log2(ssize) */

//目前已经有entry的最大桶号。

uint32 max_bucket; /* ID of Maximum bucket in use */

/*************************************************************************

*这个一看也有点迷糊，这个属性和hash函数有关系，因为这里的hash函数都是返回Uint32型的值，

*这个值表示桶的编号。但有可能桶的个数小于hash值，因此需要将hash值截断。

*例如，桶的个数为256×256个也就是2的16次方个，那个我们就只取hash函数返回值的低16为得到的值来作为实际的hash值，

*也就是实际的桶号。hash值 & high_mask=桶号。可以推断high_mask为最大的桶号值。

*************************************************************************/

uint32 high_mask; /* Mask to modulo into entire table */

uint32 low_mask; /* Mask to modulo into lower half of table */

long ffactor; /*Fill factor */

long nentries; /* Number of entries in hash table */

long nsegs; /*Number of allocated segments */

Size keysize; /*hash key length in bytes */

Size entrysize; /* total user element size in bytes */

long max_dsize; /* 'dsize' limit if directory is fixed size */

int nelem_alloc; /* number of entries to allocate at once */

HASHELEMENT*freeList; /* linked list of freeelements */

/******************************************************************************

*当一个entry删除是，并不free他的listnode ，而是把它链接到freelist上，以备下次使用。

*如果freelist为空且要增加新的entry是，则需新分配一些elements，让后将新分配的elements链接到freelist，

*再从freelist中取出一个使用。

*******************************************************************************/

#ifdef HASH_STATISTICS

long accesses;

long collisions;

#endif

} HASHHDR;

Hash表的存储结构：

Postgres内存中的Hash表结构

下面是其一个核心函数，实现了插入，删除，查找功能

/*----------

* hash_search -- look up key in table andperform action

* action is one of:

* HASH_FIND:look up key in table

* HASH_ENTER:look up key in table, creating entry if not present

* HASH_ENTER_NULL:same, but return NULL if out of memory

* HASH_REMOVE:look up key in table, remove entry if present

* Return value is a pointer to the elementfound/entered/removed if any,

* or NULL if no match was found. (NB: in the case of the REMOVE action,

* the result is a dangling pointer thatshouldn't be dereferenced!)

* HASH_ENTER will normally ereport a generic"out of memory" error if

* it is unable to create a new entry. The HASH_ENTER_NULL operation is

* the same except it will return NULL if outof memory. Note that

* HASH_ENTER_NULL cannot be used with thedefault palloc-based allocator,

* since palloc internally ereports onout-of-memory.

* If foundPtr isn't NULL, then *foundPtr isset TRUE if we found an

* existing entry in the table, FALSEotherwise. This is needed in the

* HASH_ENTER case, but is redundant with thereturn value otherwise.

*----------

void *

hash_search(HTAB*hashp , const void *keyPtr , HASHACTION action , bool *foundPtr)

{

HASHHDR*hctl = hashp->hctl;

Size keysize= hctl->keysize;

//key的hash值。

uint32 hashvalue;

//key所在的桶号。

uint32 bucket;

//key所在的段号

long segment_num;

//段内偏移量

long segment_ndx;

HASHSEGMENT segp;

HASHBUCKET currBucket;

HASHBUCKET *prevBucketPtr;

HashCompareFunc match;

#ifHASH_STATISTICS

hash_accesses++;

hctl->accesses++;

#endif

*Do the initial lookup

hashvalue = hashp->hash(keyPtr,keysize);

//如何计算桶号？

bucket = calc_bucket(hctl, hashvalue);

segment_num = bucket >>hctl->sshift;//段号。

segment_ndx = MOD(bucket,hctl->ssize);//段内偏移。

segp = hashp->dir[segment_num];

if (segp == NULL)

hash_corrupted(hashp);

prevBucketPtr = &segp[segment_ndx];

currBucket = *prevBucketPtr;

*Follow collision chain looking for matching key

//沿着冲突链找到匹配的key。

match = hashp->match; /* save one fetch in inner loop */

while (currBucket != NULL)

{

if (currBucket->hashvalue ==hashvalue &&

match(ELEMENTKEY(currBucket),keyPtr, keysize) == 0)

break;

prevBucketPtr =&(currBucket->link);

currBucket = *prevBucketPtr;

#ifHASH_STATISTICS

hash_collisions++;

hctl->collisions++;

#endif

}

if (foundPtr)

*foundPtr = (bool) (currBucket !=NULL);

*OK, now what?

switch (action)

{

case HASH_FIND:

if (currBucket != NULL)

return (void *)ELEMENTKEY(currBucket);

return NULL;

case HASH_REMOVE:

if (currBucket != NULL)

{

Assert(hctl->nentries> 0);

hctl->nentries--;

/* remove recordfrom hash bucket's chain. */

*prevBucketPtr =currBucket->link;

/* add the record to the freelist forthis table. */

currBucket->link= hctl->freeList;

hctl->freeList= currBucket;

* better hope the caller is synchronizingaccess to this

* element, because someone else is going toreuse it the next

* time something is added to the table

return (void *)ELEMENTKEY(currBucket);

}

return NULL;

case HASH_ENTER_NULL:

/*ENTER_NULL does not work with palloc-based allocator */

Assert(hashp->alloc !=DynaHashAlloc);

/* FALL THRU */

case HASH_ENTER:

/* Return existing el ement if found, else create one */

if (currBucket != NULL)

return (void *)ELEMENTKEY(currBucket);

/* disallow inserts iffrozen */

if (hashp->frozen)

elog(ERROR,"cannot insert into a frozen hashtable");

/* get the next freeelement */

currBucket =hctl->freeList;

if (currBucket == NULL)

{

/* no freeelements. allocate another chunk ofbuckets */

if (!element_alloc(hashp, hctl->nelem_alloc))

{

/* out ofmemory */

if (action ==HASH_ENTER_NULL)

returnNULL;

/* report ageneric message */

if(hashp->isshared)

ereport(ERROR,

(errcode(ERRCODE_OUT_OF_MEMORY),

errmsg("out of shared memory")));

else

ereport(ERROR,

(errcode(ERRCODE_OUT_OF_MEMORY),

errmsg("out of memory")));

}

currBucket = hctl->freeList;

Assert(currBucket !=NULL);

}

hctl->freeList =currBucket->link;

/* link into hashbucketchain */

*prevBucketPtr =currBucket;

currBucket->link = NULL;

/* copy key into record */

currBucket->hashvalue =hashvalue;

hashp->keycopy(ELEMENTKEY(currBucket),keyPtr, keysize);

/* caller is expected tofill the data field on return */

* Check if it is time to split a bucket. Can't split if table

* is the subject of any active hash_seq_searchscans.

if (++hctl->nentries /(long) (hctl->max_bucket + 1) >= hctl->ffactor &&

!has_seq_scans(hashp))

{

* NOTE: failure to expand table is not a fatalerror, it just

* means we have to run at higher fill factorthan we wanted.

expand_table(hashp);

}

return (void *)ELEMENTKEY(currBucket);

}

elog(ERROR, "unrecognized hashaction code: %d", (int) action);

return NULL; /* keep compiler quiet */

}

Postgres内存中的Hash表结构

相关推荐