Postgres内存中的Hash表结构
hash表是一种快速定位到元素位置的手段。Hash表有两个重要操作,一个是put操作,通过put操作把元素存入hash表中;一种是get操作,快速的找到元素取。
(1)项或条目(entry):在实现中,我们把存入到hash表中的元素成为项或条目.
项的结构如下
/*
*HASHELEMENT is the private part of a hashtable entry. The caller's data
*follows the HASHELEMENT structure (on a MAXALIGN'd boundary). The hash key
* isexpected to be at the start of the caller's hash entry data structure.
*/
哈希表中的entry头部结构如下:
typedef struct HASHELEMENT
{
structHASHELEMENT *link; /* link to nextentry in same bucket */
uint32 hashvalue; /* hash function result for this entry */
} HASHELEMENT;
(2) 桶:hash表中的每一个项被散列到桶中。在这里,桶是由桶中项组成的链表(list)组成。
也就是说,hash函数计算得出的值是桶的编号。具有相同hash值的项称作冲突项,通过链表的形式将他们组织到桶中来解决冲突。
/* A hash bucket is a linked list of HASHELEMENTs */
typedef HASHELEMENT *HASHBUCKET;//是指向项链表的一个指针。
(3) 段:段可以说是hash表的一个二级索引。Hash表中的桶都是分布在段中。一个段是由桶构成的数组组成。
/* A hash segment is an array ofbucket headers */
typedef HASHBUCKET *HASHSEGMENT;
//是指向桶数组的指针。
(4) 目录:目录可以说是hash表的一级索引。目录是一个由指向段的指针构成的数组。
HASHSEGMENT *dir; /* directory of segment starts*/
(5) Hash表结构:hash表结构是存储用来存储hash表信息的数据结构。他描述了hash表操作过程中要用到的各种信息。
/*
* Topcontrol structure for a hashtable --- need not be shared, since
* nofields change at runtime
*/
typedef struct HTAB
{
HASHHDR *hctl; /*shared control information */
HASHSEGMENT*dir; /* directory ofsegment starts */
//下面是操作hash表的函数。
HashValueFunchash; /* hash function*/
HashCompareFuncmatch; /* key comparisonfunction */
HashCopyFunckeycopy; /* key copyingfunction */
HashAllocFuncalloc; /* memory allocator */
char *tabname; /* table name (forerror messages) */
} HTAB;
/* Header structure for a hash table ---contains all changeable info */
//hash表的头结构,包含了所有可变信息。
typedef struct HASHHDR
{
//目录大小,说明了段的个数。
long dsize; /*Directory Size */
//段的大小,说明了每个段包含的桶数,为2的幂。
long ssize; /*Segment Size --- must be power of 2 */
/*************************************************************************
*这个属性有点怪,给定一个hash值我们首先要定位到段号;而后定位到段内偏移量,也就是目标桶的位置。
*如何定位段号呢?很明显,hash值减1除以段大小ssize再加1就是段号,但这样做很麻烦,不如移位方便和高效,
*其实除以ssize就相当于向右移位log2(ssize)个位,当然段的编号是从0开始。
*************************************************************************/
int sshift; /* Segment shift = log2(ssize) */
//目前已经有entry的最大桶号。
uint32 max_bucket; /* ID of Maximum bucket in use */
/*************************************************************************
*这个一看也有点迷糊,这个属性和hash函数有关系,因为这里的hash函数都是返回Uint32型的值,
*这个值表示桶的编号。但有可能桶的个数小于hash值,因此需要将hash值截断。
*例如,桶的个数为256×256个也就是2的16次方个,那个我们就只取hash函数返回值的低16为得到的值来作为实际的hash值,
*也就是实际的桶号。hash值 & high_mask=桶号。可以推断high_mask为最大的桶号值。
*************************************************************************/
uint32 high_mask; /* Mask to modulo into entire table */
uint32 low_mask; /* Mask to modulo into lower half of table */
//
long ffactor; /*Fill factor */
//
long nentries; /* Number of entries in hash table */
//
long nsegs; /*Number of allocated segments */
//
Size keysize; /*hash key length in bytes */
//
Size entrysize; /* total user element size in bytes */
//
long max_dsize; /* 'dsize' limit if directory is fixed size */
int nelem_alloc; /* number of entries to allocate at once */
HASHELEMENT*freeList; /* linked list of freeelements */
/******************************************************************************
*当一个entry删除是,并不free他的listnode ,而是把它链接到freelist上,以备下次使用。
*如果freelist为空且要增加新的entry是,则需新分配一些elements,让后将新分配的elements链接到freelist,
*再从freelist中取出一个使用。
*******************************************************************************/
#ifdef HASH_STATISTICS
long accesses;
long collisions;
#endif
} HASHHDR;
Hash表的存储结构:
下面是其一个核心函数,实现了插入,删除,查找功能
/*----------
* hash_search -- look up key in table andperform action
*
* action is one of:
* HASH_FIND:look up key in table
* HASH_ENTER:look up key in table, creating entry if not present
* HASH_ENTER_NULL:same, but return NULL if out of memory
* HASH_REMOVE:look up key in table, remove entry if present
*
* Return value is a pointer to the elementfound/entered/removed if any,
* or NULL if no match was found. (NB: in the case of the REMOVE action,
* the result is a dangling pointer thatshouldn't be dereferenced!)
*
* HASH_ENTER will normally ereport a generic"out of memory" error if
* it is unable to create a new entry. The HASH_ENTER_NULL operation is
* the same except it will return NULL if outof memory. Note that
* HASH_ENTER_NULL cannot be used with thedefault palloc-based allocator,
* since palloc internally ereports onout-of-memory.
*
* If foundPtr isn't NULL, then *foundPtr isset TRUE if we found an
* existing entry in the table, FALSEotherwise. This is needed in the
* HASH_ENTER case, but is redundant with thereturn value otherwise.
*----------
*/
void *
hash_search(HTAB*hashp , const void *keyPtr , HASHACTION action , bool *foundPtr)
{
HASHHDR*hctl = hashp->hctl;
Size keysize= hctl->keysize;
//key的hash值。
uint32 hashvalue;
//key所在的桶号。
uint32 bucket;
//key所在的段号
long segment_num;
//段内偏移量
long segment_ndx;
HASHSEGMENT segp;
HASHBUCKET currBucket;
HASHBUCKET *prevBucketPtr;
HashCompareFunc match;
#ifHASH_STATISTICS
hash_accesses++;
hctl->accesses++;
#endif
/*
*Do the initial lookup
*/
hashvalue = hashp->hash(keyPtr,keysize);
//如何计算桶号?
bucket = calc_bucket(hctl, hashvalue);
segment_num = bucket >>hctl->sshift;//段号。
segment_ndx = MOD(bucket,hctl->ssize);//段内偏移。
segp = hashp->dir[segment_num];
if (segp == NULL)
hash_corrupted(hashp);
prevBucketPtr = &segp[segment_ndx];
currBucket = *prevBucketPtr;
/*
*Follow collision chain looking for matching key
//沿着冲突链找到匹配的key。
*/
match = hashp->match; /* save one fetch in inner loop */
while (currBucket != NULL)
{
if (currBucket->hashvalue ==hashvalue &&
match(ELEMENTKEY(currBucket),keyPtr, keysize) == 0)
break;
prevBucketPtr =&(currBucket->link);
currBucket = *prevBucketPtr;
#ifHASH_STATISTICS
hash_collisions++;
hctl->collisions++;
#endif
}
if (foundPtr)
*foundPtr = (bool) (currBucket !=NULL);
/*
*OK, now what?
*/
switch (action)
{
case HASH_FIND:
if (currBucket != NULL)
return (void *)ELEMENTKEY(currBucket);
return NULL;
case HASH_REMOVE:
if (currBucket != NULL)
{
Assert(hctl->nentries> 0);
hctl->nentries--;
/* remove recordfrom hash bucket's chain. */
*prevBucketPtr =currBucket->link;
/* add the record to the freelist forthis table. */
currBucket->link= hctl->freeList;
hctl->freeList= currBucket;
/*
* better hope the caller is synchronizingaccess to this
* element, because someone else is going toreuse it the next
* time something is added to the table
*/
return (void *)ELEMENTKEY(currBucket);
}
return NULL;
case HASH_ENTER_NULL:
/*ENTER_NULL does not work with palloc-based allocator */
Assert(hashp->alloc !=DynaHashAlloc);
/* FALL THRU */
case HASH_ENTER:
/* Return existing el ement if found, else create one */
if (currBucket != NULL)
return (void *)ELEMENTKEY(currBucket);
/* disallow inserts iffrozen */
if (hashp->frozen)
elog(ERROR,"cannot insert into a frozen hashtable");
/* get the next freeelement */
currBucket =hctl->freeList;
if (currBucket == NULL)
{
/* no freeelements. allocate another chunk ofbuckets */
if (!element_alloc(hashp, hctl->nelem_alloc))
{
/* out ofmemory */
if (action ==HASH_ENTER_NULL)
returnNULL;
/* report ageneric message */
if(hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
currBucket = hctl->freeList;
Assert(currBucket !=NULL);
}
hctl->freeList =currBucket->link;
/* link into hashbucketchain */
*prevBucketPtr =currBucket;
currBucket->link = NULL;
/* copy key into record */
currBucket->hashvalue =hashvalue;
hashp->keycopy(ELEMENTKEY(currBucket),keyPtr, keysize);
/* caller is expected tofill the data field on return */
/*
* Check if it is time to split a bucket. Can't split if table
* is the subject of any active hash_seq_searchscans.
*/
if (++hctl->nentries /(long) (hctl->max_bucket + 1) >= hctl->ffactor &&
!has_seq_scans(hashp))
{
/*
* NOTE: failure to expand table is not a fatalerror, it just
* means we have to run at higher fill factorthan we wanted.
*/
expand_table(hashp);
}
return (void *)ELEMENTKEY(currBucket);
}
elog(ERROR, "unrecognized hashaction code: %d", (int) action);
return NULL; /* keep compiler quiet */
}
相关推荐
- PostgreSQL服务过程中的那些事一:启动postgres服务进程一.六:初始化系统表缓存catcache...
- 直播回顾|日本工程师Amit Langote畅谈Postgres中的表分区
- lucene倒排表在内存中的缓存结构1--ByteBlockPool,IntBlockPool,ParallelPostingsArray,BytesRefHash
- 顺序表数据结构在python中的应用
- PostgreSQL中的数据结构一:可扩展哈希表一
- 顺序表数据结构在python中的应用
- mysql中 索引数据结构用+树的原因,与hash,二叉树,红黑树的简单对比。
- springboot将数据库的字典表加载进内存中
- 结构体在内存中的对其原则
- 结构体在内存中的对齐规则
- [email protected] 注解的作用以及加载流程
- spring是什么