Redis5.0源码探索之跳跃表
- redis跳跃表结构探索
- 1.zset结构体的定义
- 2.zadd指令
本文通过redis5.0.3源码对redis实现跳跃表进行探索,由于redis的有序集合有两种编码形式(OBJ_ENCODING_ZIPLIST和OBJ_ENCODING_SKIPLIST),其中OBJ_ENCODING_SKIPLIST即为跳跃表,也就是说研究redis底层跳跃表只需阅读OBJ_ENCODING_SKIPLIST相关的源码。接下来将通过跳跃表的定义和有序集合的几个命令去探索跳跃表在有序集合中的实现。
redis跳跃表结构探索
1.zset结构体的定义
redis有序集合zset为定义在server.h中,其源码如下:
/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
sds ele;
double score;
struct zskiplistNode *backward; //后退指针
struct zskiplistLevel {
struct zskiplistNode *forward; //前进指针
unsigned long span;
} level[];//层数组
} zskiplistNode;
/*跳跃表结构体*/
typedef struct zskiplist {
struct zskiplistNode *header, *tail;
unsigned long length;
int level;
} zskiplist;
/*有序集合*/
typedef struct zset {
dict *dict;
zskiplist *zsl;
} zset;
2.zadd指令
zadd指令原型为zaddCommand(client *c),zaddCommand函数直接调用zaddGenericCommand函数,该函数中有如下代码:
/* Lookup the key and create the sorted set if does not exist. */
zobj = lookupKeyWrite(c->db,key);
if (zobj == NULL) {
if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
// zset_max_ziplist_entries默认为128,zset_max_ziplist_value默认为64
if (server.zset_max_ziplist_entries == 0 ||
server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject(); //创建跳跃表
} else {
zobj = createZsetZiplistObject(); //创建压缩列表
}
dbAdd(c->db,key,zobj);
} else {
// 如果key在db中存在,但是type不是zset,则报错
if (zobj->type != OBJ_ZSET) {
addReply(c,shared.wrongtypeerr);
goto cleanup;
}
}
当zadd的key在db中不存在时,会进行有序集合的创建,由于此处探讨的是跳跃表,故跟踪createZsetObject函数创建有序集合对象。createZsetObject源码如下:
robj *createZsetObject(void) {
zset *zs = zmalloc(sizeof(*zs)); //分配空间
robj *o;
zs->dict = dictCreate(&zsetDictType,NULL); //创建字典
zs->zsl = zslCreate(); //创建zskiplist
o = createObject(OBJ_ZSET,zs);
o->encoding = OBJ_ENCODING_SKIPLIST;
return o;
}
接着进入创建跳跃表的函数zslCreate,其源码如下:
/* Create a new skiplist. */
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
zsl = zmalloc(sizeof(*zsl));
zsl->level = 1;
zsl->length = 0;
// ZSKIPLIST_MAXLEVEL=64
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->level[j].forward = NULL;
zsl->header->level[j].span = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL;
return zsl;
}
继续往zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL)阅读,我们可以看到第一个参数ZSKIPLIST_MAXLEVEL在server.h中有如下定义:
#define ZSKIPLIST_MAXLEVEL 64 /* Should be enough for 2^64 elements */
而zslCreateNode源码如下:
/* Create a skiplist node with the specified number of levels.
* The SDS string 'ele' is referenced by the node after the call. */
zskiplistNode *zslCreateNode(int level, double score, sds ele) {
zskiplistNode *zn =
zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
zn->score = score;
zn->ele = ele;
return zn;
}
由此我们可以看出,在zadd时,当key在db中不存在数据而且创建的编码为OBJ_ENCODING_SKIPLIST时,会调用函数createZsetObject进行创建,根据源码大致可以画出此时创建成的跳跃表结构为:
此时就已经完成zset的创建(或者db中找到了对应key存在的对象),接下来,在函数zaddGenericCommand中会进行值的存放,具体源码为:
for (j = 0; j < elements; j++) {
double newscore;
score = scores[j];
int retflags = flags;
ele = c->argv[scoreidx+1+j*2]->ptr;
// 有序列表中添加一个新的元素或者更新一个已存在的元素的score
int retval = zsetAdd(zobj, score, ele, &retflags, &newscore);
if (retval == 0) {
addReplyError(c,nanerr);
goto cleanup;
}
if (retflags & ZADD_ADDED) added++;
if (retflags & ZADD_UPDATED) updated++;
if (!(retflags & ZADD_NOP)) processed++;
score = newscore;
}
其中,函数zsetAdd会执行在有序列表中添加一个新的元素或者更新一个已存在的元素的score的逻辑,其源码中在zobj->encoding == OBJ_ENCODING_SKIPLIST时,有如下源码:
zset *zs = zobj->ptr;
zskiplistNode *znode;
dictEntry *de;
// 在字典中寻找ele
de = dictFind(zs->dict,ele);
// 当 字典中存在该ele
if (de != NULL) {
/* NX? Return, same element already exists. */
if (nx) {
*flags |= ZADD_NOP;
return 1;
}
curscore = *(double*)dictGetVal(de);
/* Prepare the score for the increment if needed. */
if (incr) {
score += curscore;
if (isnan(score)) {
*flags |= ZADD_NAN;
return 0;
}
if (newscore) *newscore = score;
}
/* Remove and re-insert when score changes. */
if (score != curscore) {
//更新score
znode = zslUpdateScore(zs->zsl,curscore,ele,score);
/* Note that we did not removed the original element from
* the hash table representing the sorted set, so we just
* update the score. */
dictGetVal(de) = &znode->score; /* Update score ptr. */
*flags |= ZADD_UPDATED;
}
return 1;
} else if (!xx) {
ele = sdsdup(ele);
// 插入
znode = zslInsert(zs->zsl,score,ele);
// dictAdd将新元素关联到字典
serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
*flags |= ZADD_ADDED;
if (newscore) *newscore = score;
return 1;
} else {
*flags |= ZADD_NOP;
return 1;
}
其中zslInsert函数即为在跳跃表中插入一个新的节点,继续跟踪zslInsert的源码:
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
serverAssert(!isnan(score));
x = zsl->header;
// 如果是新建的zset, level为1
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
sdscmp(x->level[i].forward->ele,ele) < 0)))
{
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
/* we assume the element is not already inside, since we allow duplicated
* scores, reinserting the same element should never happen since the
* caller of zslInsert() should test in the hash table if the element is
* already inside or not. */
level = zslRandomLevel(); //生成随机level
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
update[i] = zsl->header;
update[i]->level[i].span = zsl->length;
}
zsl->level = level;
}
x = zslCreateNode(level,score,ele);
for (i = 0; i < level; i++) {
x->level[i].forward = update[i]->level[i].forward;
update[i]->level[i].forward = x;
/* update span covered by update[i] as x is inserted here */
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
update[i]->level[i].span = (rank[0] - rank[i]) + 1;
}
/* increment span for untouched levels */
for (i = level; i < zsl->level; i++) {
update[i]->level[i].span++;
}
x->backward = (update[0] == zsl->header) ? NULL : update[0];
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
zsl->tail = x;
zsl->length++;
return x;
}
根据该段源码,假如zadd时,该key在以前数据库中并不存在的情况下且创建的编码为OBJ_ENCODING_SKIPLIST时,第一次增加数据,根据上面的源码,大致可以推算出第一次增加一次数据后的结构为(假设level随机出来为3):
继续回到源码zaddGenericCommand函数中,继续循环调用zsetAdd,即第二次向该跳跃表插入数据,假设第二次插入时level随机的值为2,根据zsetAdd函数,我们可以推算出第二次向该跳跃表插入数据后跳跃表的结构为:
以上都是调用zslInsert函数将新元素添加到跳跃表,而在zslInsert将新元素插入跳跃表后紧接着执行了函数dictAdd(即dictAdd(zs->dict,ele,&znode->score)),该函数的源码为:
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
此处进行了dict的处理,根据源代码,大致可以画出完整的zset结构图:
综上所述,大致了解到zadd在创建编码为OBJ_ENCODING_SKIPLIST的有序集合时,生成的zset数据结构。zset结构体中包含了一个dict和一个zskiplist。