一、什么是VM
virtual memory(VM)虚拟内存,在进程视角看到的内存空间,主要是使用磁盘文件扩展内存,使其实际使用的内存空间超过实际的物理空间。原理是在实际物理内存空间不足的情况下,将内存中最近最久未使用的数据(冷数据)序列化到文件中,然后释放这部分数据占用的空间,以腾出空间给其他数据分配空间使用。
二、redis的VM
Linux中已经有VM,为什么redis还要自己实现一套VM呢?
主要还是考虑使用方面,Linux在换出数据时将整个页(Page)的数据换出,整个页上可能有很多数据,可能包含key或则value,不方便管理。(可能不正确,后续继续学习Linux)
只将value换出,key保留在内存中,简单可控。
redis从2.0.0引入VM,而在2.6.0中已经消失了,真是昙花一现。
三、redis的vm原理
3.1 配置
#开关,是否启用vm
vm-enabled no
#swap文件
vm-swap-file /tmp/redis.swap
#最大内存限制,超过则淘汰某些value到文件
vm-max-memory 102400
#swap文件中每页的大小
vm-page-size 32
#swap文件中页的总数
vm-pages 134217728
3.2 VM初始化
分配一个page_size * pages 的文件, 以及一个(pages+7)/8大小的位图,用于标记swap文件中哪些page已经使用,哪些没有使用
static void vmInit(void) {
off_t totsize;
int pipefds[2];
size_t stacksize;
struct flock fl;
...
/* Try to open the old swap file, otherwise create it */
if ((server.vm_fp = fopen(server.vm_swap_file,"r+b")) == NULL) {
server.vm_fp = fopen(server.vm_swap_file,"w+b");
}
...
server.vm_fd = fileno(server.vm_fp);
...
/* Initialize */
server.vm_next_page = 0;
server.vm_near_pages = 0;
server.vm_stats_used_pages = 0;
server.vm_stats_swapped_objects = 0;
server.vm_stats_swapouts = 0;
server.vm_stats_swapins = 0;
totsize = server.vm_pages*server.vm_page_size; //计算文件总大小
//创建指定大小的文件
ftruncate(server.vm_fd,totsize)
//创建位图
server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
//创建任务队列,多线程使用
/* Initialize threaded I/O (used by Virtual Memory) */
server.io_newjobs = listCreate();
server.io_processing = listCreate();
server.io_processed = listCreate();
server.io_ready_clients = listCreate();
pthread_mutex_init(&server.io_mutex,NULL);
pthread_mutex_init(&server.obj_freelist_mutex,NULL);
pthread_mutex_init(&server.io_swapfile_mutex,NULL);
server.io_active_threads = 0;
//创建管道,用于多线程通知主线程有任务完成
if (pipe(pipefds) == -1) {
redisLog(REDIS_WARNING,"Unable to intialized VM: pipe(2): %s. Exiting."
,strerror(errno));
exit(1);
}
server.io_ready_pipe_read = pipefds[0];
server.io_ready_pipe_write = pipefds[1];
redisAssert(anetNonBlock(NULL,server.io_ready_pipe_read) != ANET_ERR);
...
//设置回调函数
/* Listen for events in the threaded I/O pipe */
if (aeCreateFileEvent(server.el, server.io_ready_pipe_read, AE_READABLE,
vmThreadedIOCompletedJob, NULL) == AE_ERR)
oom("creating file event");
}
3.3 value换出
将对象序列化到文件对应位置,并且设置位图,标志相应页已经使用
static int vmSwapObjectBlocking(robj *key, robj *val) {
off_t pages = rdbSavedObjectPages(val,NULL);
off_t page;
assert(key->storage == REDIS_VM_MEMORY);
assert(key->refcount == 1);
//寻找连续页空间
if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return REDIS_ERR;
//将对象写入文件中
if (vmWriteObjectOnSwap(val,page) == REDIS_ERR) return REDIS_ERR;
//设置状态
key->vm.page = page;
key->vm.usedpages = pages;
key->storage = REDIS_VM_SWAPPED;
key->vtype = val->type;
//删除空间
decrRefCount(val); /* Deallocate the object from memory. */
//设置位图
vmMarkPagesUsed(page,pages);
...
server.vm_stats_swapped_objects++;
server.vm_stats_swapouts++;
return REDIS_OK;
}
3.4 value换入
从文件中读取对象后,修改位图清空标志,并未对文件对应页清空
vmGenericLoadObject(..)
{
...
val = vmReadObjectFromSwap(key->vm.page,key->vtype);
//设置状态
key->storage = REDIS_VM_MEMORY;
key->vm.atime = server.unixtime;
//清理位图
vmMarkPagesFree(key->vm.page,key->vm.usedpages);
...
return val;
}
static robj *vmReadObjectFromSwap(off_t page, int type) {
robj *o;
...
//偏移到指定页
if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
...
}
//加载文件,创建对象
o = rdbLoadObject(type,server.vm_fp);
if (o == NULL) {
...
}
...
return o;
}
四、redis的vm的交换过程
4.1 如何选择value进行交换
- 遍历所有数据库
- 在每一个数据库中随机选择五个key
- 计算候选分数,最终选择分数最大的一个作为淘汰对象
4.1.1 如何随机选择key
整个redis数据库就是一个巨大的hash字典,并且通过链表形式进行冲突处理,所以选择key过程如下
step1. random()生成一个随机值,使用随机值对hash大小进行取余,生成一个hash下标
step2. 如果此下标没有对象,则回到step1
step3. 计算此下标下的对象链表长度
step4. 再根据长度进行random(), 随机取一个位置
step5. 最终根据随机到的位置,取此对象
dictEntry *dictGetRandomKey(dict *d)
{
dictEntry *he, *orighe;
unsigned int h;
int listlen, listele;
if (dictSize(d) == 0) return NULL;
...
//随机一个下标
do {
h = random() & d->ht[0].sizemask;
he = d->ht[0].table[h];
} while(he == NULL);
//计算链表长度
listlen = 0;
orighe = he;
while(he) {
he = he->next;
listlen++;
}
//随机一个对象
listele = random() % listlen;
//获取此对象
he = orighe;
while(listele--) he = he->next;
return he;
}
4.1.2 如何计算候选分数
step1. 计算此对象多久未被访问age(当前时间 - 最后一次访问时间)
step2. 计算对象大小asize
step3. 未被访问时间 乘以 对象大小的对数值 age*log(1+asize)
static double computeObjectSwappability(robj *o) {
time_t age = server.unixtime - o->vm.atime; //计算多久未被访问
long asize = 0;
...
//刚访问过的,分数为0,尽量不交换出去
if (age <= 0) return 0;
// 根据对象类型,进行对象大小的计算
switch(o->type) {
case REDIS_STRING:
if (o->encoding != REDIS_ENCODING_RAW) {
asize = sizeof(*o);
} else {
asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
}
break;
case REDIS_LIST:
l = o->ptr;
...
break;
case REDIS_SET:
case REDIS_ZSET:
...
break;
case REDIS_HASH:
...
break;
}
//计算候选分数
return (double)age*log(1+asize);
}
4.2 如何交换
4.2.1 换出
- 计算需要换出的对象需要页数n
- 遍历位图,查找连续n个页
- 将对象写入找到的页
- 将页位置及页数保存到key中
- 释放value对象
- 设置位图
static off_t rdbSavedObjectPages(robj *o, FILE *fp) {
off_t bytes = rdbSavedObjectLen(o,fp);//计算对象序列化后的字节数
//根据页大小,计算需要多少页(根据页大小对齐)
return (bytes+(server.vm_page_size-1))/server.vm_page_size;
}
static int vmFindContiguousPages(off_t *first, off_t n) {
off_t base, offset = 0, since_jump = 0, numfree = 0;
if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
server.vm_near_pages = 0;
server.vm_next_page = 0;
}
server.vm_near_pages++; /* Yet another try for pages near to the old ones */
base = server.vm_next_page;
while(offset < server.vm_pages) {
off_t this = base+offset;
/* If we overflow, restart from page zero */
if (this >= server.vm_pages) {
this -= server.vm_pages;
if (this == 0) {
/* Just overflowed, what we found on tail is no longer
* interesting, as it's no longer contiguous. */
numfree = 0;
}
}
if (vmFreePage(this)) {
/* This is a free page */
numfree++;
/* Already got N free pages? Return to the caller, with success */
if (numfree == n) {
*first = this-(n-1);
server.vm_next_page = this+1;
redisLog(REDIS_DEBUG, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n, (long long) *first);
return REDIS_OK;
}
} else {
/* The current one is not a free page */
numfree = 0;
}
/* Fast-forward if the current page is not free and we already
* searched enough near this place. */
since_jump++;
if (!numfree && since_jump >= REDIS_VM_MAX_RANDOM_JUMP/4) {
offset += random() % REDIS_VM_MAX_RANDOM_JUMP;
since_jump = 0;
/* Note that even if we rewind after the jump, we are don't need
* to make sure numfree is set to zero as we only jump *if* it
* is set to zero. */
} else {
/* Otherwise just check the next page */
offset++;
}
}
return REDIS_ERR;
}
static int vmWriteObjectOnSwap(robj *o, off_t page) {
...
//偏移到对应页的位置
if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
...
return REDIS_ERR;
}
//将对象写入文件
rdbSaveObject(server.vm_fp,o);
//刷新磁盘
fflush(server.vm_fp);
...
return REDIS_OK;
}
//保存相关信息
key->vm.page = page;
key->vm.usedpages = pages;
key->storage = REDIS_VM_SWAPPED;
key->vtype = val->type;
decrRefCount(val); /* Deallocate the object from memory. */
static void vmMarkPagesUsed(off_t page, off_t count) {
off_t j;
for (j = 0; j < count; j++)
vmMarkPageUsed(page+j);
...
}
4.2.2 换入
- 根据页偏移读取对象,反序列化成对象
- 清理位图
static robj *vmReadObjectFromSwap(off_t page, int type) {
robj *o;
...
//偏移到指定位置
if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
...
_exit(1);
}
//加载对象
o = rdbLoadObject(type,server.vm_fp);
if (o == NULL) {
...
_exit(1);
}
...
return o;
}
static void vmMarkPagesFree(off_t page, off_t count) {
off_t j;
for (j = 0; j < count; j++)
vmMarkPageFree(page+j);
...
}
五、持久化过程如何处理
当发送了swap时,内存的数据是不完整的,如何持久化呢?
在持久化过程中,发现某个value在swap时,将swap中的内容加载到内存,然后再序列化到持久化文件中,所以最终的持久化文件内容是完整的数据。
六、多线程处理
因redis使用单线程进行命令处理,所以在进行swap时,将阻塞其他客户端的请求响应,因此引入了多线程处理,将阻塞所有客户端转为阻塞某些客户端。
6.1 配置
# 0则表示不用线程,阻塞操作
vm-max-threads 4
6.2 整体流程
- 创建换入、换出任务,加入到任务队列中(只有一个队列,所以需要加锁)
- 如果线程数没有超过最大限制,则创建新线程
- 线程从任务队列中获取任务(多个线程操作一个队列,需要加锁),放入正常队列队列中,然后进行处理
- 每个任务处理完后,将放入完成队列
- 当队列中所有任务都处理完时,线程将退出
- 线程每处理完一个任务,将通知主线程,主线程从完成队列中取队列进行后续的处理
static int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db) {
iojob *j;
assert(key->storage == REDIS_VM_MEMORY);
assert(key->refcount == 1);
//创建任务
j = zmalloc(sizeof(*j));
j->type = REDIS_IOJOB_PREPARE_SWAP;
j->db = db;
j->key = key;
j->val = val;
incrRefCount(val);
j->canceled = 0;
j->thread = (pthread_t) -1;
key->storage = REDIS_VM_SWAPPING;
//加入任务队列
lockThreadedIO();
queueIOJob(j);
unlockThreadedIO();
return REDIS_OK;
}
static void *IOThreadEntryPoint(void *arg) {
iojob *j;
listNode *ln;
REDIS_NOTUSED(arg);
pthread_detach(pthread_self());
while(1) {
/* Get a new job to process */
lockThreadedIO();
//任务队列为空,则线程退出
if (listLength(server.io_newjobs) == 0) {
...
server.io_active_threads--;
unlockThreadedIO();
return NULL;
}
//从任务队列中取一个任务
ln = listFirst(server.io_newjobs);
j = ln->value;
listDelNode(server.io_newjobs,ln);
//将刚取出的任务加入到正在处理队列上
/* Add the job in the processing queue */
j->thread = pthread_self();
listAddNodeTail(server.io_processing,j);
ln = listLast(server.io_processing); /* We use ln later to remove it */
unlockThreadedIO();
redisLog(REDIS_DEBUG,"Thread %ld got a new job (type %d): %p about key '%s'",
(long) pthread_self(), j->type, (void*)j, (char*)j->key->ptr);
//处理任务
/* Process the Job */
if (j->type == REDIS_IOJOB_LOAD) {
j->val = vmReadObjectFromSwap(j->page,j->key->vtype);
} else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
FILE *fp = fopen("/dev/null","w+");
j->pages = rdbSavedObjectPages(j->val,fp);
fclose(fp);
} else if (j->type == REDIS_IOJOB_DO_SWAP) {
if (vmWriteObjectOnSwap(j->val,j->page) == REDIS_ERR)
j->canceled = 1;
}
/* Done: insert the job into the processed queue */
redisLog(REDIS_DEBUG,"Thread %ld completed the job: %p (key %s)",
(long) pthread_self(), (void*)j, (char*)j->key->ptr);
//将处理完成的任务从正在处理队列中取出,放入完成队列中
lockThreadedIO();
listDelNode(server.io_processing,ln);
listAddNodeTail(server.io_processed,j);
unlockThreadedIO();
//通知主线程有任务完成
/* Signal the main thread there is new stuff to process */
assert(write(server.io_ready_pipe_write,"x",1) == 1);
}
return NULL; /* never reached */
}
6.3 刚换出又要换入
因为多线程处理后,换入换出过程就成了异步过程,可能ClientA写请求导致key1的value要被换出,然后ClientB读请求刚好读取key1的value,此时value已经被换出,又需要将value换入。
- 如果换出请求还在任务队列中未处理,则直接取消,并将位图对应位置清理
- 如果任务正在处理,等待处理完成后,进行重试
- 如果任务已经完成,则设置cancele标志,主线程的回调函数在处理时,如果时canceled的任务,直接忽略
static void vmCancelThreadedIOJob(robj *o) {
list *lists[3] = {
server.io_newjobs, /* 0 */
server.io_processing, /* 1 */
server.io_processed /* 2 */
};
int i;
assert(o->storage == REDIS_VM_LOADING || o->storage == REDIS_VM_SWAPPING);
again:
lockThreadedIO();
/* Search for a matching key in one of the queues */
for (i = 0; i < 3; i++) {
listNode *ln;
listIter li;
listRewind(lists[i],&li);
while ((ln = listNext(&li)) != NULL) {
iojob *job = ln->value;
if (job->canceled) continue; /* Skip this, already canceled. */
if (job->key == o) {
...
//将位图清理
/* Mark the pages as free since the swap didn't happened
* or happened but is now discarded. */
if (i != 1 && job->type == REDIS_IOJOB_DO_SWAP)
vmMarkPagesFree(job->page,job->pages);
/* Cancel the job. It depends on the list the job is
* living in. */
switch(i) {
case 0: /* io_newjobs */
/* If the job was yet not processed the best thing to do
* is to remove it from the queue at all */
freeIOJob(job);
listDelNode(lists[i],ln);
break;
case 1: /* io_processing */
/* Oh Shi- the thread is messing with the Job:
*
* Probably it's accessing the object if this is a
* PREPARE_SWAP or DO_SWAP job.
* If it's a LOAD job it may be reading from disk and
* if we don't wait for the job to terminate before to
* cancel it, maybe in a few microseconds data can be
* corrupted in this pages. So the short story is:
*
* Better to wait for the job to move into the
* next queue (processed)... */
/* We try again and again until the job is completed. */
unlockThreadedIO();
/* But let's wait some time for the I/O thread
* to finish with this job. After all this condition
* should be very rare. */
usleep(1);
goto again;
case 2: /* io_processed */
/* The job was already processed, that's easy...
* just mark it as canceled so that we'll ignore it
* when processing completed jobs. */
job->canceled = 1;
break;
}
//恢复状态
/* Finally we have to adjust the storage type of the object
* in order to "UNDO" the operaiton. */
if (o->storage == REDIS_VM_LOADING)
o->storage = REDIS_VM_SWAPPED;
else if (o->storage == REDIS_VM_SWAPPING)
o->storage = REDIS_VM_MEMORY;
unlockThreadedIO();
return;
}
}
}
unlockThreadedIO();
assert(1 != 1); /* We should never reach this */
}
static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata,
int mask)
{
char buf[1];
int retval, processed = 0, toprocess = -1, trytoswap = 1;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
REDIS_NOTUSED(privdata);
if (privdata != NULL) trytoswap = 0; /* check the comments above... */
/* For every byte we read in the read side of the pipe, there is one
* I/O job completed to process. */
while((retval = read(fd,buf,1)) == 1) {
iojob *j;
listNode *ln;
robj *key;
struct dictEntry *de;
redisLog(REDIS_DEBUG,"Processing I/O completed job");
/* Get the processed element (the oldest one) */
lockThreadedIO();
assert(listLength(server.io_processed) != 0);
//计算本次需要处理多少个任务,因这个回调是在主线程执行,所有不会全部处理完
//而处理百分比
if (toprocess == -1) {
toprocess = (listLength(server.io_processed)*REDIS_MAX_COMPLETED_JOBS_PROCESSED)/100;
if (toprocess <= 0) toprocess = 1;
}
ln = listFirst(server.io_processed);
j = ln->value;
listDelNode(server.io_processed,ln);
unlockThreadedIO();
//标记为取消的任务,不处理后续
/* If this job is marked as canceled, just ignore it */
if (j->canceled) {
freeIOJob(j);
continue;
}
/* Post process it in the main thread, as there are things we
* can do just here to avoid race conditions and/or invasive locks */
...
de = dictFind(j->db->dict,j->key);
assert(de != NULL);
key = dictGetEntryKey(de);
if (j->type == REDIS_IOJOB_LOAD) { //加载任务
redisDb *db;
/* Key loaded, bring it at home */
key->storage = REDIS_VM_MEMORY;
key->vm.atime = server.unixtime;
vmMarkPagesFree(key->vm.page,key->vm.usedpages);
redisLog(REDIS_DEBUG, "VM: object %s loaded from disk (threaded)",
(unsigned char*) key->ptr);
server.vm_stats_swapped_objects--;
server.vm_stats_swapins++;
dictGetEntryVal(de) = j->val;
incrRefCount(j->val);
db = j->db;
freeIOJob(j);
//通知等待key的client
/* Handle clients waiting for this key to be loaded. */
handleClientsBlockedOnSwappedKey(db,key);
} else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {//准备换出任务
/* Now we know the amount of pages required to swap this object.
* Let's find some space for it, and queue this task again
* rebranded as REDIS_IOJOB_DO_SWAP. */
...
/* Note that we need to mark this pages as used now,
* if the job will be canceled, we'll mark them as freed
* again. */
vmMarkPagesUsed(j->page,j->pages);
j->type = REDIS_IOJOB_DO_SWAP;
lockThreadedIO();
queueIOJob(j);
unlockThreadedIO();
...
} else if (j->type == REDIS_IOJOB_DO_SWAP) { //换出任务
robj *val;
/* Key swapped. We can finally free some memory. */
if (key->storage != REDIS_VM_SWAPPING) {
printf("key->storage: %d\n",key->storage);
printf("key->name: %s\n",(char*)key->ptr);
printf("key->refcount: %d\n",key->refcount);
printf("val: %p\n",(void*)j->val);
printf("val->type: %d\n",j->val->type);
printf("val->ptr: %s\n",(char*)j->val->ptr);
}
redisAssert(key->storage == REDIS_VM_SWAPPING);
val = dictGetEntryVal(de);
key->vm.page = j->page;
key->vm.usedpages = j->pages;
key->storage = REDIS_VM_SWAPPED;
key->vtype = j->val->type;
//成功换出后,这里才真正的删除此对象
decrRefCount(val); /* Deallocate the object from memory. */
dictGetEntryVal(de) = NULL;
...
server.vm_stats_swapped_objects++;
server.vm_stats_swapouts++;
freeIOJob(j);
...
}
processed++;
if (processed == toprocess) return; //已经处理预定任务个数,退出,主线程继续后续处理
}
...
}
6.4 如何知道哪些client在等待哪些key的值
当某个client访问某个key时,此key的value已经交换出去了,则将key,client加入到io_keys字典中,并且多个client访问相同key时,将串成一个链表。
每个client自己维护了自己正在等待key的一个链表。
当某个client的等待队列空了的时候,将恢复读。
static int processCommand(redisClient *c) {
...
//开启了vm, 并且使用多线程,则进行判断是否有key不在内存
if (server.vm_enabled && server.vm_max_threads > 0 &&
blockClientOnSwappedKeys(c,cmd)) return 1;
call(c,cmd);
...
}
static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd) {
if (cmd->vm_preload_proc != NULL) {
//某几个命令需要所有key对应的值都在内存,所以进行批量预处理
cmd->vm_preload_proc(c,cmd,c->argc,c->argv);
} else {
waitForMultipleSwappedKeys(c,cmd,c->argc,c->argv);
}
/* If the client was blocked for at least one key, mark it as blocked. */
if (listLength(c->io_keys)) { //如果访问的key不在内存,则阻塞
c->flags |= REDIS_IO_WAIT;
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
server.vm_blocked_clients++;
return 1;
} else {
return 0;
}
}
static void waitForMultipleSwappedKeys(redisClient *c,
struct redisCommand *cmd,int argc, robj **argv) {
int j, last;
if (cmd->vm_firstkey == 0) return;
last = cmd->vm_lastkey;
if (last < 0) last = argc+last;
for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) {
redisAssert(j < argc);
waitForSwappedKey(c,argv[j]);
}
}
static int waitForSwappedKey(redisClient *c, robj *key) {
struct dictEntry *de;
robj *o;
list *l;
/* If the key does not exist or is already in RAM we don't need to
* block the client at all. */
de = dictFind(c->db->dict,key);
if (de == NULL) return 0;
o = dictGetEntryKey(de);
if (o->storage == REDIS_VM_MEMORY) { //在内存,退出
return 0;
} else if (o->storage == REDIS_VM_SWAPPING) { //正在交换,取消任务
/* We were swapping the key, undo it! */
vmCancelThreadedIOJob(o);
return 0;
}
/* OK: the key is either swapped, or being loaded just now. */
//将key放入client的等待链表中
/* Add the key to the list of keys this client is waiting for.
* This maps clients to keys they are waiting for. */
listAddNodeTail(c->io_keys,key);
incrRefCount(key);
//将client放入db的等待hash中,并且以链表的形式组织等待相同key的client
/* Add the client to the swapped keys => clients waiting map. */
de = dictFind(c->db->io_keys,key);
if (de == NULL) {
int retval;
/* For every key we take a list of clients blocked for it */
l = listCreate();
retval = dictAdd(c->db->io_keys,key,l);
incrRefCount(key);
assert(retval == DICT_OK);
} else {
l = dictGetEntryVal(de);
}
listAddNodeTail(l,c);
//建立任务,载入内存
/* Are we already loading the key from disk? If not create a job */
if (o->storage == REDIS_VM_SWAPPED) {
iojob *j;
o->storage = REDIS_VM_LOADING;
j = zmalloc(sizeof(*j));
j->type = REDIS_IOJOB_LOAD;
j->db = c->db;
j->key = o;
j->key->vtype = o->vtype;
j->page = o->vm.page;
j->val = NULL;
j->canceled = 0;
j->thread = (pthread_t) -1;
lockThreadedIO();
queueIOJob(j);
unlockThreadedIO();
}
return 1;
}
//当key已经加载到内存,通知client
static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key) {
struct dictEntry *de;
list *l;
listNode *ln;
int len;
de = dictFind(db->io_keys,key);
if (!de) return;
l = dictGetEntryVal(de);
len = listLength(l);
/* Note: we can't use something like while(listLength(l)) as the list
* can be freed by the calling function when we remove the last element. */
while (len--) {
ln = listFirst(l);
redisClient *c = ln->value;
//如果返回1,则当前client已经没有需要等待的key
//加入ready链表
if (dontWaitForSwappedKey(c,key)) {
/* Put the client in the list of clients ready to go as we
* loaded all the keys about it. */
listAddNodeTail(server.io_ready_clients,c);
}
}
}
static int dontWaitForSwappedKey(redisClient *c, robj *key) {
list *l;
listNode *ln;
listIter li;
struct dictEntry *de;
/* The key object might be destroyed when deleted from the c->io_keys
* list (and the "key" argument is physically the same object as the
* object inside the list), so we need to protect it. */
incrRefCount(key);
//遍历client等待链表,删除此key
/* Remove the key from the list of keys this client is waiting for. */
listRewind(c->io_keys,&li);
while ((ln = listNext(&li)) != NULL) {
if (equalStringObjects(ln->value,key)) {
listDelNode(c->io_keys,ln);
break;
}
}
redisAssert(ln != NULL);
//从db的hash中对应key的链表中删除client
/* Remove the client form the key => waiting clients map. */
de = dictFind(c->db->io_keys,key);
redisAssert(de != NULL);
l = dictGetEntryVal(de);
ln = listSearchKey(l,c);
redisAssert(ln != NULL);
listDelNode(l,ln);
//如果hash节点中的链表已经空了,则删除此节点
if (listLength(l) == 0)
dictDelete(c->db->io_keys,key);
decrRefCount(key);
return listLength(c->io_keys) == 0;
}
七、其他
7.1 当某个key的值被换出后,后续再次给此key设置新值
- 如果已经换出成功,则直接将位图对应位置清空
- 如果正在换出、换入,将任务取消,取消成功后,将位图对应位置清空