缓存淘汰算法 LRU 和 LFU

原创

幽夜落雨 2022-12-15 10:12:29 博主文章分类：Algorithm ©著作权

©著作权归作者所有：来自51CTO博客作者幽夜落雨的原创作品，请联系作者获取转载授权，否则将追究法律责任

缓存是一个计算机思维，对于重复的计算，缓存其结果，下次再算这个任务的时候，不去真正的计算，而是直接返回结果，能加快处理速度。当然有些会随时间改变的东西，缓存会失效，得重新计算。

比如缓存空间只有2个，要缓存的数据有很多，1，2，3，4，5，那么当缓存空间满了，需要淘汰一个缓存出去，其中淘汰算法有 LRU，LFU，FIFO，SC二次机会，老化算法，时钟工作集算法等等。

算法流程

LRU，最近最少使用，把数据加入一个链表中，按访问时间排序，发生淘汰的时候，把访问时间最旧的淘汰掉。
比如有数据 1，2，1，3，2
此时缓存中已有（1，2）
当3加入的时候，得把后面的2淘汰，变成（3，1）

LFU，最近不经常使用，把数据加入到链表中，按频次排序，一个数据被访问过，把它的频次+1，发生淘汰的时候，把频次低的淘汰掉。
比如有数据 1，1，1，2，2，3
缓存中有（1(3次)，2(2次)）
当3加入的时候，得把后面的2淘汰，变成（1(3次)，3(1次)）
区别：LRU 是得把 1 淘汰。

显然
LRU对于循环出现的数据，缓存命中不高
比如，这样的数据，1，1，1，2，2，2，3，4，1，1，1，2，2，2.....
当走到3，4的时候，1，2会被淘汰掉，但是后面还有很多1，2

LFU对于交替出现的数据，缓存命中不高
比如，1，1，1，2，2，3，4，3，4，3，4，3，4，3，4，3，4......
由于前面被（1(3次)，2(2次)）
3加入把2淘汰，4加入把3淘汰，3加入把4淘汰，然而3，4才是最需要缓存的，1去到了3次，谁也淘汰不了它了。

实现

leetcode上有两个题目
LRU：https://leetcode.com/problems/lru-cache/description/ LFU：https://leetcode.com/problems/lfu-cache/description/

要求是缓存的加入put()，缓存读取get()，都要在O(1)内实现。

LRU的一个实现方法：
用一个双向链表记录访问时间，因为链表插入删除高效，时间新的在前面，旧的在后面。
用一个哈希表记录缓存(key, value)，哈希查找近似O(1)，发生哈希冲突时最坏O(n)，同时哈希表中得记录 (key, (value, key_ptr))，key_ptr 是key在链表中的地址，为了能在O(1)时间内找到该节点，并把节点提升到表头。
链表中的key，能快速找到hash中的value，并删除。

LFU的一个实现方法：
用一个主双向链表记录（访问次数，从链表头），从链表中按时间顺序记录着（key）
用一个哈希表记录（key，(value, 主链表ptr，从链表ptr)）ptr表示该key在链表中的地址
然后，get，put都在哈希表中操作，近似O(1)，哈希表中有个节点在链表中的地址，能O(1)找到，并把节点提搞访问频次，链表插入删除也都是O(1)。

-------------------- 最后贴个AC的代码：--------------------
代码性能：1000000次加入，读取用时
LRU: 480ms
LFU: 510ms
NSCache: 2000ms
YYCache: 1400ms

LRU：

#include <list>
#include <unordered_map>

using namespace std;

class LRUCache {
    
public:
    LRUCache(int capacity);
    ~LRUCache();
    int get(int key);               // 获取缓存，hash查找的复杂度
    void put(int key, int value);   // 加入缓存，相同的key会覆盖，hash插入的复杂度
    
private:
    int max_capacity;
    list<pair<int, int>> m_list;           // 双向链表，pair<key, value>
    unordered_map<int, list<pair<int, int>>::iterator> u_map;   // 哈希map, vector + list 实现，<key, list::iter>
};

LRUCache::LRUCache(int capacity) {
    max_capacity = capacity;
}

LRUCache::~LRUCache() {
    max_capacity = 0;
    u_map.clear();
    m_list.clear();
}

int LRUCache::get(int key) {
    auto it = u_map.find(key);      // C++11 自动类型推断
    if (it != u_map.end()) {
        // splice() 合并 将 m_list 的 iter 移动到 m_list.begin() 中
        m_list.splice(m_list.begin(), m_list, it->second);
        return it->second->second;      // return value
    }
    return -1;
}

void LRUCache::put(int key, int value) {
    auto it = u_map.find(key);
    if (it != u_map.end()) {
        // 更新 key 的 value，并把 key 提前
        it->second->second = value;
        m_list.splice(m_list.begin(), m_list, it->second);
    } else {
        // 先判断是否满，满了要删除
        if (m_list.size() >= max_capacity) {
            int del_key = m_list.back().first;
            u_map.erase(del_key);
            m_list.pop_back();
        }
        // 插入到 u_map, list 中
        m_list.emplace_front(key, value);   // emplace_front 与 puch_front， emplace_front 不拷贝节点，不移动元素，高效
        u_map[key] = m_list.begin();
    }
}

LFU：

#include <list>
#include <unordered_map>

using namespace std;

// map value 结构
typedef struct LFUMapValue {
    int value;
    list<pair<int, list<int> > >::iterator main_it;    
    list<int>::iterator sub_it;
} LFUMapValue;

class LFUCache {
public:
    LFUCache(int capacity);
    ~LFUCache();
    int get(int key);
    void put(int key, int value);
    void right_move(LFUMapValue *value);  // 把一个节点的key向右提高访问次数
    
private:
    int max_cap;
    int cur_cap;
    // 储存 pair<count, subList<key> > 结构，count 访问次数，count 小到大，key 时间由新到旧
    list<pair<int, list<int> > > m_list;
    unordered_map<int, LFUMapValue> u_map;      // 储存 <key, LFUMapValue> 结构
    unordered_map<int, LFUMapValue>::iterator map_it;
};

LFUCache::LFUCache(int capacity) {
    cur_cap = 0;
    max_cap = capacity;
    m_list.emplace_front(pair<int, list<int> >(1, list<int>()));    // 插入 count == 1 的节点
}

LFUCache::~LFUCache() {
    m_list.clear();
    u_map.clear();
}

void LFUCache::right_move(LFUMapValue *value) {
    auto pre = value->main_it;
    auto pre_sub_it = value->sub_it;
    auto next = pre;
    next++;
    
    if (next != m_list.end()) {
        if (pre->first + 1 != next->first) {        // 访问次数+1，判断是否相等
            if (pre->second.size() == 1) {
                pre->first++;       // 这个 count 的 list 只有1个key，原地+1，不创建新节点
            } else {
                // next 前插入一个节点
                auto it = m_list.emplace(next, pair<int, list<int> >(pre->first + 1, list<int>()));
                it->second.splice(it->second.begin(), pre->second, pre_sub_it);
                value->main_it = it;
                value->sub_it = it->second.begin();
            }
        } else {
            // 追加在 next 的 sub_list 头部
            next->second.splice(next->second.begin(), pre->second, pre_sub_it);
            value->main_it = next;
            value->sub_it = next->second.begin();
            
            // 如果 pre.size == 0 则释放
            if (pre->second.size() == 0) {
                m_list.erase(pre);
            }
        }
    } else {
        if (pre->second.size() == 1) {
            pre->first++;       // 原地+1
        } else {
            // 新建一个节点插入
            list<int> tmp_list;
            tmp_list.splice(tmp_list.begin(), pre->second, pre_sub_it);
            // tmp_list 的迭代器不能用，加入 m_list 的时候会对，tmp_list进行拷贝构造，生成新的list插入，tmp_list被释放
            m_list.emplace_back(pair<int, list<int> >(pre->first + 1, tmp_list));
            value->main_it = m_list.end();
            (value->main_it)--;
            value->sub_it = value->main_it->second.begin();
        }
    }
}

int LFUCache::get(int key) {
    map_it = u_map.find(key);
    if (map_it == u_map.end()) {
        return -1;
    }
    
    LFUMapValue *value = &(map_it->second);
    right_move(value);
    
    return value->value;
}

void LFUCache::put(int key, int value) {
    if (max_cap == 0) {
        return ;
    }
    map_it = u_map.find(key);
    if (map_it == u_map.end()) {
        // 找不到，插入
        list<int> *firstList = &(m_list.front().second);
        if (cur_cap == max_cap) {
            // 淘汰一个
            if (firstList->size() > 0) {
                // u_map 中删除，list 中删除
                u_map.erase(firstList->back());
                firstList->pop_back();
                cur_cap--;
            }
        }
        cur_cap++;
        if (m_list.front().first != 1) {
            m_list.emplace_front(pair<int, list<int> >(1, list<int>()));
            firstList = &(m_list.front().second);
        }
        firstList->emplace_front(key);
        LFUMapValue map_value;
        map_value.value = value;
        map_value.main_it = m_list.begin();
        map_value.sub_it = firstList->begin();
        u_map[key] = map_value;
    } else {
        // 找得到，更新，提高一个访问次数
        map_it->second.value = value;
        right_move(&(map_it->second));
    }
}