在大模型系统架构设计中,缓存系统是提升响应速度和降低后端负载的关键组件。本文基于实际部署经验,对比分析LRU与LFU两种缓存策略的适用场景。
LRU缓存实现
from collections import OrderedDict
class LRUCache:
def __init__(self, capacity: int):
self.capacity = capacity
self.cache = OrderedDict()
def get(self, key: int) -> int:
if key not in self.cache:
return -1
# 更新访问顺序
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key: int, value: int) -> None:
if key in self.cache:
self.cache.move_to_end(key)
elif len(self.cache) >= self.capacity:
self.cache.popitem(last=False)
self.cache[key] = value
LFU缓存实现
class LFUCache:
def __init__(self, capacity: int):
self.capacity = capacity
self.cache = {} # key -> (value, frequency)
self.freq_map = {} # frequency -> OrderedDict
self.min_freq = 0
def get(self, key: int) -> int:
if key not in self.cache:
return -1
value, freq = self.cache[key]
# 更新频率
self.freq_map[freq].pop(key)
if not self.freq_map[freq]:
del self.freq_map[freq]
if freq == self.min_freq:
self.min_freq += 1
self._update_frequency(key, value, freq + 1)
return value
def put(self, key: int, value: int) -> None:
if self.capacity <= 0:
return
if key in self.cache:
self.get(key) # 更新访问频率
self.cache[key] = (value, self.cache[key][1])
else:
if len(self.cache) >= self.capacity:
# 移除最少使用项
key_to_remove = self.freq_map[self.min_freq].popitem(last=False)[0]
del self.cache[key_to_remove]
self.min_freq = 1
self._update_frequency(key, value, 1)
def _update_frequency(self, key: int, value: int, freq: int):
if freq not in self.freq_map:
self.freq_map[freq] = OrderedDict()
self.freq_map[freq][key] = value
self.cache[key] = (value, freq)
在实际部署中,我们发现:
- LRU适合访问模式相对均匀的场景,实现简单且性能稳定
- LFU更适合访问频率差异明显的场景,但需要额外空间维护频率信息
- 在大模型服务中,建议采用LRU策略,并结合预热机制和缓存失效策略来优化实际效果

讨论