Refactor of LFU/LRU code for modularity (#2857)

General cleanup on LRU/LFU code. Improve modularity and maintainability.

Specifically:
* Consolidates the mathematical logic for LRU/LFU into `lrulfu.c`, with
an API in `lrulfu.h`. Knowledge of the LRU/LFU implementation was
previously spread out across `db.c`, `evict.c`, `object.c`, `server.c`,
and `server.h`.
* Separates knowledge of the LRU from knowledge of the object containing
the LRU value. `lrulfu.c` knows about the LRU/LFU algorithms, without
knowing about the `robj`. `object.c` knows about the `robj` without
knowing about the details of the LRU/LFU algorithms.
* Eliminated `server.lruclock`, instead using `server.unixtime`. This
also eliminates the periodic need to call `mstime()` to maintain the lru
clock.
* Fixed a minor computation bug in the old `LFUTimeElapsed` function
(off by 1 after rollover).
* Eliminate specific IF checks for rollover, using defined behavior for
unsigned rollover instead.
* Fixed a bug in `debug.c` which would perform LFU modification on an
LRU value.

---------

Signed-off-by: Jim Brunner <brunnerj@amazon.com>
Co-authored-by: Ran Shidlansik <ranshid@amazon.com>
This commit is contained in:
Jim Brunner 2025-12-02 10:14:33 -08:00 committed by GitHub
parent 853d111f47
commit 1b5f245eae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 296 additions and 218 deletions

View File

@ -72,6 +72,7 @@ set(VALKEY_SERVER_SRCS
${CMAKE_SOURCE_DIR}/src/geo.c
${CMAKE_SOURCE_DIR}/src/lazyfree.c
${CMAKE_SOURCE_DIR}/src/module.c
${CMAKE_SOURCE_DIR}/src/lrulfu.c
${CMAKE_SOURCE_DIR}/src/evict.c
${CMAKE_SOURCE_DIR}/src/expire.c
${CMAKE_SOURCE_DIR}/src/geohash.c

View File

@ -423,7 +423,7 @@ ENGINE_NAME=valkey
SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX)
ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX)
ENGINE_TRACE_OBJ=trace/trace.o trace/trace_commands.o trace/trace_db.o trace/trace_cluster.o trace/trace_server.o trace/trace_rdb.o trace/trace_aof.o
ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o cluster_migrateslots.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut9.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o
ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o cluster_migrateslots.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o lrulfu.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut9.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o
ENGINE_SERVER_OBJ+=$(ENGINE_TRACE_OBJ)
ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX)
ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o sds.o util.o sha256.o

View File

@ -198,7 +198,7 @@ void dumpCommand(client *c) {
/* RESTORE key ttl serialized-value [REPLACE] [ABSTTL] [IDLETIME seconds] [FREQ frequency] */
void restoreCommand(client *c) {
long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1;
long long ttl, lfu_freq = -1, lru_idle = -1;
uint16_t rdbver = 0;
rio payload;
int j, type, replace = 0, absttl = 0;
@ -217,7 +217,6 @@ void restoreCommand(client *c) {
addReplyError(c, "Invalid IDLETIME value, must be >= 0");
return;
}
lru_clock = LRU_CLOCK();
j++; /* Consume additional arg. */
} else if (!strcasecmp(c->argv[j]->ptr, "freq") && additional >= 1 && lru_idle == -1) {
if (getLongLongFromObjectOrReply(c, c->argv[j + 1], &lfu_freq, NULL) != C_OK) return;
@ -305,7 +304,7 @@ void restoreCommand(client *c) {
rewriteClientCommandArgument(c, c->argc, shared.absttl);
}
}
objectSetLRUOrLFU(obj, lfu_freq, lru_idle, lru_clock, 1000);
objectSetLRUOrLFU(obj, lfu_freq, lru_idle);
signalModifiedKey(c, c->db, key);
notifyKeyspaceEvent(NOTIFY_GENERIC, "restore", key, c->db->id);
addReply(c, shared.ok);

View File

@ -49,14 +49,6 @@ static int objectIsExpired(robj *val);
static void dbSetValue(serverDb *db, robj *key, robj **valref, int overwrite, void **oldref);
static robj *dbFindWithDictIndex(serverDb *db, sds key, int dict_index);
/* Update LFU when an object is accessed.
* Firstly, decrement the counter if the decrement time is reached.
* Then logarithmically increment the counter, and update the access time. */
void updateLFU(robj *val) {
unsigned long counter = LFUDecrAndReturn(val);
counter = LFULogIncr(counter);
val->lru = (LFUGetTimeInMinutes() << 8) | counter;
}
/* Lookup a key for read or write operations, or return NULL if the key is not
* found in the specified DB. This function implements the functionality of
@ -118,11 +110,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) {
if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) {
/* Shared objects can't be stored in the database. */
serverAssert(val->refcount != OBJ_SHARED_REFCOUNT);
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
updateLFU(val);
} else {
val->lru = LRU_CLOCK();
}
val->lru = lrulfu_touch(val->lru);
}
if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE))) server.stat_keyspace_hits++;

View File

@ -689,8 +689,11 @@ void debugCommand(client *c) {
s = sdscatprintf(s, "Value at:%p refcount:%d encoding:%s", (void *)val, val->refcount, strenc);
if (!fast) s = sdscatprintf(s, " serializedlength:%zu", rdbSavedObjectLen(val, c->argv[2], c->db->id));
/* Either lru or lfu field could work correctly which depends on server.maxmemory_policy. */
s = sdscatprintf(s, " lru:%d lru_seconds_idle:%llu", val->lru, estimateObjectIdleTime(val) / 1000);
s = sdscatprintf(s, " lfu_freq:%lu lfu_access_time_minutes:%u", LFUDecrAndReturn(val), val->lru >> 8);
if (lrulfu_isUsingLFU()) {
s = sdscatprintf(s, " lfu_freq:%u lfu_access_time_minutes:%u", objectGetLFUFrequency(val), val->lru >> 8);
} else {
s = sdscatprintf(s, " lru:%d lru_seconds_idle:%u", val->lru, lru_getIdleSecs(val->lru));
}
s = sdscatprintf(s, "%s", extra);
addReplyStatusLength(c, s, sdslen(s));
sdsfree(s);

View File

@ -67,38 +67,6 @@ static struct evictionPoolEntry *EvictionPoolLRU;
* Implementation of eviction, aging and LRU
* --------------------------------------------------------------------------*/
/* Return the LRU clock, based on the clock resolution. This is a time
* in a reduced-bits format that can be used to set and check the
* object->lru field of serverObject structures. */
unsigned int getLRUClock(void) {
return (mstime() / LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
}
/* This function is used to obtain the current LRU clock.
* If the current resolution is lower than the frequency we refresh the
* LRU clock (as it should be in production servers) we return the
* precomputed value, otherwise we need to resort to a system call. */
unsigned int LRU_CLOCK(void) {
unsigned int lruclock;
if (1000 / server.hz <= LRU_CLOCK_RESOLUTION) {
lruclock = server.lruclock;
} else {
lruclock = getLRUClock();
}
return lruclock;
}
/* Given an object returns the min number of milliseconds the object was never
* requested, using an approximated LRU algorithm. */
unsigned long long estimateObjectIdleTime(robj *o) {
unsigned long long lruclock = LRU_CLOCK();
if (lruclock >= o->lru) {
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
} else {
return (lruclock + (LRU_CLOCK_MAX - o->lru)) * LRU_CLOCK_RESOLUTION;
}
}
/* LRU approximation algorithm
*
* The server uses an approximation of the LRU algorithm that runs in constant
@ -158,17 +126,8 @@ int evictionPoolPopulate(serverDb *db, kvstore *samplekvs, struct evictionPoolEn
/* Calculate the idle time according to the policy. This is called
* idle just because the code initially handled LRU, but is in fact
* just a score where a higher score means better candidate. */
if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
idle = estimateObjectIdleTime(o);
} else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
/* When we use an LRU policy, we sort the keys by idle time
* so that we expire keys starting from greater idle time.
* However when the policy is an LFU one, we have a frequency
* estimation, and we want to evict keys with lower frequency
* first. So inside the pool we put objects using the inverted
* frequency subtracting the actual frequency to the maximum
* frequency of 255. */
idle = 255 - LFUDecrAndReturn(o);
if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU | MAXMEMORY_FLAG_LFU)) {
idle = objectGetIdleness(o);
} else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
/* In this case the sooner the expire the better. */
idle = ULLONG_MAX - objectGetExpire(o);
@ -230,88 +189,6 @@ int evictionPoolPopulate(serverDb *db, kvstore *samplekvs, struct evictionPoolEn
return count;
}
/* ----------------------------------------------------------------------------
* LFU (Least Frequently Used) implementation.
* We have 24 total bits of space in each object in order to implement
* an LFU (Least Frequently Used) eviction policy, since we re-use the
* LRU field for this purpose.
*
* We split the 24 bits into two fields:
*
* 16 bits 8 bits
* +------------------+--------+
* + Last access time | LOG_C |
* +------------------+--------+
*
* LOG_C is a logarithmic counter that provides an indication of the access
* frequency. However this field must also be decremented otherwise what used
* to be a frequently accessed key in the past, will remain ranked like that
* forever, while we want the algorithm to adapt to access pattern changes.
*
* So the remaining 16 bits are used in order to store the "access time",
* a reduced-precision Unix time (we take 16 bits of the time converted
* in minutes since we don't care about wrapping around) where the LOG_C
* counter decays every minute by default (depends on lfu-decay-time).
*
* New keys don't start at zero, in order to have the ability to collect
* some accesses before being trashed away, so they start at LFU_INIT_VAL.
* The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
* when incrementing the key, so that keys starting at LFU_INIT_VAL
* (or having a smaller value) have a very high chance of being incremented
* on access. (The chance depends on counter and lfu-log-factor.)
*
* During decrement, the value of the logarithmic counter is decremented by
* one when lfu-decay-time minutes elapsed.
* --------------------------------------------------------------------------*/
/* Return the current time in minutes, just taking the least significant
* 16 bits. The returned time is suitable to be stored as LDT (last access
* time) for the LFU implementation. */
unsigned long LFUGetTimeInMinutes(void) {
return (server.unixtime / 60) & 65535;
}
/* Given an object ldt (last access time), compute the minimum number of minutes
* that elapsed since the last access. Handle overflow (ldt greater than
* the current 16 bits minutes time) considering the time as wrapping
* exactly once. */
unsigned long LFUTimeElapsed(unsigned long ldt) {
unsigned long now = LFUGetTimeInMinutes();
if (now >= ldt) return now - ldt;
return 65535 - ldt + now;
}
/* Logarithmically increment a counter. The greater is the current counter value
* the less likely is that it gets really incremented. Saturate it at 255. */
uint8_t LFULogIncr(uint8_t counter) {
if (counter == 255) return 255;
double r = (double)rand() / RAND_MAX;
double baseval = counter - LFU_INIT_VAL;
if (baseval < 0) baseval = 0;
double p = 1.0 / (baseval * server.lfu_log_factor + 1);
if (r < p) counter++;
return counter;
}
/* If the object's ldt (last access time) is reached, decrement the LFU counter but
* do not update LFU fields of the object, we update the access time
* and counter in an explicit way when the object is really accessed.
* And we will decrement the counter according to the times of
* elapsed time than server.lfu_decay_time.
* Return the object frequency counter.
*
* This function is used in order to scan the dataset for the best object
* to fit: as we check for the candidate, we incrementally decrement the
* counter of the scanned objects if needed. */
unsigned long LFUDecrAndReturn(robj *o) {
unsigned long ldt = o->lru >> 8;
unsigned long counter = o->lru & 255;
unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
if (num_periods) counter = (num_periods > counter) ? 0 : counter - num_periods;
return counter;
}
/* We don't want to count AOF buffers and replicas output buffers as
* used memory: the eviction should use mostly data size, because
* it can cause feedback-loop when we push DELs into them, putting

170
src/lrulfu.c Normal file
View File

@ -0,0 +1,170 @@
#include "lrulfu.h"
#include "server.h"
#define LRULFU_MASK ((1 << LRULFU_BITS) - 1) /* Mask for LRU/LFU value */
/**************** LRU ****************/
/* LRU uses a 24 bit timestamp of the last access time (in seconds)
* The LRU value needs to be "touched" within 194 days, or the value will wrap,
* and the last access time will appear to be recent.
*/
/* The LRU_CLOCK_RESOLUTION is used to support an older ruby program which tests
* the LRU behavior. This should be set to 1 if building Valkey to support this
* ruby test. Otherwise, the default of 1000 is expected. */
#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
// Current time in seconds (24 least significant bits). Designed to roll over.
static uint32_t LRUGetClockTime(void) {
#if LRU_CLOCK_RESOLUTION == 1000
return (uint32_t)(server.unixtime & LRULFU_MASK);
#else
return (uint32_t)((server.mstime / LRU_CLOCK_RESOLUTION) & LRULFU_MASK);
#endif
}
uint32_t lru_import(uint32_t idle_secs) {
uint32_t now = LRUGetClockTime();
#if LRU_CLOCK_RESOLUTION != 1000
idle_secs = (uint32_t)((long)idle_secs * 1000 / LRU_CLOCK_RESOLUTION);
#endif
idle_secs = idle_secs & LRULFU_MASK;
// Underflow is ok/expected
return (now - idle_secs) & LRULFU_MASK;
}
uint32_t lru_getIdleSecs(uint32_t lru) {
// Underflow is ok/expected
uint32_t seconds = (LRUGetClockTime() - lru) & LRULFU_MASK;
#if LRU_CLOCK_RESOLUTION != 1000
seconds = (uint32_t)((long)seconds * LRU_CLOCK_RESOLUTION / 1000);
#endif
return seconds;
}
/**************** LFU ****************/
/* ----------------------------------------------------------------------------
* LFU (Least Frequently Used) implementation.
*
* We split the 24 bits into two fields:
*
* 16 bits 8 bits
* +-----------------------+--------+
* + Last access (minutes) | LOG_C |
* +-----------------------+--------+
*
* LOG_C is a logarithmic counter that provides an indication of the access
* frequency. However this field must also be decremented otherwise what used
* to be a frequently accessed key in the past, will remain ranked like that
* forever, while we want the algorithm to adapt to access pattern changes.
*
* So the remaining 16 bits are used in order to store the "access time",
* a reduced-precision Unix time (we take 16 bits of the time converted
* in minutes since we don't care about wrapping around) where the LOG_C
* counter decays every minute by default (depends on lfu-decay-time).
*
* New keys don't start at zero, in order to have the ability to collect
* some accesses before being trashed away, so they start at LFU_INIT_VAL.
* The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
* when incrementing the key, so that keys starting at LFU_INIT_VAL
* (or having a smaller value) have a very high chance of being incremented
* on access. (The chance depends on counter and lfu-log-factor.)
*
* During decrement, the value of the logarithmic counter is decremented by
* one when lfu-decay-time minutes elapsed.
* --------------------------------------------------------------------------*/
#define LFU_INIT_VAL 5
// Current time in minutes (16 least significant bits). Designed to roll over.
static uint16_t LFUGetTimeInMinutes(void) {
return (uint16_t)(server.unixtime / 60);
}
uint32_t lfu_import(uint8_t freq) {
return ((uint32_t)LFUGetTimeInMinutes() << 8) | freq;
}
/* Update an LFU to consider decay, but doesn't add a "touch" */
static uint32_t LFUDecay(uint32_t lfu) {
uint16_t now = LFUGetTimeInMinutes();
uint16_t prev_time = (uint16_t)(lfu >> 8);
uint8_t freq = (uint8_t)lfu;
uint16_t elapsed = now - prev_time; // Wrap-around expected/valid
uint16_t num_periods = server.lfu_decay_time ? elapsed / server.lfu_decay_time : 0;
freq = (num_periods > freq) ? 0 : freq - num_periods;
return ((uint32_t)now << 8) | freq;
}
/* Increment the freq counter with logarithmic probability.
* Values closer to 0 are more likely to increment.
* Values closer to 255 are logarithmically less likely to increment. */
static uint8_t LFULogIncr(uint8_t freq) {
if (freq == 255) return freq;
double r = (double)rand() / RAND_MAX;
double baseval = (int)freq - LFU_INIT_VAL;
if (baseval < 0) baseval = 0;
double p = 1.0 / (baseval * server.lfu_log_factor + 1);
if (r < p) freq++;
return freq;
}
uint32_t lfu_touch(uint32_t lfu) {
lfu = LFUDecay(lfu);
uint8_t freq = (uint8_t)lfu;
freq = LFULogIncr(freq);
return (lfu & ~(uint32_t)UINT8_MAX) | freq;
}
uint32_t lfu_getFrequency(uint32_t lfu, uint8_t *freq) {
lfu = LFUDecay(lfu);
*freq = (uint8_t)lfu;
return lfu;
}
/**************** Generic API ****************/
bool lrulfu_isUsingLFU(void) {
return (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) != 0;
}
uint32_t lrulfu_init(void) {
if (lrulfu_isUsingLFU()) {
return lfu_import(LFU_INIT_VAL);
} else {
return lru_import(0);
}
}
uint32_t lrulfu_getIdleness(uint32_t lrulfu, uint32_t *idleness) {
if (lrulfu_isUsingLFU()) {
uint8_t freq;
lrulfu = lfu_getFrequency(lrulfu, &freq);
*idleness = UINT8_MAX - freq;
} else {
*idleness = lru_getIdleSecs(lrulfu);
}
return lrulfu;
}
uint32_t lrulfu_touch(uint32_t lrulfu) {
if (lrulfu_isUsingLFU()) {
return lfu_touch(lrulfu);
} else {
return lru_import(0);
}
}

71
src/lrulfu.h Normal file
View File

@ -0,0 +1,71 @@
#ifndef __LRULFU_H__
#define __LRULFU_H__
#include <stdint.h>
#include <stdbool.h>
/* LRU (Least Recently Used) and LFU (Least Frequently Used) numeric logic.
*
* Implementation of a 24 bit value which may either be an LRU or LFU value as indicated by the
* server's maxmemory_policy.
*
* LRU - the value consists of a 24-bit time in seconds. This value will roll over after 194 days.
* (If a value is not touched for 194 days, it will appear as recent.)
*
* LFU - maintains an approximate logarithmic value indicating the frequency of access. The first
* 16 bits maintain the last evaluation time in minutes. The remaining 8 bits maintain an
* approximate frequency of use. The time value will roll over after 45 days. If a value
* is not evaluated in this time, it may not show as decayed after this time.
*
* Returned values are guaranteed to fit in an unsigned 24-bit region. They can safely be packed
* like:
* struct {
* uint32_t lru : LRU_BITS;
* }
*/
#define LRULFU_BITS 24
/**************** LRU ****************/
/* Import a given LRU idleness to the current time. */
uint32_t lru_import(uint32_t idle_secs);
/* Get the current idle secs from the given LRU value. */
uint32_t lru_getIdleSecs(uint32_t lru);
/**************** LFU ****************/
/* Import a given LFU frequency to the current time. */
uint32_t lfu_import(uint8_t freq);
/* Update/Touch an LFU value, decays the old value and adds a "touch". */
uint32_t lfu_touch(uint32_t lfu);
/* Return the LFU frequency, without adding a touch.
* An updated LFU is returned which maintains the decay on the LFU. */
uint32_t lfu_getFrequency(uint32_t lfu, uint8_t *freq);
/**************** Generic API ****************/
/* These API functions can be used interchangeably between LRU and LFU, depending on the setting of
* server.maxmemory_policy. It is preferred to use these functions rather than directly accessing
* the LRU/LFU API functions if the use case permits. Note that if the server's policy is changed,
* LRU <-> LFU, evaluations will be incorrect until values have had time to be touched/updated.*/
/* Is the server using LFU policy? */
bool lrulfu_isUsingLFU(void);
/* Provide an initial value for LRU or LFU */
uint32_t lrulfu_init(void);
/* Return a relative indication of idleness, used for comparison between LRU or LFU values.
* A greater number indicates a greater degree of idleness.
*
* Returns an updated LRU/LFU value, maintaining the data, without a "touch". */
uint32_t lrulfu_getIdleness(uint32_t lrulfu, uint32_t *idleness);
/* Add a touch to the LRU or LFU value, returning the updated LRU/LFU. */
uint32_t lrulfu_touch(uint32_t lrulfu);
#endif

View File

@ -13739,7 +13739,7 @@ size_t moduleCount(void) {
* returns VALKEYMODULE_OK if the LRU was updated, VALKEYMODULE_ERR otherwise. */
int VM_SetLRU(ValkeyModuleKey *key, mstime_t lru_idle) {
if (!key->value) return VALKEYMODULE_ERR;
if (objectSetLRUOrLFU(key->value, -1, lru_idle, lru_idle >= 0 ? LRU_CLOCK() : 0, 1)) return VALKEYMODULE_OK;
if (objectSetLRUOrLFU(key->value, -1, lru_idle * 1000)) return VALKEYMODULE_OK;
return VALKEYMODULE_ERR;
}
@ -13751,7 +13751,7 @@ int VM_GetLRU(ValkeyModuleKey *key, mstime_t *lru_idle) {
*lru_idle = -1;
if (!key->value) return VALKEYMODULE_ERR;
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) return VALKEYMODULE_OK;
*lru_idle = estimateObjectIdleTime(key->value);
*lru_idle = objectGetLRUIdleSecs(key->value) * 1000;
return VALKEYMODULE_OK;
}
@ -13762,7 +13762,7 @@ int VM_GetLRU(ValkeyModuleKey *key, mstime_t *lru_idle) {
* returns VALKEYMODULE_OK if the LFU was updated, VALKEYMODULE_ERR otherwise. */
int VM_SetLFU(ValkeyModuleKey *key, long long lfu_freq) {
if (!key->value) return VALKEYMODULE_ERR;
if (objectSetLRUOrLFU(key->value, lfu_freq, -1, 0, 1)) return VALKEYMODULE_OK;
if (objectSetLRUOrLFU(key->value, lfu_freq, -1)) return VALKEYMODULE_OK;
return VALKEYMODULE_ERR;
}
@ -13772,7 +13772,7 @@ int VM_SetLFU(ValkeyModuleKey *key, long long lfu_freq) {
int VM_GetLFU(ValkeyModuleKey *key, long long *lfu_freq) {
*lfu_freq = -1;
if (!key->value) return VALKEYMODULE_ERR;
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) *lfu_freq = LFUDecrAndReturn(key->value);
if (lrulfu_isUsingLFU()) *lfu_freq = objectGetLFUFrequency(key->value);
return VALKEYMODULE_OK;
}

View File

@ -110,14 +110,7 @@ robj *createObject(int type, void *ptr) {
void initObjectLRUOrLFU(robj *o) {
if (o->refcount == OBJ_SHARED_REFCOUNT) return;
/* Set the LRU to the current lruclock (minutes resolution), or
* alternatively the LFU counter. */
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes() << 8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
return;
o->lru = lrulfu_init();
}
/* Set a special refcount in the object to make it "shared":
@ -1582,32 +1575,39 @@ sds getMemoryDoctorReport(void) {
return s;
}
/* Return the LFU frequency for an object. */
uint8_t objectGetLFUFrequency(robj *o) {
uint8_t freq;
o->lru = lfu_getFrequency(o->lru, &freq);
return freq;
}
/* Return the LRU idle time for an object. */
uint32_t objectGetLRUIdleSecs(robj *o) {
return lru_getIdleSecs(o->lru);
}
/* Return an indication of idleness. Larger numbers are more idle. */
uint32_t objectGetIdleness(robj *o) {
uint32_t idleness;
o->lru = lrulfu_getIdleness(o->lru, &idleness);
return idleness;
}
/* Set the object LRU/LFU depending on server.maxmemory_policy.
* The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
* The lru_idle and lru_clock args are only relevant if policy
* is MAXMEMORY_FLAG_LRU.
* Either or both of them may be <0, in that case, nothing is set. */
int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, long long lru_clock, int lru_multiplier) {
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle_secs) {
if (lrulfu_isUsingLFU()) {
if (lfu_freq >= 0) {
serverAssert(lfu_freq <= 255);
val->lru = (LFUGetTimeInMinutes() << 8) | lfu_freq;
serverAssert(lfu_freq <= UINT8_MAX);
val->lru = lfu_import((uint8_t)lfu_freq);
return 1;
}
} else if (lru_idle >= 0) {
/* Provided LRU idle time is in seconds. Scale
* according to the LRU clock resolution this
* instance was compiled with (normally 1000 ms, so the
* below statement will expand to lru_idle*1000/1000. */
lru_idle = lru_idle * lru_multiplier / LRU_CLOCK_RESOLUTION;
long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
/* If the LRU field underflows (since lru_clock is a wrapping clock),
* we need to make it positive again. This will be handled by the unwrapping
* code in estimateObjectIdleTime. I.e. imagine a day when lru_clock
* wrap arounds (happens once in some 6 months), and becomes a low
* value, like 10, an lru_idle of 1000 should be near LRU_CLOCK_MAX. */
if (lru_abs < 0) lru_abs += LRU_CLOCK_MAX;
val->lru = lru_abs;
} else if (lru_idle_secs >= 0) {
val->lru = lru_import(lru_idle_secs);
return 1;
}
return 0;
@ -1660,7 +1660,7 @@ void objectCommand(client *c) {
"switching between policies at runtime LRU and LFU data will take some time to adjust.");
return;
}
addReplyLongLong(c, estimateObjectIdleTime(o) / 1000);
addReplyLongLong(c, lru_getIdleSecs(o->lru));
} else if (!strcasecmp(c->argv[1]->ptr, "freq") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.null[c->resp])) == NULL) return;
if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
@ -1669,11 +1669,7 @@ void objectCommand(client *c) {
"when switching between policies at runtime LRU and LFU data will take some time to adjust.");
return;
}
/* LFUDecrAndReturn should be called
* in case of the key has not been accessed for a long time,
* because we update the access time only
* when the key is read or overwritten. */
addReplyLongLong(c, LFUDecrAndReturn(o));
addReplyLongLong(c, objectGetLFUFrequency(o));
} else {
addReplySubcommandSyntaxError(c);
}

View File

@ -1186,22 +1186,20 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, in
/* Save the LRU info. */
if (savelru) {
uint64_t idletime = estimateObjectIdleTime(val);
idletime /= 1000; /* Using seconds is enough and requires less space.*/
uint64_t idletime = objectGetLRUIdleSecs(val);
if (rdbSaveType(rdb, RDB_OPCODE_IDLE) == -1) return -1;
if (rdbSaveLen(rdb, idletime) == -1) return -1;
}
/* Save the LFU info. */
if (savelfu) {
uint8_t buf[1];
buf[0] = LFUDecrAndReturn(val);
uint8_t freq = objectGetLFUFrequency(val);
/* We can encode this in exactly two bytes: the opcode and an 8
* bit counter, since the frequency is logarithmic with a 0-255 range.
* Note that we do not store the halving time because to reset it
* a single time when loading does not affect the frequency much. */
if (rdbSaveType(rdb, RDB_OPCODE_FREQ) == -1) return -1;
if (rdbWriteRaw(rdb, buf, 1) == -1) return -1;
if (rdbWriteRaw(rdb, &freq, 1) == -1) return -1;
}
/* Save type, key, value */
@ -3136,7 +3134,6 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
/* Key-specific attributes, set by opcodes before the key type. */
long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
long long lru_clock = LRU_CLOCK();
while (1) {
sds key;
@ -3481,7 +3478,7 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
}
/* Set usage information (for eviction). */
objectSetLRUOrLFU(val, lfu_freq, lru_idle, lru_clock, 1000);
objectSetLRUOrLFU(val, lfu_freq, lru_idle);
/* call key space notification on key loaded for modules only */
moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id);

View File

@ -1513,19 +1513,6 @@ long long serverCron(struct aeEventLoop *eventLoop, long long id, void *clientDa
server.duration_stats[EL_DURATION_TYPE_EL].cnt, 1);
}
/* We have just LRU_BITS bits per object for LRU information.
* So we use an (eventually wrapping) LRU clock.
*
* Note that even if the counter wraps it's not a big problem,
* everything will still work but some object will appear younger
* to the server. However for this to happen a given object should never be
* touched for all the time needed to the counter to wrap, which is
* not likely.
*
* Note that you can change the resolution altering the
* LRU_CLOCK_RESOLUTION define. */
server.lruclock = getLRUClock();
cronUpdateMemoryStats();
/* We received a SIGTERM or SIGINT, shutting down here in a safe way, as it is
@ -2288,7 +2275,6 @@ void initServerConfig(void) {
server.latency_tracking_info_percentiles[1] = 99.0; /* p99 */
server.latency_tracking_info_percentiles[2] = 99.9; /* p999 */
server.lruclock = getLRUClock();
resetServerSaveParams();
appendServerSaveParams(60 * 60, 1); /* save after 1 hour and 1 change */
@ -5917,7 +5903,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
"hz:%i\r\n", server.hz,
"configured_hz:%i\r\n", server.hz,
"clients_hz:%i\r\n", server.clients_hz,
"lru_clock:%u\r\n", server.lruclock,
"lru_clock:%u\r\n", server.unixtime & ((1 << LRULFU_BITS) - 1),
"executable:%s\r\n", server.executable ? server.executable : "",
"config_file:%s\r\n", server.configfile ? server.configfile : "",
"io_threads_active:%i\r\n", server.active_io_threads_num > 1,

View File

@ -82,6 +82,7 @@
#include "vset.h"
#include "trace/trace.h"
#include "entry.h"
#include "lrulfu.h"
#ifdef USE_LTTNG
#define valkey_fork() do_fork()
@ -784,10 +785,6 @@ typedef struct ValkeyModuleType moduleType;
#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
#define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */
#define LRU_BITS 24
#define LRU_CLOCK_MAX ((1 << LRU_BITS) - 1) /* Max value of obj->lru */
#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
#define OBJ_REFCOUNT_BITS 30
#define OBJ_SHARED_REFCOUNT ((1 << OBJ_REFCOUNT_BITS) - 1) /* Global object never destroyed. */
#define OBJ_STATIC_REFCOUNT ((1 << OBJ_REFCOUNT_BITS) - 2) /* Object allocated in the stack. */
@ -795,9 +792,7 @@ typedef struct ValkeyModuleType moduleType;
struct serverObject {
unsigned type : 4;
unsigned encoding : 4;
unsigned lru : LRU_BITS; /* LRU time (relative to global lru_clock) or
* LFU data (least significant 8 bits frequency
* and most significant 16 bits access time). */
unsigned lru : LRULFU_BITS;
unsigned hasexpire : 1;
unsigned hasembkey : 1;
unsigned refcount : OBJ_REFCOUNT_BITS;
@ -1682,7 +1677,6 @@ struct valkeyServer {
_Atomic AeIoState io_poll_state; /* Indicates the state of the IO polling. */
int io_ae_fired_events; /* Number of poll events received by the IO thread. */
rax *errors; /* Errors table */
unsigned int lruclock; /* Clock for LRU eviction */
volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */
int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */
@ -3056,7 +3050,6 @@ char *strEncoding(int encoding);
int compareStringObjects(const robj *a, const robj *b);
int collateStringObjects(const robj *a, const robj *b);
int equalStringObjects(robj *a, robj *b);
unsigned long long estimateObjectIdleTime(robj *o);
void trimStringObjectIfNeeded(robj *o, int trim_small_values);
#define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR)
@ -3066,6 +3059,9 @@ robj *objectSetKeyAndExpire(robj *val, sds key, long long expire);
robj *objectSetExpire(robj *val, long long expire);
sds objectGetKey(const robj *val);
long long objectGetExpire(const robj *val);
uint8_t objectGetLFUFrequency(robj *o);
uint32_t objectGetLRUIdleSecs(robj *o);
uint32_t objectGetIdleness(robj *o);
/* Synchronous I/O with timeout */
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
@ -3380,8 +3376,6 @@ void exitExecutionUnit(void);
void resetServerStats(void);
void monitorActiveDefrag(void);
void defragWhileBlocked(void);
unsigned int getLRUClock(void);
unsigned int LRU_CLOCK(void);
const char *evictPolicyToString(void);
struct serverMemOverhead *getMemoryOverheadData(void);
void freeMemoryOverheadData(struct serverMemOverhead *mh);
@ -3590,7 +3584,7 @@ robj *lookupKeyReadWithFlags(serverDb *db, robj *key, int flags);
robj *lookupKeyWriteWithFlags(serverDb *db, robj *key, int flags);
robj *objectCommandLookup(client *c, robj *key);
robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, long long lru_clock, int lru_multiplier);
int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle_secs);
#define LOOKUP_NONE 0
#define LOOKUP_NOTOUCH (1 << 0) /* Don't update LRU. */
#define LOOKUP_NONOTIFY (1 << 1) /* Don't trigger keyspace event on key misses. */
@ -3750,10 +3744,6 @@ int clientsCronHandleTimeout(client *c, mstime_t now_ms);
/* evict.c -- maxmemory handling and LRU eviction. */
void evictionPoolAlloc(void);
#define LFU_INIT_VAL 5
unsigned long LFUGetTimeInMinutes(void);
uint8_t LFULogIncr(uint8_t value);
unsigned long LFUDecrAndReturn(robj *o);
#define EVICT_OK 0
#define EVICT_RUNNING 1
#define EVICT_FAIL 2