From c15f8cbeea17da8c24724e82b4de5b7bbbd9f7a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 01:27:05 +0800
Subject: [PATCH 01/19] [Embedding] Add AgingLFU(RedisLFUCache) Cache Policy.

---
 tensorflow/core/framework/embedding/cache.h | 135 ++++++++++++++++++++
 1 file changed, 135 insertions(+)
diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 45960bcf8ea..a4a982e382c 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -272,6 +272,141 @@ class LFUCache : public BatchCache<K> {
   mutex mu_;
 };
 
+
+#define LFU_INIT_VAL 5
+#define LFU_LOG_FACTOR 10
+#define MAX_STEP 16777215
+
+#define MOD_STEP(st) (st & MAX_STEP)  // 2^24-1
+#define TIMES_TO_MAX (32640 * LFU_LOG_FACTOR * LFU_LOG_FACTOR)
+
+template <class K>
+class RedisLFUCache : public BatchCache<K> {
+ public:
+  LFUCache() {
+    min_freq = 255;
+    max_freq = 0;
+    freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
+        new std::list<RedisLFUNode>, 0));
+    BatchCache<K>::num_hit = 0;
+    BatchCache<K>::num_miss = 0;
+  }
+
+  size_t size() {
+    mutex_lock l(mu_);
+    return key_table.size();
+  }
+
+  size_t get_evic_ids(K* evic_ids, size_t k_size) {
+    mutex_lock l(mu_);
+    size_t true_size = 0;
+    for (size_t i = 0; i < k_size && key_table.size() > 0; ++i) {
+      auto rm_it = freq_table[min_freq].first->back();
+      key_table.erase(rm_it.key);
+      evic_ids[i] = rm_it.key;
+      ++true_size;
+      freq_table[min_freq].first->pop_back();
+      freq_table[min_freq].second--;
+      if (freq_table[min_freq].second == 0) {
+        ++min_freq;
+        while (min_freq <= max_freq) {
+          if (freq_table[min_freq].second == 0) {
+            ++min_freq;
+          } else {
+            break;
+          }
+        }
+      }
+    }
+    return true_size;
+  }
+
+  void add_to_rank(const K* batch_ids, size_t batch_size) {
+    mutex_lock l(mu_);
+    global_step = MOD_STEP(global_step + 1);
+    for (size_t i = 0; i < batch_size; ++i) {
+      K id = batch_ids[i];
+      auto it = key_table.find(id);
+      if (it == key_table.end()) {
+        freq_table[LFU_INIT_VAL].first->emplace_front(
+            RedisLFUNode(id, global_step));
+        freq_table[LFU_INIT_VAL].second++;
+        key_table[id] = freq_table[LFU_INIT_VAL].first->begin();
+        min_freq = std::min(min_freq, LFU_INIT_VAL);
+        max_freq = std::max(max_freq, LFU_INIT_VAL);
+        BatchCache<K>::num_miss++;
+      } else {
+        typename std::list<RedisLFUNode>::iterator node = it->second;
+        RedisLFUNode node_new(node);
+        uint8_t freq = node->get_cnt();
+        uint8_t freq_new = node_new->updateLFU(global_step);
+        freq_table[freq].first->erase(node);
+        freq_table[freq].second--;
+        if (freq_table[freq].second == 0) {
+          if (min_freq == freq) min_freq = freq_new;
+        }
+        if (freq_new == freq_table.size()) {
+          freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
+              new std::list<RedisLFUNode>, 0));
+        }
+        max_freq = std::max(max_freq, freq_new);
+        min_freq = std::min(min_freq, freq_new);
+        freq_table[freq_new].first->emplace_front(*node_new);
+        freq_table[freq_new].second++;
+        key_table[id] = freq_table[freq].first->begin();
+        BatchCache<K>::num_hit++;
+      }
+    }
+  }
+
+ private:
+  class RedisLFUNode {
+   public:
+    K key;
+    unsigned lfu;
+    RedisLFUNode(K key, unsigned long step)
+        : key(key), lfu((step << 8) | LFU_INIT_VAL) {}
+    RedisLFUNode(RedisLFUNode* that) : key(that->key), lfu(that->lfu) {}
+    unsigned long decrFuncLog(unsigned long period, unsigned long counter) {
+      if (counter > 0 && rand() * TIMES_TO_MAX > RAND_MAX * (period - 500))
+        counter--;
+      return counter;
+    }
+    unsigned long LFUTimeElapsed(unsigned long ldt, unsigned long step) {
+      unsigned long now = step;
+      if (now >= ldt) return now - ldt;
+      return MAX_STEP - ldt + now;
+    }
+    unsigned long LFUDecrAndReturn(unsigned long step) {
+      unsigned long ldt = LDT(this);
+      unsigned long counter = CNT(this);
+      unsigned long period = this->LFUTimeElapsed(ldt, step);
+      return this->decrFuncLog(period, counter);
+    }
+    uint8_t LFULogIncr(uint8_t counter) {
+      if (counter < 255 &&
+          rand() * (counter - LFU_INIT_VAL) * LFU_LOG_FACTOR < RAND_MAX)
+        counter++;
+      return counter;
+    }
+    uint8_t updateLFU(unsigned long step) {
+      step = MOD_STEP(step);
+      unsigned long counter = this->LFUDecrAndReturn(step);
+      counter = this->LFULogIncr(counter);
+      this->lfu = (step << 8) | counter;
+      return counter;
+    }
+    uint8_t get_cnt() { return lfu & 255; }
+    unsigned long get_ldt() { return lfu >> 8; }
+  };
+  time_t global_step;
+  size_t min_freq;
+  size_t max_freq;
+  std::vector<std::pair<std::list<RedisLFUNode>*, int64>> freq_table;
+  std::unordered_map<K, typename std::list<RedisLFUNode>::iterator> key_table;
+  mutex mu_;
+};
+
 } // embedding
 } // tensorflow
 

From 83b4d0f49c9554e5efeda23160166ed4eea7926b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 01:29:06 +0800
Subject: [PATCH 02/19] [Embedding] Add AutoLRFU Cache Policy.

---
 tensorflow/core/framework/embedding/cache.h | 236 ++++++++++++++++++++
 1 file changed, 236 insertions(+)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 45960bcf8ea..9bb0f262731 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -272,6 +272,242 @@ class LFUCache : public BatchCache<K> {
   mutex mu_;
 };
 
+
+template <class K>
+class AutoLRFUCache : public BatchCache<K> {
+ public:
+  AutoLFUCache() {
+    global_step = 0;
+    key_table.clear();
+    freq_table.clear();
+    state = F0;
+    mode = FREQ_MODE;
+    span_last_check = 0;
+    counter_switch = 0;
+    prev_hit_rate = -100;
+    counter_replacement = 0;
+    num_replacement = 1;
+    HitSpan = 1000;
+  }
+
+  void rebuild() {
+    if (mode == STEP_MODE) return;
+    std::unordered_map<K, BaseNode*> new_table;
+    for (auto it = key_table.begin(); it != key_table.end(); it++) {
+      BaseNode* node = (BaseNode*)(*(it->second));
+      node->updateLFU(global_step, mode);
+      new_table[node->get_index()] = node;
+    }
+    freq_table.clear();
+    key_table.clear();
+    for (auto it = new_table.begin(); it != new_table.end(); it++) {
+      BaseNode* node = (BaseNode*)(it->second);
+      freq_table[node->get_index()].emplace_front(node);
+    }
+    for (auto list = freq_table.begin(); list != freq_table.end(); list++)
+      for (auto it = list->second.begin(); it != list->second.end(); it++)
+        key_table[(*it)->key] = it;
+  }
+
+  int get_hit_rate_len_100000(int len) {
+    int total = hit_recent.size();
+    if (total <= 0) return 0.0;
+    int hit = 0;
+    for (auto it = hit_recent.begin(); it != hit_recent.end(); it++)
+      hit += (len-- > 0) && (*it);
+    if (hit == 0) return 0.0;
+    return int(hit * 100000 / total);
+  }
+
+  int get_hit_rate100000() {
+    int total = hit_recent.size();
+    if (total <= 0) return 0.0;
+    int hit = 0;
+    for (auto it = hit_recent.begin(); it != hit_recent.end(); it++) hit += *it;
+    if (hit == 0) return 0.0;
+    return int(hit * 100000 / total);
+  }
+
+  void mode_switch() {
+    if (mode == STEP_MODE) {
+      mode = FREQ_MODE;
+      rebuild();  // 根据保存的频次信息，重新构建链表
+    } else {
+      mode = STEP_MODE;
+    }
+  }
+
+  void auto_switch() {
+    if (state == S3 && counter_replacement < HitSpan)
+      return;
+    else if (counter_replacement <
+             num_replacement *
+                 (SingleCache<K, V>::capacity() + HitSpan))  // TODO:
+      return;
+
+    counter_replacement = 0;
+    int curr_hit_rate = get_hit_rate100000();
+    if (state == F0) {
+      if (prev_hit_rate >= 0 &&
+          curr_hit_rate <
+              prev_hit_rate * 0.85) {  // (prev_hit_rate - curr_hit_rate) /
+                                       // prev_hit_rate > 0.15
+        mode_switch();
+        state = S1;
+      }
+    } else if (state == S1) {
+      if (curr_hit_rate <= 1.15 * prev_hit_rate) {  //没有显著提高
+        mode_switch();
+        state = F0;
+      } else {
+        state = S2;
+      }
+    } else if (state == S2) {
+      mode_switch();
+      state = S3;
+    } else if (state == S3) {
+      curr_hit_rate = get_hit_rate_len_100000(HitSpan);
+      if (curr_hit_rate > prev_hit_rate) {
+        state = F0;
+        num_replacement = 1;
+      } else {
+        mode_switch();
+        state = S2;
+        num_replacement = num_replacement * 2;
+      }
+    }
+    if (state != S3) prev_hit_rate = curr_hit_rate;
+  }
+
+  void add_to_rank(const K* batch_ids, size_t batch_size) {
+    mutex_lock l(mu_);
+    for (size_t i = 0; i < batch_size; ++i) {
+      K id = batch_ids[i];
+      auto_switch();
+      global_step++;
+      if (hit_recent.size() > 6000) {
+        hit_recent.pop_back();
+      }
+      auto it = key_table.find(id);
+      if (it != key_table.end()) {
+        hit_recent.push_front(true);
+        BaseNode* node = (BaseNode*)(*(it->second));
+        short index = node->get_index();
+        freq_table[index].erase(it->second);
+        index = node->updateLFU(global_step, mode);
+        freq_table[index].emplace_front(node);
+        key_table[id] = freq_table[index].begin();
+      } else {
+        hit_recent.push_front(false);
+        counter_replacement++;
+        BaseNode* node = new BaseNode(id, global_step);
+        short index = node->updateLFU(global_step, mode);
+        auto& freq_ls = freq_table[index];
+        freq_ls.emplace_front(node);
+        key_table[id] = freq_ls.begin();
+      }
+    }
+  }
+
+  size_t get_evic_ids(K* evic_ids, size_t k_size) {
+    mutex_lock l(mu_);
+    size_t true_size = 0;
+    short min_freq = BaseNode::MinCount;
+    for (size_t i = 0; i < k_size && key_table.size() > 0; ++i) {
+      for (; min_freq <= BaseNode::MaxCount; min_freq++)
+        if (freq_table[min_freq].size() > 0) break;
+
+      auto& freq_ls = freq_table[min_freq];
+      auto rm_it = freq_ls.back();
+
+      freq_ls.pop_back();
+      K evic_id = rm_it->key;
+      key_table.erase(evic_id);
+      delete rm_it;
+
+      evic_ids[i] = evic_id;
+      ++true_size;
+    }
+    return true_size;
+  }
+
+  size_t size() {
+    mutex_lock l(mu_);
+    return key_table.size();
+  }
+
+ private:
+  enum Mode { STEP_MODE, FREQ_MODE };
+  enum State { F0, S1, S2, S3 };
+  class BaseNode {
+   public:
+    enum { PrivilegeStep = 255, MinCount = 0, MaxCount = 255 };
+    K key;
+    short count;
+    short index;
+    unsigned last;
+    BaseNode(K key, unsigned step)
+        : key(key), count(LFU_INIT_VAL), index(LFU_INIT_VAL), last(step) {}
+    BaseNode(BaseNode* that)
+        : key(that->key),
+          count(that->count),
+          index(that->index),
+          last(that->last) {}
+
+    short decrFuncLinear(unsigned long period, short count) {
+      unsigned long num = period * MaxCount / TIMES_TO_MAX;
+      if (num > 0) count = std::max(0, int(count - num));
+      return count;
+    }
+
+    short decrFuncLog(unsigned long period, short count) {
+      if (count > MinCount &&
+          rand() * TIMES_TO_MAX < RAND_MAX * (period - PrivilegeStep))
+        count--;  //   rand()/RAND_MAX < (period-PrivilegeStep)/timem
+      return count;
+    }
+    unsigned long LFUTimeElapsed(unsigned long now) {
+      if (now >= last) return now - last;
+      return 16777215 - (last - now);
+    }
+    short LFULogIncr(short count) {
+      if (count < MaxCount &&
+          rand() * (count - LFU_INIT_VAL) * LFU_LOG_FACTOR < RAND_MAX)
+        count++;  // rand()/RAND_MAX < 1/( (count - LFU_INIT_VAL) *
+                  // LFU_LOG_FACTOR )
+      return count;
+    }
+    short LFUDecrAndReturn(unsigned long step) {
+      unsigned long period = this->LFUTimeElapsed(step);
+      return this->decrFuncLog(period, count);
+    }
+    short updateLFU(unsigned long step, Mode mode) {
+      step = MOD_STEP(step);
+      this->count = this->LFUDecrAndReturn(step);  //文档结果没有这个
+      this->count = this->LFULogIncr(
+          this->count);  // 按概率自增1，概率约为 1/(LFU_LOG_FACTOR * counter)
+      this->last = step;
+      this->index = (mode == FREQ_MODE ? this->count : MaxCount);
+      return this->index;
+    }
+    inline short get_index() { return this->index; }
+  };
+
+  time_t global_step;
+  std::unordered_map<K, typename std::list<BaseNode*>::iterator> key_table;
+  std::unordered_map<short, typename std::list<BaseNode*>> freq_table;
+  std::deque<bool> hit_recent;
+  short HitSpan;  // 每span次查看
+  size_t span_last_check;
+  size_t counter_switch;
+  size_t counter_replacement;
+  size_t counter_visit;
+  int num_replacement;
+  int prev_hit_rate;
+  Mode mode;
+  State state;
+};
+
 } // embedding
 } // tensorflow
 

From a55d139a9d513e6120b1f49bdc5029010c0453d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 02:42:38 +0800
Subject: [PATCH 03/19] [Embedding] Fix macro

---
 tensorflow/core/framework/embedding/cache.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 9bb0f262731..d721cbf45d0 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -272,6 +272,12 @@ class LFUCache : public BatchCache<K> {
   mutex mu_;
 };
 
+#define LFU_INIT_VAL 5
+#define LFU_LOG_FACTOR 10
+#define MAX_STEP 16777215
+
+#define MOD_STEP(st) (st & MAX_STEP)  // 2^24-1
+#define TIMES_TO_MAX (32640 * LFU_LOG_FACTOR * LFU_LOG_FACTOR)
 
 template <class K>
 class AutoLRFUCache : public BatchCache<K> {

From 8431888f1bab9c497d1321aff4f6fe255566f2e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 03:03:30 +0800
Subject: [PATCH 04/19] fix:

---
 tensorflow/core/framework/embedding/cache.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index a4a982e382c..5435bb80adc 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -283,7 +283,7 @@ class LFUCache : public BatchCache<K> {
 template <class K>
 class RedisLFUCache : public BatchCache<K> {
  public:
-  LFUCache() {
+  RedisLFUCache() {
     min_freq = 255;
     max_freq = 0;
     freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
@@ -332,8 +332,8 @@ class RedisLFUCache : public BatchCache<K> {
             RedisLFUNode(id, global_step));
         freq_table[LFU_INIT_VAL].second++;
         key_table[id] = freq_table[LFU_INIT_VAL].first->begin();
-        min_freq = std::min(min_freq, LFU_INIT_VAL);
-        max_freq = std::max(max_freq, LFU_INIT_VAL);
+        min_freq = std::min(min_freq, (uint8_t)LFU_INIT_VAL);
+        max_freq = std::max(max_freq, (uint8_t)LFU_INIT_VAL);
         BatchCache<K>::num_miss++;
       } else {
         typename std::list<RedisLFUNode>::iterator node = it->second;
@@ -400,8 +400,8 @@ class RedisLFUCache : public BatchCache<K> {
     unsigned long get_ldt() { return lfu >> 8; }
   };
   time_t global_step;
-  size_t min_freq;
-  size_t max_freq;
+  uint8_t min_freq;
+  uint8_t max_freq;
   std::vector<std::pair<std::list<RedisLFUNode>*, int64>> freq_table;
   std::unordered_map<K, typename std::list<RedisLFUNode>::iterator> key_table;
   mutex mu_;

From 4cea3be515adf511c04668d589d0f5bb6953165b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 16:19:30 +0800
Subject: [PATCH 05/19] [Embedding] Fix bug of compilation.

---
 tensorflow/core/framework/embedding/cache.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index d721cbf45d0..ff55e528230 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -4,6 +4,7 @@
 #include <map>
 #include <unordered_map>
 #include <set>
+#include <queue>
 #include <list>
 #include <limits>
 #include "tensorflow/core/framework/tensor.h"
@@ -282,7 +283,8 @@ class LFUCache : public BatchCache<K> {
 template <class K>
 class AutoLRFUCache : public BatchCache<K> {
  public:
-  AutoLFUCache() {
+  AutoLRFUCache(int64 cache_capacity) {
+    cache_capacity_ = cache_capacity;
     global_step = 0;
     key_table.clear();
     freq_table.clear();
@@ -347,8 +349,7 @@ class AutoLRFUCache : public BatchCache<K> {
     if (state == S3 && counter_replacement < HitSpan)
       return;
     else if (counter_replacement <
-             num_replacement *
-                 (SingleCache<K, V>::capacity() + HitSpan))  // TODO:
+             num_replacement * (cache_capacity_ + HitSpan))  // TODO:
       return;
 
     counter_replacement = 0;
@@ -499,6 +500,8 @@ class AutoLRFUCache : public BatchCache<K> {
     inline short get_index() { return this->index; }
   };
 
+  int64 cache_capacity_;
+  mutex mu_;
   time_t global_step;
   std::unordered_map<K, typename std::list<BaseNode*>::iterator> key_table;
   std::unordered_map<short, typename std::list<BaseNode*>> freq_table;

From d84dbfbcbef58b002a4d20088d9db5926d2f8400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 17:22:36 +0800
Subject: [PATCH 06/19] [Embedding] Add Test TestAutoLRFUCache.

---
 tensorflow/core/framework/embedding/cache.h   |  7 +++++++
 .../kernels/embedding_variable_ops_test.cc    | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index ff55e528230..b9f16ad6084 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -443,6 +443,13 @@ class AutoLRFUCache : public BatchCache<K> {
     return key_table.size();
   }
 
+  void add_to_rank(const K* batch_ids, size_t batch_size,
+                    const int64* batch_version,
+                    const int64* batch_freqs) {
+    //TODO: add to rank accroding to the version of ids
+    add_to_rank(batch_ids, batch_size);
+  }
+
  private:
   enum Mode { STEP_MODE, FREQ_MODE };
   enum State { F0, S1, S2, S3 };
diff --git a/tensorflow/core/kernels/embedding_variable_ops_test.cc b/tensorflow/core/kernels/embedding_variable_ops_test.cc
index 923aaf4c38c..b5611420a1c 100644
--- a/tensorflow/core/kernels/embedding_variable_ops_test.cc
+++ b/tensorflow/core/kernels/embedding_variable_ops_test.cc
@@ -1176,6 +1176,25 @@ TEST(EmbeddingVariableTest, TestLFUCache) {
   }
 }
 
+TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
+  BatchCache<int64>* cache = new AutoLRFUCache<int64>(100);
+  int num_ids = 30;
+  int num_access = 100;
+  int num_evict = 50;
+  int64 ids[num_access] = {0};
+  int64 evict_ids[num_evict] = {0};
+  for (int i = 0; i < num_access; i++){
+    ids[i] = i % num_ids;
+  }
+  cache->add_to_rank(ids, num_access);
+  int64 size = cache->get_evic_ids(evict_ids, num_evict);
+  ASSERT_EQ(size, num_ids);
+  ASSERT_EQ(cache->size(), 0);
+  for (int i = 0; i < size; i++) {
+    ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
+  }
+}
+
 TEST(EmbeddingVariableTest, TestCacheRestore) {
   int64 value_size = 4;
   Tensor value(DT_FLOAT, TensorShape({value_size}));

From 1b890510786eaae553e2b5658c21796c91ebd2b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 17:24:52 +0800
Subject: [PATCH 07/19] [Embedding] Add Test TestAgingLFUCache.

---
 tensorflow/core/framework/embedding/cache.h   |  7 +++++++
 .../kernels/embedding_variable_ops_test.cc    | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 5435bb80adc..2a24b24d666 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -359,6 +359,13 @@ class RedisLFUCache : public BatchCache<K> {
     }
   }
 
+  void add_to_rank(const K* batch_ids, size_t batch_size,
+                    const int64* batch_version,
+                    const int64* batch_freqs) {
+    //TODO: add to rank accroding to the version of ids
+    add_to_rank(batch_ids, batch_size);
+  }
+
  private:
   class RedisLFUNode {
    public:
diff --git a/tensorflow/core/kernels/embedding_variable_ops_test.cc b/tensorflow/core/kernels/embedding_variable_ops_test.cc
index 923aaf4c38c..eaea0bf4274 100644
--- a/tensorflow/core/kernels/embedding_variable_ops_test.cc
+++ b/tensorflow/core/kernels/embedding_variable_ops_test.cc
@@ -1176,6 +1176,25 @@ TEST(EmbeddingVariableTest, TestLFUCache) {
   }
 }
 
+TEST(EmbeddingVariableTest, TestAgingLFUCache) {
+  BatchCache<int64>* cache = new RedisLFUCache<int64>();
+  int num_ids = 30;
+  int num_access = 100;
+  int num_evict = 50;
+  int64 ids[num_access] = {0};
+  int64 evict_ids[num_evict] = {0};
+  for (int i = 0; i < num_access; i++){
+    ids[i] = i % num_ids;
+  }
+  cache->add_to_rank(ids, num_access);
+  int64 size = cache->get_evic_ids(evict_ids, num_evict);
+  ASSERT_EQ(size, num_ids);
+  ASSERT_EQ(cache->size(), 0);
+  for (int i = 0; i < size; i++) {
+    ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
+  }
+}
+
 TEST(EmbeddingVariableTest, TestCacheRestore) {
   int64 value_size = 4;
   Tensor value(DT_FLOAT, TensorShape({value_size}));

From 7c7aa47d8cad2ed34e5896c35c63a50b8b62359a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Mon, 24 Oct 2022 17:51:58 +0800
Subject: [PATCH 08/19] [Embedding] Fix bug of compilation.

---
 tensorflow/core/framework/embedding/cache.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 2a24b24d666..2329a2b6faa 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -339,7 +339,7 @@ class RedisLFUCache : public BatchCache<K> {
         typename std::list<RedisLFUNode>::iterator node = it->second;
         RedisLFUNode node_new(node);
         uint8_t freq = node->get_cnt();
-        uint8_t freq_new = node_new->updateLFU(global_step);
+        uint8_t freq_new = node_new.updateLFU(global_step);
         freq_table[freq].first->erase(node);
         freq_table[freq].second--;
         if (freq_table[freq].second == 0) {
@@ -351,7 +351,7 @@ class RedisLFUCache : public BatchCache<K> {
         }
         max_freq = std::max(max_freq, freq_new);
         min_freq = std::min(min_freq, freq_new);
-        freq_table[freq_new].first->emplace_front(*node_new);
+        freq_table[freq_new].first->emplace_front(node_new);
         freq_table[freq_new].second++;
         key_table[id] = freq_table[freq].first->begin();
         BatchCache<K>::num_hit++;
@@ -373,7 +373,7 @@ class RedisLFUCache : public BatchCache<K> {
     unsigned lfu;
     RedisLFUNode(K key, unsigned long step)
         : key(key), lfu((step << 8) | LFU_INIT_VAL) {}
-    RedisLFUNode(RedisLFUNode* that) : key(that->key), lfu(that->lfu) {}
+    RedisLFUNode(typename std::list<RedisLFUNode>::iterator that) : key(that->key), lfu(that->lfu) {}
     unsigned long decrFuncLog(unsigned long period, unsigned long counter) {
       if (counter > 0 && rand() * TIMES_TO_MAX > RAND_MAX * (period - 500))
         counter--;
@@ -385,8 +385,8 @@ class RedisLFUCache : public BatchCache<K> {
       return MAX_STEP - ldt + now;
     }
     unsigned long LFUDecrAndReturn(unsigned long step) {
-      unsigned long ldt = LDT(this);
-      unsigned long counter = CNT(this);
+      unsigned long ldt = get_ldt();
+      unsigned long counter = get_cnt();
       unsigned long period = this->LFUTimeElapsed(ldt, step);
       return this->decrFuncLog(period, counter);
     }

From 277cac0b2d54e1713954a5feda71e46b418cb7f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Tue, 25 Oct 2022 00:28:23 +0800
Subject: [PATCH 09/19] [Embedding] Fix bug in running Test.

---
 tensorflow/core/framework/embedding/cache.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 2329a2b6faa..75285297170 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -286,8 +286,10 @@ class RedisLFUCache : public BatchCache<K> {
   RedisLFUCache() {
     min_freq = 255;
     max_freq = 0;
-    freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
-        new std::list<RedisLFUNode>, 0));
+    for (size_t i = 0; i <= LFU_INIT_VAL; ++i) {
+      freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
+          new std::list<RedisLFUNode>, 0));
+    }
     BatchCache<K>::num_hit = 0;
     BatchCache<K>::num_miss = 0;
   }
@@ -353,7 +355,7 @@ class RedisLFUCache : public BatchCache<K> {
         min_freq = std::min(min_freq, freq_new);
         freq_table[freq_new].first->emplace_front(node_new);
         freq_table[freq_new].second++;
-        key_table[id] = freq_table[freq].first->begin();
+        key_table[id] = freq_table[freq_new].first->begin();
         BatchCache<K>::num_hit++;
       }
     }

From 4473ad7ae12f1e406eede3f63fd8991fdd85ba5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Wed, 2 Nov 2022 16:03:08 +0800
Subject: [PATCH 10/19] [Embedding] Fix bug of RedisLFUCache.

---
 tensorflow/core/framework/embedding/cache.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 75285297170..dc26ec5c38e 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -275,10 +275,10 @@ class LFUCache : public BatchCache<K> {
 
 #define LFU_INIT_VAL 5
 #define LFU_LOG_FACTOR 10
-#define MAX_STEP 16777215
-
-#define MOD_STEP(st) (st & MAX_STEP)  // 2^24-1
-#define TIMES_TO_MAX (32640 * LFU_LOG_FACTOR * LFU_LOG_FACTOR)
+#define MAX_STEP 16777215 // 2^24-1
+#define IGNORE_STEP 0
+#define MOD_STEP(st) (st & MAX_STEP)
+#define TIMES_TO_MAX (32640 * LFU_LOG_FACTOR) // 32640 = 1 + 2 + ... + 255
 
 template <class K>
 class RedisLFUCache : public BatchCache<K> {
@@ -377,7 +377,7 @@ class RedisLFUCache : public BatchCache<K> {
         : key(key), lfu((step << 8) | LFU_INIT_VAL) {}
     RedisLFUNode(typename std::list<RedisLFUNode>::iterator that) : key(that->key), lfu(that->lfu) {}
     unsigned long decrFuncLog(unsigned long period, unsigned long counter) {
-      if (counter > 0 && rand() * TIMES_TO_MAX > RAND_MAX * (period - 500))
+      if (counter > 0 && rand() * TIMES_TO_MAX < RAND_MAX * (period - IGNORE_STEP))
         counter--;
       return counter;
     }

From 834a34251fa7ae5147580f883b3d75d974325f2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Wed, 2 Nov 2022 16:09:40 +0800
Subject: [PATCH 11/19] [Embedding] Rename RedisLFUCache to AgingLFUCache.

---
 tensorflow/core/framework/embedding/cache.h   | 28 +++++++++----------
 .../kernels/embedding_variable_ops_test.cc    |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index dc26ec5c38e..a7dbf51fa83 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -281,14 +281,14 @@ class LFUCache : public BatchCache<K> {
 #define TIMES_TO_MAX (32640 * LFU_LOG_FACTOR) // 32640 = 1 + 2 + ... + 255
 
 template <class K>
-class RedisLFUCache : public BatchCache<K> {
+class AgingLFUCache : public BatchCache<K> {
  public:
-  RedisLFUCache() {
+  AgingLFUCache() {
     min_freq = 255;
     max_freq = 0;
     for (size_t i = 0; i <= LFU_INIT_VAL; ++i) {
-      freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
-          new std::list<RedisLFUNode>, 0));
+      freq_table.emplace_back(std::pair<std::list<AgingLFUNode>*, int64>(
+          new std::list<AgingLFUNode>, 0));
     }
     BatchCache<K>::num_hit = 0;
     BatchCache<K>::num_miss = 0;
@@ -331,15 +331,15 @@ class RedisLFUCache : public BatchCache<K> {
       auto it = key_table.find(id);
       if (it == key_table.end()) {
         freq_table[LFU_INIT_VAL].first->emplace_front(
-            RedisLFUNode(id, global_step));
+            AgingLFUNode(id, global_step));
         freq_table[LFU_INIT_VAL].second++;
         key_table[id] = freq_table[LFU_INIT_VAL].first->begin();
         min_freq = std::min(min_freq, (uint8_t)LFU_INIT_VAL);
         max_freq = std::max(max_freq, (uint8_t)LFU_INIT_VAL);
         BatchCache<K>::num_miss++;
       } else {
-        typename std::list<RedisLFUNode>::iterator node = it->second;
-        RedisLFUNode node_new(node);
+        typename std::list<AgingLFUNode>::iterator node = it->second;
+        AgingLFUNode node_new(node);
         uint8_t freq = node->get_cnt();
         uint8_t freq_new = node_new.updateLFU(global_step);
         freq_table[freq].first->erase(node);
@@ -348,8 +348,8 @@ class RedisLFUCache : public BatchCache<K> {
           if (min_freq == freq) min_freq = freq_new;
         }
         if (freq_new == freq_table.size()) {
-          freq_table.emplace_back(std::pair<std::list<RedisLFUNode>*, int64>(
-              new std::list<RedisLFUNode>, 0));
+          freq_table.emplace_back(std::pair<std::list<AgingLFUNode>*, int64>(
+              new std::list<AgingLFUNode>, 0));
         }
         max_freq = std::max(max_freq, freq_new);
         min_freq = std::min(min_freq, freq_new);
@@ -369,13 +369,13 @@ class RedisLFUCache : public BatchCache<K> {
   }
 
  private:
-  class RedisLFUNode {
+  class AgingLFUNode {
    public:
     K key;
     unsigned lfu;
-    RedisLFUNode(K key, unsigned long step)
+    AgingLFUNode(K key, unsigned long step)
         : key(key), lfu((step << 8) | LFU_INIT_VAL) {}
-    RedisLFUNode(typename std::list<RedisLFUNode>::iterator that) : key(that->key), lfu(that->lfu) {}
+    AgingLFUNode(typename std::list<AgingLFUNode>::iterator that) : key(that->key), lfu(that->lfu) {}
     unsigned long decrFuncLog(unsigned long period, unsigned long counter) {
       if (counter > 0 && rand() * TIMES_TO_MAX < RAND_MAX * (period - IGNORE_STEP))
         counter--;
@@ -411,8 +411,8 @@ class RedisLFUCache : public BatchCache<K> {
   time_t global_step;
   uint8_t min_freq;
   uint8_t max_freq;
-  std::vector<std::pair<std::list<RedisLFUNode>*, int64>> freq_table;
-  std::unordered_map<K, typename std::list<RedisLFUNode>::iterator> key_table;
+  std::vector<std::pair<std::list<AgingLFUNode>*, int64>> freq_table;
+  std::unordered_map<K, typename std::list<AgingLFUNode>::iterator> key_table;
   mutex mu_;
 };
 
diff --git a/tensorflow/core/kernels/embedding_variable_ops_test.cc b/tensorflow/core/kernels/embedding_variable_ops_test.cc
index eaea0bf4274..7b9f563ce53 100644
--- a/tensorflow/core/kernels/embedding_variable_ops_test.cc
+++ b/tensorflow/core/kernels/embedding_variable_ops_test.cc
@@ -1177,7 +1177,7 @@ TEST(EmbeddingVariableTest, TestLFUCache) {
 }
 
 TEST(EmbeddingVariableTest, TestAgingLFUCache) {
-  BatchCache<int64>* cache = new RedisLFUCache<int64>();
+  BatchCache<int64>* cache = new AgingLFUCache<int64>();
   int num_ids = 30;
   int num_access = 100;
   int num_evict = 50;

From 36a80bccc89a5371e562aa42c916aec639607183 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Fri, 11 Nov 2022 22:47:12 +0800
Subject: [PATCH 12/19] [Embedding] Try.

---
 tensorflow/core/framework/embedding/cache.h | 259 +++++++++++++-------
 1 file changed, 164 insertions(+), 95 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index a7dbf51fa83..d4426776694 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -129,7 +129,7 @@ class LRUCache : public BatchCache<K> {
 };
 
 template <class K>
-class LFUCache : public BatchCache<K> {
+class OriginLFUCache : public BatchCache<K> {
  public:
   LFUCache() {
     min_freq = std::numeric_limits<size_t>::max();
@@ -273,23 +273,87 @@ class LFUCache : public BatchCache<K> {
 };
 
 
-#define LFU_INIT_VAL 5
-#define LFU_LOG_FACTOR 10
-#define MAX_STEP 16777215 // 2^24-1
-#define IGNORE_STEP 0
-#define MOD_STEP(st) (st & MAX_STEP)
-#define TIMES_TO_MAX (32640 * LFU_LOG_FACTOR) // 32640 = 1 + 2 + ... + 255
+template <class K>
+class LFUNode {
+ public:
+  K key;
+  size_t freq;
+  LFUNode(K key, unsigned now) : key(key), freq(0) {}
+  size_t GetIndex() { return freq; }
+  size_t UpdateAndReturnIndex(unsigned now, bool lru_mode) { return ++freq; }
+};
 
 template <class K>
-class AgingLFUCache : public BatchCache<K> {
+class AgingNode {
+ public:
+  static const uint8_t INIT_CNT = 5;
+  static const uint8_t MIN_CNT = 1;
+  static const uint8_t MAX_CNT = 255;
+  static const unsigned INCR_FACTOR = 7;
+  static const unsigned DECR_FACTOR = 10;
+  // 32640 = 1 + 2 + ... + 255
+  static const unsigned UNIT_STEP = 32640 * INCR_FACTOR;
+  static const unsigned IGNORE_STEP = 0;
+
  public:
-  AgingLFUCache() {
-    min_freq = 255;
+  K key;
+  uint8_t count;
+  uint8_t index;
+  unsigned last;
+  AgingNode(K key, unsigned now)
+      : key(key), count(INIT_CNT), index(INIT_CNT), last(now) {}
+  AgingNode(typename std::list<AgingNode>::iterator that)
+      : key(that->key),
+        count(that->count),
+        index(that->index),
+        last(that->last) {}
+  void DecrByProb(unsigned period) {
+    size_t ret1 = rand();
+    size_t ret2 = RAND_MAX;
+    ret1 *= NUIT_STEP * DECR_FACTOR;
+    ret2 *= (period - IGNORE_STEP);
+    if (count > 0 && ret1 < ret2) count--;
+  }
+  void DecrByPeriod(unsigned period) {
+    unsigned decrease_value = period / UNIT_STEP;
+    if (decrease_value + INIT_CNT > count)
+      count = INIT_CNT;
+    else
+      count -= decrease_value;
+  }
+  void Decrease(unsigned now) {
+    unsigned period = now >= last
+                          ? now - last
+                          : std::numeric_limits<size_t>::max() - last + now;
+    DecrByPeriod(period);
+  }
+  void IncrByProb() {
+    size_t ret = rand();
+    ret *= (count - LFU_INIT_VAL) * INCR_FACTOR;
+    if (count < 255 && ret < RAND_MAX) count++;
+  }
+  uint8_t UpdateAndReturnIndex(unsigned now, bool lru_mode) {
+    Decrease(now);
+    IncrByProb();
+    if (lru_mode)
+      index = MAX_CNT;
+    else
+      index = count;
+    return index;
+  }
+};
+
+template <class K, class Node>
+class BaseLFUCache : public BatchCache<K> {
+ public:
+  using map_iter =
+      typename std::unordered_map<K,
+                                  typename std::list<Node>::iterator>::iterator;
+  BaseLFUCache() {
+    min_freq = std::numeric_limits<size_t>::max();
     max_freq = 0;
-    for (size_t i = 0; i <= LFU_INIT_VAL; ++i) {
-      freq_table.emplace_back(std::pair<std::list<AgingLFUNode>*, int64>(
-          new std::list<AgingLFUNode>, 0));
-    }
+    freq_table.emplace_back(
+        std::pair<std::list<Node>*, int64>(new std::list<Node>, 0));
     BatchCache<K>::num_hit = 0;
     BatchCache<K>::num_miss = 0;
   }
@@ -302,18 +366,19 @@ class AgingLFUCache : public BatchCache<K> {
   size_t get_evic_ids(K* evic_ids, size_t k_size) {
     mutex_lock l(mu_);
     size_t true_size = 0;
+    size_t st_freq = min_freq;
     for (size_t i = 0; i < k_size && key_table.size() > 0; ++i) {
-      auto rm_it = freq_table[min_freq].first->back();
+      auto rm_it = freq_table[st_freq].first->back();
       key_table.erase(rm_it.key);
       evic_ids[i] = rm_it.key;
       ++true_size;
-      freq_table[min_freq].first->pop_back();
-      freq_table[min_freq].second--;
-      if (freq_table[min_freq].second == 0) {
-        ++min_freq;
-        while (min_freq <= max_freq) {
-          if (freq_table[min_freq].second == 0) {
-            ++min_freq;
+      freq_table[st_freq].first->pop_back();
+      freq_table[st_freq].second--;
+      if (freq_table[st_freq].second == 0) {
+        ++st_freq;
+        while (st_freq <= max_freq) {
+          if (freq_table[st_freq].second == 0) {
+            ++st_freq;
           } else {
             break;
           }
@@ -323,99 +388,103 @@ class AgingLFUCache : public BatchCache<K> {
     return true_size;
   }
 
+  void AddNode(K id, unsigned now) {
+    Node node(id, now);
+    size_t index = node.GetIndex();
+    freq_table[index].first->emplace_front(node);
+    freq_table[index].second++;
+    key_table[id] = freq_table[index].first->begin();
+    min_freq = std::min(min_freq, index);
+    max_freq = std::max(max_freq, index);
+  }
+
+  void UpdateNode(K id, map_iter it, unsigned now, bool lru_mode) {
+    Node node = (Node)(*(it->second));
+    size_t index = node.GetIndex();
+    freq_table[index].first->erase(it->second);
+    freq_table[index].second--;
+    if (freq_table[index].second == 0) {
+      if (min_freq == index) min_freq += 1;
+    }
+    index = node.UpdateAndReturnIndex(now, lru_mode);
+    if (index == freq_table.size()) {
+      freq_table.emplace_back(
+          std::pair<std::list<Node>*, int64>(new std::list<Node>, 0));
+    }
+    max_freq = std::max(max_freq, index);
+    min_freq = std::min(min_freq, index);
+    freq_table[index].first->emplace_front(node);
+    freq_table[index].second++;
+    key_table[id] = freq_table[index].first->begin();
+  }
+
   void add_to_rank(const K* batch_ids, size_t batch_size) {
     mutex_lock l(mu_);
-    global_step = MOD_STEP(global_step + 1);
     for (size_t i = 0; i < batch_size; ++i) {
       K id = batch_ids[i];
       auto it = key_table.find(id);
       if (it == key_table.end()) {
-        freq_table[LFU_INIT_VAL].first->emplace_front(
-            AgingLFUNode(id, global_step));
-        freq_table[LFU_INIT_VAL].second++;
-        key_table[id] = freq_table[LFU_INIT_VAL].first->begin();
-        min_freq = std::min(min_freq, (uint8_t)LFU_INIT_VAL);
-        max_freq = std::max(max_freq, (uint8_t)LFU_INIT_VAL);
+        AddNode(id, 0);
         BatchCache<K>::num_miss++;
       } else {
-        typename std::list<AgingLFUNode>::iterator node = it->second;
-        AgingLFUNode node_new(node);
-        uint8_t freq = node->get_cnt();
-        uint8_t freq_new = node_new.updateLFU(global_step);
-        freq_table[freq].first->erase(node);
-        freq_table[freq].second--;
-        if (freq_table[freq].second == 0) {
-          if (min_freq == freq) min_freq = freq_new;
-        }
-        if (freq_new == freq_table.size()) {
-          freq_table.emplace_back(std::pair<std::list<AgingLFUNode>*, int64>(
-              new std::list<AgingLFUNode>, 0));
-        }
-        max_freq = std::max(max_freq, freq_new);
-        min_freq = std::min(min_freq, freq_new);
-        freq_table[freq_new].first->emplace_front(node_new);
-        freq_table[freq_new].second++;
-        key_table[id] = freq_table[freq_new].first->begin();
+        UpdateNode(id, it, 0, false);
         BatchCache<K>::num_hit++;
       }
     }
   }
 
   void add_to_rank(const K* batch_ids, size_t batch_size,
-                    const int64* batch_version,
-                    const int64* batch_freqs) {
-    //TODO: add to rank accroding to the version of ids
+                   const int64* batch_version, const int64* batch_freqs) {
+    // TODO: add to rank accroding to the version of ids
     add_to_rank(batch_ids, batch_size);
   }
 
- private:
-  class AgingLFUNode {
-   public:
-    K key;
-    unsigned lfu;
-    AgingLFUNode(K key, unsigned long step)
-        : key(key), lfu((step << 8) | LFU_INIT_VAL) {}
-    AgingLFUNode(typename std::list<AgingLFUNode>::iterator that) : key(that->key), lfu(that->lfu) {}
-    unsigned long decrFuncLog(unsigned long period, unsigned long counter) {
-      if (counter > 0 && rand() * TIMES_TO_MAX < RAND_MAX * (period - IGNORE_STEP))
-        counter--;
-      return counter;
-    }
-    unsigned long LFUTimeElapsed(unsigned long ldt, unsigned long step) {
-      unsigned long now = step;
-      if (now >= ldt) return now - ldt;
-      return MAX_STEP - ldt + now;
-    }
-    unsigned long LFUDecrAndReturn(unsigned long step) {
-      unsigned long ldt = get_ldt();
-      unsigned long counter = get_cnt();
-      unsigned long period = this->LFUTimeElapsed(ldt, step);
-      return this->decrFuncLog(period, counter);
-    }
-    uint8_t LFULogIncr(uint8_t counter) {
-      if (counter < 255 &&
-          rand() * (counter - LFU_INIT_VAL) * LFU_LOG_FACTOR < RAND_MAX)
-        counter++;
-      return counter;
-    }
-    uint8_t updateLFU(unsigned long step) {
-      step = MOD_STEP(step);
-      unsigned long counter = this->LFUDecrAndReturn(step);
-      counter = this->LFULogIncr(counter);
-      this->lfu = (step << 8) | counter;
-      return counter;
-    }
-    uint8_t get_cnt() { return lfu & 255; }
-    unsigned long get_ldt() { return lfu >> 8; }
-  };
-  time_t global_step;
-  uint8_t min_freq;
-  uint8_t max_freq;
-  std::vector<std::pair<std::list<AgingLFUNode>*, int64>> freq_table;
-  std::unordered_map<K, typename std::list<AgingLFUNode>::iterator> key_table;
+ protected:
+  size_t min_freq;
+  size_t max_freq;
+  std::vector<std::pair<std::list<Node>*, int64>> freq_table;
+  std::unordered_map<K, typename std::list<Node>::iterator> key_table;
   mutex mu_;
 };
 
+template <class K>
+class LFUCache : public BaseLFUCache<K, LFUNode> {
+ public:
+  LFUCache() : BaseLFUCache<K, LFUNode>() {}
+};
+
+template <class K>
+class AgingLFUCache : public LFUCache<K, AgingNode> {
+ public:
+  AgingLFUCache() : BaseLFUCache<K, AgingNode>() { global_step = 0; }
+
+  void add_to_rank(const K* batch_ids, size_t batch_size) {
+    mutex_lock l(BaseLFUCache<K, AgingNode>::mu_);
+    for (size_t i = 0; i < batch_size; ++i) {
+      if (global_step == std::numeric_limits<size_t>::max();) global_step = 0;
+      global_step++;
+      K id = batch_ids[i];
+      auto it = AgingLFUCache<K>::key_table.find(id);
+      if (it == AgingLFUCache<K>::key_table.end()) {
+        AddNode(id, global_step);
+        BatchCache<K>::num_miss++;
+      } else {
+        UpdateNode(id, it, global_step, false);
+        BatchCache<K>::num_hit++;
+      }
+    }
+  }
+
+  void add_to_rank(const K* batch_ids, size_t batch_size,
+                   const int64* batch_version, const int64* batch_freqs) {
+    // TODO: add to rank accroding to the version of ids
+    add_to_rank(batch_ids, batch_size);
+  }
+
+ protected:
+  size_t global_step;
+};
+
 } // embedding
 } // tensorflow
 

From 2eb6b3cc132472002362dd9acd499cb860a78f66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Fri, 11 Nov 2022 23:47:33 +0800
Subject: [PATCH 13/19] [Embedding] Fix.

---
 tensorflow/core/framework/embedding/cache.h | 33 +++++++++++++--------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index d4426776694..91e7bcd724f 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -131,7 +131,7 @@ class LRUCache : public BatchCache<K> {
 template <class K>
 class OriginLFUCache : public BatchCache<K> {
  public:
-  LFUCache() {
+  OriginLFUCache() {
     min_freq = std::numeric_limits<size_t>::max();
     max_freq = 0;
     freq_table.emplace_back(std::pair<std::list<LFUNode>*, int64>(
@@ -310,7 +310,7 @@ class AgingNode {
   void DecrByProb(unsigned period) {
     size_t ret1 = rand();
     size_t ret2 = RAND_MAX;
-    ret1 *= NUIT_STEP * DECR_FACTOR;
+    ret1 *= UNIT_STEP * DECR_FACTOR;
     ret2 *= (period - IGNORE_STEP);
     if (count > 0 && ret1 < ret2) count--;
   }
@@ -329,7 +329,7 @@ class AgingNode {
   }
   void IncrByProb() {
     size_t ret = rand();
-    ret *= (count - LFU_INIT_VAL) * INCR_FACTOR;
+    ret *= (count - INIT_CNT) * INCR_FACTOR;
     if (count < 255 && ret < RAND_MAX) count++;
   }
   uint8_t UpdateAndReturnIndex(unsigned now, bool lru_mode) {
@@ -341,6 +341,7 @@ class AgingNode {
       index = count;
     return index;
   }
+  size_t GetIndex() { return index; }
 };
 
 template <class K, class Node>
@@ -448,28 +449,34 @@ class BaseLFUCache : public BatchCache<K> {
 };
 
 template <class K>
-class LFUCache : public BaseLFUCache<K, LFUNode> {
+class LFUCache : public BaseLFUCache<K, LFUNode<K>> {
  public:
-  LFUCache() : BaseLFUCache<K, LFUNode>() {}
+  LFUCache() : BaseLFUCache<K, LFUNode<K>>() {}
 };
 
 template <class K>
-class AgingLFUCache : public LFUCache<K, AgingNode> {
+class AgingLFUCache : public BaseLFUCache<K, AgingNode<K>> {
  public:
-  AgingLFUCache() : BaseLFUCache<K, AgingNode>() { global_step = 0; }
+  AgingLFUCache() : BaseLFUCache<K, AgingNode<K>>() { 
+    global_step = 0;
+    for (size_t i = 0; i <= AgingNode<K>::INIT_CNT; ++i) {
+      this->freq_table.emplace_back(std::pair<std::list<AgingNode<K>>*, int64>(
+          new std::list<AgingNode<K>>, 0));
+    }
+  }
 
   void add_to_rank(const K* batch_ids, size_t batch_size) {
-    mutex_lock l(BaseLFUCache<K, AgingNode>::mu_);
+    mutex_lock l(this->mu_);
     for (size_t i = 0; i < batch_size; ++i) {
-      if (global_step == std::numeric_limits<size_t>::max();) global_step = 0;
+      if (global_step == std::numeric_limits<size_t>::max()) global_step = 0;
       global_step++;
       K id = batch_ids[i];
-      auto it = AgingLFUCache<K>::key_table.find(id);
-      if (it == AgingLFUCache<K>::key_table.end()) {
-        AddNode(id, global_step);
+      auto it = this->key_table.find(id);
+      if (it == this->key_table.end()) {
+        this->AddNode(id, global_step);
         BatchCache<K>::num_miss++;
       } else {
-        UpdateNode(id, it, global_step, false);
+        this->UpdateNode(id, it, global_step, false);
         BatchCache<K>::num_hit++;
       }
     }

From 8b5a38d4263999e76b6c4a40860dc916d9b047da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Sat, 12 Nov 2022 02:17:13 +0800
Subject: [PATCH 14/19] [Embedding] AutoLRFU..

---
 tensorflow/core/framework/embedding/cache.h | 148 ++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 91e7bcd724f..f988ab7fbdc 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -492,6 +492,154 @@ class AgingLFUCache : public BaseLFUCache<K, AgingNode<K>> {
   size_t global_step;
 };
 
+template <class K>
+class AutoLRFUCache : public AgingLFUCache<K> {
+ public:
+  AutoLRFUCache(int64 cache_capacity) : AgingLFUCache<K>() {
+    cache_capacity_ = cache_capacity;
+    state = F0;
+    lru_mode = false;
+    span_last_check = 0;
+    counter_switch = 0;
+    prev_hit_rate = -100;
+    counter_replacement = 0;
+    num_replacement = 1;
+    HitSpan = 1000;
+  }
+
+  void rebuild() {
+    if (lru_mode) return;
+    std::unordered_map<K, AgingNode<K>> new_table;
+    for (auto it = this->key_table.begin(); it != this->key_table.end(); it++) {
+      AgingNode<K> node = (AgingNode<K>)(*(it->second));
+      node.UpdateAndReturnIndex(this->global_step, lru_mode);
+      new_table[node.GetIndex()] = node;
+    }
+    this->freq_table.clear();
+    this->key_table.clear();
+    for (auto it = new_table.begin(); it != new_table.end(); it++) {
+      AgingNode<K> node = (AgingNode<K>)(*(it->second));
+      this->freq_table[node.GetIndex()].first->emplace_front(node);
+      this->freq_table[node.GetIndex()].second++;
+      this->key_table[node.key] =
+          this->freq_table[node.GetIndex()].first->begin();
+    }
+  }
+
+  int get_hit_rate_len_100000(int len) {
+    int total = hit_recent.size();
+    if (total <= 0) return 0.0;
+    int hit = 0;
+    for (auto it = hit_recent.begin(); it != hit_recent.end(); it++)
+      hit += (len-- > 0) && (*it);
+    if (hit == 0) return 0.0;
+    return int(hit * 100000 / total);
+  }
+
+  int get_hit_rate100000() {
+    int total = hit_recent.size();
+    if (total <= 0) return 0.0;
+    int hit = 0;
+    for (auto it = hit_recent.begin(); it != hit_recent.end(); it++) hit += *it;
+    if (hit == 0) return 0.0;
+    return int(hit * 100000 / total);
+  }
+
+  void mode_switch() {
+    if (lru_mode) {
+      lru_mode = false;
+      rebuild();  // 根据保存的频次信息，重新构建链表
+    } else {
+      lru_mode = true;
+    }
+  }
+
+  void auto_switch() {
+    if (state == S3 && counter_replacement < HitSpan)
+      return;
+    else if (counter_replacement <
+             num_replacement * (cache_capacity_ + HitSpan))  // TODO:
+      return;
+
+    counter_replacement = 0;
+    int curr_hit_rate = get_hit_rate100000();
+    if (state == F0) {
+      if (prev_hit_rate >= 0 && curr_hit_rate < prev_hit_rate * 0.85) {
+        // (prev_hit_rate - curr_hit_rate) / prev_hit_rate > 0.15
+        mode_switch();
+        state = S1;
+      }
+    } else if (state == S1) {
+      if (curr_hit_rate <= 1.15 * prev_hit_rate) {  //没有显著提高
+        mode_switch();
+        state = F0;
+      } else {
+        state = S2;
+      }
+    } else if (state == S2) {
+      mode_switch();
+      state = S3;
+    } else if (state == S3) {
+      curr_hit_rate = get_hit_rate_len_100000(HitSpan);
+      if (curr_hit_rate > prev_hit_rate) {
+        state = F0;
+        num_replacement = 1;
+      } else {
+        mode_switch();
+        state = S2;
+        num_replacement = num_replacement * 2;
+      }
+    }
+    if (state != S3) prev_hit_rate = curr_hit_rate;
+  }
+
+  void add_to_rank(const K* batch_ids, size_t batch_size) {
+    mutex_lock l(this->mu_);
+    for (size_t i = 0; i < batch_size; ++i) {
+      auto_switch();
+      if (this->global_step == std::numeric_limits<size_t>::max())
+        this->global_step = 0;
+      this->global_step++;
+      if (hit_recent.size() > 6000) {
+        hit_recent.pop_back();
+      }
+
+      K id = batch_ids[i];
+      auto it = this->key_table.find(id);
+      if (it != this->key_table.end()) {
+        hit_recent.push_front(true);
+        this->AddNode(id, this->global_step);
+        BatchCache<K>::num_miss++;
+      } else {
+        hit_recent.push_front(false);
+        counter_replacement++;
+        this->UpdateNode(id, it, this->global_step, lru_mode);
+        BatchCache<K>::num_hit++;
+      }
+    }
+  }
+
+  void add_to_rank(const K* batch_ids, size_t batch_size,
+                   const int64* batch_version, const int64* batch_freqs) {
+    // TODO: add to rank accroding to the version of ids
+    add_to_rank(batch_ids, batch_size);
+  }
+
+ private:
+  enum State { F0, S1, S2, S3 };
+
+  int64 cache_capacity_;
+  std::deque<bool> hit_recent;
+  short HitSpan;  // 每span次查看
+  size_t span_last_check;
+  size_t counter_switch;
+  size_t counter_replacement;
+  size_t counter_visit;
+  int num_replacement;
+  int prev_hit_rate;
+  bool lru_mode;
+  State state;
+};
 } // embedding
 } // tensorflow
 

From a80f02d95b7d291190458b5f43dd910c62f4bf13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Sat, 12 Nov 2022 03:57:39 +0800
Subject: [PATCH 15/19] [Embedding] Fix Test.

---
 tensorflow/core/framework/embedding/cache.h | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index cc47002b3a9..e7bfe5ddaf4 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -4,8 +4,8 @@
 #include <map>
 #include <unordered_map>
 #include <set>
-#include <queue>
 #include <list>
+#include <deque>
 #include <limits>
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/platform/types.h"
@@ -460,7 +460,7 @@ class AgingLFUCache : public BaseLFUCache<K, AgingNode<K>> {
  public:
   AgingLFUCache() : BaseLFUCache<K, AgingNode<K>>() { 
     global_step = 0;
-    for (size_t i = 0; i <= AgingNode<K>::INIT_CNT; ++i) {
+    for (size_t i = 0; i < AgingNode<K>::MAX_CNT; ++i) {
       this->freq_table.emplace_back(std::pair<std::list<AgingNode<K>>*, int64>(
           new std::list<AgingNode<K>>, 0));
     }
@@ -510,20 +510,20 @@ class AutoLRFUCache : public AgingLFUCache<K> {
 
   void rebuild() {
     if (lru_mode) return;
-    std::unordered_map<K, AgingNode<K>> new_table;
+    std::unordered_map<K, AgingNode<K>*> new_table;
     for (auto it = this->key_table.begin(); it != this->key_table.end(); it++) {
-      AgingNode<K> node = (AgingNode<K>)(*(it->second));
-      node.UpdateAndReturnIndex(this->global_step, lru_mode);
-      new_table[node.GetIndex()] = node;
+      AgingNode<K>* node = new AgingNode<K>(it->second);
+      node->UpdateAndReturnIndex(this->global_step, lru_mode);
+      new_table[node->GetIndex()] = node;
     }
     this->freq_table.clear();
     this->key_table.clear();
     for (auto it = new_table.begin(); it != new_table.end(); it++) {
-      AgingNode<K> node = (AgingNode<K>)(*(it->second));
-      this->freq_table[node.GetIndex()].first->emplace_front(node);
-      this->freq_table[node.GetIndex()].second++;
-      this->key_table[node.key] =
-          this->freq_table[node.GetIndex()].first->begin();
+      AgingNode<K> *node = (AgingNode<K>*)(it->second);
+      this->freq_table[node->GetIndex()].first->emplace_front(*node);
+      this->freq_table[node->GetIndex()].second++;
+      this->key_table[node->key] =
+          this->freq_table[node->GetIndex()].first->begin();
     }
   }
 
@@ -607,7 +607,7 @@ class AutoLRFUCache : public AgingLFUCache<K> {
 
       K id = batch_ids[i];
       auto it = this->key_table.find(id);
-      if (it != this->key_table.end()) {
+      if (it == this->key_table.end()) {
         hit_recent.push_front(true);
         this->AddNode(id, this->global_step);
         BatchCache<K>::num_miss++;

From 3141808bd5f71610bd4803b365912ee148b0586f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Sat, 12 Nov 2022 04:05:01 +0800
Subject: [PATCH 16/19] [Embedding] Refine.

---
 tensorflow/core/framework/embedding/cache.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index e7bfe5ddaf4..4da3a83b897 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -500,8 +500,6 @@ class AutoLRFUCache : public AgingLFUCache<K> {
     cache_capacity_ = cache_capacity;
     state = F0;
     lru_mode = false;
-    span_last_check = 0;
-    counter_switch = 0;
     prev_hit_rate = -100;
     counter_replacement = 0;
     num_replacement = 1;
@@ -631,12 +629,9 @@ class AutoLRFUCache : public AgingLFUCache<K> {
 
   int64 cache_capacity_;
   std::deque<bool> hit_recent;
-  short HitSpan;  // 每span次查看
-  size_t span_last_check;
-  size_t counter_switch;
+  unsigned HitSpan;  // 每span次查看
   size_t counter_replacement;
-  size_t counter_visit;
-  int num_replacement;
+  unsigned num_replacement;
   int prev_hit_rate;
   bool lru_mode;
   State state;

From 0018feeab0e409fb4e1424a65af62b0c7bc4a05a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Sat, 12 Nov 2022 04:08:10 +0800
Subject: [PATCH 17/19] [Embedding] Refine.

---
 .../kernels/embedding_variable_ops_test.cc    | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/kernels/embedding_variable_ops_test.cc b/tensorflow/core/kernels/embedding_variable_ops_test.cc
index 77850cfc666..a46d1c3b712 100644
--- a/tensorflow/core/kernels/embedding_variable_ops_test.cc
+++ b/tensorflow/core/kernels/embedding_variable_ops_test.cc
@@ -1176,8 +1176,8 @@ TEST(EmbeddingVariableTest, TestLFUCache) {
   }
 }
 
-TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
-  BatchCache<int64>* cache = new AutoLRFUCache<int64>(100);
+TEST(EmbeddingVariableTest, TestAgingLFUCache) {
+  BatchCache<int64>* cache = new AgingLFUCache<int64>();
   int num_ids = 30;
   int num_access = 100;
   int num_evict = 50;
@@ -1190,13 +1190,13 @@ TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
   int64 size = cache->get_evic_ids(evict_ids, num_evict);
   ASSERT_EQ(size, num_ids);
   ASSERT_EQ(cache->size(), 0);
-  for (int i = 0; i < size; i++) {
-    ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
-  }
+  // for (int i = 0; i < size; i++) {
+  //   ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
+  // }
 }
 
-TEST(EmbeddingVariableTest, TestAgingLFUCache) {
-  BatchCache<int64>* cache = new AgingLFUCache<int64>();
+TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
+  BatchCache<int64>* cache = new AutoLRFUCache<int64>(100);
   int num_ids = 30;
   int num_access = 100;
   int num_evict = 50;
@@ -1209,9 +1209,9 @@ TEST(EmbeddingVariableTest, TestAgingLFUCache) {
   int64 size = cache->get_evic_ids(evict_ids, num_evict);
   ASSERT_EQ(size, num_ids);
   ASSERT_EQ(cache->size(), 0);
-  for (int i = 0; i < size; i++) {
-    ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
-  }
+  // for (int i = 0; i < size; i++) {
+  //   ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
+  // }
 }
 
 TEST(EmbeddingVariableTest, TestCacheRestore) {

From f42e591b723410a0d2a9e67a7ae12bbd9083271a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Tue, 22 Nov 2022 03:56:49 +0800
Subject: [PATCH 18/19] [Embedding] Refine according to comment of PR.

---
 tensorflow/core/framework/embedding/cache.h   | 207 +++++++++---------
 .../kernels/embedding_variable_ops_test.cc    |  22 +-
 2 files changed, 115 insertions(+), 114 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index 4da3a83b897..be9ed5e897d 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -129,6 +129,8 @@ class LRUCache : public BatchCache<K> {
   mutex mu_;
 };
 
+/* Use real frequencies, from 1 to max value of size_t.
+   Independent implementation. */
 template <class K>
 class OriginLFUCache : public BatchCache<K> {
  public:
@@ -277,37 +279,37 @@ class OriginLFUCache : public BatchCache<K> {
 template <class K>
 class LFUNode {
  public:
-  K key;
-  size_t freq;
   LFUNode(K key, unsigned now) : key(key), freq(0) {}
+  ~LFUNode() {}
+  K GetKey() { return key; }
   size_t GetIndex() { return freq; }
   size_t UpdateAndReturnIndex(unsigned now, bool lru_mode) { return ++freq; }
+ private:
+  K key;
+  size_t freq;
 };
 
 template <class K>
-class AgingNode {
+class AgingNode : public LFUNode<K> { {
  public:
+  AgingNode(K key, unsigned now)
+      : LFUNode<K>(key, now), count(INIT_CNT), index(INIT_CNT), last(now) {}
+  // AgingNode(typename std::list<AgingNode>::iterator that)
+  //     : key(that->key),
+  //       count(that->count),
+  //       index(that->index),
+  //       last(that->last) {}
+  ~AgingNode() {}
+  size_t GetIndex() { return index; }
+  size_t UpdateAndReturnIndex(unsigned now, bool lru_mode) {
+    Decrease(now);
+    IncrByProb();
+    index = lru_mode ? MAX_CNT : count;
+    return (size_t)index;
+  }
   static const uint8_t INIT_CNT = 5;
-  static const uint8_t MIN_CNT = 1;
-  static const uint8_t MAX_CNT = 255;
-  static const unsigned INCR_FACTOR = 7;
-  static const unsigned DECR_FACTOR = 10;
-  // 32640 = 1 + 2 + ... + 255
-  static const unsigned UNIT_STEP = 32640 * INCR_FACTOR;
-  static const unsigned IGNORE_STEP = 0;
 
- public:
-  K key;
-  uint8_t count;
-  uint8_t index;
-  unsigned last;
-  AgingNode(K key, unsigned now)
-      : key(key), count(INIT_CNT), index(INIT_CNT), last(now) {}
-  AgingNode(typename std::list<AgingNode>::iterator that)
-      : key(that->key),
-        count(that->count),
-        index(that->index),
-        last(that->last) {}
+ private:
   void DecrByProb(unsigned period) {
     size_t ret1 = rand();
     size_t ret2 = RAND_MAX;
@@ -333,24 +335,26 @@ class AgingNode {
     ret *= (count - INIT_CNT) * INCR_FACTOR;
     if (count < 255 && ret < RAND_MAX) count++;
   }
-  uint8_t UpdateAndReturnIndex(unsigned now, bool lru_mode) {
-    Decrease(now);
-    IncrByProb();
-    if (lru_mode)
-      index = MAX_CNT;
-    else
-      index = count;
-    return index;
-  }
-  size_t GetIndex() { return index; }
+
+ private:
+  static const uint8_t MIN_CNT = 1;
+  static const uint8_t MAX_CNT = 255;
+  static const unsigned INCR_FACTOR = 7;
+  static const unsigned DECR_FACTOR = 10;
+  // 32640 = 1 + 2 + ... + 255
+  static const unsigned UNIT_STEP = 32640 * INCR_FACTOR;
+  static const unsigned IGNORE_STEP = 0;
+  K key;
+  uint8_t count;
+  uint8_t index;
+  unsigned last;
 };
 
 template <class K, class Node>
 class BaseLFUCache : public BatchCache<K> {
  public:
-  using map_iter =
-      typename std::unordered_map<K,
-                                  typename std::list<Node>::iterator>::iterator;
+  using map_iter = typename std::unordered_map<
+        K, typename std::list<Node>::iterator>::iterator;
   BaseLFUCache() {
     min_freq = std::numeric_limits<size_t>::max();
     max_freq = 0;
@@ -371,8 +375,8 @@ class BaseLFUCache : public BatchCache<K> {
     size_t st_freq = min_freq;
     for (size_t i = 0; i < k_size && key_table.size() > 0; ++i) {
       auto rm_it = freq_table[st_freq].first->back();
-      key_table.erase(rm_it.key);
-      evic_ids[i] = rm_it.key;
+      key_table.erase(rm_it.GetKey());
+      evic_ids[i] = rm_it.GetKey();
       ++true_size;
       freq_table[st_freq].first->pop_back();
       freq_table[st_freq].second--;
@@ -460,7 +464,7 @@ class AgingLFUCache : public BaseLFUCache<K, AgingNode<K>> {
  public:
   AgingLFUCache() : BaseLFUCache<K, AgingNode<K>>() { 
     global_step = 0;
-    for (size_t i = 0; i < AgingNode<K>::MAX_CNT; ++i) {
+    for (size_t i = 0; i < AgingNode<K>::INIT_CNT; ++i) {
       this->freq_table.emplace_back(std::pair<std::list<AgingNode<K>>*, int64>(
           new std::list<AgingNode<K>>, 0));
     }
@@ -496,16 +500,47 @@ class AgingLFUCache : public BaseLFUCache<K, AgingNode<K>> {
 template <class K>
 class AutoLRFUCache : public AgingLFUCache<K> {
  public:
-  AutoLRFUCache(int64 cache_capacity) : AgingLFUCache<K>() {
-    cache_capacity_ = cache_capacity;
-    state = F0;
-    lru_mode = false;
-    prev_hit_rate = -100;
-    counter_replacement = 0;
-    num_replacement = 1;
-    HitSpan = 1000;
+  AutoLRFUCache(int64 cache_capacity)
+        : AgingLFUCache<K>(),
+          cache_capacity_(cache_capacity),
+          state(F0),
+          lru_mode(false),
+          prev_hit_rate(-100),
+          counter_replacement(0),
+          factor_replacement(1) {}
+
+  void add_to_rank(const K* batch_ids, size_t batch_size) {
+    mutex_lock l(this->mu_);
+    for (size_t i = 0; i < batch_size; ++i) {
+      auto_switch();
+      if (this->global_step == std::numeric_limits<size_t>::max())
+        this->global_step = 0;
+      this->global_step++;
+      if (hit_recent.size() > NORMAL_CHECK_SPAN) {
+        hit_recent.pop_back();
+      }
+      K id = batch_ids[i];
+      auto it = this->key_table.find(id);
+      if (it == this->key_table.end()) {
+        hit_recent.push_front(true);
+        this->AddNode(id, this->global_step);
+        BatchCache<K>::num_miss++;
+      } else {
+        hit_recent.push_front(false);
+        counter_replacement++;
+        this->UpdateNode(id, it, this->global_step, lru_mode);
+        BatchCache<K>::num_hit++;
+      }
+    }
   }
 
+  void add_to_rank(const K* batch_ids, size_t batch_size,
+                   const int64* batch_version, const int64* batch_freqs) {
+    // TODO: add to rank accroding to the version of ids
+    add_to_rank(batch_ids, batch_size);
+  }
+
+ private:
   void rebuild() {
     if (lru_mode) return;
     std::unordered_map<K, AgingNode<K>*> new_table;
@@ -520,56 +555,44 @@ class AutoLRFUCache : public AgingLFUCache<K> {
       AgingNode<K> *node = (AgingNode<K>*)(it->second);
       this->freq_table[node->GetIndex()].first->emplace_front(*node);
       this->freq_table[node->GetIndex()].second++;
-      this->key_table[node->key] =
+      this->key_table[node->GetKey()] =
           this->freq_table[node->GetIndex()].first->begin();
     }
   }
 
-  int get_hit_rate_len_100000(int len) {
-    int total = hit_recent.size();
-    if (total <= 0) return 0.0;
-    int hit = 0;
-    for (auto it = hit_recent.begin(); it != hit_recent.end(); it++)
-      hit += (len-- > 0) && (*it);
-    if (hit == 0) return 0.0;
-    return int(hit * 100000 / total);
-  }
-
-  int get_hit_rate100000() {
+  int get_hit_rate100000(int len = 0) {
     int total = hit_recent.size();
     if (total <= 0) return 0.0;
+    if (len <= 0) len = total;
     int hit = 0;
-    for (auto it = hit_recent.begin(); it != hit_recent.end(); it++) hit += *it;
-    if (hit == 0) return 0.0;
+    for (auto it = hit_recent.begin(); len-- >0 && it != hit_recent.end(); it++)
+      if(*it) ++hit;
     return int(hit * 100000 / total);
   }
 
   void mode_switch() {
     if (lru_mode) {
       lru_mode = false;
-      rebuild();  // 根据保存的频次信息，重新构建链表
+      rebuild();
     } else {
       lru_mode = true;
     }
   }
 
   void auto_switch() {
-    if (state == S3 && counter_replacement < HitSpan)
-      return;
-    else if (counter_replacement <
-             num_replacement * (cache_capacity_ + HitSpan))  // TODO:
-      return;
-
+    if (state == S3 && counter_replacement < FAST_CHECK_SPAN)
+        || (counter_replacement < factor_replacement * cache_capacity_) return;
     counter_replacement = 0;
-    int curr_hit_rate = get_hit_rate100000();
+    int curr_hit_rate = get_hit_rate100000(0);
     if (state == F0) {
+      // Switch to LRU mode if the hit rate decreased significantly(15%).
       if (prev_hit_rate >= 0 && curr_hit_rate < prev_hit_rate * 0.85) {
-        // (prev_hit_rate - curr_hit_rate) / prev_hit_rate > 0.15
         mode_switch();
         state = S1;
       }
     } else if (state == S1) {
-      if (curr_hit_rate <= 1.15 * prev_hit_rate) {  //没有显著提高
+      // Switch back to LFU mode if the hit rate did not increase significantly(15%).
+      if (curr_hit_rate <= 1.15 * prev_hit_rate) {
         mode_switch();
         state = F0;
       } else {
@@ -579,59 +602,27 @@ class AutoLRFUCache : public AgingLFUCache<K> {
       mode_switch();
       state = S3;
     } else if (state == S3) {
-      curr_hit_rate = get_hit_rate_len_100000(HitSpan);
+      curr_hit_rate = get_hit_rate100000(FAST_CHECK_SPAN);
       if (curr_hit_rate > prev_hit_rate) {
         state = F0;
-        num_replacement = 1;
+        factor_replacement = 1;
       } else {
         mode_switch();
         state = S2;
-        num_replacement = num_replacement * 2;
+        factor_replacement = factor_replacement * 2;
       }
     }
     if (state != S3) prev_hit_rate = curr_hit_rate;
   }
 
-  void add_to_rank(const K* batch_ids, size_t batch_size) {
-    mutex_lock l(this->mu_);
-    for (size_t i = 0; i < batch_size; ++i) {
-      auto_switch();
-      if (this->global_step == std::numeric_limits<size_t>::max())
-        this->global_step = 0;
-      this->global_step++;
-      if (hit_recent.size() > 6000) {
-        hit_recent.pop_back();
-      }
-
-      K id = batch_ids[i];
-      auto it = this->key_table.find(id);
-      if (it == this->key_table.end()) {
-        hit_recent.push_front(true);
-        this->AddNode(id, this->global_step);
-        BatchCache<K>::num_miss++;
-      } else {
-        hit_recent.push_front(false);
-        counter_replacement++;
-        this->UpdateNode(id, it, this->global_step, lru_mode);
-        BatchCache<K>::num_hit++;
-      }
-    }
-  }
-
-  void add_to_rank(const K* batch_ids, size_t batch_size,
-                   const int64* batch_version, const int64* batch_freqs) {
-    // TODO: add to rank accroding to the version of ids
-    add_to_rank(batch_ids, batch_size);
-  }
-
  private:
   enum State { F0, S1, S2, S3 };
-
   int64 cache_capacity_;
   std::deque<bool> hit_recent;
-  unsigned HitSpan;  // 每span次查看
+  static const unsigned FAST_CHECK_SPAN = 1000;
+  static const unsigned NORMAL_CHECK_SPAN = 5000;
   size_t counter_replacement;
-  unsigned num_replacement;
+  unsigned factor_replacement;
   int prev_hit_rate;
   bool lru_mode;
   State state;
diff --git a/tensorflow/core/kernels/embedding_variable_ops_test.cc b/tensorflow/core/kernels/embedding_variable_ops_test.cc
index a46d1c3b712..da67f9dda94 100644
--- a/tensorflow/core/kernels/embedding_variable_ops_test.cc
+++ b/tensorflow/core/kernels/embedding_variable_ops_test.cc
@@ -1183,6 +1183,7 @@ TEST(EmbeddingVariableTest, TestAgingLFUCache) {
   int num_evict = 50;
   int64 ids[num_access] = {0};
   int64 evict_ids[num_evict] = {0};
+  bool evict_ids_map[num_ids] = {false};
   for (int i = 0; i < num_access; i++){
     ids[i] = i % num_ids;
   }
@@ -1190,9 +1191,13 @@ TEST(EmbeddingVariableTest, TestAgingLFUCache) {
   int64 size = cache->get_evic_ids(evict_ids, num_evict);
   ASSERT_EQ(size, num_ids);
   ASSERT_EQ(cache->size(), 0);
-  // for (int i = 0; i < size; i++) {
-  //   ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
-  // }
+  for (int i = 0; i < num_ids; i++) {
+    ASSERT_EQ(evict_ids[i] < num_ids, true);
+    evict_ids_map[evict_ids[i]] = true;
+  }
+  for (int id = 0; id < num_ids; id++) {
+    ASSERT_EQ(evict_ids_map[id], true);
+  }
 }
 
 TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
@@ -1202,6 +1207,7 @@ TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
   int num_evict = 50;
   int64 ids[num_access] = {0};
   int64 evict_ids[num_evict] = {0};
+  bool evict_ids_map[num_ids] = {false};
   for (int i = 0; i < num_access; i++){
     ids[i] = i % num_ids;
   }
@@ -1209,9 +1215,13 @@ TEST(EmbeddingVariableTest, TestAutoLRFUCache) {
   int64 size = cache->get_evic_ids(evict_ids, num_evict);
   ASSERT_EQ(size, num_ids);
   ASSERT_EQ(cache->size(), 0);
-  // for (int i = 0; i < size; i++) {
-  //   ASSERT_EQ(evict_ids[i], (num_access % num_ids + i) % num_ids);
-  // }
+  for (int i = 0; i < num_ids; i++) {
+    ASSERT_EQ(evict_ids[i] < num_ids, true);
+    evict_ids_map[evict_ids[i]] = true;
+  }
+  for (int id = 0; id < num_ids; id++) {
+    ASSERT_EQ(evict_ids_map[id], true);
+  }
 }
 
 TEST(EmbeddingVariableTest, TestCacheRestore) {

From b7d78b44981dde195790f53efc216426feb982bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=95=BF=E5=85=B0?= <aoxuyang.axy@alibaba-inc.com>
Date: Tue, 22 Nov 2022 04:23:57 +0800
Subject: [PATCH 19/19] [Embedding] Fix.

---
 tensorflow/core/framework/embedding/cache.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/framework/embedding/cache.h b/tensorflow/core/framework/embedding/cache.h
index be9ed5e897d..61ba21a5f86 100644
--- a/tensorflow/core/framework/embedding/cache.h
+++ b/tensorflow/core/framework/embedding/cache.h
@@ -290,15 +290,15 @@ class LFUNode {
 };
 
 template <class K>
-class AgingNode : public LFUNode<K> { {
+class AgingNode : public LFUNode<K> {
  public:
   AgingNode(K key, unsigned now)
       : LFUNode<K>(key, now), count(INIT_CNT), index(INIT_CNT), last(now) {}
-  // AgingNode(typename std::list<AgingNode>::iterator that)
-  //     : key(that->key),
-  //       count(that->count),
-  //       index(that->index),
-  //       last(that->last) {}
+  AgingNode(typename std::list<AgingNode>::iterator that)
+      : LFUNode<K>(that->key, that->last),
+        count(that->count),
+        index(that->index),
+        last(that->last) {}
   ~AgingNode() {}
   size_t GetIndex() { return index; }
   size_t UpdateAndReturnIndex(unsigned now, bool lru_mode) {
@@ -580,8 +580,9 @@ class AutoLRFUCache : public AgingLFUCache<K> {
   }
 
   void auto_switch() {
-    if (state == S3 && counter_replacement < FAST_CHECK_SPAN)
-        || (counter_replacement < factor_replacement * cache_capacity_) return;
+    if ((state == S3 && counter_replacement < FAST_CHECK_SPAN) ||
+        (counter_replacement < factor_replacement * cache_capacity_))
+      return;
     counter_replacement = 0;
     int curr_hit_rate = get_hit_rate100000(0);
     if (state == F0) {