diff --git a/dev-support/spotbugs-exclude.xml b/dev-support/spotbugs-exclude.xml
index 2f0684eff4d7..17b8d2cbdedd 100644
--- a/dev-support/spotbugs-exclude.xml
+++ b/dev-support/spotbugs-exclude.xml
@@ -271,4 +271,16 @@
+
+
+
+
+
+
+
+
+
diff --git a/hbase-protocol-shaded/src/main/protobuf/BucketCacheEntry.proto b/hbase-protocol-shaded/src/main/protobuf/BucketCacheEntry.proto
index 80fc10ada786..c1f7b496776f 100644
--- a/hbase-protocol-shaded/src/main/protobuf/BucketCacheEntry.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/BucketCacheEntry.proto
@@ -49,6 +49,9 @@ message BlockCacheKey {
required int64 offset = 2;
required BlockType block_type = 3;
required bool primary_replica_block = 4;
+ optional string region_name = 5;
+ optional string family_name = 6;
+ optional bool archived = 7;
}
enum BlockType {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index 0ae65b9a0084..4a2fddf8866c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -85,14 +85,6 @@ Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
*/
int evictBlocksByHfileName(String hfileName);
- /**
- * Evicts all blocks for the given HFile by path.
- * @return the number of blocks evicted
- */
- default int evictBlocksByHfilePath(Path hfilePath) {
- return evictBlocksByHfileName(hfilePath.getName());
- }
-
/**
* Get the statistics for this block cache.
*/
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
index f87b456c29bf..2142de7053f7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
@@ -19,7 +19,9 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.io.hfile.bucket.FilePathStringPool;
import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.yetus.audience.InterfaceAudience;
/**
@@ -27,75 +29,146 @@
*/
@InterfaceAudience.Private
public class BlockCacheKey implements HeapSize, java.io.Serializable {
- private static final long serialVersionUID = -5199992013113130534L;
- private final String hfileName;
+ private static final long serialVersionUID = -5199992013113130535L; // Changed due to format
+ // change
+
+ // New compressed format using integer file ID (when codec is available)
+ private final int hfileNameId;
+
+ private transient final FilePathStringPool stringPool;
+
+ private final int regionId;
+
+ private final int cfId;
+
private final long offset;
+
private BlockType blockType;
+
private final boolean isPrimaryReplicaBlock;
- private Path filePath;
+ private final boolean archived;
/**
- * Construct a new BlockCacheKey
+ * Constructs a new BlockCacheKey with the file name and offset only. To be used for cache lookups
+ * only, DO NOT use this for creating keys when inserting into the cache. Use either the
+ * overriding constructors with the path parameter or the region and cf parameters, otherwise,
+ * region cache metrics won't be recorded properly.
* @param hfileName The name of the HFile this block belongs to.
* @param offset Offset of the block into the file
*/
public BlockCacheKey(String hfileName, long offset) {
- this(hfileName, offset, true, BlockType.DATA);
+ this(hfileName, null, null, offset, true, BlockType.DATA, false);
}
+ /**
+ * Constructs a new BlockCacheKey with the file name, offset, replica and type only. To be used
+ * for cache lookups only, DO NOT use this for creating keys when inserting into the cache. Use
+ * either the overriding constructors with the path parameter or the region and cf parameters,
+ * otherwise, region cache metrics won't be recorded properly.
+ * @param hfileName The name of the HFile this block belongs to.
+ * @param offset Offset of the block into the file
+ * @param isPrimaryReplica Whether this is from primary replica
+ * @param blockType Type of block
+ */
public BlockCacheKey(String hfileName, long offset, boolean isPrimaryReplica,
BlockType blockType) {
+ this(hfileName, null, null, offset, isPrimaryReplica, blockType, false);
+ }
+
+ /**
+ * Construct a new BlockCacheKey, with file, column family and region information. This should be
+ * used when inserting keys into the cache, so that region cache metrics are recorded properly.
+ * @param hfileName The name of the HFile this block belongs to.
+ * @param cfName The column family name
+ * @param regionName The region name
+ * @param offset Offset of the block into the file
+ * @param isPrimaryReplica Whether this is from primary replica
+ * @param blockType Type of block
+ */
+ public BlockCacheKey(String hfileName, String cfName, String regionName, long offset,
+ boolean isPrimaryReplica, BlockType blockType, boolean archived) {
this.isPrimaryReplicaBlock = isPrimaryReplica;
- this.hfileName = hfileName;
this.offset = offset;
this.blockType = blockType;
+ this.stringPool = FilePathStringPool.getInstance();
+ // Use string pool for file, region and cf values
+ this.hfileNameId = stringPool.encode(hfileName);
+ this.regionId = (regionName != null) ? stringPool.encode(regionName) : -1;
+ this.cfId = (cfName != null) ? stringPool.encode(cfName) : -1;
+ this.archived = archived;
}
+ /**
+ * Construct a new BlockCacheKey using a file path. File, column family and region information
+ * will be extracted from the passed path. This should be used when inserting keys into the cache,
+ * so that region cache metrics are recorded properly.
+ * @param hfilePath The path to the HFile
+ * @param offset Offset of the block into the file
+ * @param isPrimaryReplica Whether this is from primary replica
+ * @param blockType Type of block
+ */
public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType) {
- this.filePath = hfilePath;
- this.isPrimaryReplicaBlock = isPrimaryReplica;
- this.hfileName = hfilePath.getName();
- this.offset = offset;
- this.blockType = blockType;
+ this(hfilePath.getName(), hfilePath.getParent().getName(),
+ hfilePath.getParent().getParent().getName(), offset, isPrimaryReplica, blockType,
+ HFileArchiveUtil.isHFileArchived(hfilePath));
}
@Override
public int hashCode() {
- return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32));
+ return hfileNameId * 127 + (int) (offset ^ (offset >>> 32));
}
@Override
public boolean equals(Object o) {
if (o instanceof BlockCacheKey) {
BlockCacheKey k = (BlockCacheKey) o;
- return offset == k.offset
- && (hfileName == null ? k.hfileName == null : hfileName.equals(k.hfileName));
- } else {
- return false;
+ if (offset != k.offset) {
+ return false;
+ }
+ return getHfileName().equals(k.getHfileName());
}
+ return false;
}
@Override
public String toString() {
- return this.hfileName + '_' + this.offset;
+ return getHfileName() + '_' + this.offset;
}
public static final long FIXED_OVERHEAD = ClassSize.estimateBase(BlockCacheKey.class, false);
/**
- * Strings have two bytes per character due to default Java Unicode encoding (hence length times
- * 2).
+ * With the compressed format using integer file IDs, the heap size is significantly reduced. We
+ * now only store a 4-byte integer instead of the full file name string.
*/
@Override
public long heapSize() {
- return ClassSize.align(FIXED_OVERHEAD + ClassSize.STRING + 2 * hfileName.length());
+ return ClassSize.align(FIXED_OVERHEAD);
}
- // can't avoid this unfortunately
- /** Returns The hfileName portion of this cache key */
+ /**
+ * Returns the hfileName portion of this cache key.
+ * @return The file name
+ */
public String getHfileName() {
- return hfileName;
+ return stringPool.decode(hfileNameId);
+ }
+
+ /**
+ * Returns the region name portion of this cache key.
+ * @return The region name
+ */
+ public String getRegionName() {
+ return stringPool.decode(regionId);
+ }
+
+ /**
+ * Returns the column family name portion of this cache key.
+ * @return The column family name
+ */
+ public String getCfName() {
+ return stringPool.decode(cfId);
}
public boolean isPrimary() {
@@ -114,12 +187,8 @@ public void setBlockType(BlockType blockType) {
this.blockType = blockType;
}
- public Path getFilePath() {
- return filePath;
- }
-
- public void setFilePath(Path filePath) {
- this.filePath = filePath;
+ public boolean isArchived() {
+ return archived;
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index ba1a5ed9ae9f..6a9c0c778f44 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -149,11 +149,6 @@ public int evictBlocksByHfileName(String hfileName) {
return l1Cache.evictBlocksByHfileName(hfileName) + l2Cache.evictBlocksByHfileName(hfileName);
}
- @Override
- public int evictBlocksByHfilePath(Path hfilePath) {
- return l1Cache.evictBlocksByHfilePath(hfilePath) + l2Cache.evictBlocksByHfilePath(hfilePath);
- }
-
@Override
public CacheStats getStats() {
return this.combinedCacheStats;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
index 39af3585112b..147e2598ef9b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
@@ -185,7 +185,7 @@ public void close(boolean evictOnClose) throws IOException {
// Deallocate data blocks
cacheConf.getBlockCache().ifPresent(cache -> {
if (evictOnClose) {
- int numEvicted = cache.evictBlocksByHfilePath(path);
+ int numEvicted = cache.evictBlocksByHfileName(name);
if (LOG.isTraceEnabled()) {
LOG.trace("On close, file= {} evicted= {} block(s)", name, numEvicted);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index db183bb7177c..7d865d6fdc13 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -1256,7 +1256,7 @@ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws
// Check cache for block. If found return.
long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
BlockCacheKey cacheKey =
- new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
+ new BlockCacheKey(path, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
cacheBlock &=
cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory(), getHFileInfo(), conf);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 0b6ea4c6e66c..6667af0de258 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -88,7 +88,6 @@
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.IdReadWriteLock;
import org.apache.hadoop.hbase.util.IdReadWriteLock.ReferenceType;
import org.apache.hadoop.hbase.util.Pair;
@@ -398,6 +397,7 @@ private void startPersistenceRetriever(int[] bucketSizes, long capacity) {
LOG.warn("Can't restore from file[{}]. The bucket cache will be reset and rebuilt."
+ " Exception seen: ", persistencePath, ex);
backingMap.clear();
+ FilePathStringPool.getInstance().clear();
fullyCachedFiles.clear();
backingMapValidated.set(true);
regionCachedSize.clear();
@@ -586,7 +586,8 @@ protected void cacheBlockWithWaitInternal(BlockCacheKey cacheKey, Cacheable cach
if (cacheKey.getBlockType() == null && cachedItem.getBlockType() != null) {
cacheKey.setBlockType(cachedItem.getBlockType());
}
- LOG.trace("Caching key={}, item={}", cacheKey, cachedItem);
+ LOG.debug("Caching key={}, item={}, key heap size={}", cacheKey, cachedItem,
+ cacheKey.heapSize());
// Stuff the entry into the RAM cache so it can get drained to the persistent store
RAMQueueEntry re = new RAMQueueEntry(cacheKey, cachedItem, accessCount.incrementAndGet(),
inMemory, isCachePersistent() && ioEngine instanceof FileIOEngine, wait);
@@ -639,6 +640,7 @@ public BucketEntry getBlockForReference(BlockCacheKey key) {
referredFileName = StoreFileInfo.getReferredToRegionAndFile(key.getHfileName()).getSecond();
}
if (referredFileName != null) {
+ // Since we just need this key for a lookup, it's enough to use only name and offset
BlockCacheKey convertedCacheKey = new BlockCacheKey(referredFileName, key.getOffset());
foundEntry = backingMap.get(convertedCacheKey);
LOG.debug("Got a link/ref: {}. Related cacheKey: {}. Found entry: {}", key.getHfileName(),
@@ -671,6 +673,8 @@ public Cacheable getBlock(BlockCacheKey key, boolean caching, boolean repeat,
return re.getData();
}
BucketEntry bucketEntry = backingMap.get(key);
+ LOG.debug("bucket entry for key {}: {}", key,
+ bucketEntry == null ? null : bucketEntry.offset());
if (bucketEntry == null) {
bucketEntry = getBlockForReference(key);
}
@@ -749,7 +753,7 @@ void blockEvicted(BlockCacheKey cacheKey, BucketEntry bucketEntry, boolean decre
private void fileNotFullyCached(BlockCacheKey key, BucketEntry entry) {
// Update the updateRegionCachedSize before removing the file from fullyCachedFiles.
// This computation should happen even if the file is not in fullyCachedFiles map.
- updateRegionCachedSize(key.getFilePath(), (entry.getLength() * -1));
+ updateRegionCachedSize(key, (entry.getLength() * -1));
fullyCachedFiles.remove(key.getHfileName());
}
@@ -762,12 +766,13 @@ public void fileCacheCompleted(Path filePath, long size) {
fullyCachedFiles.put(filePath.getName(), pair);
}
- private void updateRegionCachedSize(Path filePath, long cachedSize) {
- if (filePath != null) {
- if (HFileArchiveUtil.isHFileArchived(filePath)) {
- LOG.trace("Skipping region cached size update for archived file: {}", filePath);
+ private void updateRegionCachedSize(BlockCacheKey key, long cachedSize) {
+ if (key.getRegionName() != null) {
+ if (key.isArchived()) {
+ LOG.trace("Skipping region cached size update for archived file:{} from region: {}",
+ key.getHfileName(), key.getRegionName());
} else {
- String regionName = filePath.getParent().getParent().getName();
+ String regionName = key.getRegionName();
regionCachedSize.merge(regionName, cachedSize,
(previousSize, newBlockSize) -> previousSize + newBlockSize);
LOG.trace("Updating region cached size for region: {}", regionName);
@@ -775,6 +780,7 @@ private void updateRegionCachedSize(Path filePath, long cachedSize) {
// remove the entry for that region from regionCachedSize map.
if (regionCachedSize.get(regionName) <= 0) {
regionCachedSize.remove(regionName);
+ FilePathStringPool.getInstance().remove(regionName);
}
}
}
@@ -834,6 +840,7 @@ private boolean doEvictBlock(BlockCacheKey cacheKey, BucketEntry bucketEntry,
if (existedInRamCache && evictedByEvictionProcess) {
cacheStats.evicted(0, cacheKey.isPrimary());
}
+ LOG.debug("Entry for key {} was not found in backing map", cacheKey);
return existedInRamCache;
} else {
return bucketEntryToUse.withWriteLock(offsetLock, () -> {
@@ -843,6 +850,9 @@ private boolean doEvictBlock(BlockCacheKey cacheKey, BucketEntry bucketEntry,
blockEvicted(cacheKey, bucketEntryToUse, !existedInRamCache, evictedByEvictionProcess);
return true;
}
+ LOG.debug("Failed to remove key {} from map. Maybe entries in the map now differ? "
+ + "Original found entry: {}, what's in the map now: {}", cacheKey,
+ bucketEntryToUse, backingMap.get(cacheKey));
return false;
});
}
@@ -994,6 +1004,7 @@ public void logStats() {
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction()
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + ", blocksCount="
+ backingMap.size());
+ LOG.info(FilePathStringPool.getInstance().getPoolStats());
cacheStats.reset();
bucketAllocator.logDebugStatistics();
@@ -1338,7 +1349,7 @@ public void run() {
protected void putIntoBackingMap(BlockCacheKey key, BucketEntry bucketEntry) {
BucketEntry previousEntry = backingMap.put(key, bucketEntry);
blocksByHFile.add(key);
- updateRegionCachedSize(key.getFilePath(), bucketEntry.getLength());
+ updateRegionCachedSize(key, bucketEntry.getLength());
if (previousEntry != null && previousEntry != bucketEntry) {
previousEntry.withWriteLock(offsetLock, () -> {
blockEvicted(key, previousEntry, false, false);
@@ -1748,6 +1759,7 @@ private void retrieveChunkedBackingMap(FileInputStream in) throws IOException {
backingMap.clear();
blocksByHFile.clear();
+ FilePathStringPool.getInstance().clear();
// Read the backing map entries in batches.
int numChunks = 0;
@@ -1803,6 +1815,7 @@ private void disableCache() {
this.blocksByHFile.clear();
this.fullyCachedFiles.clear();
this.regionCachedSize.clear();
+ FilePathStringPool.getInstance().clear();
}
if (cacheStats.getMetricsRollerScheduler() != null) {
cacheStats.getMetricsRollerScheduler().shutdownNow();
@@ -1816,24 +1829,27 @@ private void join() throws InterruptedException {
@Override
public void shutdown() {
- disableCache();
- LOG.info("Shutdown bucket cache: IO persistent=" + ioEngine.isPersistent() + "; path to write="
- + persistencePath);
- if (ioEngine.isPersistent() && persistencePath != null) {
- try {
- join();
- if (cachePersister != null) {
- LOG.info("Shutting down cache persister thread.");
- cachePersister.shutdown();
- while (cachePersister.isAlive()) {
- Thread.sleep(10);
+ if (isCacheEnabled()) {
+ disableCache();
+ LOG.info("Shutdown bucket cache: IO persistent=" + ioEngine.isPersistent()
+ + "; path to write=" + persistencePath);
+ if (ioEngine.isPersistent() && persistencePath != null) {
+ try {
+ join();
+ if (cachePersister != null) {
+ LOG.info("Shutting down cache persister thread.");
+ cachePersister.shutdown();
+ while (cachePersister.isAlive()) {
+ Thread.sleep(10);
+ }
}
+ persistToFile();
+ FilePathStringPool.getInstance().clear();
+ } catch (IOException ex) {
+ LOG.error("Unable to persist data on exit: " + ex.toString(), ex);
+ } catch (InterruptedException e) {
+ LOG.warn("Failed to persist data on exit", e);
}
- persistToFile();
- } catch (IOException ex) {
- LOG.error("Unable to persist data on exit: " + ex.toString(), ex);
- } catch (InterruptedException e) {
- LOG.warn("Failed to persist data on exit", e);
}
}
}
@@ -1915,33 +1931,31 @@ public int evictBlocksByHfileName(String hfileName) {
}
@Override
- public int evictBlocksByHfilePath(Path hfilePath) {
- return evictBlocksRangeByHfileName(hfilePath.getName(), hfilePath, 0, Long.MAX_VALUE);
- }
-
- public int evictBlocksRangeByHfileName(String hfileName, Path filePath, long initOffset,
- long endOffset) {
+ public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long endOffset) {
Set keySet = getAllCacheKeysForFile(hfileName, initOffset, endOffset);
+ // We need to make sure whether we are evicting all blocks for this given file. In case of
+ // split references, we might be evicting just half of the blocks
+ int totalFileKeys = (endOffset == Long.MAX_VALUE)
+ ? keySet.size()
+ : getAllCacheKeysForFile(hfileName, 0, Long.MAX_VALUE).size();
LOG.debug("found {} blocks for file {}, starting offset: {}, end offset: {}", keySet.size(),
hfileName, initOffset, endOffset);
int numEvicted = 0;
for (BlockCacheKey key : keySet) {
- if (filePath != null) {
- key.setFilePath(filePath);
- }
if (evictBlock(key)) {
++numEvicted;
}
}
+ if (numEvicted > 0) {
+ if (totalFileKeys == numEvicted) {
+ FilePathStringPool.getInstance().remove(hfileName);
+ }
+ }
return numEvicted;
}
- @Override
- public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long endOffset) {
- return evictBlocksRangeByHfileName(hfileName, null, initOffset, endOffset);
- }
-
private Set getAllCacheKeysForFile(String hfileName, long init, long end) {
+ // These keys are just for comparison and are short lived, so we need only file name and offset
return blocksByHFile.subSet(new BlockCacheKey(hfileName, init), true,
new BlockCacheKey(hfileName, end), true);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
index eb9c2cb5de88..b87e0e0dd62a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
@@ -165,8 +165,9 @@ static Pair, NavigableSet
+ * Thread-safe implementation that maintains bidirectional mappings between strings and IDs.
+ *
+ */
+@InterfaceAudience.Private
+public class FilePathStringPool {
+ private static final Logger LOG = LoggerFactory.getLogger(FilePathStringPool.class);
+
+ // Bidirectional mappings for string objects re-use
+ private final ConcurrentHashMap stringToId = new ConcurrentHashMap<>();
+ private final ConcurrentHashMap idToString = new ConcurrentHashMap<>();
+ private final AtomicInteger nextId = new AtomicInteger(0);
+
+ private static FilePathStringPool instance;
+
+ public static FilePathStringPool getInstance() {
+ synchronized (FilePathStringPool.class) {
+ if (instance == null) {
+ instance = new FilePathStringPool();
+ }
+ }
+ return instance;
+ }
+
+ private FilePathStringPool() {
+ // Private constructor for singleton
+ }
+
+ /**
+ * Gets or creates an integer ID for the given String.
+ * @param string value for the file/region/CF name.
+ * @return the integer ID encoding this string in the pool.
+ */
+ public int encode(String string) {
+ if (string == null) {
+ throw new IllegalArgumentException("string cannot be null");
+ }
+ return stringToId.computeIfAbsent(string, name -> {
+ if (stringToId.size() == Integer.MAX_VALUE) {
+ throw new IllegalStateException(
+ "String pool has reached maximum capacity of " + Integer.MAX_VALUE + " unique strings.");
+ }
+ int id = nextId.getAndIncrement();
+ while (idToString.containsKey(id)) {
+ id = nextId.getAndIncrement();
+ if (id == Integer.MAX_VALUE) {
+ nextId.set(0);
+ LOG.info("Id values reached Integer.MAX_VALUE, restarting from 0");
+ }
+ }
+ idToString.put(id, name);
+ LOG.trace("Encoded new string to ID {}: {}", id, name);
+ return id;
+ });
+ }
+
+ /**
+ * Decodes an integer ID back to its original file name.
+ * @param id the integer ID
+ * @return the original file name, or null if not found
+ */
+ public String decode(int id) {
+ return idToString.get(id);
+ }
+
+ /**
+ * Checks if a given string ID is already being used.
+ * @param id the integer ID to check
+ * @return true if the ID exists
+ */
+ public boolean contains(int id) {
+ return idToString.containsKey(id);
+ }
+
+ /**
+ * Checks if a given string has been encoded.
+ * @param string the value to check
+ * @return true if the string value has been encoded
+ */
+ public boolean contains(String string) {
+ return stringToId.containsKey(string);
+ }
+
+ /**
+ * Gets the number of unique file names currently tracked.
+ * @return the number of entries in the codec
+ */
+ public int size() {
+ return stringToId.size();
+ }
+
+ /**
+ * Removes a string value and its ID from the pool. This should only be called when all blocks for
+ * a file have been evicted from the cache.
+ * @param string the file name to remove
+ * @return true if the file name was removed, false if it wasn't present
+ */
+ public boolean remove(String string) {
+ if (string == null) {
+ return false;
+ }
+ Integer id = stringToId.remove(string);
+ if (id != null) {
+ idToString.remove(id);
+ LOG.debug("Removed string value from pool: {} (ID: {})", string, id);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Clears all mappings from the codec.
+ */
+ public void clear() {
+ stringToId.clear();
+ idToString.clear();
+ nextId.set(0);
+ LOG.info("Cleared all file name mappings from codec");
+ }
+
+ /**
+ * Gets statistics about memory savings from string pooling.
+ * @return a formatted string with compression statistics
+ */
+ public String getPoolStats() {
+ long uniqueStrings = stringToId.size();
+ if (uniqueStrings == 0) {
+ return "No strings encoded";
+ }
+ // Calculate average string length
+ long totalChars = stringToId.keySet().stream().mapToLong(String::length).sum();
+ double avgLength = (double) totalChars / uniqueStrings;
+ return String.format("FilePathStringPool stats: %d unique strings, avg length: %.1f chars, ",
+ uniqueStrings, avgLength);
+ }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
index 6638fd2049c6..cb02c04e9e3f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hbase.regionserver;
-import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -96,11 +95,14 @@ public static synchronized DataTieringManager getInstance() {
* @throws DataTieringException if there is an error retrieving the HFile path or configuration
*/
public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
- Path hFilePath = key.getFilePath();
- if (hFilePath == null) {
- throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
+ if (key.getCfName() == null || key.getRegionName() == null) {
+ throw new DataTieringException(
+ "BlockCacheKey doesn't contain Column Family Name or Region Name");
}
- return isDataTieringEnabled(hFilePath);
+ Configuration configuration =
+ getHStore(key.getRegionName(), key.getCfName()).getReadOnlyConfiguration();
+ DataTieringType dataTieringType = getDataTieringType(configuration);
+ return !dataTieringType.equals(DataTieringType.NONE);
}
/**
@@ -125,11 +127,16 @@ public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException
* @throws DataTieringException if there is an error retrieving data tiering information
*/
public boolean isHotData(BlockCacheKey key) throws DataTieringException {
- Path hFilePath = key.getFilePath();
- if (hFilePath == null) {
- throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
+ if (key.getRegionName() == null) {
+ throw new DataTieringException("BlockCacheKey doesn't contain Region Name");
+ }
+ if (key.getCfName() == null) {
+ throw new DataTieringException("BlockCacheKey doesn't contain CF Name");
}
- return isHotData(hFilePath);
+ if (key.getHfileName() == null) {
+ throw new DataTieringException("BlockCacheKey doesn't contain File Name");
+ }
+ return isHotData(key.getRegionName(), key.getCfName(), key.getHfileName());
}
/**
@@ -157,24 +164,14 @@ public boolean isHotData(long maxTimestamp, Configuration conf) {
return true;
}
- /**
- * Determines whether the data in the HFile at the given path is considered hot based on the
- * configured data tiering type and hot data age. If the data tiering type is set to
- * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers
- * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default.
- * @param hFilePath the path to the HFile
- * @return {@code true} if the data is hot, {@code false} otherwise
- * @throws DataTieringException if there is an error retrieving data tiering information
- */
- public boolean isHotData(Path hFilePath) throws DataTieringException {
- Configuration configuration = getConfiguration(hFilePath);
+ private boolean isHotData(String region, String cf, String fileName) throws DataTieringException {
+ Configuration configuration = getHStore(region, cf).getReadOnlyConfiguration();
DataTieringType dataTieringType = getDataTieringType(configuration);
-
if (!dataTieringType.equals(DataTieringType.NONE)) {
- HStoreFile hStoreFile = getHStoreFile(hFilePath);
+ HStoreFile hStoreFile = getHStoreFile(region, cf, fileName);
if (hStoreFile == null) {
throw new DataTieringException(
- "Store file corresponding to " + hFilePath + " doesn't exist");
+ "Store file corresponding to " + region + "/" + cf + "/" + fileName + " doesn't exist");
}
long maxTimestamp = dataTieringType.getInstance().getTimestamp(hStoreFile);
if (isWithinGracePeriod(maxTimestamp, configuration)) {
@@ -244,34 +241,29 @@ public Set getColdDataFiles(Set allCachedBlocks)
return coldHFiles;
}
- private HRegion getHRegion(Path hFilePath) throws DataTieringException {
- String regionId;
- try {
- regionId = HRegionFileSystem.getRegionId(hFilePath);
- } catch (IOException e) {
- throw new DataTieringException(e.getMessage());
- }
- HRegion hRegion = this.onlineRegions.get(regionId);
+ private HRegion getHRegion(String region) throws DataTieringException {
+ HRegion hRegion = this.onlineRegions.get(region);
if (hRegion == null) {
- throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist");
+ throw new DataTieringException("HRegion corresponding to " + region + " doesn't exist");
}
return hRegion;
}
- private HStore getHStore(Path hFilePath) throws DataTieringException {
- HRegion hRegion = getHRegion(hFilePath);
- String columnFamily = hFilePath.getParent().getName();
- HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily));
+ private HStore getHStore(String region, String cf) throws DataTieringException {
+ HRegion hRegion = getHRegion(region);
+ HStore hStore = hRegion.getStore(Bytes.toBytes(cf));
if (hStore == null) {
- throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist");
+ throw new DataTieringException(
+ "HStore corresponding to " + region + "/" + cf + " doesn't exist");
}
return hStore;
}
- private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException {
- HStore hStore = getHStore(hFilePath);
+ private HStoreFile getHStoreFile(String region, String cf, String fileName)
+ throws DataTieringException {
+ HStore hStore = getHStore(region, cf);
for (HStoreFile file : hStore.getStorefiles()) {
- if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) {
+ if (file.getPath().getName().equals(fileName)) {
return file;
}
}
@@ -279,7 +271,18 @@ private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException {
}
private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
- HStore hStore = getHStore(hFilePath);
+ String regionName = null;
+ String cfName = null;
+ try {
+ regionName = hFilePath.getParent().getParent().getName();
+ cfName = hFilePath.getParent().getName();
+ } catch (Exception e) {
+ throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
+ }
+ if (regionName == null || cfName == null) {
+ throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
+ }
+ HStore hStore = getHStore(regionName, cfName);
return hStore.getReadOnlyConfiguration();
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
index e66dd3454181..501a0347026e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
@@ -280,7 +280,24 @@ public BlockType getBlockType() {
}
public static HFileBlockPair[] generateHFileBlocks(int blockSize, int numBlocks) {
- return generateBlocksForPath(blockSize, numBlocks, null);
+ return generateBlocksForPath(blockSize, numBlocks, null, false);
+ }
+
+ public static String[] getHFileNames(HFileBlockPair[] blocks) {
+ String[] names = new String[blocks.length];
+ for (int i = 0; i < blocks.length; i++) {
+ names[i] = blocks[i].blockName.getHfileName();
+ }
+ return names;
+ }
+
+ public static BlockCacheKey[] regenerateKeys(HFileBlockPair[] blocks, String[] names) {
+ BlockCacheKey[] keys = new BlockCacheKey[blocks.length];
+ for (int i = 0; i < blocks.length; i++) {
+ keys[i] = new BlockCacheKey(names[i], blocks[i].blockName.getOffset(), true,
+ blocks[i].blockName.getBlockType());
+ }
+ return keys;
}
public static HFileBlockPair[] generateBlocksForPath(int blockSize, int numBlocks, Path path,
@@ -312,28 +329,25 @@ public static HFileBlockPair[] generateBlocksForPath(int blockSize, int numBlock
ByteBuffAllocator.HEAP);
String key = null;
long offset = 0;
+ returnedBlocks[i] = new HFileBlockPair();
if (path != null) {
- key = path.getName();
offset = i * blockSize;
+ returnedBlocks[i].blockName =
+ new BlockCacheKey(path, offset, true, encoded ? BlockType.ENCODED_DATA : BlockType.DATA);
} else {
/* No conflicting keys */
key = Long.toString(rand.nextLong());
while (!usedStrings.add(key)) {
key = Long.toString(rand.nextLong());
}
+ returnedBlocks[i].blockName =
+ new BlockCacheKey(key, offset, true, encoded ? BlockType.ENCODED_DATA : BlockType.DATA);
}
- returnedBlocks[i] = new HFileBlockPair();
- returnedBlocks[i].blockName =
- new BlockCacheKey(key, offset, true, encoded ? BlockType.ENCODED_DATA : BlockType.DATA);
returnedBlocks[i].block = generated;
}
return returnedBlocks;
}
- public static HFileBlockPair[] generateBlocksForPath(int blockSize, int numBlocks, Path path) {
- return generateBlocksForPath(blockSize, numBlocks, path, false);
- }
-
public static class HFileBlockPair {
BlockCacheKey blockName;
HFileBlock block;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java
index baacb3c18a26..5001113f2488 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java
@@ -177,7 +177,13 @@ public TableName writeDataToTable(String testName) throws IOException, Interrupt
@After
public void tearDown() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
+ try {
+ TEST_UTIL.shutdownMiniCluster();
+ } catch (NullPointerException e) {
+ //shutdown clears the FilePathStringPool. Since it's a singleton, the second RS shutting down
+ // might try to persist bucket cache after string pool is cleared and NPE is thrown. This
+ // won't happen in real clusters, since there will be only one BucketCache instance per JVM.
+ }
TEST_UTIL.cleanupDataTestDirOnTestFS(String.valueOf(testDir));
if (zkCluster != null) {
zkCluster.shutdown();
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index 3c8a807fcb15..09f4091dfe8d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -48,6 +48,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Random;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -658,6 +659,7 @@ public void testEvictionCount() throws InterruptedException {
assertEquals(0, cache.getStats().getEvictionCount());
// add back
+ key = new BlockCacheKey("testEvictionCount", 0);
CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithNextBlockMetadata, actualBuffer,
block1Buffer);
waitUntilFlushedToBucket(cache, key);
@@ -676,6 +678,37 @@ public void testEvictionCount() throws InterruptedException {
assertEquals(1, cache.getStats().getEvictionCount());
}
+ @Test
+ public void testStringPool() throws Exception {
+ HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ Path testDir = TEST_UTIL.getDataTestDir();
+ TEST_UTIL.getTestFileSystem().mkdirs(testDir);
+ BucketCache bucketCache =
+ new BucketCache("file:" + testDir + "/bucket.cache", capacitySize, constructedBlockSize,
+ constructedBlockSizes, writeThreads, writerQLen, testDir + "/bucket.persistence");
+ assertTrue(bucketCache.waitForCacheInitialization(10000));
+ long usedSize = bucketCache.getAllocator().getUsedSize();
+ assertEquals(0, usedSize);
+ Random rand = ThreadLocalRandom.current();
+ Path filePath = new Path(testDir, Long.toString(rand.nextLong()));
+ CacheTestUtils.HFileBlockPair[] blocks =
+ CacheTestUtils.generateBlocksForPath(constructedBlockSize, 1, filePath, false);
+ String name = blocks[0].getBlockName().getHfileName();
+ assertEquals(name, filePath.getName());
+ assertNotNull(blocks[0].getBlockName().getRegionName());
+ bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
+ waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
+ assertTrue(FilePathStringPool.getInstance().size() > 0);
+ bucketCache.evictBlock(blocks[0].getBlockName());
+ assertTrue(FilePathStringPool.getInstance().size() > 0);
+ bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
+ waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
+ bucketCache.fileCacheCompleted(filePath,
+ bucketCache.backingMap.get(blocks[0].getBlockName()).getLength());
+ bucketCache.evictBlocksByHfileName(name);
+ assertEquals(1, FilePathStringPool.getInstance().size());
+ }
+
@Test
public void testCacheBlockNextBlockMetadataMissing() throws Exception {
int size = 100;
@@ -884,6 +917,7 @@ public void testBlockAdditionWaitWhenCache() throws Exception {
HFileBlockPair[] hfileBlockPairs =
CacheTestUtils.generateHFileBlocks(constructedBlockSize, 10);
+ String[] names = CacheTestUtils.getHFileNames(hfileBlockPairs);
// Add blocks
for (HFileBlockPair hfileBlockPair : hfileBlockPairs) {
bucketCache.cacheBlock(hfileBlockPair.getBlockName(), hfileBlockPair.getBlock(), false,
@@ -912,10 +946,9 @@ public void testBlockAdditionWaitWhenCache() throws Exception {
constructedBlockSizes, writeThreads, writerQLen, persistencePath);
assertTrue(bucketCache.waitForCacheInitialization(10000));
assertEquals(usedByteSize, bucketCache.getAllocator().getUsedSize());
-
- for (HFileBlockPair hfileBlockPair : hfileBlockPairs) {
- BlockCacheKey blockCacheKey = hfileBlockPair.getBlockName();
- bucketCache.evictBlock(blockCacheKey);
+ BlockCacheKey[] newKeys = CacheTestUtils.regenerateKeys(hfileBlockPairs, names);
+ for (BlockCacheKey key : newKeys) {
+ bucketCache.evictBlock(key);
}
assertEquals(0, bucketCache.getAllocator().getUsedSize());
assertEquals(0, bucketCache.backingMap.size());
@@ -1098,9 +1131,9 @@ private BucketCache testEvictOrphans(long orphanEvictionGracePeriod) throws Exce
constructedBlockSize, new int[] { constructedBlockSize + 1024 }, 1, 1, null, 60 * 1000,
HBASE_TESTING_UTILITY.getConfiguration(), onlineRegions);
HFileBlockPair[] validBlockPairs =
- CacheTestUtils.generateBlocksForPath(constructedBlockSize, 10, validFile);
+ CacheTestUtils.generateBlocksForPath(constructedBlockSize, 10, validFile, false);
HFileBlockPair[] orphanBlockPairs =
- CacheTestUtils.generateBlocksForPath(constructedBlockSize, 10, orphanFile);
+ CacheTestUtils.generateBlocksForPath(constructedBlockSize, 10, orphanFile, false);
for (HFileBlockPair pair : validBlockPairs) {
bucketCache.cacheBlockWithWait(pair.getBlockName(), pair.getBlock(), false, true);
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
new file mode 100644
index 000000000000..a42a61c93fbd
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile.bucket;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Tests for {@link FilePathStringPool}
+ */
+@Category({ SmallTests.class })
+public class TestFilePathStringPool {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestFilePathStringPool.class);
+
+ private FilePathStringPool pool;
+
+ @Before
+ public void setUp() {
+ pool = FilePathStringPool.getInstance();
+ pool.clear();
+ }
+
+ @Test
+ public void testSingletonPattern() {
+ FilePathStringPool instance1 = FilePathStringPool.getInstance();
+ FilePathStringPool instance2 = FilePathStringPool.getInstance();
+ assertNotNull(instance1);
+ assertNotNull(instance2);
+ assertEquals(instance1, instance2);
+ }
+
+ @Test
+ public void testBasicEncodeDecodeRoundTrip() {
+ String testString = "/hbase/data/default/test-table/region1/cf1/file1.hfile";
+ int id = pool.encode(testString);
+ String decoded = pool.decode(id);
+ assertEquals(testString, decoded);
+ }
+
+ @Test
+ public void testEncodeReturnsSameIdForSameString() {
+ String testString = "/hbase/data/file.hfile";
+ int id1 = pool.encode(testString);
+ int id2 = pool.encode(testString);
+ assertEquals(id1, id2);
+ assertEquals(1, pool.size());
+ }
+
+ @Test
+ public void testEncodeDifferentStringsGetDifferentIds() {
+ String string1 = "/path/to/file1.hfile";
+ String string2 = "/path/to/file2.hfile";
+ int id1 = pool.encode(string1);
+ int id2 = pool.encode(string2);
+ assertNotEquals(id1, id2);
+ assertEquals(2, pool.size());
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testEncodeNullStringThrowsException() {
+ pool.encode(null);
+ }
+
+ @Test
+ public void testDecodeNonExistentIdReturnsNull() {
+ String decoded = pool.decode(999999);
+ assertNull(decoded);
+ }
+
+ @Test
+ public void testContainsWithId() {
+ String testString = "/hbase/file.hfile";
+ int id = pool.encode(testString);
+ assertTrue(pool.contains(id));
+ assertFalse(pool.contains(id + 1));
+ }
+
+ @Test
+ public void testContainsWithString() {
+ String testString = "/hbase/file.hfile";
+ pool.encode(testString);
+ assertTrue(pool.contains(testString));
+ assertFalse(pool.contains("/hbase/other-file.hfile"));
+ }
+
+ @Test
+ public void testRemoveExistingString() {
+ String testString = "/hbase/file.hfile";
+ int id = pool.encode(testString);
+ assertEquals(1, pool.size());
+ assertTrue(pool.contains(testString));
+ boolean removed = pool.remove(testString);
+ assertTrue(removed);
+ assertEquals(0, pool.size());
+ assertFalse(pool.contains(testString));
+ assertFalse(pool.contains(id));
+ assertNull(pool.decode(id));
+ }
+
+ @Test
+ public void testRemoveNonExistentStringReturnsFalse() {
+ boolean removed = pool.remove("/non/existent/file.hfile");
+ assertFalse(removed);
+ }
+
+ @Test
+ public void testRemoveNullStringReturnsFalse() {
+ boolean removed = pool.remove(null);
+ assertFalse(removed);
+ }
+
+ @Test
+ public void testClear() {
+ pool.encode("/file1.hfile");
+ pool.encode("/file2.hfile");
+ pool.encode("/file3.hfile");
+ assertEquals(3, pool.size());
+ pool.clear();
+ assertEquals(0, pool.size());
+ }
+
+ @Test
+ public void testSizeTracking() {
+ assertEquals(0, pool.size());
+ pool.encode("/file1.hfile");
+ assertEquals(1, pool.size());
+ pool.encode("/file2.hfile");
+ assertEquals(2, pool.size());
+ // Encoding same string should not increase size
+ pool.encode("/file1.hfile");
+ assertEquals(2, pool.size());
+ pool.remove("/file1.hfile");
+ assertEquals(1, pool.size());
+ pool.clear();
+ assertEquals(0, pool.size());
+ }
+
+ @Test
+ public void testGetPoolStats() {
+ String stats = pool.getPoolStats();
+ assertEquals("No strings encoded", stats);
+ pool.encode("/hbase/data/table1/file1.hfile");
+ pool.encode("/hbase/data/table2/file2.hfile");
+ stats = pool.getPoolStats();
+ assertNotNull(stats);
+ assertTrue(stats.contains("2 unique strings"));
+ assertTrue(stats.contains("avg length:"));
+ }
+
+ @Test
+ public void testConcurrentEncoding() throws InterruptedException {
+ int numThreads = 10;
+ int stringsPerThread = 100;
+ ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+ CountDownLatch doneLatch = new CountDownLatch(numThreads);
+ ConcurrentHashMap results = new ConcurrentHashMap<>();
+ AtomicInteger errorCount = new AtomicInteger(0);
+
+ for (int t = 0; t < numThreads; t++) {
+ final int threadId = t;
+ executor.submit(() -> {
+ try {
+ for (int i = 0; i < stringsPerThread; i++) {
+ String string = "/thread" + threadId + "/file" + i + ".hfile";
+ int id = pool.encode(string);
+ results.put(string, id);
+ }
+ } catch (Exception e) {
+ errorCount.incrementAndGet();
+ } finally {
+ doneLatch.countDown();
+ }
+ });
+ }
+
+ assertTrue(doneLatch.await(30, TimeUnit.SECONDS));
+ executor.shutdown();
+
+ assertEquals(0, errorCount.get());
+ assertEquals(numThreads * stringsPerThread, pool.size());
+ assertEquals(numThreads * stringsPerThread, results.size());
+
+ // Verify all strings can be decoded correctly
+ for (Map.Entry entry : results.entrySet()) {
+ String decoded = pool.decode(entry.getValue());
+ assertEquals(entry.getKey(), decoded);
+ }
+ }
+
+ @Test
+ public void testConcurrentEncodingSameStrings() throws InterruptedException {
+ int numThreads = 20;
+ String sharedString = "/shared/file.hfile";
+ ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+ CountDownLatch doneLatch = new CountDownLatch(numThreads);
+ Set ids = ConcurrentHashMap.newKeySet();
+ AtomicInteger errorCount = new AtomicInteger(0);
+
+ for (int i = 0; i < numThreads; i++) {
+ executor.submit(() -> {
+ try {
+ int id = pool.encode(sharedString);
+ ids.add(id);
+ } catch (Exception e) {
+ errorCount.incrementAndGet();
+ } finally {
+ doneLatch.countDown();
+ }
+ });
+ }
+
+ doneLatch.await(10, TimeUnit.SECONDS);
+ executor.shutdown();
+
+ assertEquals(0, errorCount.get());
+ // All threads should get the same ID
+ assertEquals(1, ids.size());
+ assertEquals(1, pool.size());
+ }
+
+ @Test
+ public void testConcurrentRemoval() throws InterruptedException {
+ // Pre-populate with strings
+ List strings = new ArrayList<>();
+ for (int i = 0; i < 100; i++) {
+ String string = "/file" + i + ".hfile";
+ strings.add(string);
+ pool.encode(string);
+ }
+ assertEquals(100, pool.size());
+
+ int numThreads = 10;
+ ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+ CountDownLatch doneLatch = new CountDownLatch(numThreads);
+ AtomicInteger successfulRemovals = new AtomicInteger(0);
+
+ for (int t = 0; t < numThreads; t++) {
+ final int threadId = t;
+ executor.submit(() -> {
+ try {
+ for (int i = threadId * 10; i < (threadId + 1) * 10; i++) {
+ if (pool.remove(strings.get(i))) {
+ successfulRemovals.incrementAndGet();
+ }
+ }
+ } catch (Exception e) {
+ // Ignore
+ } finally {
+ doneLatch.countDown();
+ }
+ });
+ }
+
+ doneLatch.await(10, TimeUnit.SECONDS);
+ executor.shutdown();
+
+ assertEquals(100, successfulRemovals.get());
+ assertEquals(0, pool.size());
+ }
+
+ @Test
+ public void testBidirectionalMappingConsistency() {
+ // Verify that both mappings stay consistent
+ List strings = new ArrayList<>();
+ List ids = new ArrayList<>();
+
+ for (int i = 0; i < 50; i++) {
+ String string = "/region" + (i % 5) + "/file" + i + ".hfile";
+ strings.add(string);
+ ids.add(pool.encode(string));
+ }
+
+ // Verify forward mapping (string -> id)
+ for (int i = 0; i < strings.size(); i++) {
+ int expectedId = ids.get(i);
+ int actualId = pool.encode(strings.get(i));
+ assertEquals(expectedId, actualId);
+ }
+
+ // Verify reverse mapping (id -> string)
+ for (int i = 0; i < ids.size(); i++) {
+ String expectedString = strings.get(i);
+ String actualString = pool.decode(ids.get(i));
+ assertEquals(expectedString, actualString);
+ }
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRecoveryPersistentBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRecoveryPersistentBucketCache.java
index b71660b88d81..5ae3343d21e6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRecoveryPersistentBucketCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRecoveryPersistentBucketCache.java
@@ -70,8 +70,10 @@ public void testBucketCacheRecovery() throws Exception {
bucketCache.isCacheInitialized("testBucketCacheRecovery") && bucketCache.isCacheEnabled());
CacheTestUtils.HFileBlockPair[] blocks = CacheTestUtils.generateHFileBlocks(8192, 4);
+ String[] names = CacheTestUtils.getHFileNames(blocks);
CacheTestUtils.HFileBlockPair[] smallerBlocks = CacheTestUtils.generateHFileBlocks(4096, 1);
+ String[] smallerNames = CacheTestUtils.getHFileNames(smallerBlocks);
// Add four blocks
cacheAndWaitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName(), blocks[0].getBlock());
cacheAndWaitUntilFlushedToBucket(bucketCache, blocks[1].getBlockName(), blocks[1].getBlock());
@@ -104,16 +106,18 @@ public void testBucketCacheRecovery() throws Exception {
8192, bucketSizes, writeThreads, writerQLen, testDir + "/bucket.persistence",
DEFAULT_ERROR_TOLERATION_DURATION, conf);
assertTrue(newBucketCache.waitForCacheInitialization(1000));
-
- assertEquals(3, newBucketCache.backingMap.size());
- assertNull(newBucketCache.getBlock(blocks[3].getBlockName(), false, false, false));
- assertNull(newBucketCache.getBlock(smallerBlocks[0].getBlockName(), false, false, false));
- assertEquals(blocks[0].getBlock(),
- newBucketCache.getBlock(blocks[0].getBlockName(), false, false, false));
- assertEquals(blocks[1].getBlock(),
- newBucketCache.getBlock(blocks[1].getBlockName(), false, false, false));
- assertEquals(blocks[2].getBlock(),
- newBucketCache.getBlock(blocks[2].getBlockName(), false, false, false));
+ BlockCacheKey[] newKeys = CacheTestUtils.regenerateKeys(blocks, names);
+ BlockCacheKey[] newKeysSmaller = CacheTestUtils.regenerateKeys(smallerBlocks, smallerNames);
+ // The new bucket cache would have only the first three blocks. Although we have persisted the
+ // the cache state when it had the first four blocks, the 4th block was evicted and then we
+ // added a 5th block, which overrides part of the 4th block in the cache. This would cause a
+ // checksum failure for this block offset, when we try to read from the cache, and we would
+ // consider that block as invalid and its offset available in the cache.
+ assertNull(newBucketCache.getBlock(newKeys[3], false, false, false));
+ assertNull(newBucketCache.getBlock(newKeysSmaller[0], false, false, false));
+ assertEquals(blocks[0].getBlock(), newBucketCache.getBlock(newKeys[0], false, false, false));
+ assertEquals(blocks[1].getBlock(), newBucketCache.getBlock(newKeys[1], false, false, false));
+ assertEquals(blocks[2].getBlock(), newBucketCache.getBlock(newKeys[2], false, false, false));
TEST_UTIL.cleanupTestDir();
}
@@ -138,6 +142,9 @@ public void testBucketCacheEvictByHFileAfterRecovery() throws Exception {
cacheAndWaitUntilFlushedToBucket(bucketCache, blocks[1].getBlockName(), blocks[1].getBlock());
cacheAndWaitUntilFlushedToBucket(bucketCache, blocks[2].getBlockName(), blocks[2].getBlock());
cacheAndWaitUntilFlushedToBucket(bucketCache, blocks[3].getBlockName(), blocks[3].getBlock());
+
+ String firstFileName = blocks[0].getBlockName().getHfileName();
+
// saves the current state of the cache
bucketCache.persistToFile();
@@ -146,7 +153,8 @@ public void testBucketCacheEvictByHFileAfterRecovery() throws Exception {
DEFAULT_ERROR_TOLERATION_DURATION, conf);
assertTrue(newBucketCache.waitForCacheInitialization(10000));
assertEquals(4, newBucketCache.backingMap.size());
- newBucketCache.evictBlocksByHfileName(blocks[0].getBlockName().getHfileName());
+
+ newBucketCache.evictBlocksByHfileName(firstFileName);
assertEquals(3, newBucketCache.backingMap.size());
TEST_UTIL.cleanupTestDir();
}
@@ -222,6 +230,7 @@ public void testBucketCacheRecoveryWithAllocationInconsistencies() throws Except
bucketCache.isCacheInitialized("testBucketCacheRecovery") && bucketCache.isCacheEnabled());
CacheTestUtils.HFileBlockPair[] blocks = CacheTestUtils.generateHFileBlocks(8192, 5);
+ String[] names = CacheTestUtils.getHFileNames(blocks);
// Add four blocks
cacheAndWaitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName(), blocks[0].getBlock());
@@ -249,16 +258,15 @@ public void testBucketCacheRecoveryWithAllocationInconsistencies() throws Except
Thread.sleep(10);
}
- assertNull(newBucketCache.getBlock(blocks[4].getBlockName(), false, false, false));
+ BlockCacheKey[] newKeys = CacheTestUtils.regenerateKeys(blocks, names);
+
+ assertNull(newBucketCache.getBlock(newKeys[4], false, false, false));
// The backing map entry with key blocks[0].getBlockName() for the may point to a valid entry
// or null based on different ordering of the keys in the backing map.
// Hence, skipping the check for that key.
- assertEquals(blocks[1].getBlock(),
- newBucketCache.getBlock(blocks[1].getBlockName(), false, false, false));
- assertEquals(blocks[2].getBlock(),
- newBucketCache.getBlock(blocks[2].getBlockName(), false, false, false));
- assertEquals(blocks[3].getBlock(),
- newBucketCache.getBlock(blocks[3].getBlockName(), false, false, false));
+ assertEquals(blocks[1].getBlock(), newBucketCache.getBlock(newKeys[1], false, false, false));
+ assertEquals(blocks[2].getBlock(), newBucketCache.getBlock(newKeys[2], false, false, false));
+ assertEquals(blocks[3].getBlock(), newBucketCache.getBlock(newKeys[3], false, false, false));
assertEquals(4, newBucketCache.backingMap.size());
TEST_UTIL.cleanupTestDir();
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
index e35db698cca5..9cf677b77f1b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
@@ -48,8 +48,10 @@
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.ClassRule;
+import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -72,6 +74,9 @@ public static Iterable