From 4473423c26f18104c0fcb507a9f76b4f9eafbe7d Mon Sep 17 00:00:00 2001 From: xlfe Date: Wed, 19 Nov 2025 06:41:57 +1100 Subject: [PATCH 1/6] building, but scanner not working? --- pom.xml | 13 +-- .../java/org/hbase/async/AppendRequest.java | 4 +- .../java/org/hbase/async/CompareFilter.java | 1 + .../java/org/hbase/async/DeleteRequest.java | 18 +++ .../java/org/hbase/async/HBaseClient.java | 24 +++- .../java/org/hbase/async/KeyRegexpFilter.java | 27 +++-- src/main/java/org/hbase/async/PutRequest.java | 12 +- .../hbase/async/RegexStringComparator.java | 4 +- src/main/java/org/hbase/async/Scanner.java | 108 ++++++++++++++++-- 9 files changed, 175 insertions(+), 36 deletions(-) diff --git a/pom.xml b/pom.xml index aaefbdd..99fe3c1 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ com.pythian.opentsdb asyncbigtable - 0.4.3 + 0.4.4 jar Async Bigtable library @@ -246,24 +246,17 @@ com.google.guava guava - 18.0 + 27.0-jre com.google.cloud.bigtable bigtable-hbase-2.x-hadoop - 1.23.0 + 2.15.5 - - org.slf4j - log4j-over-slf4j - 1.7.7 - runtime - - commons-lang commons-lang diff --git a/src/main/java/org/hbase/async/AppendRequest.java b/src/main/java/org/hbase/async/AppendRequest.java index 7efbd90..0a19c74 100644 --- a/src/main/java/org/hbase/async/AppendRequest.java +++ b/src/main/java/org/hbase/async/AppendRequest.java @@ -174,6 +174,7 @@ public AppendRequest(final byte[] table, * @param table The table to edit. * @param kv The {@link KeyValue} to store. */ + @SuppressWarnings("deprecation") public AppendRequest(final byte[] table, final KeyValue kv) { super(table, kv.key(), kv.family(), kv.timestamp(), RowLock.NO_LOCK); @@ -182,6 +183,7 @@ public AppendRequest(final byte[] table, } /** Private constructor. */ + @SuppressWarnings("deprecation") private AppendRequest(final byte[] table, final byte[] key, final byte[] family, @@ -271,4 +273,4 @@ public boolean returnResult() { return return_result; } -} \ No newline at end of file +} diff --git a/src/main/java/org/hbase/async/CompareFilter.java b/src/main/java/org/hbase/async/CompareFilter.java index 9684f4d..313517f 100644 --- a/src/main/java/org/hbase/async/CompareFilter.java +++ b/src/main/java/org/hbase/async/CompareFilter.java @@ -31,6 +31,7 @@ * operator (equal, greater, not equal, etc) and a filter comparator. * @since 1.6 */ +@SuppressWarnings("deprecation") public abstract class CompareFilter extends ScanFilter { /** Comparison operators. */ diff --git a/src/main/java/org/hbase/async/DeleteRequest.java b/src/main/java/org/hbase/async/DeleteRequest.java index 2a9567b..8d6cff4 100644 --- a/src/main/java/org/hbase/async/DeleteRequest.java +++ b/src/main/java/org/hbase/async/DeleteRequest.java @@ -66,6 +66,7 @@ public final class DeleteRequest extends BatchableRpc * @param key The key of the row to edit in that table. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key) { this(table, key, null, null, KeyValue.TIMESTAMP_NOW, RowLock.NO_LOCK); } @@ -78,6 +79,7 @@ public DeleteRequest(final byte[] table, final byte[] key) { * @param timestamp The timestamp to set on this edit. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final long timestamp) { this(table, key, null, null, timestamp, RowLock.NO_LOCK); @@ -91,6 +93,7 @@ public DeleteRequest(final byte[] table, final byte[] key, * @param family The column family to edit in that table. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family) { @@ -106,6 +109,7 @@ public DeleteRequest(final byte[] table, * @param timestamp The timestamp to set on this edit. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -123,6 +127,7 @@ public DeleteRequest(final byte[] table, * Can be {@code null} since version 1.1. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -143,6 +148,7 @@ public DeleteRequest(final byte[] table, * @param timestamp The timestamp to set on this edit. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -162,6 +168,7 @@ public DeleteRequest(final byte[] table, * @param qualifiers The column qualifiers to delete in that family. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -180,6 +187,7 @@ public DeleteRequest(final byte[] table, * @param timestamp The timestamp to set on this edit. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -199,6 +207,7 @@ public DeleteRequest(final byte[] table, * @deprecated * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -221,6 +230,7 @@ public DeleteRequest(final byte[] table, * @deprecated * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -244,6 +254,7 @@ public DeleteRequest(final byte[] table, * @deprecated * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -265,6 +276,7 @@ public DeleteRequest(final byte[] table, * @deprecated * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final byte[] key, final byte[] family, @@ -280,6 +292,7 @@ public DeleteRequest(final byte[] table, * @param key The key of the row to edit in that table. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final String table, final String key) { this(table.getBytes(), key.getBytes(), null, null, KeyValue.TIMESTAMP_NOW, RowLock.NO_LOCK); @@ -292,6 +305,7 @@ public DeleteRequest(final String table, final String key) { * @param family The column family to edit in that table. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final String table, final String key, final String family) { @@ -308,6 +322,7 @@ public DeleteRequest(final String table, * Can be {@code null} since version 1.1. * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final String table, final String key, final String family, @@ -328,6 +343,7 @@ public DeleteRequest(final String table, * @deprecated * @throws IllegalArgumentException if any argument is malformed. */ + @SuppressWarnings("deprecation") public DeleteRequest(final String table, final String key, final String family, @@ -345,6 +361,7 @@ public DeleteRequest(final String table, * {@link KeyValue} specifies a timestamp, then this specific timestamp only * will be deleted. */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final KeyValue kv) { this(table, kv.key(), kv.family(), new byte[][] { kv.qualifier() }, kv.timestamp(), RowLock.NO_LOCK); @@ -359,6 +376,7 @@ public DeleteRequest(final byte[] table, final KeyValue kv) { * @param lock Ignored * @deprecated */ + @SuppressWarnings("deprecation") public DeleteRequest(final byte[] table, final KeyValue kv, final RowLock lock) { diff --git a/src/main/java/org/hbase/async/HBaseClient.java b/src/main/java/org/hbase/async/HBaseClient.java index fd25d97..24c6f72 100644 --- a/src/main/java/org/hbase/async/HBaseClient.java +++ b/src/main/java/org/hbase/async/HBaseClient.java @@ -71,6 +71,7 @@ import org.slf4j.LoggerFactory; import com.google.cloud.bigtable.hbase.BigtableConfiguration; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.cache.LoadingCache; import com.google.common.collect.Lists; import com.stumbleupon.async.Callback; @@ -99,8 +100,7 @@ public final class HBaseClient { * TODO(tsuna): Make the tick duration configurable? */ private final HashedWheelTimer - timer = new HashedWheelTimer(Threads.newDaemonThreadFactory("Flush-Timer"), 20, MILLISECONDS); - + timer = new HashedWheelTimer(new ThreadFactoryBuilder().setDaemon(true).setNameFormat("HBaseClient-Timer-%d").build(),20, MILLISECONDS); /** Up to how many milliseconds can we buffer an edit on the client side. */ private volatile short flush_interval = 1000; // ms @@ -287,7 +287,21 @@ public HBaseClient(final Config config, final ExecutorService executor) { public HBaseClient(final Configuration configuration, final ExecutorService executor) { this.executor = executor; num_connections_created.increment(); - hbase_config = BigtableConfiguration.asyncConfigure(configuration); + + // --- FIX: Extract project/instance IDs and call the correct 'configure' method --- + final String projectId = configuration.get("google.bigtable.project.id"); + final String instanceId = configuration.get("google.bigtable.instance.id"); + + if (projectId == null || projectId.isEmpty()) { + throw new IllegalArgumentException("Missing required configuration: google.bigtable.project.id"); + } + if (instanceId == null || instanceId.isEmpty()) { + throw new IllegalArgumentException("Missing required configuration: google.bigtable.instance.id"); + } + + hbase_config = BigtableConfiguration.configure(configuration, projectId, instanceId); + // --- END FIX --- + LOG.info("BigTable API: Connecting with config: {}", hbase_config); try { @@ -609,6 +623,7 @@ public Deferred ensureTableFamilyExists(final String table, * @throws TableNotFoundException (deferred) if the table doesn't exist. * @throws NoSuchColumnFamilyException (deferred) if the family doesn't exist. */ + @SuppressWarnings("unchecked") public Deferred ensureTableFamilyExists(final byte[] table, final byte[] family) { if (LOG.isDebugEnabled()) { LOG.debug("BigTable API: Checking if table [{}] and family [{}] exist", Bytes.pretty(table), @@ -1177,6 +1192,7 @@ public Deferred put(final PutRequest request) { * (think of it as {@code Deferred}). But you probably want to attach * at least an errback to this {@code Deferred} to handle failures. */ + @SuppressWarnings("deprecation") public Deferred append(final AppendRequest request) { num_appends.increment(); @@ -1217,6 +1233,7 @@ public Deferred append(final AppendRequest request) { * the CAS failed because the value in BigTable didn't match the expected value * of the CAS request. */ + @SuppressWarnings("deprecation") public Deferred compareAndSet(final PutRequest edit, final byte[] expected) { @@ -1271,6 +1288,7 @@ public Deferred compareAndSet(final PutRequest edit, * inserted in BigTable, {@code false} if there was already a value in the * given cell. */ + @SuppressWarnings("deprecation") public Deferred atomicCreate(final PutRequest edit) { return compareAndSet(edit, EMPTY_ARRAY); } diff --git a/src/main/java/org/hbase/async/KeyRegexpFilter.java b/src/main/java/org/hbase/async/KeyRegexpFilter.java index 68847c2..0df53d1 100644 --- a/src/main/java/org/hbase/async/KeyRegexpFilter.java +++ b/src/main/java/org/hbase/async/KeyRegexpFilter.java @@ -29,6 +29,8 @@ import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; +// Import the correct comparator for raw byte regex +import org.apache.hadoop.hbase.filter.BinaryRegexComparator; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hbase.thirdparty.io.netty.util.CharsetUtil; @@ -147,17 +149,21 @@ public Charset getCharset() { @Override Filter getBigtableFilter() { + /* + * PHASE 3 FIX: The previous implementation used RegexStringComparator and a fragile + * reflection hack (shown below, commented out) to inject raw bytes, bypassing charset issues. + * + * We replace this with BinaryRegexComparator, which operates directly on byte arrays. + * This is stable and correct for OpenTSDB's raw byte key structure. + */ + + // Original implementation using reflection: + /* RegexStringComparator comparator = new RegexStringComparator(regexp_string); comparator.setCharset(charset_object); try { // WARNING: This is some ugly ass code. It WILL break at some point. - // Bigtable uses RE2 and runs in raw byte mode. TSDB writes regex with - // byte values but when passing it through the HTable APIs it's converted - // to UTF and serializes differently than the old AsyncHBase client. The - // native BigTable client will pass the regex along properly BUT we need - // to bypass the RegexStringComparator methods and inject our ASCII regex - // directly into the underlying comparator object. Hopefully this is - // temporary (famous last words) until we get to a native Bigtable wrapper. + // ... final Field field = ByteArrayComparable.class.getDeclaredField("value"); field.setAccessible(true); field.set(comparator, regexp_string.getBytes(HBaseClient.ASCII)); @@ -170,7 +176,12 @@ Filter getBigtableFilter() { throw new RuntimeException("Access denied when hacking the " + "regex comparator field", e); } + */ + + // New robust implementation: + // We use the raw 'regexp' bytes provided during construction. + BinaryRegexComparator comparator = new BinaryRegexComparator(this.regexp); + return new RowFilter(CompareFilter.CompareOp.EQUAL, comparator); } } - diff --git a/src/main/java/org/hbase/async/PutRequest.java b/src/main/java/org/hbase/async/PutRequest.java index fbcf072..34446e6 100644 --- a/src/main/java/org/hbase/async/PutRequest.java +++ b/src/main/java/org/hbase/async/PutRequest.java @@ -95,6 +95,7 @@ public final class PutRequest extends BatchableRpc * @param qualifier The column qualifier to edit in that family. * @param value The value to store. */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final byte[] key, final byte[] family, @@ -121,6 +122,7 @@ public PutRequest(final byte[] table, * @throws IllegalArgumentException if {@code qualifiers.length == 0} * or if {@code qualifiers.length != values.length} */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final byte[] key, final byte[] family, @@ -140,6 +142,7 @@ public PutRequest(final byte[] table, * @param value The value to store. * @param timestamp The timestamp to set on this edit. */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final byte[] key, final byte[] family, @@ -161,7 +164,8 @@ public PutRequest(final byte[] table, * @throws IllegalArgumentException if {@code qualifiers.length == 0} * or if {@code qualifiers.length != values.length} */ - public PutRequest(final byte[] table, + @SuppressWarnings("deprecation") + public PutRequest(final byte[] table, final byte[] key, final byte[] family, final byte[][] qualifiers, @@ -187,6 +191,7 @@ public PutRequest(final byte[] table, * @param lock Ignored * @deprecated */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final byte[] key, final byte[] family, @@ -233,6 +238,7 @@ public PutRequest(final byte[] table, * @throws IllegalArgumentException if {@code qualifiers.length == 0} * or if {@code qualifiers.length != values.length} */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final byte[] key, final byte[] family, @@ -257,6 +263,7 @@ public PutRequest(final byte[] table, * @param qualifier The column qualifier to edit in that family. * @param value The value to store. */ + @SuppressWarnings("deprecation") public PutRequest(final String table, final String key, final String family, @@ -283,6 +290,7 @@ public PutRequest(final String table, * @param lock Ignored * @deprecated */ + @SuppressWarnings("deprecation") public PutRequest(final String table, final String key, final String family, @@ -299,6 +307,7 @@ public PutRequest(final String table, * @param table The table to edit. * @param kv The {@link KeyValue} to store. */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final KeyValue kv) { this(table, kv, RowLock.NO_LOCK); @@ -311,6 +320,7 @@ public PutRequest(final byte[] table, * @param lock Ignored * @deprecated */ + @SuppressWarnings("deprecation") public PutRequest(final byte[] table, final KeyValue kv, final RowLock lock) { diff --git a/src/main/java/org/hbase/async/RegexStringComparator.java b/src/main/java/org/hbase/async/RegexStringComparator.java index b80368e..e7b85bc 100644 --- a/src/main/java/org/hbase/async/RegexStringComparator.java +++ b/src/main/java/org/hbase/async/RegexStringComparator.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hbase.filter.ByteArrayComparable; -import com.google.common.base.Charsets; +import java.nio.charset.StandardCharsets; /** * A regular expression comparator used in comparison filters such as RowFilter, @@ -68,7 +68,7 @@ public final class RegexStringComparator extends FilterComparator { * @param expr The regular expression with which to filter. */ public RegexStringComparator(String expr) { - this(expr, Charsets.UTF_8); + this(expr, StandardCharsets.UTF_8); } /** diff --git a/src/main/java/org/hbase/async/Scanner.java b/src/main/java/org/hbase/async/Scanner.java index 479593e..e18eefc 100644 --- a/src/main/java/org/hbase/async/Scanner.java +++ b/src/main/java/org/hbase/async/Scanner.java @@ -33,6 +33,11 @@ import java.util.ArrayList; import java.util.NavigableMap; +// Imports required for Phase 2 fix (Explicit RowRange construction) +import com.google.bigtable.v2.RowRange; +import com.google.protobuf.ByteString; + +import com.google.cloud.bigtable.hbase.BigtableExtendedScan; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; @@ -82,7 +87,7 @@ public final class Scanner { private static final Logger LOG = LoggerFactory.getLogger(Scanner.class); /** HBase API scan instance */ - private final Scan hbase_scan; + private final BigtableExtendedScan hbase_scan; /** * HBase API ResultScanner. After the scan is submitted is must not be null. @@ -188,7 +193,7 @@ public Scanner(final HBaseClient client, final byte[] table) { this.client = client; this.table = table; - hbase_scan = new Scan(); + hbase_scan = new BigtableExtendedScan(); } /** @@ -210,8 +215,6 @@ public void setStartKey(final byte[] start_key) { KeyValue.checkKey(start_key); checkScanningNotStarted(); this.start_key = start_key; - - hbase_scan.setStartRow(start_key); } /** @@ -235,8 +238,6 @@ public void setStopKey(final byte[] stop_key) { KeyValue.checkKey(stop_key); checkScanningNotStarted(); this.stop_key = stop_key; - - hbase_scan.setStopRow(stop_key); } /** @@ -522,7 +523,7 @@ public void setMaxVersions(final int versions) { checkScanningNotStarted(); this.versions = versions; - hbase_scan.setMaxVersions(versions); + hbase_scan.readVersions(versions); } /** @@ -671,11 +672,70 @@ public void setTimeRange(final long min_timestamp, final long max_timestamp) { throw new IllegalArgumentException("Invalid time range", e); } } - - /** @return the HTable scan object */ - Scan getHbaseScan() { + +/** @return the HTable scan object */ + BigtableExtendedScan getHbaseScan() { + // ---[ PHASE 1: Whitebox Logging - Inputs ]--- + // Log the inputs provided to the shim. + LOG.info("DEBUG-SHIM: Preparing Scan. StartKey(Hex)={}, StopKey(Hex)={}, Filter={}", + Bytes.hex(this.start_key), Bytes.hex(this.stop_key), this.filter); + // ---[ End Logging ]--- + + // ---[ PHASE 2 FIX: Explicit Row Range Construction ]--- + // The previous logic used: hbase_scan.addRange(this.start_key, this.stop_key); + // This convenience wrapper might not handle EMPTY_ARRAY correctly for unbounded ranges + // in the new driver. We explicitly use the Protobuf builder instead for robustness. + + final RowRange.Builder rangeBuilder = RowRange.newBuilder(); + boolean rangeSet = false; + + if (this.start_key != null && this.start_key.length > 0) { + // Inclusive start key + rangeBuilder.setStartKeyClosed(ByteString.copyFrom(this.start_key)); + rangeSet = true; + } else { + LOG.info("DEBUG-SHIM: Start key empty, treating as start of table."); + } + + if (this.stop_key != null && this.stop_key.length > 0) { + // Exclusive stop key + rangeBuilder.setEndKeyOpen(ByteString.copyFrom(this.stop_key)); + rangeSet = true; + } else { + LOG.info("DEBUG-SHIM: Stop key empty, treating as end of table."); + } + + // If boundaries were set, add the range. If neither, Bigtable defaults to a full scan implicitly. + if (rangeSet) { + hbase_scan.addRange(rangeBuilder.build()); + } + // ---[ End Phase 2 Fix ]--- + + // ---[ PHASE 1: Whitebox Logging - Outputs (Ranges) ]--- + try { + // 1. Log Legacy HBase API fields (should be empty/null if using BigtableExtendedScan correctly) + byte[] legacyStart = hbase_scan.getStartRow(); + byte[] legacyStop = hbase_scan.getStopRow(); + LOG.info("DEBUG-SHIM: Driver Legacy StartRow(Hex)={}, StopRow(Hex)={}", + legacyStart == null ? "NULL" : Bytes.hex(legacyStart), + legacyStop == null ? "NULL" : Bytes.hex(legacyStop)); + + // 2. Log the Bigtable RowSet (CRITICAL: This defines the actual ranges sent to Bigtable) + if (hbase_scan.getRowSet() != null) { + // This logs the Protobuf structure (e.g., start_key_closed, end_key_open). + LOG.info("DEBUG-SHIM: Driver RowSet={}", hbase_scan.getRowSet().toString()); + } else { + // This is expected ONLY if rangeSet was false (full table scan). + LOG.info("DEBUG-SHIM: Driver RowSet is NULL (Indicates Full Table Scan)."); + } + } catch (Exception e) { + LOG.error("DEBUG-SHIM: Exception during post-addRange introspection logging", e); + } + // ---[ End Logging ]--- + // setup the filters if (filter == null) { + LOG.info("DEBUG-SHIM: No filter attached."); return hbase_scan; } @@ -693,7 +753,33 @@ Scan getHbaseScan() { // hbase_scan.setFilter(((KeyRegexpFilter)filter).getRegexFilterForBigtable()); // return hbase_scan; // } - hbase_scan.setFilter(filter.getBigtableFilter()); + + // ---[ PHASE 3: Filter Attachment (Relies on KeyRegexpFilter fix) ]--- + try { + // This calls filter.getBigtableFilter(). With the Phase 3 fix applied (BinaryRegexComparator), + // this should now correctly generate the filter without reflection errors. + hbase_scan.setFilter(filter.getBigtableFilter()); + + // ---[ PHASE 1: Whitebox Logging - Outputs (Filters) ]--- + if (hbase_scan.getFilter() != null) { + LOG.info("DEBUG-SHIM: Driver Filter Attached={}", hbase_scan.getFilter().toString()); + } else { + // This might happen if filter.getBigtableFilter() returned null unexpectedly. + LOG.error("DEBUG-SHIM: CRITICAL: Driver Filter is NULL after attachment attempt."); + } + // ---[ End Logging ]--- + + } catch (Exception e) { + // Catch failures during filter creation (if Phase 3 fix was missed or incomplete). + LOG.error("DEBUG-SHIM: CRITICAL: Failed to create or attach filter. Check KeyRegexpFilter implementation.", e); + // Rethrow to ensure failure is visible during testing. + if (e instanceof RuntimeException) { + throw (RuntimeException) e; + } + throw new RuntimeException("Failed during scan filter attachment.", e); + } + // ---[ End Phase 3 ]--- + return hbase_scan; } From 2f3dc32f85cd0c9cd7fa0a194c651ae83b37424d Mon Sep 17 00:00:00 2001 From: xlfe Date: Wed, 19 Nov 2025 08:19:04 +1100 Subject: [PATCH 2/6] 1. Dependency Update (`pom.xml`): * Switched from the shaded bigtable-hbase-2.x-hadoop artifact to bigtable-hbase-2.x. This exposed the necessary transitive dependencies (like com.google.bigtable.v2.RowRange and com.google.protobuf) required by Scanner.java to implement the "Phase 2 Fix". 2. Removed Incompatible Code (`jsr166e`): * Deleted the org.hbase.async.jsr166e package. These classes (LongAdder, Striped64) contained Unsafe and AccessController usage that is restricted in modern JDKs (Java 21). * Updated src/main/java/org/hbase/async/Counter.java to use the standard java.util.concurrent.atomic.LongAdder available since Java 8. 3. Fixed Compilation Errors in `Scanner.java`: * Replaced calls to the non-existent Bytes.hex(byte[]) method with Bytes.pretty(byte[]) in the debug logging statements. * Resolved package com.google.bigtable.v2 does not exist and package RowRange does not exist errors via the pom.xml dependency switch. 4. Fixed `KeyRegexpFilter.java`: * Replaced the missing BinaryRegexComparator with a robust fallback using RegexStringComparator. * Configured it to use ISO-8859-1 charset to safely perform "binary" regex matching on raw byte keys, mimicking the intended behavior of the missing comparator. 1. Added Zookeeper Dependency: * Added org.apache.zookeeper:zookeeper:3.4.14 to pom.xml. * Included exclusions for slf4j-log4j12 and log4j to prevent the "circular logging" conflicts mentioned in the project history. --- pom.xml | 18 +- src/main/java/org/hbase/async/Counter.java | 2 +- .../java/org/hbase/async/KeyRegexpFilter.java | 10 +- src/main/java/org/hbase/async/Scanner.java | 6 +- .../org/hbase/async/jsr166e/LongAdder.java | 202 ----------- src/main/java/org/hbase/async/jsr166e/README | 14 - .../org/hbase/async/jsr166e/Striped64.java | 340 ------------------ .../org/hbase/async/jsr166e/package-info.java | 32 -- 8 files changed, 26 insertions(+), 598 deletions(-) delete mode 100644 src/main/java/org/hbase/async/jsr166e/LongAdder.java delete mode 100644 src/main/java/org/hbase/async/jsr166e/README delete mode 100644 src/main/java/org/hbase/async/jsr166e/Striped64.java delete mode 100644 src/main/java/org/hbase/async/jsr166e/package-info.java diff --git a/pom.xml b/pom.xml index 99fe3c1..08ae9cb 100644 --- a/pom.xml +++ b/pom.xml @@ -251,10 +251,26 @@ com.google.cloud.bigtable - bigtable-hbase-2.x-hadoop + bigtable-hbase-2.x 2.15.5 + + org.apache.zookeeper + zookeeper + 3.4.14 + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + diff --git a/src/main/java/org/hbase/async/Counter.java b/src/main/java/org/hbase/async/Counter.java index d29875c..2d24a61 100644 --- a/src/main/java/org/hbase/async/Counter.java +++ b/src/main/java/org/hbase/async/Counter.java @@ -26,7 +26,7 @@ */ package org.hbase.async; -import org.hbase.async.jsr166e.LongAdder; +import java.util.concurrent.atomic.LongAdder; /** * An atomic counter to replace {@link java.util.concurrent.atomic.AtomicLong}. diff --git a/src/main/java/org/hbase/async/KeyRegexpFilter.java b/src/main/java/org/hbase/async/KeyRegexpFilter.java index 0df53d1..aa68cf4 100644 --- a/src/main/java/org/hbase/async/KeyRegexpFilter.java +++ b/src/main/java/org/hbase/async/KeyRegexpFilter.java @@ -29,8 +29,6 @@ import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; -// Import the correct comparator for raw byte regex -import org.apache.hadoop.hbase.filter.BinaryRegexComparator; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hbase.thirdparty.io.netty.util.CharsetUtil; @@ -178,9 +176,11 @@ Filter getBigtableFilter() { } */ - // New robust implementation: - // We use the raw 'regexp' bytes provided during construction. - BinaryRegexComparator comparator = new BinaryRegexComparator(this.regexp); + // Fallback implementation: + // Use RegexStringComparator with ISO-8859-1 (Latin-1) encoding. + // This maps bytes 0-255 to chars 0-255 1:1, allowing "binary" regex matching. + RegexStringComparator comparator = new RegexStringComparator(new String(this.regexp, CharsetUtil.ISO_8859_1)); + comparator.setCharset(CharsetUtil.ISO_8859_1); // Ensure server side uses same charset return new RowFilter(CompareFilter.CompareOp.EQUAL, comparator); } diff --git a/src/main/java/org/hbase/async/Scanner.java b/src/main/java/org/hbase/async/Scanner.java index e18eefc..0516a92 100644 --- a/src/main/java/org/hbase/async/Scanner.java +++ b/src/main/java/org/hbase/async/Scanner.java @@ -678,7 +678,7 @@ BigtableExtendedScan getHbaseScan() { // ---[ PHASE 1: Whitebox Logging - Inputs ]--- // Log the inputs provided to the shim. LOG.info("DEBUG-SHIM: Preparing Scan. StartKey(Hex)={}, StopKey(Hex)={}, Filter={}", - Bytes.hex(this.start_key), Bytes.hex(this.stop_key), this.filter); + Bytes.pretty(this.start_key), Bytes.pretty(this.stop_key), this.filter); // ---[ End Logging ]--- // ---[ PHASE 2 FIX: Explicit Row Range Construction ]--- @@ -717,8 +717,8 @@ BigtableExtendedScan getHbaseScan() { byte[] legacyStart = hbase_scan.getStartRow(); byte[] legacyStop = hbase_scan.getStopRow(); LOG.info("DEBUG-SHIM: Driver Legacy StartRow(Hex)={}, StopRow(Hex)={}", - legacyStart == null ? "NULL" : Bytes.hex(legacyStart), - legacyStop == null ? "NULL" : Bytes.hex(legacyStop)); + legacyStart == null ? "NULL" : Bytes.pretty(legacyStart), + legacyStop == null ? "NULL" : Bytes.pretty(legacyStop)); // 2. Log the Bigtable RowSet (CRITICAL: This defines the actual ranges sent to Bigtable) if (hbase_scan.getRowSet() != null) { diff --git a/src/main/java/org/hbase/async/jsr166e/LongAdder.java b/src/main/java/org/hbase/async/jsr166e/LongAdder.java deleted file mode 100644 index 6ce53b6..0000000 --- a/src/main/java/org/hbase/async/jsr166e/LongAdder.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Written by Doug Lea with assistance from members of JCP JSR-166 - * Expert Group and released to the public domain, as explained at - * http://creativecommons.org/publicdomain/zero/1.0/ - */ - -package org.hbase.async.jsr166e; -import java.util.concurrent.atomic.AtomicLong; -import java.io.IOException; -import java.io.Serializable; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; - -/** - * One or more variables that together maintain an initially zero - * {@code long} sum. When updates (method {@link #add}) are contended - * across threads, the set of variables may grow dynamically to reduce - * contention. Method {@link #sum} (or, equivalently, {@link - * #longValue}) returns the current total combined across the - * variables maintaining the sum. - * - *

This class is usually preferable to {@link AtomicLong} when - * multiple threads update a common sum that is used for purposes such - * as collecting statistics, not for fine-grained synchronization - * control. Under low update contention, the two classes have similar - * characteristics. But under high contention, expected throughput of - * this class is significantly higher, at the expense of higher space - * consumption. - * - *

This class extends {@link Number}, but does not define - * methods such as {@code hashCode} and {@code compareTo} because - * instances are expected to be mutated, and so are not useful as - * collection keys. - * - *

jsr166e note: This class is targeted to be placed in - * java.util.concurrent.atomic - * - * @since 1.8 - * @author Doug Lea - */ -public class LongAdder extends Striped64 implements Serializable { - private static final long serialVersionUID = 7249069246863182397L; - - /** - * Version of plus for use in retryUpdate - */ - final long fn(long v, long x) { return v + x; } - - /** - * Creates a new adder with initial sum of zero. - */ - public LongAdder() { - } - - /** - * Adds the given value. - * - * @param x the value to add - */ - public void add(long x) { - Cell[] as; long b, v; HashCode hc; Cell a; int n; - if ((as = cells) != null || !casBase(b = base, b + x)) { - boolean uncontended = true; - int h = (hc = threadHashCode.get()).code; - if (as == null || (n = as.length) < 1 || - (a = as[(n - 1) & h]) == null || - !(uncontended = a.cas(v = a.value, v + x))) - retryUpdate(x, hc, uncontended); - } - } - - /** - * Equivalent to {@code add(1)}. - */ - public void increment() { - add(1L); - } - - /** - * Equivalent to {@code add(-1)}. - */ - public void decrement() { - add(-1L); - } - - /** - * Returns the current sum. The returned value is NOT an - * atomic snapshot: Invocation in the absence of concurrent - * updates returns an accurate result, but concurrent updates that - * occur while the sum is being calculated might not be - * incorporated. - * - * @return the sum - */ - public long sum() { - long sum = base; - Cell[] as = cells; - if (as != null) { - int n = as.length; - for (int i = 0; i < n; ++i) { - Cell a = as[i]; - if (a != null) - sum += a.value; - } - } - return sum; - } - - /** - * Resets variables maintaining the sum to zero. This method may - * be a useful alternative to creating a new adder, but is only - * effective if there are no concurrent updates. Because this - * method is intrinsically racy, it should only be used when it is - * known that no threads are concurrently updating. - */ - public void reset() { - internalReset(0L); - } - - /** - * Equivalent in effect to {@link #sum} followed by {@link - * #reset}. This method may apply for example during quiescent - * points between multithreaded computations. If there are - * updates concurrent with this method, the returned value is - * not guaranteed to be the final value occurring before - * the reset. - * - * @return the sum - */ - public long sumThenReset() { - long sum = base; - Cell[] as = cells; - base = 0L; - if (as != null) { - int n = as.length; - for (int i = 0; i < n; ++i) { - Cell a = as[i]; - if (a != null) { - sum += a.value; - a.value = 0L; - } - } - } - return sum; - } - - /** - * Returns the String representation of the {@link #sum}. - * @return the String representation of the {@link #sum} - */ - public String toString() { - return Long.toString(sum()); - } - - /** - * Equivalent to {@link #sum}. - * - * @return the sum - */ - public long longValue() { - return sum(); - } - - /** - * Returns the {@link #sum} as an {@code int} after a narrowing - * primitive conversion. - */ - public int intValue() { - return (int)sum(); - } - - /** - * Returns the {@link #sum} as a {@code float} - * after a widening primitive conversion. - */ - public float floatValue() { - return (float)sum(); - } - - /** - * Returns the {@link #sum} as a {@code double} after a widening - * primitive conversion. - */ - public double doubleValue() { - return (double)sum(); - } - - private void writeObject(java.io.ObjectOutputStream s) - throws java.io.IOException { - s.defaultWriteObject(); - s.writeLong(sum()); - } - - private void readObject(ObjectInputStream s) - throws IOException, ClassNotFoundException { - s.defaultReadObject(); - busy = 0; - cells = null; - base = s.readLong(); - } - -} diff --git a/src/main/java/org/hbase/async/jsr166e/README b/src/main/java/org/hbase/async/jsr166e/README deleted file mode 100644 index c1449fb..0000000 --- a/src/main/java/org/hbase/async/jsr166e/README +++ /dev/null @@ -1,14 +0,0 @@ -The contents of this directory contains code from JSR 166e. -** THIS IS NOT PART OF THE PUBLIC INTERFACE OF ASYNCHBASE ** - -Code was downloaded from: - http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/ - -The code was released to the public domain, as explained at - http://creativecommons.org/publicdomain/zero/1.0/ - -The code is bundled in asynchbase as it is not available until JDK8 -becomes a reality, which is expected to take years (at time of writing). -Ideally another library, such as Google Guava, would provide this -code until JDK8 becomes the norm. But in the mean time it's here -for asynchbase's internal use. diff --git a/src/main/java/org/hbase/async/jsr166e/Striped64.java b/src/main/java/org/hbase/async/jsr166e/Striped64.java deleted file mode 100644 index e5a6dee..0000000 --- a/src/main/java/org/hbase/async/jsr166e/Striped64.java +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Written by Doug Lea with assistance from members of JCP JSR-166 - * Expert Group and released to the public domain, as explained at - * http://creativecommons.org/publicdomain/zero/1.0/ - */ - -package org.hbase.async.jsr166e; -import java.util.Random; - -/** - * A package-local class holding common representation and mechanics - * for classes supporting dynamic striping on 64bit values. The class - * extends Number so that concrete subclasses must publicly do so. - */ -abstract class Striped64 extends Number { - /* - * This class maintains a lazily-initialized table of atomically - * updated variables, plus an extra "base" field. The table size - * is a power of two. Indexing uses masked per-thread hash codes. - * Nearly all declarations in this class are package-private, - * accessed directly by subclasses. - * - * Table entries are of class Cell; a variant of AtomicLong padded - * to reduce cache contention on most processors. Padding is - * overkill for most Atomics because they are usually irregularly - * scattered in memory and thus don't interfere much with each - * other. But Atomic objects residing in arrays will tend to be - * placed adjacent to each other, and so will most often share - * cache lines (with a huge negative performance impact) without - * this precaution. - * - * In part because Cells are relatively large, we avoid creating - * them until they are needed. When there is no contention, all - * updates are made to the base field. Upon first contention (a - * failed CAS on base update), the table is initialized to size 2. - * The table size is doubled upon further contention until - * reaching the nearest power of two greater than or equal to the - * number of CPUS. Table slots remain empty (null) until they are - * needed. - * - * A single spinlock ("busy") is used for initializing and - * resizing the table, as well as populating slots with new Cells. - * There is no need for a blocking lock: When the lock is not - * available, threads try other slots (or the base). During these - * retries, there is increased contention and reduced locality, - * which is still better than alternatives. - * - * Per-thread hash codes are initialized to random values. - * Contention and/or table collisions are indicated by failed - * CASes when performing an update operation (see method - * retryUpdate). Upon a collision, if the table size is less than - * the capacity, it is doubled in size unless some other thread - * holds the lock. If a hashed slot is empty, and lock is - * available, a new Cell is created. Otherwise, if the slot - * exists, a CAS is tried. Retries proceed by "double hashing", - * using a secondary hash (Marsaglia XorShift) to try to find a - * free slot. - * - * The table size is capped because, when there are more threads - * than CPUs, supposing that each thread were bound to a CPU, - * there would exist a perfect hash function mapping threads to - * slots that eliminates collisions. When we reach capacity, we - * search for this mapping by randomly varying the hash codes of - * colliding threads. Because search is random, and collisions - * only become known via CAS failures, convergence can be slow, - * and because threads are typically not bound to CPUS forever, - * may not occur at all. However, despite these limitations, - * observed contention rates are typically low in these cases. - * - * It is possible for a Cell to become unused when threads that - * once hashed to it terminate, as well as in the case where - * doubling the table causes no thread to hash to it under - * expanded mask. We do not try to detect or remove such cells, - * under the assumption that for long-running instances, observed - * contention levels will recur, so the cells will eventually be - * needed again; and for short-lived ones, it does not matter. - */ - - /** - * Padded variant of AtomicLong supporting only raw accesses plus CAS. - * The value field is placed between pads, hoping that the JVM doesn't - * reorder them. - * - * JVM intrinsics note: It would be possible to use a release-only - * form of CAS here, if it were provided. - */ - static final class Cell { - volatile long p0, p1, p2, p3, p4, p5, p6; - volatile long value; - volatile long q0, q1, q2, q3, q4, q5, q6; - Cell(long x) { value = x; } - - final boolean cas(long cmp, long val) { - return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val); - } - - // Unsafe mechanics - private static final sun.misc.Unsafe UNSAFE; - private static final long valueOffset; - static { - try { - UNSAFE = getUnsafe(); - Class ak = Cell.class; - valueOffset = UNSAFE.objectFieldOffset - (ak.getDeclaredField("value")); - } catch (Exception e) { - throw new Error(e); - } - } - - } - - /** - * Holder for the thread-local hash code. The code is initially - * random, but may be set to a different value upon collisions. - */ - static final class HashCode { - static final Random rng = new Random(); - int code; - HashCode() { - int h = rng.nextInt(); // Avoid zero to allow xorShift rehash - code = (h == 0) ? 1 : h; - } - } - - /** - * The corresponding ThreadLocal class - */ - static final class ThreadHashCode extends ThreadLocal { - public HashCode initialValue() { return new HashCode(); } - } - - /** - * Static per-thread hash codes. Shared across all instances to - * reduce ThreadLocal pollution and because adjustments due to - * collisions in one table are likely to be appropriate for - * others. - */ - static final ThreadHashCode threadHashCode = new ThreadHashCode(); - - /** Number of CPUS, to place bound on table size */ - static final int NCPU = Runtime.getRuntime().availableProcessors(); - - /** - * Table of cells. When non-null, size is a power of 2. - */ - transient volatile Cell[] cells; - - /** - * Base value, used mainly when there is no contention, but also as - * a fallback during table initialization races. Updated via CAS. - */ - transient volatile long base; - - /** - * Spinlock (locked via CAS) used when resizing and/or creating Cells. - */ - transient volatile int busy; - - /** - * Package-private default constructor - */ - Striped64() { - } - - /** - * CASes the base field. - */ - final boolean casBase(long cmp, long val) { - return UNSAFE.compareAndSwapLong(this, baseOffset, cmp, val); - } - - /** - * CASes the busy field from 0 to 1 to acquire lock. - */ - final boolean casBusy() { - return UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1); - } - - /** - * Computes the function of current and new value. Subclasses - * should open-code this update function for most uses, but the - * virtualized form is needed within retryUpdate. - * - * @param currentValue the current value (of either base or a cell) - * @param newValue the argument from a user update call - * @return result of the update function - */ - abstract long fn(long currentValue, long newValue); - - /** - * Handles cases of updates involving initialization, resizing, - * creating new Cells, and/or contention. See above for - * explanation. This method suffers the usual non-modularity - * problems of optimistic retry code, relying on rechecked sets of - * reads. - * - * @param x the value - * @param hc the hash code holder - * @param wasUncontended false if CAS failed before call - */ - final void retryUpdate(long x, HashCode hc, boolean wasUncontended) { - int h = hc.code; - boolean collide = false; // True if last slot nonempty - for (;;) { - Cell[] as; Cell a; int n; long v; - if ((as = cells) != null && (n = as.length) > 0) { - if ((a = as[(n - 1) & h]) == null) { - if (busy == 0) { // Try to attach new Cell - Cell r = new Cell(x); // Optimistically create - if (busy == 0 && casBusy()) { - boolean created = false; - try { // Recheck under lock - Cell[] rs; int m, j; - if ((rs = cells) != null && - (m = rs.length) > 0 && - rs[j = (m - 1) & h] == null) { - rs[j] = r; - created = true; - } - } finally { - busy = 0; - } - if (created) - break; - continue; // Slot is now non-empty - } - } - collide = false; - } - else if (!wasUncontended) // CAS already known to fail - wasUncontended = true; // Continue after rehash - else if (a.cas(v = a.value, fn(v, x))) - break; - else if (n >= NCPU || cells != as) - collide = false; // At max size or stale - else if (!collide) - collide = true; - else if (busy == 0 && casBusy()) { - try { - if (cells == as) { // Expand table unless stale - Cell[] rs = new Cell[n << 1]; - for (int i = 0; i < n; ++i) - rs[i] = as[i]; - cells = rs; - } - } finally { - busy = 0; - } - collide = false; - continue; // Retry with expanded table - } - h ^= h << 13; // Rehash - h ^= h >>> 17; - h ^= h << 5; - } - else if (busy == 0 && cells == as && casBusy()) { - boolean init = false; - try { // Initialize table - if (cells == as) { - Cell[] rs = new Cell[2]; - rs[h & 1] = new Cell(x); - cells = rs; - init = true; - } - } finally { - busy = 0; - } - if (init) - break; - } - else if (casBase(v = base, fn(v, x))) - break; // Fall back on using base - } - hc.code = h; // Record index for next time - } - - - /** - * Sets base and all cells to the given value. - */ - final void internalReset(long initialValue) { - Cell[] as = cells; - base = initialValue; - if (as != null) { - int n = as.length; - for (int i = 0; i < n; ++i) { - Cell a = as[i]; - if (a != null) - a.value = initialValue; - } - } - } - - // Unsafe mechanics - private static final sun.misc.Unsafe UNSAFE; - private static final long baseOffset; - private static final long busyOffset; - static { - try { - UNSAFE = getUnsafe(); - Class sk = Striped64.class; - baseOffset = UNSAFE.objectFieldOffset - (sk.getDeclaredField("base")); - busyOffset = UNSAFE.objectFieldOffset - (sk.getDeclaredField("busy")); - } catch (Exception e) { - throw new Error(e); - } - } - - /** - * Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. - * Replace with a simple call to Unsafe.getUnsafe when integrating - * into a jdk. - * - * @return a sun.misc.Unsafe - */ - private static sun.misc.Unsafe getUnsafe() { - try { - return sun.misc.Unsafe.getUnsafe(); - } catch (SecurityException se) { - try { - return java.security.AccessController.doPrivileged - (new java.security - .PrivilegedExceptionAction() { - public sun.misc.Unsafe run() throws Exception { - java.lang.reflect.Field f = sun.misc - .Unsafe.class.getDeclaredField("theUnsafe"); - f.setAccessible(true); - return (sun.misc.Unsafe) f.get(null); - }}); - } catch (java.security.PrivilegedActionException e) { - throw new RuntimeException("Could not initialize intrinsics", - e.getCause()); - } - } - } - -} diff --git a/src/main/java/org/hbase/async/jsr166e/package-info.java b/src/main/java/org/hbase/async/jsr166e/package-info.java deleted file mode 100644 index 4538865..0000000 --- a/src/main/java/org/hbase/async/jsr166e/package-info.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. - * This file is part of Async HBase. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - Neither the name of the StumbleUpon nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - *

This package is not part of asynchbase's public interface.

- * Use {@link org.hbase.async.Counter} instead. - */ -package org.hbase.async.jsr166e; From 3b531f55938f4227d547427ef9c6699ce35fdb2d Mon Sep 17 00:00:00 2001 From: xlfe Date: Thu, 20 Nov 2025 10:23:58 +1100 Subject: [PATCH 3/6] runs, but scanner still not working --- pom.xml | 29 ++++++++++++++++++---- src/main/java/org/hbase/async/Scanner.java | 4 +-- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index 08ae9cb..81f41cd 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,24 @@ https://github.com/OpenTSDB/asyncbigtable/issues + + + + io.opentelemetry + opentelemetry-bom + 1.34.1 pom + import + + + com.google.cloud + libraries-bom + 26.27.0 + pom + import + + + + tsuna @@ -230,6 +248,10 @@ + + io.opentelemetry + opentelemetry-api + com.stumbleupon async @@ -242,19 +264,16 @@ 1.7.7 - com.google.guava guava 27.0-jre - com.google.cloud.bigtable - bigtable-hbase-2.x + bigtable-hbase-2.x-hadoop 2.15.5 - - +
org.apache.zookeeper zookeeper diff --git a/src/main/java/org/hbase/async/Scanner.java b/src/main/java/org/hbase/async/Scanner.java index 0516a92..3fbd0b0 100644 --- a/src/main/java/org/hbase/async/Scanner.java +++ b/src/main/java/org/hbase/async/Scanner.java @@ -34,8 +34,8 @@ import java.util.NavigableMap; // Imports required for Phase 2 fix (Explicit RowRange construction) -import com.google.bigtable.v2.RowRange; -import com.google.protobuf.ByteString; +import com.google.bigtable.repackaged.com.google.bigtable.v2.RowRange; +import com.google.bigtable.repackaged.com.google.protobuf.ByteString; import com.google.cloud.bigtable.hbase.BigtableExtendedScan; import org.apache.hadoop.hbase.client.Result; From 1f425e4f271f5cc8c45ff6149cca5a2d3ec64e17 Mon Sep 17 00:00:00 2001 From: xlfe Date: Thu, 20 Nov 2025 13:37:19 +1100 Subject: [PATCH 4/6] some data returned, but not all of it --- pom.xml | 50 +++++++++------- .../java/org/hbase/async/KeyRegexpFilter.java | 57 ++++++++----------- src/main/java/org/hbase/async/Scanner.java | 49 ++++------------ 3 files changed, 64 insertions(+), 92 deletions(-) diff --git a/pom.xml b/pom.xml index 81f41cd..cf3a87c 100644 --- a/pom.xml +++ b/pom.xml @@ -33,23 +33,24 @@ https://github.com/OpenTSDB/asyncbigtable/issues - - - - io.opentelemetry - opentelemetry-bom - 1.34.1 pom - import - - - com.google.cloud - libraries-bom - 26.27.0 - pom - import - - - + + + + io.opentelemetry + opentelemetry-bom + 1.34.1 + pom + import + + + com.google.cloud + libraries-bom + 26.27.0 + pom + import + + + @@ -248,6 +249,7 @@ + io.opentelemetry opentelemetry-api @@ -269,11 +271,19 @@ guava 27.0-jre + - com.google.cloud.bigtable - bigtable-hbase-2.x-hadoop - 2.15.5 + com.google.cloud.bigtable + bigtable-hbase-2.x-hadoop + 2.15.5 + + + com.google.cloud.bigtable + bigtable-client-core + + + org.apache.zookeeper zookeeper diff --git a/src/main/java/org/hbase/async/KeyRegexpFilter.java b/src/main/java/org/hbase/async/KeyRegexpFilter.java index aa68cf4..4903353 100644 --- a/src/main/java/org/hbase/async/KeyRegexpFilter.java +++ b/src/main/java/org/hbase/async/KeyRegexpFilter.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.CompareFilter; +import org.apache.hadoop.hbase.CompareOperator; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; @@ -147,41 +148,31 @@ public Charset getCharset() { @Override Filter getBigtableFilter() { - /* - * PHASE 3 FIX: The previous implementation used RegexStringComparator and a fragile - * reflection hack (shown below, commented out) to inject raw bytes, bypassing charset issues. - * - * We replace this with BinaryRegexComparator, which operates directly on byte arrays. - * This is stable and correct for OpenTSDB's raw byte key structure. - */ - - // Original implementation using reflection: - /* - RegexStringComparator comparator = new RegexStringComparator(regexp_string); - comparator.setCharset(charset_object); - try { - // WARNING: This is some ugly ass code. It WILL break at some point. - // ... - final Field field = ByteArrayComparable.class.getDeclaredField("value"); - field.setAccessible(true); - field.set(comparator, regexp_string.getBytes(HBaseClient.ASCII)); - field.setAccessible(false); - return new RowFilter(CompareFilter.CompareOp.EQUAL, comparator); - } catch (NoSuchFieldException e) { - throw new RuntimeException("ByteArrayComparator must have changed, " - + "can't find the field", e); - } catch (IllegalAccessException e) { - throw new RuntimeException("Access denied when hacking the " - + "regex comparator field", e); + // SANITIZER: Convert raw binary pattern to RE2 Hex-Escaped ASCII string. + // This survives the Bigtable Client 2.x UTF-8 encoding enforcement. + StringBuilder sanitizer = new StringBuilder(this.regexp.length * 2); + + for (byte b : this.regexp) { + int unsigned = b & 0xFF; + // Pass printable ASCII through directly for readability/performance + // (ASCII 32-126, except backslash which needs escaping) + if (unsigned >= 32 && unsigned < 127 && unsigned != '\\') { + sanitizer.append((char) unsigned); + } else { + // Escape everything else (High-bit, Control chars, Backslashes) + // Format: \xHH + sanitizer.append(String.format("\\x%02x", unsigned)); + } } - */ - // Fallback implementation: - // Use RegexStringComparator with ISO-8859-1 (Latin-1) encoding. - // This maps bytes 0-255 to chars 0-255 1:1, allowing "binary" regex matching. - RegexStringComparator comparator = new RegexStringComparator(new String(this.regexp, CharsetUtil.ISO_8859_1)); - comparator.setCharset(CharsetUtil.ISO_8859_1); // Ensure server side uses same charset - return new RowFilter(CompareFilter.CompareOp.EQUAL, comparator); + // Create the comparator using the sanitized, transport-safe string + RegexStringComparator comparator = new RegexStringComparator(sanitizer.toString()); + + // Charset is technically irrelevant now since we are pure 7-bit ASCII, + // but UTF-8 is the safe default for v2. + comparator.setCharset(CharsetUtil.UTF_8); + + return new RowFilter(CompareOperator.EQUAL, comparator); } } diff --git a/src/main/java/org/hbase/async/Scanner.java b/src/main/java/org/hbase/async/Scanner.java index 3fbd0b0..23947fc 100644 --- a/src/main/java/org/hbase/async/Scanner.java +++ b/src/main/java/org/hbase/async/Scanner.java @@ -33,14 +33,9 @@ import java.util.ArrayList; import java.util.NavigableMap; -// Imports required for Phase 2 fix (Explicit RowRange construction) -import com.google.bigtable.repackaged.com.google.bigtable.v2.RowRange; -import com.google.bigtable.repackaged.com.google.protobuf.ByteString; - import com.google.cloud.bigtable.hbase.BigtableExtendedScan; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -675,41 +670,16 @@ public void setTimeRange(final long min_timestamp, final long max_timestamp) { /** @return the HTable scan object */ BigtableExtendedScan getHbaseScan() { - // ---[ PHASE 1: Whitebox Logging - Inputs ]--- - // Log the inputs provided to the shim. + + // ---[ Whitebox Logging - Inputs ]--- LOG.info("DEBUG-SHIM: Preparing Scan. StartKey(Hex)={}, StopKey(Hex)={}, Filter={}", Bytes.pretty(this.start_key), Bytes.pretty(this.stop_key), this.filter); - // ---[ End Logging ]--- - - // ---[ PHASE 2 FIX: Explicit Row Range Construction ]--- - // The previous logic used: hbase_scan.addRange(this.start_key, this.stop_key); - // This convenience wrapper might not handle EMPTY_ARRAY correctly for unbounded ranges - // in the new driver. We explicitly use the Protobuf builder instead for robustness. - final RowRange.Builder rangeBuilder = RowRange.newBuilder(); - boolean rangeSet = false; - - if (this.start_key != null && this.start_key.length > 0) { - // Inclusive start key - rangeBuilder.setStartKeyClosed(ByteString.copyFrom(this.start_key)); - rangeSet = true; - } else { - LOG.info("DEBUG-SHIM: Start key empty, treating as start of table."); - } - - if (this.stop_key != null && this.stop_key.length > 0) { - // Exclusive stop key - rangeBuilder.setEndKeyOpen(ByteString.copyFrom(this.stop_key)); - rangeSet = true; - } else { - LOG.info("DEBUG-SHIM: Stop key empty, treating as end of table."); - } - - // If boundaries were set, add the range. If neither, Bigtable defaults to a full scan implicitly. - if (rangeSet) { - hbase_scan.addRange(rangeBuilder.build()); - } - // ---[ End Phase 2 Fix ]--- + // ---[ RANGES ]--- + // BigtableExtendedScan requires addRange instead of setStartRow/setStopRow. + // this.start_key and this.stop_key default to EMPTY_ARRAY, which + // Bigtable interprets correctly as "start/end of table" (unbounded). + hbase_scan.addRange(this.start_key, this.stop_key); // ---[ PHASE 1: Whitebox Logging - Outputs (Ranges) ]--- try { @@ -756,8 +726,9 @@ BigtableExtendedScan getHbaseScan() { // ---[ PHASE 3: Filter Attachment (Relies on KeyRegexpFilter fix) ]--- try { - // This calls filter.getBigtableFilter(). With the Phase 3 fix applied (BinaryRegexComparator), - // this should now correctly generate the filter without reflection errors. + + // Standard HBase API attachment. Relies on KeyRegexpFilter returning a + // correctly charset-configured RegexStringComparator. hbase_scan.setFilter(filter.getBigtableFilter()); // ---[ PHASE 1: Whitebox Logging - Outputs (Filters) ]--- From 656dc5b6ed1166f324b8f342d8b40f69519ea699 Mon Sep 17 00:00:00 2001 From: xlfe Date: Thu, 20 Nov 2025 15:22:13 +1100 Subject: [PATCH 5/6] still not working --- .../java/org/hbase/async/KeyRegexpFilter.java | 74 ++++-- .../java/org/hbase/async/FiltersTest.java | 213 ++++++++++++++---- 2 files changed, 233 insertions(+), 54 deletions(-) diff --git a/src/main/java/org/hbase/async/KeyRegexpFilter.java b/src/main/java/org/hbase/async/KeyRegexpFilter.java index 4903353..4f7757c 100644 --- a/src/main/java/org/hbase/async/KeyRegexpFilter.java +++ b/src/main/java/org/hbase/async/KeyRegexpFilter.java @@ -145,32 +145,74 @@ public byte[] getRegexp() { public Charset getCharset() { return Charset.forName(new String(charset)); } - + + /** + * Checks if a byte corresponds to a Regex Metacharacter that requires escaping + * when found inside a Literal (\Q...\E) block. + */ + private boolean isMetaCharacter(int b) { + // Metacharacters: \ * + ? | { [ ( ) ^ $ . # + return b == 92 || b == 42 || b == 43 || b == 63 || b == 124 || + b == 123 || b == 91 || b == 40 || b == 41 || b == 94 || + b == 36 || b == 46 || b == 35; + } + @Override Filter getBigtableFilter() { - // SANITIZER: Convert raw binary pattern to RE2 Hex-Escaped ASCII string. - // This survives the Bigtable Client 2.x UTF-8 encoding enforcement. - StringBuilder sanitizer = new StringBuilder(this.regexp.length * 2); + // SANITIZER V4: "Strip, Escape Meta, and ASCII-Safe" + // 1. Strip \Q and \E. + // 2. Inside quotes: Escape metacharacters (e.g. '.') with backslash. + // 3. Inside quotes: Hex-escape non-printables. + // 4. Outside quotes: Pass structure through. - for (byte b : this.regexp) { + StringBuilder sanitizer = new StringBuilder(this.regexp.length * 2); + boolean inQuote = false; + + for (int i = 0; i < this.regexp.length; i++) { + byte b = this.regexp[i]; int unsigned = b & 0xFF; - // Pass printable ASCII through directly for readability/performance - // (ASCII 32-126, except backslash which needs escaping) - if (unsigned >= 32 && unsigned < 127 && unsigned != '\\') { - sanitizer.append((char) unsigned); + + // CHECK FOR \Q (Start Quote) -> byte 92 followed by 81 + if (unsigned == 92 && i + 1 < this.regexp.length && (this.regexp[i+1] & 0xFF) == 81) { + inQuote = true; + i++; // Skip the 'Q' + continue; // Skip the '\' + } + + // CHECK FOR \E (End Quote) -> byte 92 followed by 69 + if (unsigned == 92 && i + 1 < this.regexp.length && (this.regexp[i+1] & 0xFF) == 69) { + inQuote = false; + i++; // Skip the 'E' + continue; // Skip the '\' + } + + if (inQuote) { + // Inside "Literal" block. + // CRITICAL FIX: If we Hex-Escape a metacharacter (like '.' -> \x2e), + // RE2 treats it as the wildcard '.'. We must escape it with '\'. + if (isMetaCharacter(unsigned)) { + sanitizer.append('\\').append((char) unsigned); + } else { + // For non-meta chars, use Hex Escape to be safe against UTF-8 clients + sanitizer.append(String.format("\\x%02x", unsigned)); + } } else { - // Escape everything else (High-bit, Control chars, Backslashes) - // Format: \xHH - sanitizer.append(String.format("\\x%02x", unsigned)); + // Structural Regex (e.g. ^, ., *, {8}) + // Pass standard printable ASCII through as-is to preserve regex logic. + if (unsigned >= 32 && unsigned < 127) { + sanitizer.append((char) unsigned); + } else { + sanitizer.append(String.format("\\x%02x", unsigned)); + } } } - // Create the comparator using the sanitized, transport-safe string RegexStringComparator comparator = new RegexStringComparator(sanitizer.toString()); - // Charset is technically irrelevant now since we are pure 7-bit ASCII, - // but UTF-8 is the safe default for v2. - comparator.setCharset(CharsetUtil.UTF_8); + // CRITICAL FIX 2: Use ISO-8859-1. + // This tells Bigtable/HBase to treat the Row Key bytes as raw 1:1 characters + // rather than trying to decode them as UTF-8 (which fails on binary IDs). + comparator.setCharset(CharsetUtil.ISO_8859_1); return new RowFilter(CompareOperator.EQUAL, comparator); } diff --git a/src/test/java/org/hbase/async/FiltersTest.java b/src/test/java/org/hbase/async/FiltersTest.java index 88f0d0e..b2a4212 100644 --- a/src/test/java/org/hbase/async/FiltersTest.java +++ b/src/test/java/org/hbase/async/FiltersTest.java @@ -4,14 +4,14 @@ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - Neither the name of the StumbleUpon nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -26,6 +26,11 @@ */ package org.hbase.async; +import java.lang.reflect.Field; +import org.apache.hadoop.hbase.filter.ByteArrayComparable; +import org.apache.hadoop.hbase.filter.CompareFilter; +import org.apache.hadoop.hbase.filter.RegexStringComparator; +import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; import org.junit.Assert; import org.junit.Test; @@ -35,35 +40,167 @@ @RunWith(JUnit4.class) public class FiltersTest { - @Test - public void ensureKeyOnlyFilterIsCorrectlyCreated() { - KeyOnlyFilter keyOnlyFilter = new KeyOnlyFilter(); - org.apache.hadoop.hbase.filter.KeyOnlyFilter filter = (org.apache.hadoop.hbase.filter.KeyOnlyFilter) keyOnlyFilter.getBigtableFilter(); - Assert.assertNotNull(filter); - Assert.assertArrayEquals(filter.toByteArray(), keyOnlyToByteArray(false)); - } - - @Test - public void ensureKeyOnlyFilterIsCorrectlyCreatedWithArgs() { - KeyOnlyFilter keyOnlyFilter = new KeyOnlyFilter(true); - org.apache.hadoop.hbase.filter.KeyOnlyFilter filter = (org.apache.hadoop.hbase.filter.KeyOnlyFilter) keyOnlyFilter.getBigtableFilter(); - Assert.assertNotNull(filter); - Assert.assertArrayEquals(filter.toByteArray(), keyOnlyToByteArray(true)); - } - - @Test - public void ensureColumnPrefixFilterIsCorrectlyCreated() { - final byte[] prefix = Bytes.UTF8("aoeu"); - ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter(prefix); - org.apache.hadoop.hbase.filter.ColumnPrefixFilter filter = (org.apache.hadoop.hbase.filter.ColumnPrefixFilter) columnPrefixFilter.getBigtableFilter(); - Assert.assertNotNull(filter); - Assert.assertArrayEquals(filter.getPrefix(), prefix); - } - - private byte[] keyOnlyToByteArray(boolean value) { - FilterProtos.KeyOnlyFilter.Builder builder = org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos.KeyOnlyFilter.newBuilder(); - builder.setLenAsVal(value); - return builder.build().toByteArray(); - } + @Test + public void ensureKeyOnlyFilterIsCorrectlyCreated() { + KeyOnlyFilter keyOnlyFilter = new KeyOnlyFilter(); + org.apache.hadoop.hbase.filter.KeyOnlyFilter filter = (org.apache.hadoop.hbase.filter.KeyOnlyFilter) keyOnlyFilter.getBigtableFilter(); + Assert.assertNotNull(filter); + Assert.assertArrayEquals(filter.toByteArray(), keyOnlyToByteArray(false)); + } + @Test + public void ensureKeyOnlyFilterIsCorrectlyCreatedWithArgs() { + KeyOnlyFilter keyOnlyFilter = new KeyOnlyFilter(true); + org.apache.hadoop.hbase.filter.KeyOnlyFilter filter = (org.apache.hadoop.hbase.filter.KeyOnlyFilter) keyOnlyFilter.getBigtableFilter(); + Assert.assertNotNull(filter); + Assert.assertArrayEquals(filter.toByteArray(), keyOnlyToByteArray(true)); + } + + @Test + public void ensureColumnPrefixFilterIsCorrectlyCreated() { + final byte[] prefix = Bytes.UTF8("aoeu"); + ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter(prefix); + org.apache.hadoop.hbase.filter.ColumnPrefixFilter filter = (org.apache.hadoop.hbase.filter.ColumnPrefixFilter) columnPrefixFilter.getBigtableFilter(); + Assert.assertNotNull(filter); + Assert.assertArrayEquals(filter.getPrefix(), prefix); + } + + // ---------------------------------------------------------------------- + // RE2/Bigtable Sanitization Tests + // ---------------------------------------------------------------------- + + @Test + public void ensureKeyRegexpFilterSanitization() throws Exception { + // 1. Basic Structural Regex (No Quotes) + // Should pass ASCII through as-is. + byte[] regex = Bytes.UTF8("^sys\\.cpu"); + KeyRegexpFilter keyRegex = new KeyRegexpFilter(regex); + + RowFilter rowFilter = (RowFilter) keyRegex.getBigtableFilter(); + String pattern = extractRegexPattern(rowFilter); + + // Expect: ^sys\.cpu + Assert.assertEquals("^sys\\.cpu", pattern); + } + + @Test + public void ensureKeyRegexpFilterStripAndEscape() throws Exception { + // 2. The OpenTSDB "Quoting" Conflict Scenario + // Input: \Q\E + // Bytes: [92, 81, 0, 1, 92, 69] + // Logic should strip \Q(92,81) and \E(92,69) and hex-escape the contents. + + byte[] quotedBinary = new byte[] { + 92, 81, // \Q + 0, 1, // Binary 0x00, 0x01 + 92, 69 // \E + }; + + KeyRegexpFilter keyRegex = new KeyRegexpFilter(quotedBinary); + RowFilter rowFilter = (RowFilter) keyRegex.getBigtableFilter(); + String pattern = extractRegexPattern(rowFilter); + + // Expect: \x00\x01 + // Explanation: + // - \Q removed + // - 0 becomes \x00 + // - 1 becomes \x01 + // - \E removed + Assert.assertEquals("\\x00\\x01", pattern); + } + + @Test + public void ensureKeyRegexpFilterMixedContent() throws Exception { + // 3. Mixed Structural and Binary + // Input: ^metric\Q<0x00><0xFF>\E$ + // This simulates a real UID query: Start Anchor + Metric Name + Binary ID + End Anchor + + byte[] mixed = new byte[14]; + int p = 0; + mixed[p++] = '^'; + mixed[p++] = 'm'; + mixed[p++] = 'e'; + mixed[p++] = 't'; + mixed[p++] = 'r'; + mixed[p++] = 'i'; + mixed[p++] = 'c'; + mixed[p++] = 92; mixed[p++] = 81; // \Q + mixed[p++] = 0; // 0x00 + mixed[p++] = (byte) 0xFF; // 0xFF (High bit char) + mixed[p++] = 92; mixed[p++] = 69; // \E + mixed[p++] = '$'; + + KeyRegexpFilter keyRegex = new KeyRegexpFilter(mixed); + RowFilter rowFilter = (RowFilter) keyRegex.getBigtableFilter(); + String pattern = extractRegexPattern(rowFilter); + + // Expect: ^metric\x00\xff$ + // - ^metric passed as ASCII + // - \Q stripped + // - 0 -> \x00 + // - 0xFF -> \xff + // - \E stripped + // - $ passed as ASCII + Assert.assertEquals("^metric\\x00\\xff$", pattern); + } + + @Test + public void ensureDoubleEscapingLogic() throws Exception { + // 4. Edge Case: Escaped Quote literal inside a quote? + // Note: The current logic is simple state-stripping. + // If OpenTSDB sends \Q.\E, we want \. (literal dot), NOT \x2e. + // RE2 interprets \x2e as a regex wildcard "." (match any), but \. as a literal dot. + + byte[] input = new byte[] { + 92, 81, // \Q + 46, // '.' (dot) - normally a regex wildcard + 92, 69 // \E + }; + + KeyRegexpFilter keyRegex = new KeyRegexpFilter(input); + RowFilter rowFilter = (RowFilter) keyRegex.getBigtableFilter(); + String pattern = extractRegexPattern(rowFilter); + + // Expect: \. + // We stripped \Q and \E. + // We detected '.' is a metacharacter. + // We escaped it with a backslash. + Assert.assertEquals("\\.", pattern); + } + + // ---------------------------------------------------------------------- + // Helpers + // ---------------------------------------------------------------------- + + private byte[] keyOnlyToByteArray(boolean value) { + FilterProtos.KeyOnlyFilter.Builder builder = org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos.KeyOnlyFilter.newBuilder(); + builder.setLenAsVal(value); + return builder.build().toByteArray(); + } + + /** + * Uses reflection to extract the internal Regex pattern string from the RowFilter. + * Corrected to use ByteArrayComparable.getValue() which works across HBase versions. + */ + private String extractRegexPattern(RowFilter rowFilter) throws Exception { + // 1. RowFilter contains a comparator. In some HBase versions this field is protected, + // so we use reflection to ensure access. + Field comparatorField = CompareFilter.class.getDeclaredField("comparator"); + comparatorField.setAccessible(true); + + // 2. The comparator is an instance of RegexStringComparator. + // RegexStringComparator extends ByteArrayComparable. + // ByteArrayComparable stores the raw bytes of the comparator value (the regex string) + // in a field accessible via the public getValue() method. + ByteArrayComparable comparator = (ByteArrayComparable) comparatorField.get(rowFilter); + + if (comparator == null) { + throw new RuntimeException("Filter comparator was null"); + } + + // 3. Convert the raw bytes back to a UTF-8 string for verification. + // We use UTF-8 here just to read the regex string back into Java for the assertion. + return new String(comparator.getValue(), "UTF-8"); + } } + From 54bc7f364f0a9c33b6cb7b5ebea0e044f99221bd Mon Sep 17 00:00:00 2001 From: xlfe Date: Thu, 20 Nov 2025 17:04:50 +1100 Subject: [PATCH 6/6] more radical change, still didn't work --- src/main/java/org/hbase/async/Scanner.java | 144 +++++---------------- 1 file changed, 32 insertions(+), 112 deletions(-) diff --git a/src/main/java/org/hbase/async/Scanner.java b/src/main/java/org/hbase/async/Scanner.java index 23947fc..1c8883f 100644 --- a/src/main/java/org/hbase/async/Scanner.java +++ b/src/main/java/org/hbase/async/Scanner.java @@ -81,9 +81,6 @@ public final class Scanner { private static final Logger LOG = LoggerFactory.getLogger(Scanner.class); - /** HBase API scan instance */ - private final BigtableExtendedScan hbase_scan; - /** * HBase API ResultScanner. After the scan is submitted is must not be null. */ @@ -187,8 +184,6 @@ public Scanner(final HBaseClient client, final byte[] table) { KeyValue.checkTable(table); this.client = client; this.table = table; - - hbase_scan = new BigtableExtendedScan(); } /** @@ -254,8 +249,6 @@ public void setFamily(final byte[] family) { KeyValue.checkFamily(family); checkScanningNotStarted(); families = new byte[][] { family }; - - hbase_scan.addFamily(family); } /** Specifies a particular column family to scan. */ @@ -288,14 +281,6 @@ public void setFamilies(byte[][] families, byte[][][] qualifiers) { this.families = families; this.qualifiers = qualifiers; - for (int i = 0; i < families.length; i++) { - KeyValue.checkFamily(families[i]); - if (qualifiers != null && qualifiers[i] != null) { - for (byte[] qualifier : qualifiers[i]) { - hbase_scan.addColumn(families[i], qualifier); - } - } - } } /** @@ -309,7 +294,6 @@ public void setFamilies(final String... families) { KeyValue.checkFamily(this.families[i]); qualifiers[i] = null; - hbase_scan.addFamily(this.families[i]); } } @@ -327,8 +311,6 @@ public void setQualifier(final byte[] qualifier) { checkScanningNotStarted(); this.qualifiers = new byte[][][] { { qualifier } }; - if (families.length > 0) - hbase_scan.addColumn(families[0], qualifier); } /** Specifies a particular column qualifier to scan. */ @@ -375,7 +357,6 @@ public ScanFilter getFilter() { */ public void clearFilter() { filter = null; - hbase_scan.setFilter(null); } /** @@ -423,7 +404,6 @@ public void setKeyRegexp(final String regexp, final Charset charset) { public void setServerBlockCache(final boolean populate_blockcache) { checkScanningNotStarted(); this.populate_blockcache = populate_blockcache; - hbase_scan.setCacheBlocks(populate_blockcache); } /** @@ -489,7 +469,6 @@ public void setMaxNumKeyValues(final int max_num_kvs) { } checkScanningNotStarted(); this.max_num_kvs = max_num_kvs; - hbase_scan.setBatch(max_num_kvs); } /** @@ -518,7 +497,6 @@ public void setMaxVersions(final int versions) { checkScanningNotStarted(); this.versions = versions; - hbase_scan.readVersions(versions); } /** @@ -549,7 +527,6 @@ public void setMaxNumBytes(final long max_num_bytes) { checkScanningNotStarted(); this.max_num_bytes = max_num_bytes; - hbase_scan.setMaxResultSize(max_num_bytes); } /** @@ -582,11 +559,6 @@ public void setMinTimestamp(final long timestamp) { checkScanningNotStarted(); min_timestamp = timestamp; - try { - hbase_scan.setTimeRange(getMinTimestamp(), getMaxTimestamp()); - } catch (IOException e) { - throw new IllegalArgumentException("Invalid timestamp: " + timestamp, e); - } } /** @@ -619,11 +591,6 @@ public void setMaxTimestamp(final long timestamp) { checkScanningNotStarted(); max_timestamp = timestamp; - try { - hbase_scan.setTimeRange(getMinTimestamp(), getMaxTimestamp()); - } catch (IOException e) { - throw new IllegalArgumentException("Invalid timestamp: " + timestamp, e); - } } /** @@ -661,97 +628,50 @@ public void setTimeRange(final long min_timestamp, final long max_timestamp) { this.min_timestamp = min_timestamp; this.max_timestamp = max_timestamp; - try { - hbase_scan.setTimeRange(getMinTimestamp(), getMaxTimestamp()); - } catch (IOException e) { - throw new IllegalArgumentException("Invalid time range", e); - } } -/** @return the HTable scan object */ - BigtableExtendedScan getHbaseScan() { - // ---[ Whitebox Logging - Inputs ]--- - LOG.info("DEBUG-SHIM: Preparing Scan. StartKey(Hex)={}, StopKey(Hex)={}, Filter={}", - Bytes.pretty(this.start_key), Bytes.pretty(this.stop_key), this.filter); + BigtableExtendedScan getHbaseScan() { + // Instantiate a NEW object to guarantee a clean state + BigtableExtendedScan scan = new BigtableExtendedScan(); - // ---[ RANGES ]--- - // BigtableExtendedScan requires addRange instead of setStartRow/setStopRow. - // this.start_key and this.stop_key default to EMPTY_ARRAY, which - // Bigtable interprets correctly as "start/end of table" (unbounded). - hbase_scan.addRange(this.start_key, this.stop_key); + // Apply Ranges (BigtableExtendedScan treats empty byte[] as unbounded) + scan.addRange(this.start_key, this.stop_key); - // ---[ PHASE 1: Whitebox Logging - Outputs (Ranges) ]--- + // Apply Configuration from local Scanner fields try { - // 1. Log Legacy HBase API fields (should be empty/null if using BigtableExtendedScan correctly) - byte[] legacyStart = hbase_scan.getStartRow(); - byte[] legacyStop = hbase_scan.getStopRow(); - LOG.info("DEBUG-SHIM: Driver Legacy StartRow(Hex)={}, StopRow(Hex)={}", - legacyStart == null ? "NULL" : Bytes.pretty(legacyStart), - legacyStop == null ? "NULL" : Bytes.pretty(legacyStop)); - - // 2. Log the Bigtable RowSet (CRITICAL: This defines the actual ranges sent to Bigtable) - if (hbase_scan.getRowSet() != null) { - // This logs the Protobuf structure (e.g., start_key_closed, end_key_open). - LOG.info("DEBUG-SHIM: Driver RowSet={}", hbase_scan.getRowSet().toString()); - } else { - // This is expected ONLY if rangeSet was false (full table scan). - LOG.info("DEBUG-SHIM: Driver RowSet is NULL (Indicates Full Table Scan)."); - } - } catch (Exception e) { - LOG.error("DEBUG-SHIM: Exception during post-addRange introspection logging", e); + scan.setTimeRange(this.min_timestamp, this.max_timestamp); + } catch (IOException e) { + // This technically shouldn't happen as setters validate, + // but we must handle the checked exception. + throw new RuntimeException("Invalid time range configuration", e); } - // ---[ End Logging ]--- - // setup the filters - if (filter == null) { - LOG.info("DEBUG-SHIM: No filter attached."); - return hbase_scan; + scan.readVersions(this.versions); + scan.setBatch(this.max_num_kvs); + scan.setMaxResultSize(this.max_num_bytes); + scan.setCacheBlocks(this.populate_blockcache); + + // Apply Families and Qualifiers + if (this.families != null) { + for (int i = 0; i < this.families.length; i++) { + byte[] family = this.families[i]; + if (this.qualifiers != null && i < this.qualifiers.length && this.qualifiers[i] != null) { + for (byte[] qualifier : this.qualifiers[i]) { + scan.addColumn(family, qualifier); + } + } else { + scan.addFamily(family); + } + } } - - // TODO - right now we ONLY push the regex filter to Bigtable. The fuzzy - // filter isn't setup properly yet and we're not using column filters at this - // time. -// if (filter instanceof FilterList) { -// for (final ScanFilter sf : ((FilterList)filter).getFilters()) { -// if (sf instanceof KeyRegexpFilter) { -// hbase_scan.setFilter(((KeyRegexpFilter)sf).getRegexFilterForBigtable()); -// return hbase_scan; -// } -// } -// } else if (filter instanceof KeyRegexpFilter) { -// hbase_scan.setFilter(((KeyRegexpFilter)filter).getRegexFilterForBigtable()); -// return hbase_scan; -// } - - // ---[ PHASE 3: Filter Attachment (Relies on KeyRegexpFilter fix) ]--- - try { - // Standard HBase API attachment. Relies on KeyRegexpFilter returning a - // correctly charset-configured RegexStringComparator. - hbase_scan.setFilter(filter.getBigtableFilter()); - - // ---[ PHASE 1: Whitebox Logging - Outputs (Filters) ]--- - if (hbase_scan.getFilter() != null) { - LOG.info("DEBUG-SHIM: Driver Filter Attached={}", hbase_scan.getFilter().toString()); - } else { - // This might happen if filter.getBigtableFilter() returned null unexpectedly. - LOG.error("DEBUG-SHIM: CRITICAL: Driver Filter is NULL after attachment attempt."); - } - // ---[ End Logging ]--- - - } catch (Exception e) { - // Catch failures during filter creation (if Phase 3 fix was missed or incomplete). - LOG.error("DEBUG-SHIM: CRITICAL: Failed to create or attach filter. Check KeyRegexpFilter implementation.", e); - // Rethrow to ensure failure is visible during testing. - if (e instanceof RuntimeException) { - throw (RuntimeException) e; - } - throw new RuntimeException("Failed during scan filter attachment.", e); + // Apply Filter - This invokes your new KeyRegexpFilter logic + if (this.filter != null) { + scan.setFilter(this.filter.getBigtableFilter()); } - // ---[ End Phase 3 ]--- - return hbase_scan; + return scan; } /** @return the scanner results to work with */