diff --git a/examples/hll_usage.rs b/examples/hll_usage.rs index ea639e1..2ea346f 100644 --- a/examples/hll_usage.rs +++ b/examples/hll_usage.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -use datasketches::hll::{HllSketch, HllType}; +use datasketches::hll::HllSketch; +use datasketches::hll::HllType; fn main() { // Create a new HLL sketch diff --git a/rustfmt.toml b/rustfmt.toml index 887de93..05b916c 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -17,3 +17,9 @@ edition = "2024" reorder_imports = true + +comment_width = 120 +format_code_in_doc_comments = true +group_imports = "StdExternalCrate" +imports_granularity = "Item" +wrap_comments = true diff --git a/src/hll/array4.rs b/src/hll/array4.rs index 4224381..44707b6 100644 --- a/src/hll/array4.rs +++ b/src/hll/array4.rs @@ -22,8 +22,10 @@ use super::aux_map::AuxMap; use crate::error::SerdeError; +use crate::hll::NumStdDev; use crate::hll::estimator::HipEstimator; -use crate::hll::{NumStdDev, get_slot, get_value}; +use crate::hll::get_slot; +use crate::hll::get_value; const AUX_TOKEN: u8 = 15; @@ -288,8 +290,9 @@ impl Array4 { compact: bool, ooo: bool, ) -> Result { + use crate::hll::get_slot; + use crate::hll::get_value; use crate::hll::serialization::*; - use crate::hll::{get_slot, get_value}; if bytes.len() < HLL_PREAMBLE_SIZE { return Err(SerdeError::InsufficientData(format!( @@ -436,7 +439,8 @@ impl Array4 { #[cfg(test)] mod tests { use super::*; - use crate::hll::{coupon, pack_coupon}; + use crate::hll::coupon; + use crate::hll::pack_coupon; #[test] fn test_get_set_raw() { diff --git a/src/hll/array6.rs b/src/hll/array6.rs index 3edd17a..8c7138b 100644 --- a/src/hll/array6.rs +++ b/src/hll/array6.rs @@ -22,8 +22,10 @@ //! cur_min optimization like Array4. use crate::error::SerdeError; +use crate::hll::NumStdDev; use crate::hll::estimator::HipEstimator; -use crate::hll::{NumStdDev, get_slot, get_value}; +use crate::hll::get_slot; +use crate::hll::get_value; const VAL_MASK_6: u16 = 0x3F; // 6 bits: 0b0011_1111 @@ -278,7 +280,8 @@ fn num_bytes_for_k(k: u32) -> usize { #[cfg(test)] mod tests { use super::*; - use crate::hll::{coupon, pack_coupon}; + use crate::hll::coupon; + use crate::hll::pack_coupon; #[test] fn test_num_bytes_calculation() { diff --git a/src/hll/array8.rs b/src/hll/array8.rs index c329d21..33e2cca 100644 --- a/src/hll/array8.rs +++ b/src/hll/array8.rs @@ -21,8 +21,10 @@ //! This provides the maximum value range (0-255) with no bit-packing complexity. use crate::error::SerdeError; +use crate::hll::NumStdDev; use crate::hll::estimator::HipEstimator; -use crate::hll::{NumStdDev, get_slot, get_value}; +use crate::hll::get_slot; +use crate::hll::get_value; /// Core Array8 data structure - one byte per slot, no packing #[derive(Debug, Clone, PartialEq)] @@ -218,7 +220,8 @@ impl Array8 { self.num_zeros = self.bytes.iter().filter(|&&v| v == 0).count() as u32; // Recompute kxq values from actual register values - // This is essential after bulk merges where registers change but estimator isn't updated incrementally + // This is essential after bulk merges where registers change but estimator isn't updated + // incrementally let mut kxq0_sum = 0.0; let mut kxq1_sum = 0.0; @@ -342,7 +345,8 @@ impl Array8 { #[cfg(test)] mod tests { use super::*; - use crate::hll::{coupon, pack_coupon}; + use crate::hll::coupon; + use crate::hll::pack_coupon; #[test] fn test_array8_basic() { diff --git a/src/hll/aux_map.rs b/src/hll/aux_map.rs index 8eb1ef8..f3e5b5d 100644 --- a/src/hll/aux_map.rs +++ b/src/hll/aux_map.rs @@ -20,7 +20,11 @@ //! Stores slot-value pairs for values that don't fit in the 4-bit main array. //! Uses open addressing with stride-based probing for collision resolution. -use crate::hll::{RESIZE_DENOMINATOR, RESIZE_NUMERATOR, get_slot, get_value, pack_coupon}; +use crate::hll::RESIZE_DENOMINATOR; +use crate::hll::RESIZE_NUMERATOR; +use crate::hll::get_slot; +use crate::hll::get_value; +use crate::hll::pack_coupon; const ENTRY_EMPTY: u32 = 0; diff --git a/src/hll/container.rs b/src/hll/container.rs index 4d358b1..5ce5777 100644 --- a/src/hll/container.rs +++ b/src/hll/container.rs @@ -20,9 +20,11 @@ //! Provides a simple array-based storage for coupons (hash values) with //! cubic interpolation-based cardinality estimation and confidence bounds. -use crate::hll::coupon_mapping::{X_ARR, Y_ARR}; +use crate::hll::COUPON_RSE; +use crate::hll::NumStdDev; +use crate::hll::coupon_mapping::X_ARR; +use crate::hll::coupon_mapping::Y_ARR; use crate::hll::cubic_interpolation::using_x_and_y_tables; -use crate::hll::{COUPON_RSE, NumStdDev}; /// Sentinel value indicating an empty coupon slot pub const COUPON_EMPTY: u32 = 0; diff --git a/src/hll/estimator.rs b/src/hll/estimator.rs index fefee91..b48393a 100644 --- a/src/hll/estimator.rs +++ b/src/hll/estimator.rs @@ -22,7 +22,9 @@ //! This is more accurate than the standard HLL estimator, especially for //! moderate cardinalities. -use crate::hll::{composite_interpolation, cubic_interpolation, harmonic_numbers}; +use crate::hll::composite_interpolation; +use crate::hll::cubic_interpolation; +use crate::hll::harmonic_numbers; /// HIP estimator with KxQ registers for improved cardinality estimation /// @@ -30,10 +32,10 @@ use crate::hll::{composite_interpolation, cubic_interpolation, harmonic_numbers} /// allowing it to be composed into Array4, Array6, and Array8. /// /// The estimator supports two modes: -/// - **In-order mode**: Uses HIP (Historical Inverse Probability) accumulator -/// for accurate sequential updates -/// - **Out-of-order mode**: Uses composite estimator (raw HLL + linear counting) -/// after deserialization or merging +/// - **In-order mode**: Uses HIP (Historical Inverse Probability) accumulator for accurate +/// sequential updates +/// - **Out-of-order mode**: Uses composite estimator (raw HLL + linear counting) after +/// deserialization or merging #[derive(Debug, Clone, PartialEq)] pub struct HipEstimator { /// HIP estimator accumulator diff --git a/src/hll/hash_set.rs b/src/hll/hash_set.rs index 681fe02..05f5ad2 100644 --- a/src/hll/hash_set.rs +++ b/src/hll/hash_set.rs @@ -21,9 +21,11 @@ //! Provides better performance than List when many coupons are stored. use crate::error::SerdeError; -use crate::hll::container::{COUPON_EMPTY, Container}; +use crate::hll::HllType; +use crate::hll::KEY_MASK_26; +use crate::hll::container::COUPON_EMPTY; +use crate::hll::container::Container; use crate::hll::serialization::*; -use crate::hll::{HllType, KEY_MASK_26}; /// Hash set for efficient coupon storage with collision handling #[derive(Debug, Clone, PartialEq)] diff --git a/src/hll/list.rs b/src/hll/list.rs index 8ef726a..d01a9b7 100644 --- a/src/hll/list.rs +++ b/src/hll/list.rs @@ -22,7 +22,8 @@ use crate::error::SerdeError; use crate::hll::HllType; -use crate::hll::container::{COUPON_EMPTY, Container}; +use crate::hll::container::COUPON_EMPTY; +use crate::hll::container::Container; use crate::hll::serialization::*; /// List for sequential coupon storage with duplicate detection diff --git a/src/hll/mod.rs b/src/hll/mod.rs index 40d1b31..9037be1 100644 --- a/src/hll/mod.rs +++ b/src/hll/mod.rs @@ -163,7 +163,9 @@ fn coupon(v: H) -> u32 { #[cfg(test)] mod tests { - use crate::hll::{get_slot, get_value, pack_coupon}; + use crate::hll::get_slot; + use crate::hll::get_value; + use crate::hll::pack_coupon; #[test] fn test_pack_unpack_coupon() { diff --git a/src/hll/sketch.rs b/src/hll/sketch.rs index f4ca064..6887338 100644 --- a/src/hll/sketch.rs +++ b/src/hll/sketch.rs @@ -23,15 +23,19 @@ use std::hash::Hash; use crate::error::SerdeError; +use crate::hll::HllType; +use crate::hll::NumStdDev; +use crate::hll::RESIZE_DENOMINATOR; +use crate::hll::RESIZE_NUMERATOR; use crate::hll::array4::Array4; use crate::hll::array6::Array6; use crate::hll::array8::Array8; use crate::hll::container::Container; +use crate::hll::coupon; use crate::hll::hash_set::HashSet; use crate::hll::list::List; use crate::hll::mode::Mode; use crate::hll::serialization::*; -use crate::hll::{HllType, NumStdDev, RESIZE_DENOMINATOR, RESIZE_NUMERATOR, coupon}; /// A HyperLogLog sketch. /// diff --git a/src/hll/union.rs b/src/hll/union.rs index 8b9b666..1d6c215 100644 --- a/src/hll/union.rs +++ b/src/hll/union.rs @@ -28,12 +28,16 @@ //! - Different modes (List, Set, Array4/6/8) //! - Different target HLL types +use std::hash::Hash; + +use crate::hll::HllSketch; +use crate::hll::HllType; +use crate::hll::NumStdDev; use crate::hll::array4::Array4; use crate::hll::array6::Array6; use crate::hll::array8::Array8; use crate::hll::mode::Mode; -use crate::hll::{HllSketch, HllType, NumStdDev, pack_coupon}; -use std::hash::Hash; +use crate::hll::pack_coupon; /// An HLL Union for combining multiple HLL sketches. /// @@ -55,9 +59,9 @@ impl HllUnion { /// /// # Arguments /// - /// * `lg_max_k` - Maximum log2 of the number of buckets. Must be in [4, 21]. - /// This determines the maximum precision the union can handle. Input sketches - /// with larger lg_k will be down-sampled. + /// * `lg_max_k` - Maximum log2 of the number of buckets. Must be in [4, 21]. This determines + /// the maximum precision the union can handle. Input sketches with larger lg_k will be + /// down-sampled. /// /// # Panics /// diff --git a/src/tdigest/sketch.rs b/src/tdigest/sketch.rs index 7f125d9..13aa6ca 100644 --- a/src/tdigest/sketch.rs +++ b/src/tdigest/sketch.rs @@ -15,14 +15,18 @@ // specific language governing permissions and limitations // under the License. -use crate::error::SerdeError; -use crate::tdigest::serialization::*; -use byteorder::{BE, LE, ReadBytesExt}; use std::cmp::Ordering; use std::convert::identity; use std::io::Cursor; use std::num::NonZeroU64; +use byteorder::BE; +use byteorder::LE; +use byteorder::ReadBytesExt; + +use crate::error::SerdeError; +use crate::tdigest::serialization::*; + /// The default value of K if one is not specified. const DEFAULT_K: u16 = 200; /// Multiplier for buffer size relative to centroids capacity. diff --git a/tests/hll_union_test.rs b/tests/hll_union_test.rs index 1df4165..c92122e 100644 --- a/tests/hll_union_test.rs +++ b/tests/hll_union_test.rs @@ -27,7 +27,10 @@ //! //! This mirrors the testing strategy used in hll_update_test.rs -use datasketches::hll::{HllSketch, HllType, HllUnion, NumStdDev}; +use datasketches::hll::HllSketch; +use datasketches::hll::HllType; +use datasketches::hll::HllUnion; +use datasketches::hll::NumStdDev; #[test] fn test_union_basic_operations() { diff --git a/tests/hll_update_test.rs b/tests/hll_update_test.rs index 5126564..918a928 100644 --- a/tests/hll_update_test.rs +++ b/tests/hll_update_test.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use datasketches::hll::{HllSketch, HllType, NumStdDev}; +use datasketches::hll::HllSketch; +use datasketches::hll::HllType; +use datasketches::hll::NumStdDev; #[test] fn test_basic_update() { diff --git a/tests/tdigest_serialization_test.rs b/tests/tdigest_serialization_test.rs index 0ad68e4..18f58ce 100644 --- a/tests/tdigest_serialization_test.rs +++ b/tests/tdigest_serialization_test.rs @@ -24,7 +24,8 @@ use common::serialization_test_data; use common::test_data; use datasketches::tdigest::TDigestMut; use googletest::assert_that; -use googletest::prelude::{eq, near}; +use googletest::prelude::eq; +use googletest::prelude::near; fn test_sketch_file(path: PathBuf, n: u64, with_buffer: bool, is_f32: bool) { let bytes = fs::read(&path).unwrap(); diff --git a/tests/tdigest_test.rs b/tests/tdigest_test.rs index 1ae1ae3..870f3de 100644 --- a/tests/tdigest_test.rs +++ b/tests/tdigest_test.rs @@ -17,7 +17,8 @@ use datasketches::tdigest::TDigestMut; use googletest::assert_that; -use googletest::prelude::{eq, near}; +use googletest::prelude::eq; +use googletest::prelude::near; #[test] fn test_empty() {