From b6423af8bd6f9cb2c930a0064761fe7d4d9fb297 Mon Sep 17 00:00:00 2001 From: William Lin <63217833+Maillew@users.noreply.github.com> Date: Fri, 5 Dec 2025 11:36:29 -0500 Subject: [PATCH 01/21] fix: fix unbounded trace row generation for rv32im_hintbuffer (#2289) Closes INT-4830 --- .../cuda/include/primitives/constants.h | 104 +++++++++--------- crates/toolchain/openvm/src/io/mod.rs | 4 +- crates/toolchain/openvm/src/io/read.rs | 6 +- crates/toolchain/openvm/src/pal_abi.rs | 4 +- crates/vm/src/arch/execution.rs | 6 + docs/vocs/docs/pages/specs/openvm/isa.mdx | 5 +- extensions/algebra/moduli-macros/src/lib.rs | 12 +- .../rv32im/circuit/cuda/src/hintstore.cu | 18 ++- .../rv32im/circuit/src/hintstore/execution.rs | 10 ++ .../rv32im/circuit/src/hintstore/mod.rs | 46 ++++++-- .../rv32im/circuit/src/hintstore/tests.rs | 93 +++++++++++++++- extensions/rv32im/guest/src/io.rs | 18 ++- extensions/rv32im/guest/src/lib.rs | 10 ++ .../programs/examples/hint_large_buffer.rs | 25 +++++ extensions/rv32im/tests/src/lib.rs | 33 +++++- extensions/rv32im/transpiler/src/lib.rs | 1 + guest-libs/pairing/src/bls12_381/pairing.rs | 8 +- guest-libs/pairing/src/bn254/pairing.rs | 8 +- 18 files changed, 327 insertions(+), 84 deletions(-) create mode 100644 extensions/rv32im/tests/programs/examples/hint_large_buffer.rs diff --git a/crates/circuits/primitives/cuda/include/primitives/constants.h b/crates/circuits/primitives/cuda/include/primitives/constants.h index dec26b5f41..16396d41ec 100644 --- a/crates/circuits/primitives/cuda/include/primitives/constants.h +++ b/crates/circuits/primitives/cuda/include/primitives/constants.h @@ -3,91 +3,97 @@ #include namespace riscv { -static const size_t RV32_REGISTER_NUM_LIMBS = 4; -static const size_t RV32_CELL_BITS = 8; -static const size_t RV_J_TYPE_IMM_BITS = 21; +inline constexpr size_t RV32_REGISTER_NUM_LIMBS = 4; +inline constexpr size_t RV32_CELL_BITS = 8; +inline constexpr size_t RV_J_TYPE_IMM_BITS = 21; -static const size_t RV32_IMM_AS = 0; +inline constexpr size_t RV32_IMM_AS = 0; } // namespace riscv namespace program { -static const size_t PC_BITS = 30; -static const size_t DEFAULT_PC_STEP = 4; +inline constexpr size_t PC_BITS = 30; +inline constexpr size_t DEFAULT_PC_STEP = 4; } // namespace program namespace native { -static const size_t AS_IMMEDIATE = 0; -static const size_t AS_NATIVE = 4; -static const size_t EXT_DEG = 4; -static const size_t BETA = 11; +inline constexpr size_t AS_IMMEDIATE = 0; +inline constexpr size_t AS_NATIVE = 4; +inline constexpr size_t EXT_DEG = 4; +inline constexpr size_t BETA = 11; } // namespace native namespace poseidon2 { -static const size_t CHUNK = 8; +inline constexpr size_t CHUNK = 8; } // namespace poseidon2 namespace p3_keccak_air { -static const size_t NUM_ROUNDS = 24; -static const size_t BITS_PER_LIMB = 16; -static const size_t U64_LIMBS = 64 / BITS_PER_LIMB; -static const size_t RATE_BITS = 1088; -static const size_t RATE_LIMBS = RATE_BITS / BITS_PER_LIMB; +inline constexpr size_t NUM_ROUNDS = 24; +inline constexpr size_t BITS_PER_LIMB = 16; +inline constexpr size_t U64_LIMBS = 64 / BITS_PER_LIMB; +inline constexpr size_t RATE_BITS = 1088; +inline constexpr size_t RATE_LIMBS = RATE_BITS / BITS_PER_LIMB; } // namespace p3_keccak_air namespace keccak256 { /// Total number of sponge bytes: number of rate bytes + number of capacity bytes. -static const size_t KECCAK_WIDTH_BYTES = 200; +inline constexpr size_t KECCAK_WIDTH_BYTES = 200; /// Total number of 16-bit limbs in the sponge. -static const size_t KECCAK_WIDTH_U16S = KECCAK_WIDTH_BYTES / 2; +inline constexpr size_t KECCAK_WIDTH_U16S = KECCAK_WIDTH_BYTES / 2; /// Number of rate bytes. -static const size_t KECCAK_RATE_BYTES = 136; +inline constexpr size_t KECCAK_RATE_BYTES = 136; /// Number of 16-bit rate limbs. -static const size_t KECCAK_RATE_U16S = KECCAK_RATE_BYTES / 2; +inline constexpr size_t KECCAK_RATE_U16S = KECCAK_RATE_BYTES / 2; /// Number of absorb rounds, equal to rate in u64s. -static const size_t NUM_ABSORB_ROUNDS = KECCAK_RATE_BYTES / 8; +inline constexpr size_t NUM_ABSORB_ROUNDS = KECCAK_RATE_BYTES / 8; /// Number of capacity bytes. -static const size_t KECCAK_CAPACITY_BYTES = 64; +inline constexpr size_t KECCAK_CAPACITY_BYTES = 64; /// Number of 16-bit capacity limbs. -static const size_t KECCAK_CAPACITY_U16S = KECCAK_CAPACITY_BYTES / 2; +inline constexpr size_t KECCAK_CAPACITY_U16S = KECCAK_CAPACITY_BYTES / 2; /// Number of output digest bytes used during the squeezing phase. -static const size_t KECCAK_DIGEST_BYTES = 32; +inline constexpr size_t KECCAK_DIGEST_BYTES = 32; /// Number of 64-bit digest limbs. -static const size_t KECCAK_DIGEST_U64S = KECCAK_DIGEST_BYTES / 8; +inline constexpr size_t KECCAK_DIGEST_U64S = KECCAK_DIGEST_BYTES / 8; // ==== Constants for register/memory adapter ==== /// Register reads to get dst, src, len -static const size_t KECCAK_REGISTER_READS = 3; +inline constexpr size_t KECCAK_REGISTER_READS = 3; /// Number of cells to read/write in a single memory access -static const size_t KECCAK_WORD_SIZE = 4; +inline constexpr size_t KECCAK_WORD_SIZE = 4; /// Memory reads for absorb per row -static const size_t KECCAK_ABSORB_READS = KECCAK_RATE_BYTES / KECCAK_WORD_SIZE; +inline constexpr size_t KECCAK_ABSORB_READS = KECCAK_RATE_BYTES / KECCAK_WORD_SIZE; /// Memory writes for digest per row -static const size_t KECCAK_DIGEST_WRITES = KECCAK_DIGEST_BYTES / KECCAK_WORD_SIZE; +inline constexpr size_t KECCAK_DIGEST_WRITES = KECCAK_DIGEST_BYTES / KECCAK_WORD_SIZE; /// keccakf parameters -static const size_t KECCAK_ROUND = 24; -static const size_t KECCAK_STATE_SIZE = 25; -static const size_t KECCAK_Q_SIZE = 192; +inline constexpr size_t KECCAK_ROUND = 24; +inline constexpr size_t KECCAK_STATE_SIZE = 25; +inline constexpr size_t KECCAK_Q_SIZE = 192; /// From memory config -static const size_t KECCAK_POINTER_MAX_BITS = 29; +inline constexpr size_t KECCAK_POINTER_MAX_BITS = 29; } // namespace keccak256 namespace mod_builder { -static const size_t MAX_LIMBS = 97; +inline constexpr size_t MAX_LIMBS = 97; } // namespace mod_builder namespace sha256 { -static const size_t SHA256_BLOCK_BITS = 512; -static const size_t SHA256_BLOCK_U8S = 64; -static const size_t SHA256_BLOCK_WORDS = 16; -static const size_t SHA256_WORD_U8S = 4; -static const size_t SHA256_WORD_BITS = 32; -static const size_t SHA256_WORD_U16S = 2; -static const size_t SHA256_HASH_WORDS = 8; -static const size_t SHA256_NUM_READ_ROWS = 4; -static const size_t SHA256_ROWS_PER_BLOCK = 17; -static const size_t SHA256_ROUNDS_PER_ROW = 4; -static const size_t SHA256_ROW_VAR_CNT = 5; -static const size_t SHA256_REGISTER_READS = 3; -static const size_t SHA256_READ_SIZE = 16; -static const size_t SHA256_WRITE_SIZE = 32; -} // namespace sha256 \ No newline at end of file +inline constexpr size_t SHA256_BLOCK_BITS = 512; +inline constexpr size_t SHA256_BLOCK_U8S = 64; +inline constexpr size_t SHA256_BLOCK_WORDS = 16; +inline constexpr size_t SHA256_WORD_U8S = 4; +inline constexpr size_t SHA256_WORD_BITS = 32; +inline constexpr size_t SHA256_WORD_U16S = 2; +inline constexpr size_t SHA256_HASH_WORDS = 8; +inline constexpr size_t SHA256_NUM_READ_ROWS = 4; +inline constexpr size_t SHA256_ROWS_PER_BLOCK = 17; +inline constexpr size_t SHA256_ROUNDS_PER_ROW = 4; +inline constexpr size_t SHA256_ROW_VAR_CNT = 5; +inline constexpr size_t SHA256_REGISTER_READS = 3; +inline constexpr size_t SHA256_READ_SIZE = 16; +inline constexpr size_t SHA256_WRITE_SIZE = 32; +} // namespace sha256 + +namespace hintstore { +// Must match MAX_HINT_BUFFER_WORDS_BITS in openvm_rv32im_guest::lib.rs +inline constexpr size_t MAX_HINT_BUFFER_WORDS_BITS = 18; +inline constexpr size_t MAX_HINT_BUFFER_WORDS = (1 << MAX_HINT_BUFFER_WORDS_BITS) - 1; +} // namespace hintstore diff --git a/crates/toolchain/openvm/src/io/mod.rs b/crates/toolchain/openvm/src/io/mod.rs index eb00a9d3cd..05f073073e 100644 --- a/crates/toolchain/openvm/src/io/mod.rs +++ b/crates/toolchain/openvm/src/io/mod.rs @@ -6,7 +6,7 @@ use core::alloc::Layout; use core::fmt::Write; #[cfg(target_os = "zkvm")] -use openvm_rv32im_guest::{hint_buffer_u32, hint_input, hint_store_u32}; +use openvm_rv32im_guest::{hint_buffer_chunked, hint_input, hint_store_u32}; use serde::de::DeserializeOwned; #[cfg(not(target_os = "zkvm"))] @@ -83,7 +83,7 @@ pub(crate) fn read_vec_by_len(len: usize) -> Vec { // The heap-embedded-alloc uses linked list allocator, which has a minimum alignment of // `sizeof(usize) * 2 = 8` on 32-bit architectures: https://github.com/rust-osdev/linked-list-allocator/blob/b5caf3271259ddda60927752fa26527e0ccd2d56/src/hole.rs#L429 let mut bytes = Vec::with_capacity(capacity); - hint_buffer_u32!(bytes.as_mut_ptr(), num_words); + hint_buffer_chunked(bytes.as_mut_ptr(), num_words as usize); // SAFETY: We populate a `Vec` by hintstore-ing `num_words` 4 byte words. We set the // length to `len` and don't care about the extra `capacity - len` bytes stored. unsafe { diff --git a/crates/toolchain/openvm/src/io/read.rs b/crates/toolchain/openvm/src/io/read.rs index 39b2166e39..f2eff6cfa5 100644 --- a/crates/toolchain/openvm/src/io/read.rs +++ b/crates/toolchain/openvm/src/io/read.rs @@ -2,7 +2,7 @@ use core::mem::MaybeUninit; use openvm_platform::WORD_SIZE; #[cfg(target_os = "zkvm")] -use openvm_rv32im_guest::hint_buffer_u32; +use openvm_rv32im_guest::hint_buffer_chunked; use super::hint_store_word; use crate::serde::WordRead; @@ -31,7 +31,7 @@ impl WordRead for Reader { let num_words = words.len(); if let Some(new_remaining) = self.bytes_remaining.checked_sub(num_words * WORD_SIZE) { #[cfg(target_os = "zkvm")] - hint_buffer_u32!(words.as_mut_ptr(), words.len()); + hint_buffer_chunked(words.as_mut_ptr() as *mut u8, words.len()); #[cfg(not(target_os = "zkvm"))] { for w in words.iter_mut() { @@ -51,7 +51,7 @@ impl WordRead for Reader { } let mut num_padded_bytes = bytes.len(); #[cfg(target_os = "zkvm")] - hint_buffer_u32!(bytes as *mut [u8] as *mut u32, num_padded_bytes / WORD_SIZE); + hint_buffer_chunked(bytes.as_mut_ptr(), num_padded_bytes / WORD_SIZE); #[cfg(not(target_os = "zkvm"))] { let mut words = bytes.chunks_exact_mut(WORD_SIZE); diff --git a/crates/toolchain/openvm/src/pal_abi.rs b/crates/toolchain/openvm/src/pal_abi.rs index 0ab3d3f386..3797998bb8 100644 --- a/crates/toolchain/openvm/src/pal_abi.rs +++ b/crates/toolchain/openvm/src/pal_abi.rs @@ -5,7 +5,7 @@ /// system operations in the same way: there is no operating system and even the standard /// library should be directly handled with intrinsics. use openvm_platform::{fileno::*, memory::sys_alloc_aligned, rust_rt::terminate, WORD_SIZE}; -use openvm_rv32im_guest::{hint_buffer_u32, hint_random, raw_print_str_from_bytes}; +use openvm_rv32im_guest::{hint_buffer_chunked, hint_random, raw_print_str_from_bytes}; const DIGEST_WORDS: usize = 8; @@ -73,7 +73,7 @@ pub unsafe extern "C" fn sys_sha_buffer( #[no_mangle] pub unsafe extern "C" fn sys_rand(recv_buf: *mut u32, words: usize) { hint_random(words); - hint_buffer_u32!(recv_buf, words); + hint_buffer_chunked(recv_buf as *mut u8, words); } /// # Safety diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 234dfbd5b9..0b7a13bfe9 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -38,6 +38,12 @@ pub enum ExecutionError { DisabledOperation { pc: u32, opcode: VmOpcode }, #[error("at pc = {pc}")] HintOutOfBounds { pc: u32 }, + #[error("at pc {pc}, hint buffer num_words {num_words} exceeds MAX_HINT_BUFFER_WORDS {max_hint_buffer_words}")] + HintBufferTooLarge { + pc: u32, + num_words: u32, + max_hint_buffer_words: u32, + }, #[error("at pc {pc}, tried to publish into index {public_value_index} when num_public_values = {num_public_values}")] PublicValueIndexOutOfBounds { pc: u32, diff --git a/docs/vocs/docs/pages/specs/openvm/isa.mdx b/docs/vocs/docs/pages/specs/openvm/isa.mdx index 14b71fa05c..a1e8223540 100644 --- a/docs/vocs/docs/pages/specs/openvm/isa.mdx +++ b/docs/vocs/docs/pages/specs/openvm/isa.mdx @@ -35,6 +35,7 @@ OpenVM depends on the following parameters, some of which are fixed and some of | `addr_space_height` | The base 2 log of the number of writable address spaces supported. | Configurable, must satisfy `addr_space_height <= F::bits() - 2` | | `pointer_max_bits` | The maximum number of bits in a pointer. | Configurable, must satisfy `pointer_max_bits <= F::bits() - 2` | | `num_public_values` | The number of user public values. | Configurable. If continuation is enabled, it must equal `8` times a power of two(which is nonzero). | +| `MAX_HINT_BUFFER_WORDS_BITS` | The maximum number of bits for hint buffer word count. This determines `MAX_HINT_BUFFER_WORDS = 2^MAX_HINT_BUFFER_WORDS_BITS - 1` = 262,143 words (≈1MB), the maximum words per `HINT_BUFFER_RV32` instruction. | Fixed to 18. | We explain these parameters in subsequent sections. @@ -428,9 +429,11 @@ with user input-output. | Name | Operands | Description | | ---------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | HINT_STOREW_RV32 | `_,b,_,1,2` | `[r32{0}(b):4]_2 = next 4 bytes from hint stream`. Only valid if next 4 values in hint stream are bytes. | -| HINT_BUFFER_RV32 | `a,b,_,1,2` | `[r32{0}(b):4 * l]_2 = next 4 * l bytes from hint stream` where `l = r32{0}(a)`. Only valid if next `4 * l` values in hint stream are bytes. Very important: `l` should not be 0. The pointer address `r32{0}(b)` does not need to be a multiple of `4`. | +| HINT_BUFFER_RV32 | `a,b,_,1,2` | `[r32{0}(b):4 * l]_2 = next 4 * l bytes from hint stream` where `l = r32{0}(a)`. Only valid if next `4 * l` values in hint stream are bytes. `l` must be non-zero and <= `MAX_HINT_BUFFER_WORDS` (262,143 words ≈ 1MB). The pointer address `r32{0}(b)` does not need to be a multiple of `4`. | | REVEAL_RV32 | `a,b,c,1,3,_,g` | Pseudo-instruction for `STOREW_RV32 a,b,c,1,3,_,g` writing to the user IO address space `3`. Only valid when continuations are enabled. | +> **Note:** The `MAX_HINT_BUFFER_WORDS` bound on `HINT_BUFFER_RV32` is enforced by both the executor and AIR constraints. The SDK's `hint_buffer_chunked` function automatically splits larger reads into multiple `HINT_BUFFER_RV32` instructions. + #### Phantom Sub-Instructions The RV32IM extension defines the following phantom sub-instructions. diff --git a/extensions/algebra/moduli-macros/src/lib.rs b/extensions/algebra/moduli-macros/src/lib.rs index 4ea8af0211..0266b7468e 100644 --- a/extensions/algebra/moduli-macros/src/lib.rs +++ b/extensions/algebra/moduli-macros/src/lib.rs @@ -875,15 +875,15 @@ pub fn moduli_declare(input: TokenStream) -> TokenStream { } #[cfg(target_os = "zkvm")] { - use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_store_u32! and hint_buffer_u32! + use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_store_u32! and hint_buffer_chunked let is_square = core::mem::MaybeUninit::::uninit(); - let sqrt = core::mem::MaybeUninit::<#struct_name>::uninit(); + let mut sqrt = core::mem::MaybeUninit::<#struct_name>::uninit(); unsafe { #hint_sqrt_extern_func(self as *const #struct_name as usize); let is_square_ptr = is_square.as_ptr() as *const u32; openvm_rv32im_guest::hint_store_u32!(is_square_ptr); - openvm_rv32im_guest::hint_buffer_u32!(sqrt.as_ptr() as *const u8, <#struct_name as ::openvm_algebra_guest::IntMod>::NUM_LIMBS / 4); + openvm_rv32im_guest::hint_buffer_chunked(sqrt.as_mut_ptr() as *mut u8, <#struct_name as ::openvm_algebra_guest::IntMod>::NUM_LIMBS / 4 as usize); let is_square = is_square.assume_init(); if is_square == 0 || is_square == 1 { Some((is_square == 1, sqrt.assume_init())) @@ -902,14 +902,14 @@ pub fn moduli_declare(input: TokenStream) -> TokenStream { } #[cfg(target_os = "zkvm")] { - use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_buffer_u32! + use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_buffer_chunked let mut non_qr_uninit = core::mem::MaybeUninit::::uninit(); let mut non_qr; unsafe { #hint_non_qr_extern_func(); - let ptr = non_qr_uninit.as_ptr() as *const u8; - openvm_rv32im_guest::hint_buffer_u32!(ptr, ::NUM_LIMBS / 4); + let ptr = non_qr_uninit.as_mut_ptr() as *mut u8; + openvm_rv32im_guest::hint_buffer_chunked(ptr, ::NUM_LIMBS / 4 as usize); non_qr = non_qr_uninit.assume_init(); } // ensure non_qr < modulus diff --git a/extensions/rv32im/circuit/cuda/src/hintstore.cu b/extensions/rv32im/circuit/cuda/src/hintstore.cu index ce09a22477..b4e3a1b607 100644 --- a/extensions/rv32im/circuit/cuda/src/hintstore.cu +++ b/extensions/rv32im/circuit/cuda/src/hintstore.cu @@ -6,6 +6,8 @@ using namespace riscv; using namespace program; +using hintstore::MAX_HINT_BUFFER_WORDS; +using hintstore::MAX_HINT_BUFFER_WORDS_BITS; template struct Rv32HintStoreCols { // common @@ -87,11 +89,25 @@ struct Rv32HintStore { COL_WRITE_ARRAY(row, Rv32HintStoreCols, mem_ptr_limbs, mem_ptr_limbs); if (local_idx == 0) { + // The overflow check for mem_ptr + num_words * 4 is not needed because + // 4 * MAX_HINT_BUFFER_WORDS < 2^pointer_max_bits guarantees no overflow + assert(MAX_HINT_BUFFER_WORDS_BITS + 2 < pointer_max_bits); + + // Range check for mem_ptr (using pointer_max_bits) uint32_t msl_rshift = (RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS; uint32_t msl_lshift = RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - pointer_max_bits; + + // Range check for num_words (using MAX_HINT_BUFFER_WORDS_BITS) + // These constraints only work for MAX_HINT_BUFFER_WORDS_BITS in [16, 23] + assert(MAX_HINT_BUFFER_WORDS_BITS >= 16 && MAX_HINT_BUFFER_WORDS_BITS <= 23); + + assert(record.num_words <= MAX_HINT_BUFFER_WORDS); + uint32_t rem_words_limb2_lshift = (RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS - MAX_HINT_BUFFER_WORDS_BITS; + + // Combined range check for mem_ptr and num_words bitwise_lookup.add_range( (record.mem_ptr >> msl_rshift) << msl_lshift, - (record.num_words >> msl_rshift) << msl_lshift + ((record.num_words >> 16) & 0xFF) << rem_words_limb2_lshift ); mem_helper.fill( row.slice_from(COL_INDEX(Rv32HintStoreCols, mem_ptr_aux_cols)), diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs index 47e68e3084..631cdb79a1 100644 --- a/extensions/rv32im/circuit/src/hintstore/execution.rs +++ b/extensions/rv32im/circuit/src/hintstore/execution.rs @@ -14,6 +14,7 @@ use openvm_instructions::{ use openvm_rv32im_transpiler::{ Rv32HintStoreOpcode, Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW}, + MAX_HINT_BUFFER_WORDS, }; use openvm_stark_backend::p3_field::PrimeField32; @@ -172,6 +173,15 @@ unsafe fn execute_e12_impl MAX_HINT_BUFFER_WORDS as u32 { + return Err(ExecutionError::HintBufferTooLarge { + pc, + num_words, + max_hint_buffer_words: MAX_HINT_BUFFER_WORDS as u32, + }); + } + if exec_state.streams.hint_stream.len() < RV32_REGISTER_NUM_LIMBS * num_words as usize { let err = ExecutionError::HintOutOfBounds { pc }; return Err(err); diff --git a/extensions/rv32im/circuit/src/hintstore/mod.rs b/extensions/rv32im/circuit/src/hintstore/mod.rs index 35955bb979..b9cac88249 100644 --- a/extensions/rv32im/circuit/src/hintstore/mod.rs +++ b/extensions/rv32im/circuit/src/hintstore/mod.rs @@ -25,6 +25,7 @@ use openvm_instructions::{ use openvm_rv32im_transpiler::{ Rv32HintStoreOpcode, Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW}, + MAX_HINT_BUFFER_WORDS, MAX_HINT_BUFFER_WORDS_BITS, }; use openvm_stark_backend::{ interaction::InteractionBuilder, @@ -202,19 +203,29 @@ impl Air for Rv32HintStoreAir { ) .eval(builder, is_start.clone()); - // Preventing mem_ptr and rem_words overflow - // Constraining mem_ptr_limbs[RV32_REGISTER_NUM_LIMBS - 1] < 2^(pointer_max_bits - - // (RV32_REGISTER_NUM_LIMBS - 1)*RV32_CELL_BITS) which implies mem_ptr <= - // 2^pointer_max_bits Similarly for rem_words <= 2^pointer_max_bits + // Preventing rem_words overflow: rem_words < 2^MAX_HINT_BUFFER_WORDS_BITS + // These constraints only work for MAX_HINT_BUFFER_WORDS_BITS in [16, 23] + debug_assert!( + (16..=23).contains(&MAX_HINT_BUFFER_WORDS_BITS), + "MAX_HINT_BUFFER_WORDS_BITS must be in [16, 23] for these constraints to work" + ); + // For MAX_HINT_BUFFER_WORDS_BITS = 18, this requires: + // - limbs[3] = 0 (since 2^18 < 2^24) + // - limbs[2] < 4 (since 2^18 = 4 * 2^16) + builder.assert_zero(local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 1]); + + // Preventing mem_ptr overflow: mem_ptr < 2^pointer_max_bits + // (rem_words overflow is handled below with the stricter MAX_HINT_BUFFER_WORDS_BITS bound) self.bitwise_operation_lookup_bus .send_range( local_cols.mem_ptr_limbs[RV32_REGISTER_NUM_LIMBS - 1] * AB::F::from_canonical_usize( 1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits), ), - local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 1] + local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 2] * AB::F::from_canonical_usize( - 1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits), + 1 << ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS + - MAX_HINT_BUFFER_WORDS_BITS), ), ) .eval(builder, is_start.clone()); @@ -409,6 +420,15 @@ where read_rv32_register(state.memory.data(), a) }; + // Bounds check: num_words must not exceed MAX_HINT_BUFFER_WORDS + if num_words > MAX_HINT_BUFFER_WORDS as u32 { + return Err(ExecutionError::HintBufferTooLarge { + pc: *state.pc, + num_words, + max_hint_buffer_words: MAX_HINT_BUFFER_WORDS as u32, + }); + } + let record = state.ctx.alloc(MultiRowLayout::new(Rv32HintStoreMetadata { num_words: num_words as usize, })); @@ -508,6 +528,10 @@ impl TraceFiller for Rv32HintStoreFiller { let msl_lshift: u32 = (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits) as u32; + // Scale factors for rem_words range check (using MAX_HINT_BUFFER_WORDS_BITS) + let rem_words_limb2_lshift: u32 = + ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS - MAX_HINT_BUFFER_WORDS_BITS) as u32; + chunks .par_iter_mut() .zip(sizes.par_iter()) @@ -526,9 +550,17 @@ impl TraceFiller for Rv32HintStoreFiller { }), ) }; + // Range check for mem_ptr (using pointer_max_bits) + // (num_words overflow check is handled below with the stricter + // MAX_HINT_BUFFER_WORDS_BITS bound) + // Range check for num_words (using MAX_HINT_BUFFER_WORDS_BITS) + debug_assert!( + num_words <= MAX_HINT_BUFFER_WORDS as u32, + "num_words must be <= MAX_HINT_BUFFER_WORDS" + ); self.bitwise_lookup_chip.request_range( (record.inner.mem_ptr >> msl_rshift) << msl_lshift, - (num_words >> msl_rshift) << msl_lshift, + ((num_words >> 16) & 0xFF) << rem_words_limb2_lshift, ); let mut timestamp = record.inner.timestamp + num_words * 3; diff --git a/extensions/rv32im/circuit/src/hintstore/tests.rs b/extensions/rv32im/circuit/src/hintstore/tests.rs index e79066aae6..61019a1104 100644 --- a/extensions/rv32im/circuit/src/hintstore/tests.rs +++ b/extensions/rv32im/circuit/src/hintstore/tests.rs @@ -19,7 +19,10 @@ use openvm_instructions::{ riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS}, LocalOpcode, }; -use openvm_rv32im_transpiler::Rv32HintStoreOpcode::{self, *}; +use openvm_rv32im_transpiler::{ + Rv32HintStoreOpcode::{self, *}, + MAX_HINT_BUFFER_WORDS, +}; use openvm_stark_backend::{ p3_field::FieldAlgebra, p3_matrix::{ @@ -194,6 +197,94 @@ fn rand_hintstore_test() { // part of the trace and check that the chip throws the expected error. ////////////////////////////////////////////////////////////////////////////////////// +#[test] +#[should_panic(expected = "HintBufferTooLarge")] +fn test_hint_buffer_exceeds_max_words() { + let mut rng = create_seeded_rng(); + let mut tester = VmChipTestBuilder::default(); + + let (mut harness, _bitwise) = create_harness::>(&mut tester); + + let num_words = (MAX_HINT_BUFFER_WORDS + 1) as u32; + + let a = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write( + RV32_REGISTER_AS as usize, + a, + num_words.to_le_bytes().map(F::from_canonical_u8), + ); + + let mem_ptr = gen_pointer(&mut rng, 4) as u32; + let b = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8)); + + for _ in 0..num_words { + let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8); + tester.streams_mut().hint_stream.extend(data); + } + + tester.execute( + &mut harness.executor, + &mut harness.arena, + &Instruction::from_usize( + HINT_BUFFER.global_opcode(), + [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize], + ), + ); +} + +#[test] +fn test_hint_buffer_rem_words_range_check() { + let mut rng = create_seeded_rng(); + let mut tester = VmChipTestBuilder::default(); + + let (mut harness, bitwise) = create_harness(&mut tester); + + // Build a small, valid buffer instruction with 1 word so trace has 1 row. + let num_words: u32 = 1; + let a = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write( + RV32_REGISTER_AS as usize, + a, + num_words.to_le_bytes().map(F::from_canonical_u8), + ); + + let mem_ptr = gen_pointer(&mut rng, 4) as u32; + let b = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8)); + + for _ in 0..num_words { + let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8); + tester.streams_mut().hint_stream.extend(data); + } + + tester.execute( + &mut harness.executor, + &mut harness.arena, + &Instruction::from_usize( + HINT_BUFFER.global_opcode(), + [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize], + ), + ); + + let modify_trace = |trace: &mut DenseMatrix| { + let mut trace_row = trace.row_slice(0).to_vec(); + let cols: &mut Rv32HintStoreCols = trace_row.as_mut_slice().borrow_mut(); + // Force `rem_words` to overflow MAX_HINT_BUFFER_WORDS_BITS on the start row. + cols.rem_words_limbs = [F::ZERO, F::ZERO, F::ZERO, F::from_canonical_u8(1)]; + *trace = RowMajorMatrix::new(trace_row, trace.width()); + }; + + disable_debug_builder(); + let tester = tester + .build() + .load_and_prank_trace(harness, modify_trace) + .load_periphery(bitwise) + .finalize(); + + tester.simple_test_with_expected_error(get_verification_error(false)); +} + #[allow(clippy::too_many_arguments)] fn run_negative_hintstore_test( opcode: Rv32HintStoreOpcode, diff --git a/extensions/rv32im/guest/src/io.rs b/extensions/rv32im/guest/src/io.rs index 664b9b1117..535959d4cc 100644 --- a/extensions/rv32im/guest/src/io.rs +++ b/extensions/rv32im/guest/src/io.rs @@ -1,5 +1,5 @@ #![allow(unused_imports)] -use crate::{PhantomImm, PHANTOM_FUNCT3, SYSTEM_OPCODE}; +use crate::{PhantomImm, MAX_HINT_BUFFER_WORDS, PHANTOM_FUNCT3, SYSTEM_OPCODE}; /// Store the next 4 bytes from the hint stream to [[rd]_1]_2. #[macro_export] @@ -21,8 +21,8 @@ macro_rules! hint_buffer_u32 { ($x:expr, $len:expr) => { if $len != 0 { openvm_custom_insn::custom_insn_i!( - opcode = openvm_rv32im_guest::SYSTEM_OPCODE, - funct3 = openvm_rv32im_guest::HINT_FUNCT3, + opcode = $crate::SYSTEM_OPCODE, + funct3 = $crate::HINT_FUNCT3, rd = In $x, rs1 = In $len, imm = Const 1, @@ -31,6 +31,18 @@ macro_rules! hint_buffer_u32 { }; } +/// Read hint buffer with automatic chunking for large reads. +/// Splits reads larger than MAX_HINT_BUFFER_WORDS into multiple instructions. +#[inline(always)] +pub fn hint_buffer_chunked(mut ptr: *mut u8, mut num_words: usize) { + while num_words > 0 { + let chunk = core::cmp::min(num_words, MAX_HINT_BUFFER_WORDS); + hint_buffer_u32!(ptr, chunk); + ptr = ptr.wrapping_add(chunk * 4); + num_words -= chunk; + } +} + /// Reset the hint stream with the next hint. #[inline(always)] pub fn hint_input() { diff --git a/extensions/rv32im/guest/src/lib.rs b/extensions/rv32im/guest/src/lib.rs index 99f1a6f97f..cea29068e2 100644 --- a/extensions/rv32im/guest/src/lib.rs +++ b/extensions/rv32im/guest/src/lib.rs @@ -25,6 +25,16 @@ pub const REVEAL_FUNCT3: u8 = 0b010; pub const PHANTOM_FUNCT3: u8 = 0b011; pub const CSRRW_FUNCT3: u8 = 0b001; +/// Maximum number of bits for hint buffer size. +/// IMPORTANT: Must be synced with MAX_HINT_BUFFER_WORDS_BITS constant for cuda +/// `crates/circuits/primitives/cuda/include/primitives/constants.h` +// For the constraints, they are configured for a range of MAX_HINT_BUFFER_WORDS_BITS between +// [16,23] +pub const MAX_HINT_BUFFER_WORDS_BITS: usize = 18; +/// Maximum number of words that can be read in a single HINT_BUFFER instruction. +/// AIR constraint requires rem_words < 2^MAX_HINT_BUFFER_WORDS_BITS, so max is one less +pub const MAX_HINT_BUFFER_WORDS: usize = (1 << MAX_HINT_BUFFER_WORDS_BITS) - 1; // 262,143 words ≈ 1MB + /// imm options for system phantom instructions #[derive(Debug, Copy, Clone, PartialEq, Eq, FromRepr)] #[repr(u16)] diff --git a/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs b/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs new file mode 100644 index 0000000000..64472b0f25 --- /dev/null +++ b/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs @@ -0,0 +1,25 @@ +#![cfg_attr(not(feature = "std"), no_main)] +#![cfg_attr(not(feature = "std"), no_std)] + +use openvm::io::read_vec; +use openvm_rv32im_guest::MAX_HINT_BUFFER_WORDS; + +openvm::entry!(main); + +pub fn main() { + let vec = read_vec(); + + // Create a hint buffer larger than MAX_HINT_BUFFER_WORDS, to test chunking + let expected_words = MAX_HINT_BUFFER_WORDS + 100; + let expected_len = expected_words * 4; + + if vec.len() != expected_len { + openvm::process::panic(); + } + + for (i, item) in vec.iter().enumerate() { + if *item != (i as u8) { + openvm::process::panic(); + } + } +} diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index ff141398f5..c4302ae808 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -13,7 +13,7 @@ mod tests { }; use openvm_instructions::{exe::VmExe, instruction::Instruction, LocalOpcode, SystemOpcode}; use openvm_rv32im_circuit::{Rv32IBuilder, Rv32IConfig, Rv32ImBuilder, Rv32ImConfig}; - use openvm_rv32im_guest::hint_load_by_key_encode; + use openvm_rv32im_guest::{hint_load_by_key_encode, MAX_HINT_BUFFER_WORDS}; use openvm_rv32im_transpiler::{ DivRemOpcode, MulHOpcode, MulOpcode, Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension, @@ -169,6 +169,37 @@ mod tests { Ok(()) } + /// NOTE: This test is slow because it processes > 1MB of data. It is marked #[ignore] + /// and can be run with: cargo test -p openvm-rv32im-integration-tests test_hint_buffer_chunking + /// -- --ignored + #[test] + #[ignore = "slow test: processes >1MB of data"] + fn test_hint_buffer_chunking() -> Result<()> { + let config = test_rv32im_config(); + let elf = build_example_program_at_path(get_programs_dir!(), "hint_large_buffer", &config)?; + let exe = VmExe::from_elf( + elf, + Transpiler::::default() + .with_extension(Rv32ITranspilerExtension) + .with_extension(Rv32MTranspilerExtension) + .with_extension(Rv32IoTranspilerExtension), + )?; + + // Create input buffer larger than MAX_HINT_BUFFER_WORDS + // This will require chunking to succeed + let expected_words = MAX_HINT_BUFFER_WORDS + 100; + let expected_len = expected_words * 4; + + // Create data with a pattern that can be verified + let data: Vec = (0..expected_len) + .map(|i| F::from_canonical_u8((i % 256) as u8)) + .collect(); + + let input = vec![data]; + air_test_with_min_segments(Rv32ImBuilder, config, exe, input, 1); + Ok(()) + } + #[test] fn test_read() -> Result<()> { let config = test_rv32im_config(); diff --git a/extensions/rv32im/transpiler/src/lib.rs b/extensions/rv32im/transpiler/src/lib.rs index 03a354517e..218202c369 100644 --- a/extensions/rv32im/transpiler/src/lib.rs +++ b/extensions/rv32im/transpiler/src/lib.rs @@ -9,6 +9,7 @@ use openvm_rv32im_guest::{ NATIVE_STOREW_FUNCT3, NATIVE_STOREW_FUNCT7, PHANTOM_FUNCT3, REVEAL_FUNCT3, RV32M_FUNCT7, RV32_ALU_OPCODE, SYSTEM_OPCODE, TERMINATE_FUNCT3, }; +pub use openvm_rv32im_guest::{MAX_HINT_BUFFER_WORDS, MAX_HINT_BUFFER_WORDS_BITS}; use openvm_stark_backend::p3_field::PrimeField32; use openvm_transpiler::{ util::{nop, unimp}, diff --git a/guest-libs/pairing/src/bls12_381/pairing.rs b/guest-libs/pairing/src/bls12_381/pairing.rs index db13c785e1..8ed9df2f68 100644 --- a/guest-libs/pairing/src/bls12_381/pairing.rs +++ b/guest-libs/pairing/src/bls12_381/pairing.rs @@ -25,7 +25,7 @@ use { openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3}, openvm_platform::custom_insn_r, openvm_rv32im_guest, - openvm_rv32im_guest::hint_buffer_u32, + openvm_rv32im_guest::hint_buffer_chunked, }; use super::{Bls12_381, Fp, Fp12, Fp2}; @@ -280,7 +280,7 @@ impl PairingCheck for Bls12_381 { } #[cfg(target_os = "zkvm")] { - let hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); + let mut hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); // We do not rely on the slice P's memory layout since rust does not guarantee it across // compiler versions. let p_fat_ptr = (P.as_ptr() as u32, P.len() as u32); @@ -294,8 +294,8 @@ impl PairingCheck for Bls12_381 { rs1 = In &p_fat_ptr, rs2 = In &q_fat_ptr ); - let ptr = hint.as_ptr() as *const u8; - hint_buffer_u32!(ptr, (48 * 12 * 2) / 4); + let ptr = hint.as_mut_ptr() as *mut u8; + hint_buffer_chunked(ptr, (48 * 12 * 2) / 4 as usize); hint.assume_init() } } diff --git a/guest-libs/pairing/src/bn254/pairing.rs b/guest-libs/pairing/src/bn254/pairing.rs index c0f1cc35f2..9fb160511b 100644 --- a/guest-libs/pairing/src/bn254/pairing.rs +++ b/guest-libs/pairing/src/bn254/pairing.rs @@ -21,7 +21,7 @@ use { core::mem::MaybeUninit, openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3}, openvm_platform::custom_insn_r, - openvm_rv32im_guest::hint_buffer_u32, + openvm_rv32im_guest::hint_buffer_chunked, }; use super::{Bn254, Fp, Fp12, Fp2}; @@ -314,7 +314,7 @@ impl PairingCheck for Bn254 { } #[cfg(target_os = "zkvm")] { - let hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); + let mut hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); // We do not rely on the slice P's memory layout since rust does not guarantee it across // compiler versions. let p_fat_ptr = (P.as_ptr() as u32, P.len() as u32); @@ -328,8 +328,8 @@ impl PairingCheck for Bn254 { rs1 = In &p_fat_ptr, rs2 = In &q_fat_ptr ); - let ptr = hint.as_ptr() as *const u8; - hint_buffer_u32!(ptr, (32 * 12 * 2) / 4); + let ptr = hint.as_mut_ptr() as *mut u8; + hint_buffer_chunked(ptr, (32 * 12 * 2) / 4 as usize); hint.assume_init() } } From 2f307f9ca3f7cd6c87bf634a949d5bb9985b0865 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 11 Dec 2025 19:14:46 +0000 Subject: [PATCH 02/21] access adapter boolean flag --- .gitignore | 8 ++ crates/vm/src/arch/config.rs | 78 ++++++++++++++++++- .../arch/execution_mode/metered/memory_ctx.rs | 7 ++ .../src/arch/execution_mode/metered_cost.rs | 7 ++ crates/vm/src/system/memory/adapter/mod.rs | 35 +++++---- crates/vm/src/system/memory/mod.rs | 47 +++++++---- 6 files changed, 149 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index c6e6aa2049..87e918c19e 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,11 @@ profile.json.gz # test fixtures benchmarks/fixtures + +#TODO: Remove this +crates/toolchain/tests/rv32im-test-vectors/tests/* +*.o +*.a +*.s +*.txt +riscv/* \ No newline at end of file diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs index 3ffbfb74e0..17f7d228eb 100644 --- a/crates/vm/src/arch/config.rs +++ b/crates/vm/src/arch/config.rs @@ -123,6 +123,11 @@ pub const OPENVM_DEFAULT_INIT_FILE_NAME: &str = "openvm_init.rs"; const DEFAULT_U8_BLOCK_SIZE: usize = 4; const DEFAULT_NATIVE_BLOCK_SIZE: usize = 1; +/// The constant block size used for memory accesses when access adapters are disabled. +/// All memory accesses for address spaces 1-3 must use this block size. +/// This is also the block size used by the Boundary AIR for memory bus interactions. +pub const CONST_BLOCK_SIZE: usize = 4; + /// Trait for generating a init.rs file that contains a call to moduli_init!, /// complex_init!, sw_init! with the supported moduli and curves. /// Should be implemented by all VM config structs. @@ -183,6 +188,11 @@ pub struct MemoryConfig { pub decomp: usize, /// Maximum N AccessAdapter AIR to support. pub max_access_adapter_n: usize, + /// Whether access adapters are enabled. When disabled, all memory accesses must be of the + /// standard block size (e.g., 4 for address spaces 1-3). This removes the need for access + /// adapter AIRs and simplifies the memory system. + #[new(value = "true")] + pub access_adapters_enabled: bool, } impl Default for MemoryConfig { @@ -194,7 +204,15 @@ impl Default for MemoryConfig { addr_spaces[RV32_MEMORY_AS as usize].num_cells = MAX_CELLS; addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = DEFAULT_MAX_NUM_PUBLIC_VALUES; addr_spaces[NATIVE_AS as usize].num_cells = MAX_CELLS; - Self::new(3, addr_spaces, POINTER_MAX_BITS, 29, 17, 32) + Self { + addr_space_height: 3, + addr_spaces, + pointer_max_bits: POINTER_MAX_BITS, + timestamp_max_bits: 29, + decomp: 17, + max_access_adapter_n: 32, + access_adapters_enabled: true, + } } } @@ -245,6 +263,36 @@ impl MemoryConfig { .map(|addr_sp| log2_strict_usize(addr_sp.min_block_size) as u8) .collect() } + + /// Returns true if the Native address space (AS 4) is used. + /// Native AS is considered "used" if it has any allocated cells. + pub fn is_native_as_used(&self) -> bool { + self.addr_spaces + .get(NATIVE_AS as usize) + .is_some_and(|config| config.num_cells > 0) + } + + /// Disables access adapters. When disabled, all memory accesses for address spaces 1-3 + /// must use the constant block size (4). Access adapters will only be used for + /// address space 4 (Native) if it is enabled. + pub fn without_access_adapters(mut self) -> Self { + self.access_adapters_enabled = false; + self + } + + /// Enables access adapters. This is the default behavior. + pub fn with_access_adapters(mut self) -> Self { + self.access_adapters_enabled = true; + self + } + + /// Automatically sets `access_adapters_enabled` based on whether Native AS is used. + /// If Native AS is not used, access adapters are disabled since all other address spaces + /// use a fixed block size of 4. + pub fn with_auto_access_adapters(mut self) -> Self { + self.access_adapters_enabled = self.is_native_as_used(); + self + } } /// System-level configuration for the virtual machine. Contains all configuration parameters that @@ -375,6 +423,7 @@ impl SystemConfig { + num_memory_airs( self.continuation_enabled, self.memory_config.max_access_adapter_n, + self.memory_config.access_adapters_enabled, ) } @@ -384,6 +433,33 @@ impl SystemConfig { false => 1, } } + + /// Disables access adapters. When disabled, all memory accesses for address spaces 1-3 + /// must use the constant block size (4). This simplifies the memory system by removing + /// access adapter AIRs. + pub fn without_access_adapters(mut self) -> Self { + self.memory_config.access_adapters_enabled = false; + self + } + + /// Enables access adapters. This is the default behavior. + pub fn with_access_adapters(mut self) -> Self { + self.memory_config.access_adapters_enabled = true; + self + } + + /// Automatically sets `access_adapters_enabled` based on whether Native AS is used. + /// If Native AS is not used, access adapters are disabled since all other address spaces + /// use a fixed block size of 4. + pub fn with_auto_access_adapters(mut self) -> Self { + self.memory_config = self.memory_config.with_auto_access_adapters(); + self + } + + /// Returns true if access adapters are enabled. + pub fn access_adapters_enabled(&self) -> bool { + self.memory_config.access_adapters_enabled + } } impl Default for SystemConfig { diff --git a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs index 3429177d11..d75dc2c46b 100644 --- a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs +++ b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs @@ -105,6 +105,7 @@ pub struct MemoryCtx { pub boundary_idx: usize, pub merkle_tree_index: Option, pub adapter_offset: usize, + access_adapters_enabled: bool, continuations_enabled: bool, chunk: u32, chunk_bits: u32, @@ -128,6 +129,7 @@ impl MemoryCtx { boundary_idx: config.memory_boundary_air_id(), merkle_tree_index: config.memory_merkle_air_id(), adapter_offset: config.access_adapter_air_id_offset(), + access_adapters_enabled: config.memory_config.access_adapters_enabled, chunk, chunk_bits, memory_dimensions, @@ -210,6 +212,11 @@ impl MemoryCtx { size_bits: u32, num: u32, ) { + // Skip if access adapters are disabled + if !self.access_adapters_enabled { + return; + } + debug_assert!((address_space as usize) < self.min_block_size_bits.len()); // SAFETY: address_space passed is usually a hardcoded constant or derived from an diff --git a/crates/vm/src/arch/execution_mode/metered_cost.rs b/crates/vm/src/arch/execution_mode/metered_cost.rs index 925bd25af2..c92965ad3f 100644 --- a/crates/vm/src/arch/execution_mode/metered_cost.rs +++ b/crates/vm/src/arch/execution_mode/metered_cost.rs @@ -18,6 +18,7 @@ pub const DEFAULT_MAX_COST: u64 = DEFAULT_MAX_SEGMENTS * DEFAULT_SEGMENT_MAX_CEL pub struct AccessAdapterCtx { min_block_size_bits: Vec, idx_offset: usize, + enabled: bool, } impl AccessAdapterCtx { @@ -25,6 +26,7 @@ impl AccessAdapterCtx { Self { min_block_size_bits: config.memory_config.min_block_size_bits(), idx_offset: config.access_adapter_air_id_offset(), + enabled: config.memory_config.access_adapters_enabled, } } @@ -36,6 +38,11 @@ impl AccessAdapterCtx { size_bits: u32, widths: &[usize], ) { + // Skip if access adapters are disabled + if !self.enabled { + return; + } + debug_assert!((address_space as usize) < self.min_block_size_bits.len()); // SAFETY: address_space passed is usually a hardcoded constant or derived from an diff --git a/crates/vm/src/system/memory/adapter/mod.rs b/crates/vm/src/system/memory/adapter/mod.rs index 8b0797dcf6..a9c89fc2ea 100644 --- a/crates/vm/src/system/memory/adapter/mod.rs +++ b/crates/vm/src/system/memory/adapter/mod.rs @@ -58,21 +58,26 @@ impl AccessAdapterInventory { memory_bus: MemoryBus, memory_config: MemoryConfig, ) -> Self { - let rc = range_checker; - let mb = memory_bus; - let tmb = memory_config.timestamp_max_bits; - let maan = memory_config.max_access_adapter_n; - assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); - let chips: Vec<_> = [ - Self::create_access_adapter_chip::<2>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<4>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<8>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<16>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<32>(rc.clone(), mb, tmb, maan), - ] - .into_iter() - .flatten() - .collect(); + // Only create adapter chips if access adapters are enabled + let chips: Vec<_> = if memory_config.access_adapters_enabled { + let rc = range_checker; + let mb = memory_bus; + let tmb = memory_config.timestamp_max_bits; + let maan = memory_config.max_access_adapter_n; + assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); + [ + Self::create_access_adapter_chip::<2>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<4>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<8>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<16>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<32>(rc.clone(), mb, tmb, maan), + ] + .into_iter() + .flatten() + .collect() + } else { + Vec::new() + }; Self { memory_config, chips, diff --git a/crates/vm/src/system/memory/mod.rs b/crates/vm/src/system/memory/mod.rs index 411e7a5473..8c3f48c7f0 100644 --- a/crates/vm/src/system/memory/mod.rs +++ b/crates/vm/src/system/memory/mod.rs @@ -118,20 +118,24 @@ impl MemoryAirInventory { ); MemoryInterfaceAirs::Volatile { boundary } }; - // Memory access adapters - let lt_air = IsLtSubAir::new(range_bus, mem_config.timestamp_max_bits); - let maan = mem_config.max_access_adapter_n; - assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); - let access_adapters: Vec> = [ - Arc::new(AccessAdapterAir::<2> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<4> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<8> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<16> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<32> { memory_bus, lt_air }) as AirRef, - ] - .into_iter() - .take(log2_strict_usize(maan)) - .collect(); + // Memory access adapters - only create if enabled + let access_adapters: Vec> = if mem_config.access_adapters_enabled { + let lt_air = IsLtSubAir::new(range_bus, mem_config.timestamp_max_bits); + let maan = mem_config.max_access_adapter_n; + assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); + [ + Arc::new(AccessAdapterAir::<2> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<4> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<8> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<16> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<32> { memory_bus, lt_air }) as AirRef, + ] + .into_iter() + .take(log2_strict_usize(maan)) + .collect() + } else { + Vec::new() + }; Self { bridge, @@ -159,7 +163,16 @@ impl MemoryAirInventory { /// This is O(1) and returns the length of /// [`MemoryAirInventory::into_airs`]. -pub fn num_memory_airs(is_persistent: bool, max_access_adapter_n: usize) -> usize { - // boundary + { merkle if is_persistent } + access_adapters - 1 + usize::from(is_persistent) + log2_strict_usize(max_access_adapter_n) +pub fn num_memory_airs( + is_persistent: bool, + max_access_adapter_n: usize, + access_adapters_enabled: bool, +) -> usize { + // boundary + { merkle if is_persistent } + access_adapters (if enabled) + let num_adapters = if access_adapters_enabled { + log2_strict_usize(max_access_adapter_n) + } else { + 0 + }; + 1 + usize::from(is_persistent) + num_adapters } From 8416b4c3304a5fbc3d3cb29e5f5ee35b4b39f928 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 11 Dec 2025 20:03:46 +0000 Subject: [PATCH 03/21] finalize memory --- crates/vm/src/system/memory/online.rs | 108 +++++++++++++++++++------- 1 file changed, 81 insertions(+), 27 deletions(-) diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 6a16e0d12b..3c5ac45f45 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -13,7 +13,7 @@ use tracing::instrument; use crate::{ arch::{ AddressSpaceHostConfig, AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, - RecordArena, MAX_CELL_BYTE_SIZE, + RecordArena, CONST_BLOCK_SIZE, MAX_CELL_BYTE_SIZE, }, system::{ memory::{ @@ -941,10 +941,10 @@ impl TracingMemory { match is_persistent { false => TouchedMemory::Volatile( - self.touched_blocks_to_equipartition::(touched_blocks), + self.touched_blocks_to_equipartition::(touched_blocks), ), true => TouchedMemory::Persistent( - self.touched_blocks_to_equipartition::(touched_blocks), + self.touched_blocks_to_equipartition::(touched_blocks), ), } } @@ -974,29 +974,37 @@ impl TracingMemory { /// Returns the equipartition of the touched blocks. /// Modifies records and adds new to account for the initial/final segments. - fn touched_blocks_to_equipartition( + fn touched_blocks_to_equipartition< + F: Field, + const PARTITION_SIZE: usize, + const OUTPUT_SIZE: usize, + >( &mut self, touched_blocks: Vec<((u32, u32), AccessMetadata)>, - ) -> TimestampedEquipartition { + ) -> TimestampedEquipartition { + assert!( + OUTPUT_SIZE % PARTITION_SIZE == 0, + "Output size must be a multiple of the partition size" + ); // [perf] We can `.with_capacity()` if we keep track of the number of segments we initialize - let mut final_memory = Vec::new(); + let mut partitioned_memory = Vec::new(); debug_assert!(touched_blocks.is_sorted_by_key(|(addr, _)| addr)); - self.handle_touched_blocks::(&mut final_memory, touched_blocks); + self.handle_touched_blocks::(&mut partitioned_memory, touched_blocks); - debug_assert!(final_memory.is_sorted_by_key(|(key, _)| *key)); - final_memory + debug_assert!(partitioned_memory.is_sorted_by_key(|(key, _)| *key)); + Self::rechunk_final_memory::(partitioned_memory) } - fn handle_touched_blocks( + fn handle_touched_blocks( &mut self, - final_memory: &mut Vec<((u32, u32), TimestampedValues)>, + final_memory: &mut Vec<((u32, u32), TimestampedValues)>, touched_blocks: Vec<((u32, u32), AccessMetadata)>, ) { - let mut current_values = vec![0u8; MAX_CELL_BYTE_SIZE * CHUNK]; + let mut current_values = vec![0u8; MAX_CELL_BYTE_SIZE * PARTITION_SIZE]; let mut current_cnt = 0; let mut current_address = MemoryAddress::new(0, 0); - let mut current_timestamps = vec![0; CHUNK]; + let mut current_timestamps = vec![0; PARTITION_SIZE]; for ((addr_space, ptr), access_metadata) in touched_blocks { // SAFETY: addr_space of touched blocks are all in bounds let addr_space_config = @@ -1009,16 +1017,16 @@ impl TracingMemory { current_cnt == 0 || (current_address.address_space == addr_space && current_address.pointer + current_cnt as u32 == ptr), - "The union of all touched blocks must consist of blocks with sizes divisible by `CHUNK`" + "The union of all touched blocks must consist of blocks with sizes divisible by the partition size" ); debug_assert!(block_size >= min_block_size as u8); debug_assert!(ptr % min_block_size as u32 == 0); if current_cnt == 0 { assert_eq!( - ptr & (CHUNK as u32 - 1), + ptr & (PARTITION_SIZE as u32 - 1), 0, - "The union of all touched blocks must consist of `CHUNK`-aligned blocks" + "The union of all touched blocks must consist of partition-aligned blocks" ); current_address = MemoryAddress::new(addr_space, ptr); } @@ -1033,7 +1041,7 @@ impl TracingMemory { type_size: cell_size as u32, }); } - if min_block_size > CHUNK { + if min_block_size > PARTITION_SIZE { assert_eq!(current_cnt, 0); for i in (0..block_size as u32).step_by(min_block_size) { self.add_split_record(AccessRecordHeader { @@ -1041,7 +1049,7 @@ impl TracingMemory { address_space: addr_space, pointer: ptr + i, block_size: min_block_size as u32, - lowest_block_size: CHUNK as u32, + lowest_block_size: PARTITION_SIZE as u32, type_size: cell_size as u32, }); } @@ -1053,14 +1061,14 @@ impl TracingMemory { block_size as usize * cell_size, ) }; - for i in (0..block_size as u32).step_by(CHUNK) { + for i in (0..block_size as u32).step_by(PARTITION_SIZE) { final_memory.push(( (addr_space, ptr + i), TimestampedValues { timestamp, values: from_fn(|j| { let byte_idx = (i as usize + j) * cell_size; - // SAFETY: block_size is multiple of CHUNK and we are reading chunks + // SAFETY: block_size is multiple of PARTITION_SIZE and we are reading chunks // of cells within bounds unsafe { addr_space_config @@ -1084,15 +1092,15 @@ impl TracingMemory { current_values[current_cnt * cell_size..current_cnt * cell_size + cell_size] .copy_from_slice(cell_data); if current_cnt & (min_block_size - 1) == 0 { - // SAFETY: current_cnt / min_block_size < CHUNK / min_block_size <= CHUNK + // SAFETY: current_cnt / min_block_size < PARTITION_SIZE / min_block_size <= PARTITION_SIZE unsafe { *current_timestamps.get_unchecked_mut(current_cnt / min_block_size) = timestamp; } } current_cnt += 1; - if current_cnt == CHUNK { - let timestamp = *current_timestamps[..CHUNK / min_block_size] + if current_cnt == PARTITION_SIZE { + let timestamp = *current_timestamps[..PARTITION_SIZE / min_block_size] .iter() .max() .unwrap(); @@ -1101,12 +1109,12 @@ impl TracingMemory { timestamp_and_mask: timestamp, address_space: addr_space, pointer: current_address.pointer, - block_size: CHUNK as u32, + block_size: PARTITION_SIZE as u32, lowest_block_size: min_block_size as u32, type_size: cell_size as u32, }, - ¤t_values[..CHUNK * cell_size], - ¤t_timestamps[..CHUNK / min_block_size], + ¤t_values[..PARTITION_SIZE * cell_size], + ¤t_timestamps[..PARTITION_SIZE / min_block_size], ); final_memory.push(( (current_address.address_space, current_address.pointer), @@ -1126,7 +1134,53 @@ impl TracingMemory { } } } - assert_eq!(current_cnt, 0, "The union of all touched blocks must consist of blocks with sizes divisible by `CHUNK`"); + assert_eq!( + current_cnt, 0, + "The union of all touched blocks must consist of blocks with sizes divisible by the partition size" + ); + } + + fn rechunk_final_memory( + partitioned_memory: Vec<((u32, u32), TimestampedValues)>, + ) -> TimestampedEquipartition { + debug_assert!(OUTPUT_SIZE % PARTITION_SIZE == 0); + let merge_factor = OUTPUT_SIZE / PARTITION_SIZE; + let mut final_memory = + Vec::with_capacity(partitioned_memory.len().saturating_div(merge_factor)); + let mut idx = 0; + while idx < partitioned_memory.len() { + debug_assert!(idx + merge_factor <= partitioned_memory.len()); + + let group = &partitioned_memory[idx..idx + merge_factor]; + let ((addr_space, base_ptr), _) = group[0]; + debug_assert_eq!(base_ptr % OUTPUT_SIZE as u32, 0); + + for (j, ((curr_addr_space, ptr), _)) in group.iter().enumerate() { + debug_assert_eq!(*curr_addr_space, addr_space); + debug_assert_eq!(*ptr, base_ptr + (j * PARTITION_SIZE) as u32); + } + + let timestamp = group + .iter() + .map(|(_, ts_values)| ts_values.timestamp) + .max() + .expect("Group is non-empty"); + let values = from_fn(|i| { + let group_idx = i / PARTITION_SIZE; + let within_group_idx = i % PARTITION_SIZE; + group[group_idx].1.values[within_group_idx] + }); + + final_memory.push(( + (addr_space, base_ptr), + TimestampedValues { timestamp, values }, + )); + + idx += merge_factor; + } + + debug_assert!(final_memory.is_sorted_by_key(|(key, _)| *key)); + final_memory } pub fn address_space_alignment(&self) -> Vec { From cf065e60235211fdf06c8721cfffd86abdaeb4fb Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 19:08:21 +0000 Subject: [PATCH 04/21] wip --- crates/vm/src/arch/config.rs | 2 +- crates/vm/src/arch/state.rs | 15 ++++++++- crates/vm/src/arch/testing/cpu.rs | 11 +++++-- crates/vm/src/system/memory/adapter/mod.rs | 8 +++++ crates/vm/src/system/memory/online.rs | 36 +++++++++++++++++++--- crates/vm/src/system/memory/persistent.rs | 29 ++++++++++------- extensions/rv32im/tests/src/lib.rs | 4 +-- 7 files changed, 84 insertions(+), 21 deletions(-) diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs index 17f7d228eb..50467de631 100644 --- a/crates/vm/src/arch/config.rs +++ b/crates/vm/src/arch/config.rs @@ -429,7 +429,7 @@ impl SystemConfig { pub fn initial_block_size(&self) -> usize { match self.continuation_enabled { - true => CHUNK, + true => CONST_BLOCK_SIZE, false => 1, } } diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs index 6e79677541..1da2a4d392 100644 --- a/crates/vm/src/arch/state.rs +++ b/crates/vm/src/arch/state.rs @@ -1,4 +1,5 @@ use std::{ + backtrace::Backtrace, fmt::Debug, ops::{Deref, DerefMut}, }; @@ -13,7 +14,7 @@ use super::{create_memory_image, ExecutionError, Streams}; #[cfg(feature = "metrics")] use crate::metrics::VmMetrics; use crate::{ - arch::{execution_mode::ExecutionCtxTrait, SystemConfig, VmStateMut}, + arch::{execution_mode::ExecutionCtxTrait, SystemConfig, VmStateMut, CONST_BLOCK_SIZE}, system::memory::online::GuestMemory, }; @@ -187,6 +188,12 @@ where addr_space: u32, ptr: u32, ) -> [T; BLOCK_SIZE] { + if BLOCK_SIZE != CONST_BLOCK_SIZE { + println!( + "vm_read: addr_space = {}, ptr = {}, BLOCK_SIZE = {}", + addr_space, ptr, BLOCK_SIZE + ); + } self.ctx .on_memory_operation(addr_space, ptr, BLOCK_SIZE as u32); self.host_read(addr_space, ptr) @@ -200,6 +207,12 @@ where ptr: u32, data: &[T; BLOCK_SIZE], ) { + if BLOCK_SIZE != CONST_BLOCK_SIZE { + println!( + "vm_read: addr_space = {}, ptr = {}, BLOCK_SIZE = {}", + addr_space, ptr, BLOCK_SIZE + ); + } self.ctx .on_memory_operation(addr_space, ptr, BLOCK_SIZE as u32); self.host_write(addr_space, ptr, data) diff --git a/crates/vm/src/arch/testing/cpu.rs b/crates/vm/src/arch/testing/cpu.rs index 105962bc11..4c574dda73 100644 --- a/crates/vm/src/arch/testing/cpu.rs +++ b/crates/vm/src/arch/testing/cpu.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use itertools::zip_eq; +use openvm_circuit::arch::CONST_BLOCK_SIZE; use openvm_circuit_primitives::var_range::{ SharedVariableRangeCheckerChip, VariableRangeCheckerBus, VariableRangeCheckerChip, }; @@ -332,7 +333,7 @@ impl VmChipTestBuilder { fn range_checker_and_memory( mem_config: &MemoryConfig, - init_block_size: usize, + init_block_size: usize, // modify this to CONST_BLOCK_SIZE ) -> (SharedVariableRangeCheckerChip, TracingMemory) { let range_checker = Arc::new(VariableRangeCheckerChip::new(VariableRangeCheckerBus::new( RANGE_CHECKER_BUS, @@ -347,7 +348,12 @@ impl VmChipTestBuilder { pub fn persistent(mem_config: MemoryConfig) -> Self { setup_tracing_with_log_level(Level::INFO); - let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, CHUNK); + /// ERRRMMM WHAT THE SIGMA not testing here + println!( + "PERSISTENT MEMORY TESTING, CONST_BLOCK_SIZE = {}", + CONST_BLOCK_SIZE + ); + let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, CONST_BLOCK_SIZE); let hasher_chip = Arc::new(Poseidon2PeripheryChip::new( vm_poseidon2_config(), POSEIDON2_DIRECT_BUS, @@ -470,6 +476,7 @@ where let mut memory_controller = memory_tester.controller; let is_persistent = memory_controller.continuation_enabled(); let mut memory = memory_tester.memory; + // here? pass in initial memory for chunking let touched_memory = memory.finalize::>(is_persistent); // Balance memory boundaries let range_checker = memory_controller.range_checker.clone(); diff --git a/crates/vm/src/system/memory/adapter/mod.rs b/crates/vm/src/system/memory/adapter/mod.rs index a9c89fc2ea..2ae0fc28eb 100644 --- a/crates/vm/src/system/memory/adapter/mod.rs +++ b/crates/vm/src/system/memory/adapter/mod.rs @@ -1,4 +1,5 @@ use std::{ + backtrace::Backtrace, borrow::{Borrow, BorrowMut}, marker::PhantomData, ptr::copy_nonoverlapping, @@ -132,6 +133,13 @@ impl AccessAdapterInventory { while ptr < bytes.len() { let bytes_slice = &bytes[ptr..]; let header: &AccessRecordHeader = bytes_slice.borrow(); + + if header.block_size == 8 { + println!("Found size 8 access:"); + println!(" Address space: {}", header.address_space); + println!(" Pointer: {}", header.pointer); + println!(" Timestamp: {}", header.timestamp_and_mask); + } // SAFETY: // - bytes[ptr..] is a valid starting pointer to a previously allocated record // - The record contains self-describing layout information diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 3c5ac45f45..308bd58a06 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -1,4 +1,4 @@ -use std::{array::from_fn, fmt::Debug, num::NonZero}; +use std::{array::from_fn, backtrace::Backtrace, fmt::Debug, num::NonZero}; use getset::Getters; use itertools::zip_eq; @@ -440,7 +440,7 @@ pub struct TracingMemory { initial_block_size: usize, /// The underlying data memory, with memory cells typed by address space: see [AddressMap]. #[getset(get = "pub")] - pub data: GuestMemory, + pub data: GuestMemory, // is this "initial memory" /// Maps addr_space to (ptr / min_block_size[addr_space] -> AccessMetadata) for latest access /// metadata. Uses paged storage for memory efficiency. AccessMetadata stores offset_to_start /// (in ALIGN units), block_size, and timestamp (latter two only valid at offset_to_start == @@ -577,6 +577,16 @@ impl TracingMemory { } pub(crate) fn add_split_record(&mut self, header: AccessRecordHeader) { + if header.block_size == 8 { + println!("-----SPLIT-----"); + println!("Adding split record for size 8:"); + println!(" Address space: {}", header.address_space); + println!(" Pointer: {}", header.pointer); + println!(" Timestamp: {}", header.timestamp_and_mask); + + let bt = Backtrace::capture(); + println!("{bt}"); + } if header.block_size == header.lowest_block_size { return; } @@ -602,6 +612,8 @@ impl TracingMemory { // we don't mind garbage values in prev_* } + //appears that we are initially still splitting, and merging memory + // is this from merkle tree? /// `data_slice` is the underlying data of the record in raw host memory format. pub(crate) fn add_merge_record( &mut self, @@ -609,6 +621,16 @@ impl TracingMemory { data_slice: &[u8], prev_ts: &[u32], ) { + if header.block_size == 8 { + println!("-----MERGE-----"); + println!("Adding merge record for size 8:"); + println!(" Address space: {}", header.address_space); + println!(" Pointer: {}", header.pointer); + println!(" Timestamp: {}", header.timestamp_and_mask); + + let bt = Backtrace::capture(); + println!("{bt}"); + } if header.block_size == header.lowest_block_size { return; } @@ -695,6 +717,10 @@ impl TracingMemory { AccessMetadata::new(timestamp, MIN_BLOCK_SIZE as u8, 0), ); } + println!( + "BLOCK SIZE: {}, MIN_BLOCK_SIZE: {}", + block_size, MIN_BLOCK_SIZE + ); self.add_split_record(AccessRecordHeader { timestamp_and_mask: timestamp, address_space: address_space as u32, @@ -935,6 +961,7 @@ impl TracingMemory { } /// Finalize the boundary and merkle chips. + /// pass in initial memory,for rechunking #[instrument(name = "memory_finalize", skip_all)] pub fn finalize(&mut self, is_persistent: bool) -> TouchedMemory { let touched_blocks = self.touched_blocks(); @@ -1148,9 +1175,10 @@ impl TracingMemory { let mut final_memory = Vec::with_capacity(partitioned_memory.len().saturating_div(merge_factor)); let mut idx = 0; - while idx < partitioned_memory.len() { - debug_assert!(idx + merge_factor <= partitioned_memory.len()); + //currently naively merging, but we need to consider incomplete blocks of PARTITION_SIZE, and make it into CHUNK + // with an initial PARTITION, keep on adding ind until it matches OUTPUT_SIZE; look at it mod OUTPUT_SIZE + while idx < partitioned_memory.len() { let group = &partitioned_memory[idx..idx + merge_factor]; let ((addr_space, base_ptr), _) = group[0]; debug_assert_eq!(base_ptr % OUTPUT_SIZE as u32, 0); diff --git a/crates/vm/src/system/memory/persistent.rs b/crates/vm/src/system/memory/persistent.rs index eeb22cbfd6..b5e8f5c1b3 100644 --- a/crates/vm/src/system/memory/persistent.rs +++ b/crates/vm/src/system/memory/persistent.rs @@ -22,7 +22,7 @@ use tracing::instrument; use super::{merkle::SerialReceiver, online::INITIAL_TIMESTAMP, TimestampedValues}; use crate::{ - arch::{hasher::Hasher, ADDR_SPACE_OFFSET}, + arch::{hasher::Hasher, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE}, system::memory::{ dimensions::MemoryDimensions, offline_checker::MemoryBus, MemoryAddress, MemoryImage, TimestampedEquipartition, @@ -109,16 +109,23 @@ impl Air for PersistentBoundaryA local.expand_direction * local.expand_direction, ); - self.memory_bus - .send( - MemoryAddress::new( - local.address_space, - local.leaf_label * AB::F::from_canonical_usize(CHUNK), - ), - local.values.to_vec(), - local.timestamp, - ) - .eval(builder, local.expand_direction); + debug_assert_eq!(CHUNK % CONST_BLOCK_SIZE, 0); + let chunk_size_f = AB::F::from_canonical_usize(CHUNK); + for block_idx in 0..(CHUNK / CONST_BLOCK_SIZE) { + let offset = AB::F::from_canonical_usize(block_idx * CONST_BLOCK_SIZE); + // Split the 1xCHUNK leaf into CONST_BLOCK_SIZE-sized bus messages. + self.memory_bus + .send( + MemoryAddress::new( + local.address_space, + local.leaf_label * chunk_size_f + offset, + ), + local.values[block_idx * CONST_BLOCK_SIZE..(block_idx + 1) * CONST_BLOCK_SIZE] + .to_vec(), + local.timestamp, + ) + .eval(builder, local.expand_direction); + } } } diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index c4302ae808..f4f0efd0d3 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -93,9 +93,9 @@ mod tests { Ok(()) } - #[test_case("fibonacci", 1)] + // #[test_case("fibonacci", 1)] #[test_case("collatz", 1)] - fn test_rv32im(example_name: &str, min_segments: usize) -> Result<()> { + fn test_rv32m(example_name: &str, min_segments: usize) -> Result<()> { let config = test_rv32im_config(); let elf = build_example_program_at_path(get_programs_dir!(), example_name, &config)?; let exe = VmExe::from_elf( From 6226faaff3d080e0b4c0b090d3e5df25298d7566 Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 21:38:13 +0000 Subject: [PATCH 05/21] rv32im --- crates/vm/src/arch/config.rs | 4 +- crates/vm/src/system/memory/controller/mod.rs | 32 ++++-- crates/vm/src/system/memory/online.rs | 55 ++--------- crates/vm/src/system/memory/persistent.rs | 97 ++++++++++++++----- crates/vm/src/system/mod.rs | 5 +- 5 files changed, 111 insertions(+), 82 deletions(-) diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs index 50467de631..02e790a932 100644 --- a/crates/vm/src/arch/config.rs +++ b/crates/vm/src/arch/config.rs @@ -26,9 +26,7 @@ use crate::{ Arena, ChipInventoryError, ExecutorInventory, ExecutorInventoryError, }, system::{ - memory::{ - merkle::public_values::PUBLIC_VALUES_AS, num_memory_airs, CHUNK, POINTER_MAX_BITS, - }, + memory::{merkle::public_values::PUBLIC_VALUES_AS, num_memory_airs, POINTER_MAX_BITS}, SystemChipComplex, }, }; diff --git a/crates/vm/src/system/memory/controller/mod.rs b/crates/vm/src/system/memory/controller/mod.rs index aabe4df08d..bd770d163e 100644 --- a/crates/vm/src/system/memory/controller/mod.rs +++ b/crates/vm/src/system/memory/controller/mod.rs @@ -14,7 +14,6 @@ use openvm_stark_backend::{ interaction::PermutationCheckBus, p3_commit::PolynomialSpace, p3_field::{Field, PrimeField32}, - p3_maybe_rayon::prelude::{IntoParallelIterator, ParallelIterator}, p3_util::{log2_ceil_usize, log2_strict_usize}, prover::{cpu::CpuBackend, types::AirProvingContext}, Chip, @@ -24,7 +23,7 @@ use serde::{Deserialize, Serialize}; use self::interface::MemoryInterface; use super::{volatile::VolatileBoundaryChip, AddressMap}; use crate::{ - arch::{DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET}, + arch::{DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE}, system::{ memory::{ adapter::AccessAdapterInventory, @@ -290,11 +289,32 @@ impl MemoryController { TouchedMemory::Persistent(final_memory), ) => { let hasher = self.hasher_chip.as_ref().unwrap(); + // boundary_chip.finalize takes CONST_BLOCK_SIZE granularity and rechunks internally boundary_chip.finalize(initial_memory, &final_memory, hasher.as_ref()); - let final_memory_values = final_memory - .into_par_iter() - .map(|(key, value)| (key, value.values)) - .collect(); + + // Rechunk CONST_BLOCK_SIZE blocks into CHUNK-sized blocks for merkle_chip + // Note: Equipartition key is (addr_space, ptr) where ptr is the starting pointer + let final_memory_values: Equipartition = { + use std::collections::BTreeMap; + let mut chunk_map: BTreeMap<(u32, u32), [F; CHUNK]> = BTreeMap::new(); + for ((addr_space, ptr), ts_values) in final_memory.into_iter() { + // Align to CHUNK boundary to get the chunk's starting pointer + let chunk_ptr = (ptr / CHUNK as u32) * CHUNK as u32; + let block_idx_in_chunk = + ((ptr % CHUNK as u32) / CONST_BLOCK_SIZE as u32) as usize; + let entry = chunk_map.entry((addr_space, chunk_ptr)).or_insert_with(|| { + // Initialize with values from initial memory + std::array::from_fn(|i| unsafe { + initial_memory.get_f::(addr_space, chunk_ptr + i as u32) + }) + }); + // Copy values for this block + for (i, val) in ts_values.values.into_iter().enumerate() { + entry[block_idx_in_chunk * CONST_BLOCK_SIZE + i] = val; + } + } + chunk_map + }; merkle_chip.finalize(initial_memory, &final_memory_values, hasher.as_ref()); } _ => panic!("TouchedMemory incorrect type"), diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 308bd58a06..021863c54b 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -18,7 +18,7 @@ use crate::{ system::{ memory::{ adapter::records::{AccessLayout, AccessRecordHeader, MERGE_AND_NOT_SPLIT_FLAG}, - MemoryAddress, TimestampedEquipartition, TimestampedValues, CHUNK, + MemoryAddress, TimestampedEquipartition, TimestampedValues, }, TouchedMemory, }, @@ -971,7 +971,9 @@ impl TracingMemory { self.touched_blocks_to_equipartition::(touched_blocks), ), true => TouchedMemory::Persistent( - self.touched_blocks_to_equipartition::(touched_blocks), + self.touched_blocks_to_equipartition::( + touched_blocks, + ), ), } } @@ -1017,10 +1019,11 @@ impl TracingMemory { let mut partitioned_memory = Vec::new(); debug_assert!(touched_blocks.is_sorted_by_key(|(addr, _)| addr)); - self.handle_touched_blocks::(&mut partitioned_memory, touched_blocks); + self.handle_touched_blocks::(&mut partitioned_memory, touched_blocks); debug_assert!(partitioned_memory.is_sorted_by_key(|(key, _)| *key)); - Self::rechunk_final_memory::(partitioned_memory) + partitioned_memory + // self.rechunk_final_memory::(partitioned_memory) } fn handle_touched_blocks( @@ -1167,50 +1170,6 @@ impl TracingMemory { ); } - fn rechunk_final_memory( - partitioned_memory: Vec<((u32, u32), TimestampedValues)>, - ) -> TimestampedEquipartition { - debug_assert!(OUTPUT_SIZE % PARTITION_SIZE == 0); - let merge_factor = OUTPUT_SIZE / PARTITION_SIZE; - let mut final_memory = - Vec::with_capacity(partitioned_memory.len().saturating_div(merge_factor)); - let mut idx = 0; - //currently naively merging, but we need to consider incomplete blocks of PARTITION_SIZE, and make it into CHUNK - // with an initial PARTITION, keep on adding ind until it matches OUTPUT_SIZE; look at it mod OUTPUT_SIZE - - while idx < partitioned_memory.len() { - let group = &partitioned_memory[idx..idx + merge_factor]; - let ((addr_space, base_ptr), _) = group[0]; - debug_assert_eq!(base_ptr % OUTPUT_SIZE as u32, 0); - - for (j, ((curr_addr_space, ptr), _)) in group.iter().enumerate() { - debug_assert_eq!(*curr_addr_space, addr_space); - debug_assert_eq!(*ptr, base_ptr + (j * PARTITION_SIZE) as u32); - } - - let timestamp = group - .iter() - .map(|(_, ts_values)| ts_values.timestamp) - .max() - .expect("Group is non-empty"); - let values = from_fn(|i| { - let group_idx = i / PARTITION_SIZE; - let within_group_idx = i % PARTITION_SIZE; - group[group_idx].1.values[within_group_idx] - }); - - final_memory.push(( - (addr_space, base_ptr), - TimestampedValues { timestamp, values }, - )); - - idx += merge_factor; - } - - debug_assert!(final_memory.is_sorted_by_key(|(key, _)| *key)); - final_memory - } - pub fn address_space_alignment(&self) -> Vec { self.min_block_size .iter() diff --git a/crates/vm/src/system/memory/persistent.rs b/crates/vm/src/system/memory/persistent.rs index b5e8f5c1b3..f3f650fbe0 100644 --- a/crates/vm/src/system/memory/persistent.rs +++ b/crates/vm/src/system/memory/persistent.rs @@ -20,7 +20,7 @@ use openvm_stark_backend::{ use rustc_hash::FxHashSet; use tracing::instrument; -use super::{merkle::SerialReceiver, online::INITIAL_TIMESTAMP, TimestampedValues}; +use super::{merkle::SerialReceiver, online::INITIAL_TIMESTAMP}; use crate::{ arch::{hasher::Hasher, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE}, system::memory::{ @@ -29,6 +29,11 @@ use crate::{ }, }; +/// Number of CONST_BLOCK_SIZE blocks per CHUNK (e.g., 2 for 8/4). +/// Blocks are on the same row only for Merkle tree hashing (8 bytes at a time). +/// Memory bus interactions use per-block timestamps. +pub const BLOCKS_PER_CHUNK: usize = 2; + /// The values describe aligned chunk of memory of size `CHUNK`---the data together with the last /// accessed timestamp---in either the initial or final memory state. #[repr(C)] @@ -42,7 +47,9 @@ pub struct PersistentBoundaryCols { pub leaf_label: T, pub values: [T; CHUNK], pub hash: [T; CHUNK], - pub timestamp: T, + /// Per-block timestamps. Each CONST_BLOCK_SIZE block within the chunk has its own timestamp. + /// For untouched blocks, timestamp stays at 0 (balances: boundary sends at t=0 init, receives at t=0 final). + pub timestamps: [T; BLOCKS_PER_CHUNK], } /// Imposes the following constraints: @@ -81,12 +88,14 @@ impl Air for PersistentBoundaryA local.expand_direction * local.expand_direction * local.expand_direction, ); - // Constrain that an "initial" row has timestamp zero. + // Constrain that an "initial" row has all timestamp zero. // Since `direction` is constrained to be in {-1, 0, 1}, we can select `direction == 1` // with the constraint below. - builder - .when(local.expand_direction * (local.expand_direction + AB::F::ONE)) - .assert_zero(local.timestamp); + let mut when_initial = + builder.when(local.expand_direction * (local.expand_direction + AB::F::ONE)); + for i in 0..BLOCKS_PER_CHUNK { + when_initial.assert_zero(local.timestamps[i]); + } let mut expand_fields = vec![ // direction = 1 => is_final = 0 @@ -110,10 +119,12 @@ impl Air for PersistentBoundaryA ); debug_assert_eq!(CHUNK % CONST_BLOCK_SIZE, 0); + debug_assert_eq!(CHUNK / CONST_BLOCK_SIZE, BLOCKS_PER_CHUNK); let chunk_size_f = AB::F::from_canonical_usize(CHUNK); - for block_idx in 0..(CHUNK / CONST_BLOCK_SIZE) { + for block_idx in 0..BLOCKS_PER_CHUNK { let offset = AB::F::from_canonical_usize(block_idx * CONST_BLOCK_SIZE); // Split the 1xCHUNK leaf into CONST_BLOCK_SIZE-sized bus messages. + // Each block uses its own timestamp - untouched blocks stay at t=0. self.memory_bus .send( MemoryAddress::new( @@ -122,7 +133,7 @@ impl Air for PersistentBoundaryA ), local.values[block_idx * CONST_BLOCK_SIZE..(block_idx + 1) * CONST_BLOCK_SIZE] .to_vec(), - local.timestamp, + local.timestamps[block_idx], ) .eval(builder, local.expand_direction); } @@ -149,7 +160,8 @@ pub struct FinalTouchedLabel { final_values: [F; CHUNK], init_hash: [F; CHUNK], final_hash: [F; CHUNK], - final_timestamp: u32, + /// Per-block timestamps. Each CONST_BLOCK_SIZE block has its own timestamp. + final_timestamps: [u32; BLOCKS_PER_CHUNK], } impl Default for TouchedLabels { @@ -214,34 +226,68 @@ impl PersistentBoundaryChip { } } + /// Finalize the boundary chip with per-block timestamped memory. + /// + /// `final_memory` is at CONST_BLOCK_SIZE granularity (4 bytes per entry, single timestamp each). + /// This function rechunks into CHUNK-sized (8 bytes) groups with per-block timestamps. + /// Untouched blocks within a touched chunk get values from initial_memory and timestamp 0. #[instrument(name = "boundary_finalize", level = "debug", skip_all)] pub(crate) fn finalize( &mut self, initial_memory: &MemoryImage, - // Only touched stuff - final_memory: &TimestampedEquipartition, + // Touched stuff at CONST_BLOCK_SIZE granularity + final_memory: &TimestampedEquipartition, hasher: &H, ) where H: Hasher + Sync + for<'a> SerialReceiver<&'a [F]>, { - let final_touched_labels: Vec<_> = final_memory - .par_iter() - .map(|&((addr_space, ptr), ts_values)| { - let TimestampedValues { timestamp, values } = ts_values; + // Group CONST_BLOCK_SIZE blocks into CHUNK-sized groups + // Key: (addr_space, chunk_label), Value: per-block timestamps and values + use std::collections::BTreeMap; + let mut chunk_map: BTreeMap<(u32, u32), ([u32; BLOCKS_PER_CHUNK], [F; CHUNK])> = + BTreeMap::new(); + + for &((addr_space, ptr), ts_values) in final_memory.iter() { + let chunk_label = ptr / CHUNK as u32; + let block_idx_in_chunk = ((ptr % CHUNK as u32) / CONST_BLOCK_SIZE as u32) as usize; + + let entry = chunk_map + .entry((addr_space, chunk_label)) + .or_insert_with(|| { + // Initialize with values from initial memory and timestamps at 0 + let chunk_ptr = chunk_label * CHUNK as u32; + let init_values: [F; CHUNK] = array::from_fn(|i| unsafe { + initial_memory.get_f::(addr_space, chunk_ptr + i as u32) + }); + ([0u32; BLOCKS_PER_CHUNK], init_values) + }); + + // Set per-block timestamp + entry.0[block_idx_in_chunk] = ts_values.timestamp; + // Copy values for this block + for (i, &val) in ts_values.values.iter().enumerate() { + entry.1[block_idx_in_chunk * CONST_BLOCK_SIZE + i] = val; + } + } + + let final_touched_labels: Vec<_> = chunk_map + .into_par_iter() + .map(|((addr_space, chunk_label), (timestamps, final_values))| { + let chunk_ptr = chunk_label * CHUNK as u32; // SAFETY: addr_space from `final_memory` are all in bounds - let init_values = array::from_fn(|i| unsafe { - initial_memory.get_f::(addr_space, ptr + i as u32) + let init_values: [F; CHUNK] = array::from_fn(|i| unsafe { + initial_memory.get_f::(addr_space, chunk_ptr + i as u32) }); let initial_hash = hasher.hash(&init_values); - let final_hash = hasher.hash(&values); + let final_hash = hasher.hash(&final_values); FinalTouchedLabel { address_space: addr_space, - label: ptr / CHUNK as u32, + label: chunk_label, init_values, - final_values: values, + final_values, init_hash: initial_hash, final_hash, - final_timestamp: timestamp, + final_timestamps: timestamps, } }) .collect(); @@ -288,7 +334,9 @@ where leaf_label: Val::::from_canonical_u32(touched_label.label), values: touched_label.init_values, hash: touched_label.init_hash, - timestamp: Val::::from_canonical_u32(INITIAL_TIMESTAMP), + // Initial timestamps are all 0 (INITIAL_TIMESTAMP) + timestamps: [Val::::from_canonical_u32(INITIAL_TIMESTAMP); + BLOCKS_PER_CHUNK], }; *final_row.borrow_mut() = PersistentBoundaryCols { @@ -297,7 +345,10 @@ where leaf_label: Val::::from_canonical_u32(touched_label.label), values: touched_label.final_values, hash: touched_label.final_hash, - timestamp: Val::::from_canonical_u32(touched_label.final_timestamp), + // Per-block timestamps - untouched blocks stay at 0 + timestamps: touched_label + .final_timestamps + .map(Val::::from_canonical_u32), }; }); Arc::new(RowMajorMatrix::new(rows, width)) diff --git a/crates/vm/src/system/mod.rs b/crates/vm/src/system/mod.rs index d1ecb2daf1..195fa6a701 100644 --- a/crates/vm/src/system/mod.rs +++ b/crates/vm/src/system/mod.rs @@ -32,7 +32,8 @@ use crate::{ ChipInventoryError, DenseRecordArena, ExecutionBridge, ExecutionBus, ExecutionState, ExecutorInventory, ExecutorInventoryError, MatrixRecordArena, PhantomSubExecutor, RowMajorMatrixArena, SystemConfig, VmAirWrapper, VmBuilder, VmChipComplex, VmChipWrapper, - VmCircuitConfig, VmExecutionConfig, CONNECTOR_AIR_ID, PROGRAM_AIR_ID, PUBLIC_VALUES_AIR_ID, + VmCircuitConfig, VmExecutionConfig, CONNECTOR_AIR_ID, CONST_BLOCK_SIZE, PROGRAM_AIR_ID, + PUBLIC_VALUES_AIR_ID, }, system::{ connector::VmConnectorChip, @@ -145,7 +146,7 @@ pub struct SystemRecords { } pub enum TouchedMemory { - Persistent(TimestampedEquipartition), + Persistent(TimestampedEquipartition), Volatile(TimestampedEquipartition), } From 6a46bd4d59936eecc81d37e3c7efafd19009612b Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 21:38:57 +0000 Subject: [PATCH 06/21] clean up redundant fn signature --- crates/vm/src/system/memory/online.rs | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 021863c54b..a18d97fa74 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -968,12 +968,10 @@ impl TracingMemory { match is_persistent { false => TouchedMemory::Volatile( - self.touched_blocks_to_equipartition::(touched_blocks), + self.touched_blocks_to_equipartition::(touched_blocks), ), true => TouchedMemory::Persistent( - self.touched_blocks_to_equipartition::( - touched_blocks, - ), + self.touched_blocks_to_equipartition::(touched_blocks), ), } } @@ -1003,23 +1001,15 @@ impl TracingMemory { /// Returns the equipartition of the touched blocks. /// Modifies records and adds new to account for the initial/final segments. - fn touched_blocks_to_equipartition< - F: Field, - const PARTITION_SIZE: usize, - const OUTPUT_SIZE: usize, - >( + fn touched_blocks_to_equipartition( &mut self, touched_blocks: Vec<((u32, u32), AccessMetadata)>, - ) -> TimestampedEquipartition { - assert!( - OUTPUT_SIZE % PARTITION_SIZE == 0, - "Output size must be a multiple of the partition size" - ); + ) -> TimestampedEquipartition { // [perf] We can `.with_capacity()` if we keep track of the number of segments we initialize let mut partitioned_memory = Vec::new(); debug_assert!(touched_blocks.is_sorted_by_key(|(addr, _)| addr)); - self.handle_touched_blocks::(&mut partitioned_memory, touched_blocks); + self.handle_touched_blocks::(&mut partitioned_memory, touched_blocks); debug_assert!(partitioned_memory.is_sorted_by_key(|(key, _)| *key)); partitioned_memory From 9d0bea8de6fbb70e02890c8eb53db4a338767efe Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 22:35:29 +0000 Subject: [PATCH 07/21] aot constant fix --- extensions/rv32im/circuit/src/common/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/extensions/rv32im/circuit/src/common/mod.rs b/extensions/rv32im/circuit/src/common/mod.rs index 0a58b7310b..5bac384f4b 100644 --- a/extensions/rv32im/circuit/src/common/mod.rs +++ b/extensions/rv32im/circuit/src/common/mod.rs @@ -9,7 +9,7 @@ mod aot { use openvm_circuit::{ arch::{ execution_mode::{metered::memory_ctx::MemoryCtx, MeteredCtx}, - AotError, SystemConfig, VmExecState, ADDR_SPACE_OFFSET, + AotError, SystemConfig, VmExecState, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE, }, system::memory::{merkle::public_values::PUBLIC_VALUES_AS, online::GuestMemory, CHUNK}, }; @@ -148,7 +148,7 @@ mod aot { // } // } // ``` - // + // // For a specific RV32 instruction, the variables can be treated as constants at AOT // compilation time: // - `address_space`: always a constant because it is derived from an Instruction @@ -225,7 +225,7 @@ mod aot { // } // } // ``` - // + // // For a specific RV32 instruction, the variables can be treated as constants at AOT compilation time: // Inputs: // - `chunk`: always 8(CHUNK) because we only support when continuation is enabled. @@ -244,12 +244,12 @@ mod aot { // Therefore the loop only iterates once for `page_id = start_page_id`. let initial_block_size: usize = config.initial_block_size(); - if initial_block_size != CHUNK { + if initial_block_size != CONST_BLOCK_SIZE { return Err(AotError::Other(format!( - "initial_block_size must be {CHUNK}, got {initial_block_size}" + "initial_block_size must be {CONST_BLOCK_SIZE}, got {initial_block_size}" ))); } - let chunk_bits = CHUNK.ilog2(); + let chunk_bits = CONST_BLOCK_SIZE.ilog2(); let as_offset = ((address_space - ADDR_SPACE_OFFSET) as u64) << (config.memory_config.memory_dimensions().address_height); From df72623aede19581cfd05b41d216dda9b585c8da Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 22:55:12 +0000 Subject: [PATCH 08/21] first commit --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index c6e6aa2049..87e918c19e 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,11 @@ profile.json.gz # test fixtures benchmarks/fixtures + +#TODO: Remove this +crates/toolchain/tests/rv32im-test-vectors/tests/* +*.o +*.a +*.s +*.txt +riscv/* \ No newline at end of file From e61739c045619184f80d0ece31fc8014feb32707 Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 23:03:53 +0000 Subject: [PATCH 09/21] LINTA --- crates/vm/src/arch/config.rs | 3 +- .../arch/execution_mode/metered/memory_ctx.rs | 1 - .../src/arch/execution_mode/metered_cost.rs | 1 - crates/vm/src/arch/state.rs | 13 ------- crates/vm/src/arch/testing/cpu.rs | 6 --- crates/vm/src/system/memory/adapter/mod.rs | 7 ---- crates/vm/src/system/memory/online.rs | 38 +++---------------- crates/vm/src/system/memory/persistent.rs | 10 +++-- extensions/rv32im/tests/src/lib.rs | 4 +- 9 files changed, 15 insertions(+), 68 deletions(-) diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs index 02e790a932..b780021c41 100644 --- a/crates/vm/src/arch/config.rs +++ b/crates/vm/src/arch/config.rs @@ -187,8 +187,7 @@ pub struct MemoryConfig { /// Maximum N AccessAdapter AIR to support. pub max_access_adapter_n: usize, /// Whether access adapters are enabled. When disabled, all memory accesses must be of the - /// standard block size (e.g., 4 for address spaces 1-3). This removes the need for access - /// adapter AIRs and simplifies the memory system. + /// standard block size (e.g., 4 for address spaces 1-3). #[new(value = "true")] pub access_adapters_enabled: bool, } diff --git a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs index d75dc2c46b..d755d73140 100644 --- a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs +++ b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs @@ -212,7 +212,6 @@ impl MemoryCtx { size_bits: u32, num: u32, ) { - // Skip if access adapters are disabled if !self.access_adapters_enabled { return; } diff --git a/crates/vm/src/arch/execution_mode/metered_cost.rs b/crates/vm/src/arch/execution_mode/metered_cost.rs index c92965ad3f..69bfd6fe69 100644 --- a/crates/vm/src/arch/execution_mode/metered_cost.rs +++ b/crates/vm/src/arch/execution_mode/metered_cost.rs @@ -38,7 +38,6 @@ impl AccessAdapterCtx { size_bits: u32, widths: &[usize], ) { - // Skip if access adapters are disabled if !self.enabled { return; } diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs index 1da2a4d392..42751d1e6a 100644 --- a/crates/vm/src/arch/state.rs +++ b/crates/vm/src/arch/state.rs @@ -1,5 +1,4 @@ use std::{ - backtrace::Backtrace, fmt::Debug, ops::{Deref, DerefMut}, }; @@ -188,12 +187,6 @@ where addr_space: u32, ptr: u32, ) -> [T; BLOCK_SIZE] { - if BLOCK_SIZE != CONST_BLOCK_SIZE { - println!( - "vm_read: addr_space = {}, ptr = {}, BLOCK_SIZE = {}", - addr_space, ptr, BLOCK_SIZE - ); - } self.ctx .on_memory_operation(addr_space, ptr, BLOCK_SIZE as u32); self.host_read(addr_space, ptr) @@ -207,12 +200,6 @@ where ptr: u32, data: &[T; BLOCK_SIZE], ) { - if BLOCK_SIZE != CONST_BLOCK_SIZE { - println!( - "vm_read: addr_space = {}, ptr = {}, BLOCK_SIZE = {}", - addr_space, ptr, BLOCK_SIZE - ); - } self.ctx .on_memory_operation(addr_space, ptr, BLOCK_SIZE as u32); self.host_write(addr_space, ptr, data) diff --git a/crates/vm/src/arch/testing/cpu.rs b/crates/vm/src/arch/testing/cpu.rs index 4c574dda73..14af328845 100644 --- a/crates/vm/src/arch/testing/cpu.rs +++ b/crates/vm/src/arch/testing/cpu.rs @@ -348,11 +348,6 @@ impl VmChipTestBuilder { pub fn persistent(mem_config: MemoryConfig) -> Self { setup_tracing_with_log_level(Level::INFO); - /// ERRRMMM WHAT THE SIGMA not testing here - println!( - "PERSISTENT MEMORY TESTING, CONST_BLOCK_SIZE = {}", - CONST_BLOCK_SIZE - ); let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, CONST_BLOCK_SIZE); let hasher_chip = Arc::new(Poseidon2PeripheryChip::new( vm_poseidon2_config(), @@ -476,7 +471,6 @@ where let mut memory_controller = memory_tester.controller; let is_persistent = memory_controller.continuation_enabled(); let mut memory = memory_tester.memory; - // here? pass in initial memory for chunking let touched_memory = memory.finalize::>(is_persistent); // Balance memory boundaries let range_checker = memory_controller.range_checker.clone(); diff --git a/crates/vm/src/system/memory/adapter/mod.rs b/crates/vm/src/system/memory/adapter/mod.rs index 2ae0fc28eb..adad5ca60f 100644 --- a/crates/vm/src/system/memory/adapter/mod.rs +++ b/crates/vm/src/system/memory/adapter/mod.rs @@ -1,5 +1,4 @@ use std::{ - backtrace::Backtrace, borrow::{Borrow, BorrowMut}, marker::PhantomData, ptr::copy_nonoverlapping, @@ -134,12 +133,6 @@ impl AccessAdapterInventory { let bytes_slice = &bytes[ptr..]; let header: &AccessRecordHeader = bytes_slice.borrow(); - if header.block_size == 8 { - println!("Found size 8 access:"); - println!(" Address space: {}", header.address_space); - println!(" Pointer: {}", header.pointer); - println!(" Timestamp: {}", header.timestamp_and_mask); - } // SAFETY: // - bytes[ptr..] is a valid starting pointer to a previously allocated record // - The record contains self-describing layout information diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index a18d97fa74..b53eb92d51 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -440,7 +440,7 @@ pub struct TracingMemory { initial_block_size: usize, /// The underlying data memory, with memory cells typed by address space: see [AddressMap]. #[getset(get = "pub")] - pub data: GuestMemory, // is this "initial memory" + pub data: GuestMemory, /// Maps addr_space to (ptr / min_block_size[addr_space] -> AccessMetadata) for latest access /// metadata. Uses paged storage for memory efficiency. AccessMetadata stores offset_to_start /// (in ALIGN units), block_size, and timestamp (latter two only valid at offset_to_start == @@ -577,16 +577,6 @@ impl TracingMemory { } pub(crate) fn add_split_record(&mut self, header: AccessRecordHeader) { - if header.block_size == 8 { - println!("-----SPLIT-----"); - println!("Adding split record for size 8:"); - println!(" Address space: {}", header.address_space); - println!(" Pointer: {}", header.pointer); - println!(" Timestamp: {}", header.timestamp_and_mask); - - let bt = Backtrace::capture(); - println!("{bt}"); - } if header.block_size == header.lowest_block_size { return; } @@ -612,8 +602,6 @@ impl TracingMemory { // we don't mind garbage values in prev_* } - //appears that we are initially still splitting, and merging memory - // is this from merkle tree? /// `data_slice` is the underlying data of the record in raw host memory format. pub(crate) fn add_merge_record( &mut self, @@ -621,16 +609,6 @@ impl TracingMemory { data_slice: &[u8], prev_ts: &[u32], ) { - if header.block_size == 8 { - println!("-----MERGE-----"); - println!("Adding merge record for size 8:"); - println!(" Address space: {}", header.address_space); - println!(" Pointer: {}", header.pointer); - println!(" Timestamp: {}", header.timestamp_and_mask); - - let bt = Backtrace::capture(); - println!("{bt}"); - } if header.block_size == header.lowest_block_size { return; } @@ -717,10 +695,6 @@ impl TracingMemory { AccessMetadata::new(timestamp, MIN_BLOCK_SIZE as u8, 0), ); } - println!( - "BLOCK SIZE: {}, MIN_BLOCK_SIZE: {}", - block_size, MIN_BLOCK_SIZE - ); self.add_split_record(AccessRecordHeader { timestamp_and_mask: timestamp, address_space: address_space as u32, @@ -961,7 +935,6 @@ impl TracingMemory { } /// Finalize the boundary and merkle chips. - /// pass in initial memory,for rechunking #[instrument(name = "memory_finalize", skip_all)] pub fn finalize(&mut self, is_persistent: bool) -> TouchedMemory { let touched_blocks = self.touched_blocks(); @@ -1013,7 +986,6 @@ impl TracingMemory { debug_assert!(partitioned_memory.is_sorted_by_key(|(key, _)| *key)); partitioned_memory - // self.rechunk_final_memory::(partitioned_memory) } fn handle_touched_blocks( @@ -1088,8 +1060,9 @@ impl TracingMemory { timestamp, values: from_fn(|j| { let byte_idx = (i as usize + j) * cell_size; - // SAFETY: block_size is multiple of PARTITION_SIZE and we are reading chunks - // of cells within bounds + // SAFETY: block_size is multiple of PARTITION_SIZE and we are + // reading chunks of cells within + // bounds unsafe { addr_space_config .layout @@ -1112,7 +1085,8 @@ impl TracingMemory { current_values[current_cnt * cell_size..current_cnt * cell_size + cell_size] .copy_from_slice(cell_data); if current_cnt & (min_block_size - 1) == 0 { - // SAFETY: current_cnt / min_block_size < PARTITION_SIZE / min_block_size <= PARTITION_SIZE + // SAFETY: current_cnt / min_block_size < PARTITION_SIZE / min_block_size <= + // PARTITION_SIZE unsafe { *current_timestamps.get_unchecked_mut(current_cnt / min_block_size) = timestamp; diff --git a/crates/vm/src/system/memory/persistent.rs b/crates/vm/src/system/memory/persistent.rs index f3f650fbe0..c30c80895e 100644 --- a/crates/vm/src/system/memory/persistent.rs +++ b/crates/vm/src/system/memory/persistent.rs @@ -48,7 +48,8 @@ pub struct PersistentBoundaryCols { pub values: [T; CHUNK], pub hash: [T; CHUNK], /// Per-block timestamps. Each CONST_BLOCK_SIZE block within the chunk has its own timestamp. - /// For untouched blocks, timestamp stays at 0 (balances: boundary sends at t=0 init, receives at t=0 final). + /// For untouched blocks, timestamp stays at 0 (balances: boundary sends at t=0 init, receives + /// at t=0 final). pub timestamps: [T; BLOCKS_PER_CHUNK], } @@ -228,9 +229,10 @@ impl PersistentBoundaryChip { /// Finalize the boundary chip with per-block timestamped memory. /// - /// `final_memory` is at CONST_BLOCK_SIZE granularity (4 bytes per entry, single timestamp each). - /// This function rechunks into CHUNK-sized (8 bytes) groups with per-block timestamps. - /// Untouched blocks within a touched chunk get values from initial_memory and timestamp 0. + /// `final_memory` is at CONST_BLOCK_SIZE granularity (4 bytes per entry, single timestamp + /// each). This function rechunks into CHUNK-sized (8 bytes) groups with per-block + /// timestamps. Untouched blocks within a touched chunk get values from initial_memory and + /// timestamp 0. #[instrument(name = "boundary_finalize", level = "debug", skip_all)] pub(crate) fn finalize( &mut self, diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index f4f0efd0d3..c4302ae808 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -93,9 +93,9 @@ mod tests { Ok(()) } - // #[test_case("fibonacci", 1)] + #[test_case("fibonacci", 1)] #[test_case("collatz", 1)] - fn test_rv32m(example_name: &str, min_segments: usize) -> Result<()> { + fn test_rv32im(example_name: &str, min_segments: usize) -> Result<()> { let config = test_rv32im_config(); let elf = build_example_program_at_path(get_programs_dir!(), example_name, &config)?; let exe = VmExe::from_elf( From 85f45feb9dc48c84e3c0d17420668175b4629b75 Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 23:04:03 +0000 Subject: [PATCH 10/21] LINTA --- extensions/rv32im/circuit/src/common/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/rv32im/circuit/src/common/mod.rs b/extensions/rv32im/circuit/src/common/mod.rs index 5bac384f4b..20855af15d 100644 --- a/extensions/rv32im/circuit/src/common/mod.rs +++ b/extensions/rv32im/circuit/src/common/mod.rs @@ -148,7 +148,7 @@ mod aot { // } // } // ``` - // + // // For a specific RV32 instruction, the variables can be treated as constants at AOT // compilation time: // - `address_space`: always a constant because it is derived from an Instruction @@ -225,7 +225,7 @@ mod aot { // } // } // ``` - // + // // For a specific RV32 instruction, the variables can be treated as constants at AOT compilation time: // Inputs: // - `chunk`: always 8(CHUNK) because we only support when continuation is enabled. From 9c302373e28c0102b76057dede3297f4c2233ced Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 23:15:27 +0000 Subject: [PATCH 11/21] gpu? --- crates/vm/src/system/cuda/memory.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/system/cuda/memory.rs b/crates/vm/src/system/cuda/memory.rs index 51d7b3677e..448ec85200 100644 --- a/crates/vm/src/system/cuda/memory.rs +++ b/crates/vm/src/system/cuda/memory.rs @@ -1,7 +1,9 @@ use std::sync::Arc; use openvm_circuit::{ - arch::{AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET}, + arch::{ + AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE, + }, system::{ memory::{online::LinearMemory, AddressMap, TimestampedValues}, TouchedMemory, @@ -150,8 +152,9 @@ impl MemoryInventoryGPU { mem.tracing_info("boundary finalize"); let (touched_memory, empty) = if partition.is_empty() { + // Create a dummy touched memory entry with CONST_BLOCK_SIZE values let leftmost_values = 'left: { - let mut res = [F::ZERO; DIGEST_WIDTH]; + let mut res = [F::ZERO; CONST_BLOCK_SIZE]; if persistent.initial_memory[ADDR_SPACE_OFFSET as usize].is_empty() { break 'left res; } @@ -159,7 +162,7 @@ impl MemoryInventoryGPU { [ADDR_SPACE_OFFSET as usize] .layout; let one_cell_size = layout.size(); - let values = vec![0u8; one_cell_size * DIGEST_WIDTH]; + let values = vec![0u8; one_cell_size * CONST_BLOCK_SIZE]; unsafe { cuda_memcpy::( values.as_ptr() as *mut std::ffi::c_void, @@ -168,7 +171,7 @@ impl MemoryInventoryGPU { values.len(), ) .unwrap(); - for i in 0..DIGEST_WIDTH { + for i in 0..CONST_BLOCK_SIZE { res[i] = layout.to_field::(&values[i * one_cell_size..]); } } From 2e57807b605821e96c8e5cc5768e11c505734bce Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 23:18:25 +0000 Subject: [PATCH 12/21] Revert "gpu?" This reverts commit 9c302373e28c0102b76057dede3297f4c2233ced. --- crates/vm/src/system/cuda/memory.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/vm/src/system/cuda/memory.rs b/crates/vm/src/system/cuda/memory.rs index 448ec85200..51d7b3677e 100644 --- a/crates/vm/src/system/cuda/memory.rs +++ b/crates/vm/src/system/cuda/memory.rs @@ -1,9 +1,7 @@ use std::sync::Arc; use openvm_circuit::{ - arch::{ - AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE, - }, + arch::{AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET}, system::{ memory::{online::LinearMemory, AddressMap, TimestampedValues}, TouchedMemory, @@ -152,9 +150,8 @@ impl MemoryInventoryGPU { mem.tracing_info("boundary finalize"); let (touched_memory, empty) = if partition.is_empty() { - // Create a dummy touched memory entry with CONST_BLOCK_SIZE values let leftmost_values = 'left: { - let mut res = [F::ZERO; CONST_BLOCK_SIZE]; + let mut res = [F::ZERO; DIGEST_WIDTH]; if persistent.initial_memory[ADDR_SPACE_OFFSET as usize].is_empty() { break 'left res; } @@ -162,7 +159,7 @@ impl MemoryInventoryGPU { [ADDR_SPACE_OFFSET as usize] .layout; let one_cell_size = layout.size(); - let values = vec![0u8; one_cell_size * CONST_BLOCK_SIZE]; + let values = vec![0u8; one_cell_size * DIGEST_WIDTH]; unsafe { cuda_memcpy::( values.as_ptr() as *mut std::ffi::c_void, @@ -171,7 +168,7 @@ impl MemoryInventoryGPU { values.len(), ) .unwrap(); - for i in 0..CONST_BLOCK_SIZE { + for i in 0..DIGEST_WIDTH { res[i] = layout.to_field::(&values[i * one_cell_size..]); } } From 9e66a3b5e5f47500cf1fc6a12aef08289e0081b9 Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 23:26:34 +0000 Subject: [PATCH 13/21] LINTA --- crates/vm/src/arch/testing/cpu.rs | 2 +- crates/vm/src/system/memory/online.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/arch/testing/cpu.rs b/crates/vm/src/arch/testing/cpu.rs index 14af328845..03ef71440b 100644 --- a/crates/vm/src/arch/testing/cpu.rs +++ b/crates/vm/src/arch/testing/cpu.rs @@ -49,7 +49,7 @@ use crate::{ adapter::records::arena_size_bound, offline_checker::{MemoryBridge, MemoryBus}, online::TracingMemory, - MemoryAirInventory, MemoryController, SharedMemoryHelper, CHUNK, + MemoryAirInventory, MemoryController, SharedMemoryHelper, }, poseidon2::Poseidon2PeripheryChip, program::ProgramBus, diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index b53eb92d51..4ba48d9408 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -1,4 +1,4 @@ -use std::{array::from_fn, backtrace::Backtrace, fmt::Debug, num::NonZero}; +use std::{array::from_fn, fmt::Debug, num::NonZero}; use getset::Getters; use itertools::zip_eq; From 2e6d2306f9ca3ca8f0fd60def0fb207fdfd05353 Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 12 Dec 2025 23:35:05 +0000 Subject: [PATCH 14/21] revert gitignore --- .gitignore | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.gitignore b/.gitignore index 87e918c19e..a252b637e6 100644 --- a/.gitignore +++ b/.gitignore @@ -48,10 +48,3 @@ profile.json.gz # test fixtures benchmarks/fixtures -#TODO: Remove this -crates/toolchain/tests/rv32im-test-vectors/tests/* -*.o -*.a -*.s -*.txt -riscv/* \ No newline at end of file From 1482bc6d3b7007e8cbf482c6a9f04c78d38381fb Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 18 Dec 2025 17:03:56 +0000 Subject: [PATCH 15/21] linta + cleanup --- crates/vm/src/arch/config.rs | 13 +++--- crates/vm/src/arch/state.rs | 2 +- crates/vm/src/arch/testing/cpu.rs | 2 +- crates/vm/src/system/memory/controller/mod.rs | 1 - crates/vm/src/system/memory/online.rs | 46 +++++++++---------- 5 files changed, 31 insertions(+), 33 deletions(-) diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs index b780021c41..f84a73a79b 100644 --- a/crates/vm/src/arch/config.rs +++ b/crates/vm/src/arch/config.rs @@ -187,7 +187,7 @@ pub struct MemoryConfig { /// Maximum N AccessAdapter AIR to support. pub max_access_adapter_n: usize, /// Whether access adapters are enabled. When disabled, all memory accesses must be of the - /// standard block size (e.g., 4 for address spaces 1-3). + /// standard block size (ie, 4 for address spaces 1-3). #[new(value = "true")] pub access_adapters_enabled: bool, } @@ -261,8 +261,8 @@ impl MemoryConfig { .collect() } - /// Returns true if the Native address space (AS 4) is used. - /// Native AS is considered "used" if it has any allocated cells. + /// Returns true if the Native address space (AS 4) is used + /// Native AS is considered "used" if it has any allocated cells pub fn is_native_as_used(&self) -> bool { self.addr_spaces .get(NATIVE_AS as usize) @@ -277,7 +277,7 @@ impl MemoryConfig { self } - /// Enables access adapters. This is the default behavior. + /// Enables access adapters. This is the default behavior pub fn with_access_adapters(mut self) -> Self { self.access_adapters_enabled = true; self @@ -285,7 +285,7 @@ impl MemoryConfig { /// Automatically sets `access_adapters_enabled` based on whether Native AS is used. /// If Native AS is not used, access adapters are disabled since all other address spaces - /// use a fixed block size of 4. + /// use a fixed block size of 4 pub fn with_auto_access_adapters(mut self) -> Self { self.access_adapters_enabled = self.is_native_as_used(); self @@ -432,8 +432,7 @@ impl SystemConfig { } /// Disables access adapters. When disabled, all memory accesses for address spaces 1-3 - /// must use the constant block size (4). This simplifies the memory system by removing - /// access adapter AIRs. + /// must use the constant block size (4) pub fn without_access_adapters(mut self) -> Self { self.memory_config.access_adapters_enabled = false; self diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs index 42751d1e6a..6e79677541 100644 --- a/crates/vm/src/arch/state.rs +++ b/crates/vm/src/arch/state.rs @@ -13,7 +13,7 @@ use super::{create_memory_image, ExecutionError, Streams}; #[cfg(feature = "metrics")] use crate::metrics::VmMetrics; use crate::{ - arch::{execution_mode::ExecutionCtxTrait, SystemConfig, VmStateMut, CONST_BLOCK_SIZE}, + arch::{execution_mode::ExecutionCtxTrait, SystemConfig, VmStateMut}, system::memory::online::GuestMemory, }; diff --git a/crates/vm/src/arch/testing/cpu.rs b/crates/vm/src/arch/testing/cpu.rs index 03ef71440b..d818dee460 100644 --- a/crates/vm/src/arch/testing/cpu.rs +++ b/crates/vm/src/arch/testing/cpu.rs @@ -333,7 +333,7 @@ impl VmChipTestBuilder { fn range_checker_and_memory( mem_config: &MemoryConfig, - init_block_size: usize, // modify this to CONST_BLOCK_SIZE + init_block_size: usize, ) -> (SharedVariableRangeCheckerChip, TracingMemory) { let range_checker = Arc::new(VariableRangeCheckerChip::new(VariableRangeCheckerBus::new( RANGE_CHECKER_BUS, diff --git a/crates/vm/src/system/memory/controller/mod.rs b/crates/vm/src/system/memory/controller/mod.rs index bd770d163e..e4733ccdf4 100644 --- a/crates/vm/src/system/memory/controller/mod.rs +++ b/crates/vm/src/system/memory/controller/mod.rs @@ -289,7 +289,6 @@ impl MemoryController { TouchedMemory::Persistent(final_memory), ) => { let hasher = self.hasher_chip.as_ref().unwrap(); - // boundary_chip.finalize takes CONST_BLOCK_SIZE granularity and rechunks internally boundary_chip.finalize(initial_memory, &final_memory, hasher.as_ref()); // Rechunk CONST_BLOCK_SIZE blocks into CHUNK-sized blocks for merkle_chip diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 4ba48d9408..44d8256ab1 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -979,24 +979,24 @@ impl TracingMemory { touched_blocks: Vec<((u32, u32), AccessMetadata)>, ) -> TimestampedEquipartition { // [perf] We can `.with_capacity()` if we keep track of the number of segments we initialize - let mut partitioned_memory = Vec::new(); + let mut final_memory = Vec::new(); debug_assert!(touched_blocks.is_sorted_by_key(|(addr, _)| addr)); - self.handle_touched_blocks::(&mut partitioned_memory, touched_blocks); + self.handle_touched_blocks::(&mut final_memory, touched_blocks); - debug_assert!(partitioned_memory.is_sorted_by_key(|(key, _)| *key)); - partitioned_memory + debug_assert!(final_memory.is_sorted_by_key(|(key, _)| *key)); + final_memory } - fn handle_touched_blocks( + fn handle_touched_blocks( &mut self, - final_memory: &mut Vec<((u32, u32), TimestampedValues)>, + final_memory: &mut Vec<((u32, u32), TimestampedValues)>, touched_blocks: Vec<((u32, u32), AccessMetadata)>, ) { - let mut current_values = vec![0u8; MAX_CELL_BYTE_SIZE * PARTITION_SIZE]; + let mut current_values = vec![0u8; MAX_CELL_BYTE_SIZE * CHUNK]; let mut current_cnt = 0; let mut current_address = MemoryAddress::new(0, 0); - let mut current_timestamps = vec![0; PARTITION_SIZE]; + let mut current_timestamps = vec![0; CHUNK]; for ((addr_space, ptr), access_metadata) in touched_blocks { // SAFETY: addr_space of touched blocks are all in bounds let addr_space_config = @@ -1009,16 +1009,16 @@ impl TracingMemory { current_cnt == 0 || (current_address.address_space == addr_space && current_address.pointer + current_cnt as u32 == ptr), - "The union of all touched blocks must consist of blocks with sizes divisible by the partition size" + "The union of all touched blocks must consist of blocks with sizes divisible by the `CHUNK`" ); debug_assert!(block_size >= min_block_size as u8); debug_assert!(ptr % min_block_size as u32 == 0); if current_cnt == 0 { assert_eq!( - ptr & (PARTITION_SIZE as u32 - 1), + ptr & (CHUNK as u32 - 1), 0, - "The union of all touched blocks must consist of partition-aligned blocks" + "The union of all touched blocks must consist of `CHUNK`-aligned blocks" ); current_address = MemoryAddress::new(addr_space, ptr); } @@ -1033,7 +1033,7 @@ impl TracingMemory { type_size: cell_size as u32, }); } - if min_block_size > PARTITION_SIZE { + if min_block_size > CHUNK { assert_eq!(current_cnt, 0); for i in (0..block_size as u32).step_by(min_block_size) { self.add_split_record(AccessRecordHeader { @@ -1041,7 +1041,7 @@ impl TracingMemory { address_space: addr_space, pointer: ptr + i, block_size: min_block_size as u32, - lowest_block_size: PARTITION_SIZE as u32, + lowest_block_size: CHUNK as u32, type_size: cell_size as u32, }); } @@ -1053,14 +1053,14 @@ impl TracingMemory { block_size as usize * cell_size, ) }; - for i in (0..block_size as u32).step_by(PARTITION_SIZE) { + for i in (0..block_size as u32).step_by(CHUNK) { final_memory.push(( (addr_space, ptr + i), TimestampedValues { timestamp, values: from_fn(|j| { let byte_idx = (i as usize + j) * cell_size; - // SAFETY: block_size is multiple of PARTITION_SIZE and we are + // SAFETY: block_size is multiple of CHUNK and we are // reading chunks of cells within // bounds unsafe { @@ -1085,16 +1085,16 @@ impl TracingMemory { current_values[current_cnt * cell_size..current_cnt * cell_size + cell_size] .copy_from_slice(cell_data); if current_cnt & (min_block_size - 1) == 0 { - // SAFETY: current_cnt / min_block_size < PARTITION_SIZE / min_block_size <= - // PARTITION_SIZE + // SAFETY: current_cnt / min_block_size < CHUNK / min_block_size <= + // CHUNKs unsafe { *current_timestamps.get_unchecked_mut(current_cnt / min_block_size) = timestamp; } } current_cnt += 1; - if current_cnt == PARTITION_SIZE { - let timestamp = *current_timestamps[..PARTITION_SIZE / min_block_size] + if current_cnt == CHUNK { + let timestamp = *current_timestamps[..CHUNK / min_block_size] .iter() .max() .unwrap(); @@ -1103,12 +1103,12 @@ impl TracingMemory { timestamp_and_mask: timestamp, address_space: addr_space, pointer: current_address.pointer, - block_size: PARTITION_SIZE as u32, + block_size: CHUNK as u32, lowest_block_size: min_block_size as u32, type_size: cell_size as u32, }, - ¤t_values[..PARTITION_SIZE * cell_size], - ¤t_timestamps[..PARTITION_SIZE / min_block_size], + ¤t_values[..CHUNK * cell_size], + ¤t_timestamps[..CHUNK / min_block_size], ); final_memory.push(( (current_address.address_space, current_address.pointer), @@ -1130,7 +1130,7 @@ impl TracingMemory { } assert_eq!( current_cnt, 0, - "The union of all touched blocks must consist of blocks with sizes divisible by the partition size" + "The union of all touched blocks must consist of blocks with sizes divisible by the `CHUNK`" ); } From beee07ae7fc57e20f2174ba4be15314775132f15 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 18 Dec 2025 17:07:17 +0000 Subject: [PATCH 16/21] gitignore --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index a252b637e6..87e918c19e 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,10 @@ profile.json.gz # test fixtures benchmarks/fixtures +#TODO: Remove this +crates/toolchain/tests/rv32im-test-vectors/tests/* +*.o +*.a +*.s +*.txt +riscv/* \ No newline at end of file From 3706b09e6c00b5978ca974fb11c788ee7e189d85 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 18 Dec 2025 18:44:48 +0000 Subject: [PATCH 17/21] testing rv32 with access_adapters disabled --- crates/vm/src/arch/execution_mode/metered/ctx.rs | 12 +++++++----- .../src/arch/execution_mode/metered/memory_ctx.rs | 2 +- crates/vm/src/arch/testing/cpu.rs | 2 +- crates/vm/src/arch/vm.rs | 8 +++++++- crates/vm/src/system/memory/online.rs | 3 ++- extensions/rv32im/circuit/src/base_alu/tests.rs | 14 ++++++++++---- extensions/rv32im/circuit/src/loadstore/tests.rs | 14 ++++++++++---- extensions/rv32im/tests/src/lib.rs | 2 +- 8 files changed, 39 insertions(+), 18 deletions(-) diff --git a/crates/vm/src/arch/execution_mode/metered/ctx.rs b/crates/vm/src/arch/execution_mode/metered/ctx.rs index 8428438ca7..0b67a3b92d 100644 --- a/crates/vm/src/arch/execution_mode/metered/ctx.rs +++ b/crates/vm/src/arch/execution_mode/metered/ctx.rs @@ -64,11 +64,13 @@ impl MeteredCtx { air_names[merkle_tree_index] ); } - debug_assert!( - air_names[memory_ctx.adapter_offset].contains("AccessAdapterAir<2>"), - "air_name={}", - air_names[memory_ctx.adapter_offset] - ); + if memory_ctx.access_adapters_enabled { + debug_assert!( + air_names[memory_ctx.adapter_offset].contains("AccessAdapterAir<2>"), + "air_name={}", + air_names[memory_ctx.adapter_offset] + ); + } let segmentation_ctx = SegmentationCtx::new(air_names, widths, interactions, config.segmentation_limits); diff --git a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs index d755d73140..2397b1b12e 100644 --- a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs +++ b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs @@ -105,7 +105,7 @@ pub struct MemoryCtx { pub boundary_idx: usize, pub merkle_tree_index: Option, pub adapter_offset: usize, - access_adapters_enabled: bool, + pub access_adapters_enabled: bool, continuations_enabled: bool, chunk: u32, chunk_bits: u32, diff --git a/crates/vm/src/arch/testing/cpu.rs b/crates/vm/src/arch/testing/cpu.rs index d818dee460..e5579fd22c 100644 --- a/crates/vm/src/arch/testing/cpu.rs +++ b/crates/vm/src/arch/testing/cpu.rs @@ -404,7 +404,7 @@ impl Default for VmChipTestBuilder { // removed when tests are updated. mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; - Self::volatile(mem_config) + Self::persistent(mem_config) } } diff --git a/crates/vm/src/arch/vm.rs b/crates/vm/src/arch/vm.rs index 68555050fe..23db960d6d 100644 --- a/crates/vm/src/arch/vm.rs +++ b/crates/vm/src/arch/vm.rs @@ -622,7 +622,13 @@ where let system_config: &SystemConfig = self.config().as_ref(); let adapter_offset = system_config.access_adapter_air_id_offset(); // ATTENTION: this must agree with `num_memory_airs` - let num_adapters = log2_strict_usize(system_config.memory_config.max_access_adapter_n); + + let num_adapters = if system_config.memory_config.access_adapters_enabled { + log2_strict_usize(system_config.memory_config.max_access_adapter_n) + } else { + 0 + }; + assert_eq!(adapter_offset + num_adapters, system_config.num_airs()); let access_adapter_arena_size_bound = records::arena_size_bound( &trace_heights[adapter_offset..adapter_offset + num_adapters], diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 44d8256ab1..d04e0cbac1 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -580,6 +580,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } + assert_eq!(1, 0); // SAFETY: // - header.address_space is validated during instruction decoding and within bounds // - header.pointer and header.type_size define valid memory bounds within the address space @@ -612,7 +613,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } - + assert_eq!(1, 0); let record_mut = self .access_adapter_records .alloc(AccessLayout::from_record_header(&header)); diff --git a/extensions/rv32im/circuit/src/base_alu/tests.rs b/extensions/rv32im/circuit/src/base_alu/tests.rs index 8f38dea1f5..7e42b38989 100644 --- a/extensions/rv32im/circuit/src/base_alu/tests.rs +++ b/extensions/rv32im/circuit/src/base_alu/tests.rs @@ -163,8 +163,11 @@ fn rand_rv32_alu_test(opcode: BaseAluOpcode, num_ops: usize) { // TODO(AG): make a more meaningful test for memory accesses tester.write(2, 1024, [F::ONE; 4]); tester.write(2, 1028, [F::ONE; 4]); - let sm = tester.read(2, 1024); - assert_eq!(sm, [F::ONE; 8]); + // Avoid wider-than-min-block accesses when access adapters are disabled + let sm1 = tester.read(2, 1024); + let sm2 = tester.read(2, 1028); + assert_eq!(sm1, [F::ONE; 4]); + assert_eq!(sm2, [F::ONE; 4]); for _ in 0..num_ops { set_and_execute( @@ -201,8 +204,11 @@ fn rand_rv32_alu_test_persistent(opcode: BaseAluOpcode, num_ops: usize) { // TODO(AG): make a more meaningful test for memory accesses tester.write(2, 1024, [F::ONE; 4]); tester.write(2, 1028, [F::ONE; 4]); - let sm = tester.read(2, 1024); - assert_eq!(sm, [F::ONE; 8]); + // Avoid wider-than-min-block accesses when access adapters are disabled + let sm1 = tester.read(2, 1024); + let sm2 = tester.read(2, 1028); + assert_eq!(sm1, [F::ONE; 4]); + assert_eq!(sm2, [F::ONE; 4]); for _ in 0..num_ops { set_and_execute( diff --git a/extensions/rv32im/circuit/src/loadstore/tests.rs b/extensions/rv32im/circuit/src/loadstore/tests.rs index 240da983d0..9d348e9fe5 100644 --- a/extensions/rv32im/circuit/src/loadstore/tests.rs +++ b/extensions/rv32im/circuit/src/loadstore/tests.rs @@ -10,7 +10,7 @@ use openvm_circuit::{ }, }; use openvm_circuit_primitives::var_range::VariableRangeCheckerChip; -use openvm_instructions::{instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode}; +use openvm_instructions::{instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode, NATIVE_AS}; use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *}; use openvm_stark_backend::{ p3_air::BaseAir, @@ -131,7 +131,8 @@ fn set_and_execute>( let mem_as = mem_as.unwrap_or(if is_load { 2 } else { - *[2, 3, 4].choose(rng).unwrap() + // Avoid Native AS while access adapters are disabled. + *[2, 3].choose(rng).unwrap() }); let shift_amount = ptr_val % 4; @@ -215,10 +216,13 @@ fn rand_loadstore_test(opcode: Rv32LoadStoreOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); let mut mem_config = MemoryConfig::default(); mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; + mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; if [STOREW, STOREB, STOREH].contains(&opcode) { mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29; } - let mut tester = VmChipTestBuilder::volatile(mem_config); + // Use persistent memory so initial block size matches the 4-byte alignment and + // avoids access-adapter split/merge paths when adapters are disabled. + let mut tester = VmChipTestBuilder::persistent(mem_config); let mut harness = create_harness(&mut tester); for _ in 0..num_ops { @@ -268,10 +272,12 @@ fn run_negative_loadstore_test( let mut rng = create_seeded_rng(); let mut mem_config = MemoryConfig::default(); mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; + mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; if [STOREW, STOREB, STOREH].contains(&opcode) { mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29; } - let mut tester = VmChipTestBuilder::volatile(mem_config); + // Use persistent memory so the min block size matches alignment without needing adapters. + let mut tester = VmChipTestBuilder::persistent(mem_config); let mut harness = create_harness(&mut tester); set_and_execute( diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index c4302ae808..32afb14a65 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -33,7 +33,7 @@ mod tests { fn test_rv32im_config() -> Rv32ImConfig { Rv32ImConfig { rv32i: Rv32IConfig { - system: test_system_config(), + system: test_system_config().without_access_adapters(), ..Default::default() }, ..Default::default() From 97f41e49c930ed30d641f40864c82a956f0a73b5 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 18 Dec 2025 19:23:40 +0000 Subject: [PATCH 18/21] clean up, remove assert(false) --- crates/vm/src/system/memory/online.rs | 17 +++++------------ extensions/rv32im/tests/src/lib.rs | 2 +- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index d04e0cbac1..800d97c214 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -580,7 +580,6 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } - assert_eq!(1, 0); // SAFETY: // - header.address_space is validated during instruction decoding and within bounds // - header.pointer and header.type_size define valid memory bounds within the address space @@ -613,7 +612,6 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } - assert_eq!(1, 0); let record_mut = self .access_adapter_records .alloc(AccessLayout::from_record_header(&header)); @@ -1010,7 +1008,7 @@ impl TracingMemory { current_cnt == 0 || (current_address.address_space == addr_space && current_address.pointer + current_cnt as u32 == ptr), - "The union of all touched blocks must consist of blocks with sizes divisible by the `CHUNK`" + "The union of all touched blocks must consist of blocks with sizes divisible by `CHUNK`" ); debug_assert!(block_size >= min_block_size as u8); debug_assert!(ptr % min_block_size as u32 == 0); @@ -1061,9 +1059,8 @@ impl TracingMemory { timestamp, values: from_fn(|j| { let byte_idx = (i as usize + j) * cell_size; - // SAFETY: block_size is multiple of CHUNK and we are - // reading chunks of cells within - // bounds + // SAFETY: block_size is multiple of CHUNK and we are reading chunks + // of cells within bounds unsafe { addr_space_config .layout @@ -1086,8 +1083,7 @@ impl TracingMemory { current_values[current_cnt * cell_size..current_cnt * cell_size + cell_size] .copy_from_slice(cell_data); if current_cnt & (min_block_size - 1) == 0 { - // SAFETY: current_cnt / min_block_size < CHUNK / min_block_size <= - // CHUNKs + // SAFETY: current_cnt / min_block_size < CHUNK / min_block_size <= CHUNK unsafe { *current_timestamps.get_unchecked_mut(current_cnt / min_block_size) = timestamp; @@ -1129,10 +1125,7 @@ impl TracingMemory { } } } - assert_eq!( - current_cnt, 0, - "The union of all touched blocks must consist of blocks with sizes divisible by the `CHUNK`" - ); + assert_eq!(current_cnt, 0, "The union of all touched blocks must consist of blocks with sizes divisible by `CHUNK`"); } pub fn address_space_alignment(&self) -> Vec { diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index 32afb14a65..c4302ae808 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -33,7 +33,7 @@ mod tests { fn test_rv32im_config() -> Rv32ImConfig { Rv32ImConfig { rv32i: Rv32IConfig { - system: test_system_config().without_access_adapters(), + system: test_system_config(), ..Default::default() }, ..Default::default() From be30749f51c8359668d51151dfe0d7b02f2b6086 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 18 Dec 2025 19:26:30 +0000 Subject: [PATCH 19/21] whitespace --- crates/vm/src/system/memory/adapter/mod.rs | 1 - crates/vm/src/system/memory/online.rs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/vm/src/system/memory/adapter/mod.rs b/crates/vm/src/system/memory/adapter/mod.rs index adad5ca60f..a9c89fc2ea 100644 --- a/crates/vm/src/system/memory/adapter/mod.rs +++ b/crates/vm/src/system/memory/adapter/mod.rs @@ -132,7 +132,6 @@ impl AccessAdapterInventory { while ptr < bytes.len() { let bytes_slice = &bytes[ptr..]; let header: &AccessRecordHeader = bytes_slice.borrow(); - // SAFETY: // - bytes[ptr..] is a valid starting pointer to a previously allocated record // - The record contains self-describing layout information diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 800d97c214..fb66845c0b 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -612,6 +612,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } + let record_mut = self .access_adapter_records .alloc(AccessLayout::from_record_header(&header)); From 20516242a0836babd73ba5312f0ee36db7f58708 Mon Sep 17 00:00:00 2001 From: Maillew Date: Thu, 18 Dec 2025 19:32:46 +0000 Subject: [PATCH 20/21] linta --- extensions/rv32im/circuit/src/loadstore/tests.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/rv32im/circuit/src/loadstore/tests.rs b/extensions/rv32im/circuit/src/loadstore/tests.rs index 9d348e9fe5..978c0cd856 100644 --- a/extensions/rv32im/circuit/src/loadstore/tests.rs +++ b/extensions/rv32im/circuit/src/loadstore/tests.rs @@ -10,7 +10,9 @@ use openvm_circuit::{ }, }; use openvm_circuit_primitives::var_range::VariableRangeCheckerChip; -use openvm_instructions::{instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode, NATIVE_AS}; +use openvm_instructions::{ + instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode, NATIVE_AS, +}; use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *}; use openvm_stark_backend::{ p3_air::BaseAir, From db268c325da5abb2eba38f6839b8611f0a6e66a0 Mon Sep 17 00:00:00 2001 From: Maillew Date: Fri, 19 Dec 2025 23:04:13 +0000 Subject: [PATCH 21/21] bigint wip --- crates/vm/src/arch/testing/memory/mod.rs | 60 ++- crates/vm/src/system/memory/online.rs | 3 +- extensions/bigint/circuit/src/base_alu.rs | 15 +- extensions/bigint/circuit/src/branch_eq.rs | 14 +- extensions/bigint/circuit/src/branch_lt.rs | 13 +- extensions/bigint/circuit/src/common.rs | 50 ++ .../bigint/circuit/src/extension/mod.rs | 36 +- extensions/bigint/circuit/src/less_than.rs | 16 +- extensions/bigint/circuit/src/lib.rs | 456 +++++++++++++++++- extensions/bigint/circuit/src/mult.rs | 15 +- extensions/bigint/circuit/src/shift.rs | 15 +- extensions/bigint/circuit/src/tests.rs | 95 ++-- extensions/rv32-adapters/src/heap_branch.rs | 174 +++++-- extensions/rv32-adapters/src/vec_heap.rs | 2 +- 14 files changed, 775 insertions(+), 189 deletions(-) diff --git a/crates/vm/src/arch/testing/memory/mod.rs b/crates/vm/src/arch/testing/memory/mod.rs index c5bea140ae..6ffd50cccf 100644 --- a/crates/vm/src/arch/testing/memory/mod.rs +++ b/crates/vm/src/arch/testing/memory/mod.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, convert::TryInto}; use air::{MemoryDummyAir, MemoryDummyChip}; use openvm_stark_backend::p3_field::{Field, PrimeField32}; @@ -42,6 +42,32 @@ impl MemoryTester { pub fn read(&mut self, addr_space: usize, ptr: usize) -> [F; N] { let memory = &mut self.memory; + // For AS 1-3, force CONST_BLOCK_SIZE (4-byte) accesses to avoid adapters. + if addr_space <= 3 && N > 4 { + let mut out = [F::ZERO; N]; + for (chunk_idx, chunk) in out.chunks_exact_mut(4).enumerate() { + let chunk_ptr = ptr + chunk_idx * 4; + let t = memory.timestamp(); + let (t_prev, data) = + unsafe { memory.read::(addr_space as u32, chunk_ptr as u32) }; + let data_f = data.map(F::from_canonical_u8); + chunk.copy_from_slice(&data_f); + self.chip_for_block.get_mut(&4).unwrap().receive( + addr_space as u32, + chunk_ptr as u32, + &data_f, + t_prev, + ); + self.chip_for_block.get_mut(&4).unwrap().send( + addr_space as u32, + chunk_ptr as u32, + &data_f, + t, + ); + } + return out; + } + let t = memory.timestamp(); // TODO: this could be improved if we added a TracingMemory::get_f function let (t_prev, data) = if addr_space <= 3 { @@ -66,6 +92,38 @@ impl MemoryTester { pub fn write(&mut self, addr_space: usize, ptr: usize, data: [F; N]) { let memory = &mut self.memory; + // For AS 1-3, force CONST_BLOCK_SIZE (4-byte) accesses to avoid adapters. + if addr_space <= 3 && N > 4 { + for (chunk_idx, chunk) in data.chunks_exact(4).enumerate() { + let chunk_ptr = ptr + chunk_idx * 4; + let t = memory.timestamp(); + let chunk_u8: [u8; 4] = chunk + .iter() + .map(|x| x.as_canonical_u32() as u8) + .collect::>() + .try_into() + .unwrap(); + let (t_prev, data_prev) = unsafe { + memory.write::(addr_space as u32, chunk_ptr as u32, chunk_u8) + }; + let data_prev_f = data_prev.map(F::from_canonical_u8); + let chunk_f: [F; 4] = chunk.try_into().unwrap(); + self.chip_for_block.get_mut(&4).unwrap().receive( + addr_space as u32, + chunk_ptr as u32, + &data_prev_f, + t_prev, + ); + self.chip_for_block.get_mut(&4).unwrap().send( + addr_space as u32, + chunk_ptr as u32, + &chunk_f, + t, + ); + } + return; + } + let t = memory.timestamp(); // TODO: this could be improved if we added a TracingMemory::write_f function let (t_prev, data_prev) = if addr_space <= 3 { diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index fb66845c0b..74e0e33d5c 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -580,6 +580,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } + assert_eq!(1, 0); // SAFETY: // - header.address_space is validated during instruction decoding and within bounds // - header.pointer and header.type_size define valid memory bounds within the address space @@ -612,7 +613,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } - + assert_eq!(1, 0); let record_mut = self .access_adapter_records .alloc(AccessLayout::from_record_header(&header)); diff --git a/extensions/bigint/circuit/src/base_alu.rs b/extensions/bigint/circuit/src/base_alu.rs index 6a8e49a239..2c9da56d0b 100644 --- a/extensions/bigint/circuit/src/base_alu.rs +++ b/extensions/bigint/circuit/src/base_alu.rs @@ -12,17 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::BaseAluExecutor; use openvm_rv32im_transpiler::BaseAluOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{bytes_to_u64_array, u64_array_to_bytes}, - Rv32BaseAlu256Executor, INT256_NUM_LIMBS, + common::{bytes_to_u64_array, u64_array_to_bytes, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32BaseAlu256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32BaseAlu256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -142,12 +141,10 @@ unsafe fn execute_e12_impl( let rs1_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let rd = ::compute(rs1, rs2); - exec_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd); + vm_write_int256(exec_state, u32::from_le_bytes(rd_ptr), &rd); let pc = exec_state.pc(); exec_state.set_pc(pc.wrapping_add(DEFAULT_PC_STEP)); } diff --git a/extensions/bigint/circuit/src/branch_eq.rs b/extensions/bigint/circuit/src/branch_eq.rs index 4732f6f9a7..ba8562cab8 100644 --- a/extensions/bigint/circuit/src/branch_eq.rs +++ b/extensions/bigint/circuit/src/branch_eq.rs @@ -9,14 +9,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapBranchAdapterExecutor; use openvm_rv32im_circuit::BranchEqualExecutor; use openvm_rv32im_transpiler::BranchEqualOpcode; use openvm_stark_backend::p3_field::PrimeField32; -use crate::{common::bytes_to_u64_array, Rv32BranchEqual256Executor, INT256_NUM_LIMBS}; +use crate::{ + common::{bytes_to_u64_array, vm_read_int256}, + BigintBranchAdapterExecutor, Rv32BranchEqual256Executor, INT256_NUM_LIMBS, +}; -type AdapterExecutor = Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintBranchAdapterExecutor; impl Rv32BranchEqual256Executor { pub fn new(adapter_step: AdapterExecutor, offset: usize, pc_step: u32) -> Self { @@ -131,10 +133,8 @@ unsafe fn execute_e12_impl(RV32_REGISTER_AS, pre_compute.a as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let cmp_result = u256_eq(rs1, rs2); if cmp_result ^ IS_NE { pc = (pc as isize + pre_compute.imm) as u32; diff --git a/extensions/bigint/circuit/src/branch_lt.rs b/extensions/bigint/circuit/src/branch_lt.rs index 8dc294d70d..0e777dc24f 100644 --- a/extensions/bigint/circuit/src/branch_lt.rs +++ b/extensions/bigint/circuit/src/branch_lt.rs @@ -12,17 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapBranchAdapterExecutor; use openvm_rv32im_circuit::BranchLessThanExecutor; use openvm_rv32im_transpiler::BranchLessThanOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{i256_lt, u256_lt}, - Rv32BranchLessThan256Executor, INT256_NUM_LIMBS, + common::{i256_lt, u256_lt, vm_read_int256}, + BigintBranchAdapterExecutor, Rv32BranchLessThan256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintBranchAdapterExecutor; impl Rv32BranchLessThan256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -139,10 +138,8 @@ unsafe fn execute_e12_impl(RV32_REGISTER_AS, pre_compute.a as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let cmp_result = OP::compute(rs1, rs2); if cmp_result { pc = (pc as isize + pre_compute.imm) as u32; diff --git a/extensions/bigint/circuit/src/common.rs b/extensions/bigint/circuit/src/common.rs index 329cf1d479..062c2a77a6 100644 --- a/extensions/bigint/circuit/src/common.rs +++ b/extensions/bigint/circuit/src/common.rs @@ -1,5 +1,55 @@ +use std::convert::TryInto; + +use openvm_circuit::{ + arch::{ExecutionCtxTrait, VmExecState, CONST_BLOCK_SIZE}, + system::memory::online::GuestMemory, +}; +use openvm_instructions::riscv::RV32_MEMORY_AS; +use openvm_stark_backend::p3_field::PrimeField32; + use crate::{INT256_NUM_LIMBS, RV32_CELL_BITS}; +pub const INT256_CHUNK_BYTES: usize = CONST_BLOCK_SIZE; +pub const INT256_BLOCKS_PER_ACCESS: usize = INT256_NUM_LIMBS / INT256_CHUNK_BYTES; + +#[inline(always)] +pub fn vm_read_int256( + exec_state: &mut VmExecState, + ptr: u32, +) -> [u8; INT256_NUM_LIMBS] { + let mut out = [0u8; INT256_NUM_LIMBS]; + for (i, chunk) in out + .chunks_exact_mut(INT256_CHUNK_BYTES) + .enumerate() + { + let data = exec_state.vm_read::( + RV32_MEMORY_AS, + ptr + (i * INT256_CHUNK_BYTES) as u32, + ); + chunk.copy_from_slice(&data); + } + out +} + +#[inline(always)] +pub fn vm_write_int256( + exec_state: &mut VmExecState, + ptr: u32, + data: &[u8; INT256_NUM_LIMBS], +) { + for (i, chunk) in data + .chunks_exact(INT256_CHUNK_BYTES) + .enumerate() + { + let chunk: &[u8; INT256_CHUNK_BYTES] = chunk.try_into().expect("chunk size"); + exec_state.vm_write::( + RV32_MEMORY_AS, + ptr + (i * INT256_CHUNK_BYTES) as u32, + chunk, + ); + } +} + #[inline(always)] pub fn bytes_to_u64_array(bytes: [u8; INT256_NUM_LIMBS]) -> [u64; 4] { // SAFETY: [u8; 32] to [u64; 4] transmute is safe - same size and compatible alignment diff --git a/extensions/bigint/circuit/src/extension/mod.rs b/extensions/bigint/circuit/src/extension/mod.rs index 0adb7fc595..fc4ebf81bd 100644 --- a/extensions/bigint/circuit/src/extension/mod.rs +++ b/extensions/bigint/circuit/src/extension/mod.rs @@ -98,19 +98,19 @@ impl VmExecutionExtension for Int256 { let pointer_max_bits = inventory.pointer_max_bits(); let alu = Rv32BaseAlu256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32BaseAlu256Opcode::CLASS_OFFSET, ); inventory.add_executor(alu, Rv32BaseAlu256Opcode::iter().map(|x| x.global_opcode()))?; let lt = Rv32LessThan256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32LessThan256Opcode::CLASS_OFFSET, ); inventory.add_executor(lt, Rv32LessThan256Opcode::iter().map(|x| x.global_opcode()))?; let beq = Rv32BranchEqual256Executor::new( - Rv32HeapBranchAdapterExecutor::new(pointer_max_bits), + BigintBranchAdapterExecutor::new(pointer_max_bits), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ); @@ -120,7 +120,7 @@ impl VmExecutionExtension for Int256 { )?; let blt = Rv32BranchLessThan256Executor::new( - Rv32HeapBranchAdapterExecutor::new(pointer_max_bits), + BigintBranchAdapterExecutor::new(pointer_max_bits), Rv32BranchLessThan256Opcode::CLASS_OFFSET, ); inventory.add_executor( @@ -129,13 +129,13 @@ impl VmExecutionExtension for Int256 { )?; let mult = Rv32Multiplication256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32Mul256Opcode::CLASS_OFFSET, ); inventory.add_executor(mult, Rv32Mul256Opcode::iter().map(|x| x.global_opcode()))?; let shift = Rv32Shift256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32Shift256Opcode::CLASS_OFFSET, ); inventory.add_executor(shift, Rv32Shift256Opcode::iter().map(|x| x.global_opcode()))?; @@ -188,37 +188,37 @@ impl VmCircuitExtension for Int256 { }; let alu = Rv32BaseAlu256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), BaseAluCoreAir::new(bitwise_lu, Rv32BaseAlu256Opcode::CLASS_OFFSET), ); inventory.add_air(alu); let lt = Rv32LessThan256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), LessThanCoreAir::new(bitwise_lu, Rv32LessThan256Opcode::CLASS_OFFSET), ); inventory.add_air(lt); let beq = Rv32BranchEqual256Air::new( - Rv32HeapBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), BranchEqualCoreAir::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP), ); inventory.add_air(beq); let blt = Rv32BranchLessThan256Air::new( - Rv32HeapBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), BranchLessThanCoreAir::new(bitwise_lu, Rv32BranchLessThan256Opcode::CLASS_OFFSET), ); inventory.add_air(blt); let mult = Rv32Multiplication256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), MultiplicationCoreAir::new(range_tuple_checker, Rv32Mul256Opcode::CLASS_OFFSET), ); inventory.add_air(mult); let shift = Rv32Shift256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), ShiftCoreAir::new(bitwise_lu, range_checker, Rv32Shift256Opcode::CLASS_OFFSET), ); inventory.add_air(shift); @@ -281,7 +281,7 @@ where inventory.next_air::()?; let alu = Rv32BaseAlu256Chip::new( BaseAluFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), Rv32BaseAlu256Opcode::CLASS_OFFSET, ), @@ -292,7 +292,7 @@ where inventory.next_air::()?; let lt = Rv32LessThan256Chip::new( LessThanFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), Rv32LessThan256Opcode::CLASS_OFFSET, ), @@ -303,7 +303,7 @@ where inventory.next_air::()?; let beq = Rv32BranchEqual256Chip::new( BranchEqualFiller::new( - Rv32HeapBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ), @@ -314,7 +314,7 @@ where inventory.next_air::()?; let blt = Rv32BranchLessThan256Chip::new( BranchLessThanFiller::new( - Rv32HeapBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), Rv32BranchLessThan256Opcode::CLASS_OFFSET, ), @@ -325,7 +325,7 @@ where inventory.next_air::()?; let mult = Rv32Multiplication256Chip::new( MultiplicationFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), range_tuple_checker.clone(), Rv32Mul256Opcode::CLASS_OFFSET, ), @@ -336,7 +336,7 @@ where inventory.next_air::()?; let shift = Rv32Shift256Chip::new( ShiftFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), range_checker.clone(), Rv32Shift256Opcode::CLASS_OFFSET, diff --git a/extensions/bigint/circuit/src/less_than.rs b/extensions/bigint/circuit/src/less_than.rs index 68861d8ba0..9819e88a4b 100644 --- a/extensions/bigint/circuit/src/less_than.rs +++ b/extensions/bigint/circuit/src/less_than.rs @@ -12,14 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::LessThanExecutor; use openvm_rv32im_transpiler::LessThanOpcode; use openvm_stark_backend::p3_field::PrimeField32; -use crate::{common, Rv32LessThan256Executor, INT256_NUM_LIMBS}; +use crate::{ + common::{self, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32LessThan256Executor, INT256_NUM_LIMBS, +}; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32LessThan256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -134,10 +136,8 @@ unsafe fn execute_e12_impl(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let cmp_result = if IS_U256 { common::u256_lt(rs1, rs2) } else { @@ -145,7 +145,7 @@ unsafe fn execute_e12_impl; +type BigintHeapAdapterFillerInner = + Rv32VecHeapAdapterFiller<2, INT256_BLOCKS_PER_ACCESS, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES, INT256_CHUNK_BYTES>; + +type BigintBranchAdapterInner = + Rv32HeapBranchAdapterExecutorGeneric<2, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES>; +type BigintBranchAdapterFillerInner = + Rv32HeapBranchAdapterFillerGeneric<2, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES>; + +fn chunk_expr( + word: [T; INT256_NUM_LIMBS], +) -> [[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS] { + let mut iter = word.into_iter(); + std::array::from_fn(|_| std::array::from_fn(|_| iter.next().expect("chunk size"))) +} + +#[inline(always)] +fn flatten_int256( + chunks: [[u8; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS], +) -> [u8; INT256_NUM_LIMBS] { + let mut word = [0u8; INT256_NUM_LIMBS]; + for (block_idx, block) in chunks.into_iter().enumerate() { + let start = block_idx * INT256_CHUNK_BYTES; + word[start..start + INT256_CHUNK_BYTES].copy_from_slice(&block); + } + word +} + +#[inline(always)] +fn chunk_int256( + word: [u8; INT256_NUM_LIMBS], +) -> [[u8; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS] { + let mut chunks = [[0u8; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]; + for (block_idx, chunk) in chunks.iter_mut().enumerate() { + let start = block_idx * INT256_CHUNK_BYTES; + chunk.copy_from_slice(&word[start..start + INT256_CHUNK_BYTES]); + } + chunks +} + +#[derive(Clone)] +pub struct BigintAccessReads( + pub [[[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]; 2], +); + +#[derive(Clone)] +pub struct BigintAccessWrites(pub [[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]); + +impl BigintAccessReads { + fn into_inner(self) -> [[[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]; 2] { + self.0 + } +} + +impl BigintAccessWrites { + fn into_inner(self) -> [[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS] { + self.0 + } +} + +impl From<[[T; INT256_NUM_LIMBS]; 2]> for BigintAccessReads { + fn from(value: [[T; INT256_NUM_LIMBS]; 2]) -> Self { + let [first, second] = value; + Self([chunk_expr(first), chunk_expr(second)]) + } +} + +impl From<[[T; INT256_NUM_LIMBS]; 1]> for BigintAccessWrites { + fn from(value: [[T; INT256_NUM_LIMBS]; 1]) -> Self { + let [word] = value; + Self(chunk_expr(word)) + } +} + +pub struct BigintHeapAdapterInterface(PhantomData); + +impl VmAdapterInterface for BigintHeapAdapterInterface { + type Reads = BigintAccessReads; + type Writes = BigintAccessWrites; + type ProcessedInstruction = MinimalInstruction; +} + +pub struct BigintBranchAdapterInterface(PhantomData); + +impl VmAdapterInterface for BigintBranchAdapterInterface { + type Reads = BigintAccessReads; + type Writes = (); + type ProcessedInstruction = ImmInstruction; +} + +#[derive(Clone, Copy)] +pub struct BigintHeapAdapterAir { + inner: Rv32VecHeapAdapterAir< + 2, + INT256_BLOCKS_PER_ACCESS, + INT256_BLOCKS_PER_ACCESS, + INT256_CHUNK_BYTES, + INT256_CHUNK_BYTES, + >, +} + +impl BigintHeapAdapterAir { + pub fn new( + execution_bridge: ExecutionBridge, + memory_bridge: MemoryBridge, + bus: BitwiseOperationLookupBus, + address_bits: usize, + ) -> Self { + Self { + inner: Rv32VecHeapAdapterAir::new( + execution_bridge, + memory_bridge, + bus, + address_bits, + ), + } + } +} + +impl BaseAir for BigintHeapAdapterAir { + fn width(&self) -> usize { + as BaseAir>::width(&self.inner) + } +} + +impl VmAdapterAir for BigintHeapAdapterAir +where + AB: InteractionBuilder, +{ + type Interface = BigintHeapAdapterInterface; + + fn eval( + &self, + builder: &mut AB, + local: &[AB::Var], + ctx: AdapterAirContext, + ) { + let inner_ctx = AdapterAirContext { + to_pc: ctx.to_pc, + reads: ctx.reads.into_inner(), + writes: ctx.writes.into_inner(), + instruction: ctx.instruction, + }; + self.inner.eval(builder, local, inner_ctx); + } + + fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var { + as VmAdapterAir>::get_from_pc(&self.inner, local) + } +} + +#[derive(Clone, Copy)] +pub struct BigintBranchAdapterAir { + inner: + Rv32HeapBranchAdapterAirGeneric<2, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES>, +} + +impl BigintBranchAdapterAir { + pub fn new( + execution_bridge: ExecutionBridge, + memory_bridge: MemoryBridge, + bus: BitwiseOperationLookupBus, + address_bits: usize, + ) -> Self { + Self { + inner: Rv32HeapBranchAdapterAirGeneric::new( + execution_bridge, + memory_bridge, + bus, + address_bits, + ), + } + } +} + +impl BaseAir for BigintBranchAdapterAir { + fn width(&self) -> usize { + as BaseAir>::width(&self.inner) + } +} + +impl VmAdapterAir for BigintBranchAdapterAir +where + AB: InteractionBuilder, +{ + type Interface = BigintBranchAdapterInterface; + + fn eval( + &self, + builder: &mut AB, + local: &[AB::Var], + ctx: AdapterAirContext, + ) { + let inner_ctx = AdapterAirContext { + to_pc: ctx.to_pc, + reads: ctx.reads.into_inner(), + writes: [], + instruction: ctx.instruction, + }; + self.inner.eval(builder, local, inner_ctx); + } + + fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var { + as VmAdapterAir>::get_from_pc(&self.inner, local) + } +} + +#[derive(Clone, Copy)] +pub struct BigintHeapAdapterExecutor { + inner: BigintHeapAdapterInner, +} + +impl BigintHeapAdapterExecutor { + pub fn new(pointer_max_bits: usize) -> Self { + Self { + inner: BigintHeapAdapterInner::new(pointer_max_bits), + } + } +} + +impl AdapterTraceExecutor for BigintHeapAdapterExecutor { + const WIDTH: usize = >::WIDTH; + type ReadData = [[u8; INT256_NUM_LIMBS]; 2]; + type WriteData = [[u8; INT256_NUM_LIMBS]; 1]; + type RecordMut<'a> = >::RecordMut<'a>; + + fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) { + >::start(pc, memory, record); + } + + fn read( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + record: &mut Self::RecordMut<'_>, + ) -> Self::ReadData { + let chunked = >::read( + &self.inner, + memory, + instruction, + record, + ); + chunked.map(flatten_int256) + } + + fn write( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + data: Self::WriteData, + record: &mut Self::RecordMut<'_>, + ) { + let [word] = data; + let chunked = chunk_int256(word); + >::write( + &self.inner, + memory, + instruction, + chunked, + record, + ); + } +} + +#[derive(Clone)] +pub struct BigintHeapAdapterFiller { + inner: BigintHeapAdapterFillerInner, +} + +impl BigintHeapAdapterFiller { + pub fn new( + pointer_max_bits: usize, + bitwise_lookup_chip: SharedBitwiseOperationLookupChip, + ) -> Self { + Self { + inner: BigintHeapAdapterFillerInner::new(pointer_max_bits, bitwise_lookup_chip), + } + } +} + +impl AdapterTraceFiller for BigintHeapAdapterFiller { + const WIDTH: usize = >::WIDTH; + + fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory, adapter_row: &mut [F]) { + self.inner.fill_trace_row(mem_helper, adapter_row); + } +} + +#[derive(Clone, Copy)] +pub struct BigintBranchAdapterExecutor { + inner: BigintBranchAdapterInner, +} + +impl BigintBranchAdapterExecutor { + pub fn new(pointer_max_bits: usize) -> Self { + Self { + inner: BigintBranchAdapterInner::new(pointer_max_bits), + } + } +} + +impl AdapterTraceExecutor for BigintBranchAdapterExecutor { + const WIDTH: usize = >::WIDTH; + type ReadData = [[u8; INT256_NUM_LIMBS]; 2]; + type WriteData = (); + type RecordMut<'a> = >::RecordMut<'a>; + + fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) { + >::start(pc, memory, record); + } + + fn read( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + record: &mut Self::RecordMut<'_>, + ) -> Self::ReadData { + let chunked = >::read( + &self.inner, + memory, + instruction, + record, + ); + chunked.map(flatten_int256) + } + + fn write( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + data: Self::WriteData, + record: &mut Self::RecordMut<'_>, + ) { + >::write( + &self.inner, + memory, + instruction, + data, + record, + ); + } +} + +#[derive(Clone)] +pub struct BigintBranchAdapterFiller { + inner: BigintBranchAdapterFillerInner, +} + +impl BigintBranchAdapterFiller { + pub fn new( + pointer_max_bits: usize, + bitwise_lookup_chip: SharedBitwiseOperationLookupChip, + ) -> Self { + Self { + inner: BigintBranchAdapterFillerInner::new(pointer_max_bits, bitwise_lookup_chip), + } + } +} + +impl AdapterTraceFiller for BigintBranchAdapterFiller { + const WIDTH: usize = >::WIDTH; + + fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory, adapter_row: &mut [F]) { + self.inner.fill_trace_row(mem_helper, adapter_row); + } +} + /// BaseAlu256 pub type Rv32BaseAlu256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, BaseAluCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32BaseAlu256Executor( BaseAluExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -57,7 +469,7 @@ pub struct Rv32BaseAlu256Executor( pub type Rv32BaseAlu256Chip = VmChipWrapper< F, BaseAluFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -65,13 +477,13 @@ pub type Rv32BaseAlu256Chip = VmChipWrapper< /// LessThan256 pub type Rv32LessThan256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, LessThanCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32LessThan256Executor( LessThanExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -79,7 +491,7 @@ pub struct Rv32LessThan256Executor( pub type Rv32LessThan256Chip = VmChipWrapper< F, LessThanFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -87,13 +499,13 @@ pub type Rv32LessThan256Chip = VmChipWrapper< /// Multiplication256 pub type Rv32Multiplication256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, MultiplicationCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32Multiplication256Executor( MultiplicationExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -101,7 +513,7 @@ pub struct Rv32Multiplication256Executor( pub type Rv32Multiplication256Chip = VmChipWrapper< F, MultiplicationFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -109,13 +521,13 @@ pub type Rv32Multiplication256Chip = VmChipWrapper< /// Shift256 pub type Rv32Shift256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, ShiftCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32Shift256Executor( ShiftExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -123,7 +535,7 @@ pub struct Rv32Shift256Executor( pub type Rv32Shift256Chip = VmChipWrapper< F, ShiftFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -131,27 +543,27 @@ pub type Rv32Shift256Chip = VmChipWrapper< /// BranchEqual256 pub type Rv32BranchEqual256Air = VmAirWrapper< - Rv32HeapBranchAdapterAir<2, INT256_NUM_LIMBS>, + BigintBranchAdapterAir, BranchEqualCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32BranchEqual256Executor( - BranchEqualExecutor, INT256_NUM_LIMBS>, + BranchEqualExecutor, ); pub type Rv32BranchEqual256Chip = VmChipWrapper< F, - BranchEqualFiller, INT256_NUM_LIMBS>, + BranchEqualFiller, >; /// BranchLessThan256 pub type Rv32BranchLessThan256Air = VmAirWrapper< - Rv32HeapBranchAdapterAir<2, INT256_NUM_LIMBS>, + BigintBranchAdapterAir, BranchLessThanCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32BranchLessThan256Executor( BranchLessThanExecutor< - Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>, + BigintBranchAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -159,7 +571,7 @@ pub struct Rv32BranchLessThan256Executor( pub type Rv32BranchLessThan256Chip = VmChipWrapper< F, BranchLessThanFiller< - Rv32HeapBranchAdapterFiller<2, INT256_NUM_LIMBS>, + BigintBranchAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, diff --git a/extensions/bigint/circuit/src/mult.rs b/extensions/bigint/circuit/src/mult.rs index 2eff4b9096..10d629e9e6 100644 --- a/extensions/bigint/circuit/src/mult.rs +++ b/extensions/bigint/circuit/src/mult.rs @@ -9,17 +9,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::MultiplicationExecutor; use openvm_rv32im_transpiler::MulOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{bytes_to_u32_array, u32_array_to_bytes}, - Rv32Multiplication256Executor, INT256_NUM_LIMBS, + common::{bytes_to_u32_array, u32_array_to_bytes, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32Multiplication256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32Multiplication256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -125,12 +124,10 @@ unsafe fn execute_e12_impl( let rs1_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let rd = u256_mul(rs1, rs2); - exec_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd); + vm_write_int256(exec_state, u32::from_le_bytes(rd_ptr), &rd); let pc = exec_state.pc(); exec_state.set_pc(pc.wrapping_add(DEFAULT_PC_STEP)); diff --git a/extensions/bigint/circuit/src/shift.rs b/extensions/bigint/circuit/src/shift.rs index c08afc26e0..498d4731b0 100644 --- a/extensions/bigint/circuit/src/shift.rs +++ b/extensions/bigint/circuit/src/shift.rs @@ -12,17 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::ShiftExecutor; use openvm_rv32im_transpiler::ShiftOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{bytes_to_u64_array, u64_array_to_bytes}, - Rv32Shift256Executor, INT256_NUM_LIMBS, + common::{bytes_to_u64_array, u64_array_to_bytes, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32Shift256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32Shift256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -138,12 +137,10 @@ unsafe fn execute_e12_impl let rs1_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let rd = OP::compute(rs1, rs2); - exec_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd); + vm_write_int256(exec_state, u32::from_le_bytes(rd_ptr), &rd); let pc = exec_state.pc(); exec_state.set_pc(pc.wrapping_add(DEFAULT_PC_STEP)); } diff --git a/extensions/bigint/circuit/src/tests.rs b/extensions/bigint/circuit/src/tests.rs index 00892e28a0..02a4c8e73c 100644 --- a/extensions/bigint/circuit/src/tests.rs +++ b/extensions/bigint/circuit/src/tests.rs @@ -10,7 +10,7 @@ use openvm_circuit::{ TestBuilder, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS, RANGE_TUPLE_CHECKER_BUS, }, - Arena, ExecutionBridge, PreflightExecutor, + Arena, ExecutionBridge, MemoryConfig, PreflightExecutor, }, system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper}, utils::generate_long_number, @@ -22,14 +22,10 @@ use openvm_circuit_primitives::{ }; use openvm_instructions::{ program::{DEFAULT_PC_STEP, PC_BITS}, - riscv::RV32_CELL_BITS, - LocalOpcode, -}; -use openvm_rv32_adapters::{ - rv32_heap_branch_default, rv32_write_heap_default, Rv32HeapAdapterAir, Rv32HeapAdapterExecutor, - Rv32HeapAdapterFiller, Rv32HeapBranchAdapterAir, Rv32HeapBranchAdapterExecutor, - Rv32HeapBranchAdapterFiller, + riscv::{RV32_CELL_BITS, RV32_REGISTER_AS}, + LocalOpcode, NATIVE_AS, }; +use openvm_rv32_adapters::{rv32_heap_branch_default, rv32_write_heap_default}; use openvm_rv32im_circuit::{ adapters::{INT256_NUM_LIMBS, RV_B_TYPE_IMM_BITS}, BaseAluCoreAir, BaseAluFiller, BranchEqualCoreAir, BranchEqualFiller, BranchLessThanCoreAir, @@ -63,12 +59,13 @@ use { }; use crate::{ - Rv32BaseAlu256Air, Rv32BaseAlu256Chip, Rv32BaseAlu256Executor, Rv32BranchEqual256Air, - Rv32BranchEqual256Chip, Rv32BranchEqual256Executor, Rv32BranchLessThan256Air, - Rv32BranchLessThan256Chip, Rv32BranchLessThan256Executor, Rv32LessThan256Air, - Rv32LessThan256Chip, Rv32LessThan256Executor, Rv32Multiplication256Air, - Rv32Multiplication256Chip, Rv32Multiplication256Executor, Rv32Shift256Air, Rv32Shift256Chip, - Rv32Shift256Executor, + BigintBranchAdapterAir, BigintBranchAdapterExecutor, BigintBranchAdapterFiller, + BigintHeapAdapterAir, BigintHeapAdapterExecutor, BigintHeapAdapterFiller, Rv32BaseAlu256Air, + Rv32BaseAlu256Chip, Rv32BaseAlu256Executor, Rv32BranchEqual256Air, Rv32BranchEqual256Chip, + Rv32BranchEqual256Executor, Rv32BranchLessThan256Air, Rv32BranchLessThan256Chip, + Rv32BranchLessThan256Executor, Rv32LessThan256Air, Rv32LessThan256Chip, + Rv32LessThan256Executor, Rv32Multiplication256Air, Rv32Multiplication256Chip, + Rv32Multiplication256Executor, Rv32Shift256Air, Rv32Shift256Chip, Rv32Shift256Executor, }; type F = BabyBear; @@ -79,6 +76,14 @@ const RANGE_TUPLE_SIZES: [u32; 2] = [ (INT256_NUM_LIMBS * (1 << RV32_CELL_BITS)) as u32, ]; +fn tester_with_access_adapters_disabled() -> VmChipTestBuilder { + let mut mem_config = MemoryConfig::default(); + mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; + mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; + mem_config.access_adapters_enabled = false; + VmChipTestBuilder::persistent(mem_config) +} + fn create_alu_harness_fields( memory_bridge: MemoryBridge, execution_bridge: ExecutionBridge, @@ -91,7 +96,7 @@ fn create_alu_harness_fields( Rv32BaseAlu256Chip, ) { let air = Rv32BaseAlu256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -100,12 +105,12 @@ fn create_alu_harness_fields( BaseAluCoreAir::new(bitwise_chip.bus(), Rv32BaseAlu256Opcode::CLASS_OFFSET), ); let executor = Rv32BaseAlu256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32BaseAlu256Opcode::CLASS_OFFSET, ); let chip = Rv32BaseAlu256Chip::new( BaseAluFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip, Rv32BaseAlu256Opcode::CLASS_OFFSET, ), @@ -126,7 +131,7 @@ fn create_lt_harness_fields( Rv32LessThan256Chip, ) { let air = Rv32LessThan256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -135,12 +140,12 @@ fn create_lt_harness_fields( LessThanCoreAir::new(bitwise_chip.bus(), Rv32LessThan256Opcode::CLASS_OFFSET), ); let executor = Rv32LessThan256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32LessThan256Opcode::CLASS_OFFSET, ); let chip = Rv32LessThan256Chip::new( LessThanFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip.clone(), Rv32LessThan256Opcode::CLASS_OFFSET, ), @@ -162,7 +167,7 @@ fn create_mul_harness_fields( Rv32Multiplication256Chip, ) { let air = Rv32Multiplication256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -171,12 +176,12 @@ fn create_mul_harness_fields( MultiplicationCoreAir::new(*range_tuple_chip.bus(), Rv32Mul256Opcode::CLASS_OFFSET), ); let executor = Rv32Multiplication256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32Mul256Opcode::CLASS_OFFSET, ); let chip = Rv32Multiplication256Chip::::new( MultiplicationFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip), range_tuple_chip, Rv32Mul256Opcode::CLASS_OFFSET, ), @@ -194,7 +199,7 @@ fn create_shift_harness_fields( address_bits: usize, ) -> (Rv32Shift256Air, Rv32Shift256Executor, Rv32Shift256Chip) { let air = Rv32Shift256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -207,12 +212,12 @@ fn create_shift_harness_fields( ), ); let executor = Rv32Shift256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32Shift256Opcode::CLASS_OFFSET, ); let chip = Rv32Shift256Chip::new( ShiftFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip.clone(), range_checker_chip.clone(), Rv32Shift256Opcode::CLASS_OFFSET, @@ -234,7 +239,7 @@ fn create_beq_harness_fields( Rv32BranchEqual256Chip, ) { let air = Rv32BranchEqual256Air::new( - Rv32HeapBranchAdapterAir::new( + BigintBranchAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -243,13 +248,13 @@ fn create_beq_harness_fields( BranchEqualCoreAir::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP), ); let executor = Rv32BranchEqual256Executor::new( - Rv32HeapBranchAdapterExecutor::new(address_bits), + BigintBranchAdapterExecutor::new(address_bits), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ); let chip = Rv32BranchEqual256Chip::new( BranchEqualFiller::new( - Rv32HeapBranchAdapterFiller::new(address_bits, bitwise_chip), + BigintBranchAdapterFiller::new(address_bits, bitwise_chip), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ), @@ -270,7 +275,7 @@ fn create_blt_harness_fields( Rv32BranchLessThan256Chip, ) { let air = Rv32BranchLessThan256Air::new( - Rv32HeapBranchAdapterAir::new( + BigintBranchAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -282,12 +287,12 @@ fn create_blt_harness_fields( ), ); let executor = Rv32BranchLessThan256Executor::new( - Rv32HeapBranchAdapterExecutor::new(address_bits), + BigintBranchAdapterExecutor::new(address_bits), Rv32BranchLessThan256Opcode::CLASS_OFFSET, ); let chip = Rv32BranchLessThan256Chip::new( BranchLessThanFiller::new( - Rv32HeapBranchAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintBranchAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip, Rv32BranchLessThan256Opcode::CLASS_OFFSET, ), @@ -374,7 +379,7 @@ fn set_and_execute_rand>( #[test_case(BaseAluOpcode::AND, 24)] fn run_alu_256_rand_test(opcode: BaseAluOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32BaseAlu256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -413,7 +418,7 @@ fn run_alu_256_rand_test(opcode: BaseAluOpcode, num_ops: usize) { #[test_case(LessThanOpcode::SLTU, 24)] fn run_lt_256_rand_test(opcode: LessThanOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32LessThan256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -451,7 +456,7 @@ fn run_lt_256_rand_test(opcode: LessThanOpcode, num_ops: usize) { #[test_case(MulOpcode::MUL, 24)] fn run_mul_256_rand_test(opcode: MulOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32Mul256Opcode::CLASS_OFFSET; let range_tuple_bus = RangeTupleCheckerBus::new(RANGE_TUPLE_CHECKER_BUS, RANGE_TUPLE_SIZES); @@ -496,7 +501,7 @@ fn run_mul_256_rand_test(opcode: MulOpcode, num_ops: usize) { #[test_case(ShiftOpcode::SRA, 24)] fn run_shift_256_rand_test(opcode: ShiftOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32Shift256Opcode::CLASS_OFFSET; let range_checker_chip = tester.range_checker(); @@ -538,7 +543,7 @@ fn run_shift_256_rand_test(opcode: ShiftOpcode, num_ops: usize) { #[test_case(BranchEqualOpcode::BNE, 24)] fn run_beq_256_rand_test(opcode: BranchEqualOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32BranchEqual256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -579,7 +584,7 @@ fn run_beq_256_rand_test(opcode: BranchEqualOpcode, num_ops: usize) { #[test_case(BranchLessThanOpcode::BGEU, 24)] fn run_blt_256_rand_test(opcode: BranchLessThanOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32BranchLessThan256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -620,7 +625,7 @@ fn run_blt_256_rand_test(opcode: BranchLessThanOpcode, num_ops: usize) { #[test_case(BaseAluOpcode::XOR, 24)] #[test_case(BaseAluOpcode::OR, 24)] #[test_case(BaseAluOpcode::AND, 24)] -fn run_alu_256_rand_test_cuda(opcode: BaseAluOpcode, num_ops: usize) { +fn run_alu_256_ran_test_cuda(opcode: BaseAluOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); let mut tester = GpuChipTestBuilder::default().with_bitwise_op_lookup(default_bitwise_lookup_bus()); @@ -667,7 +672,7 @@ fn run_alu_256_rand_test_cuda(opcode: BaseAluOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -728,7 +733,7 @@ fn run_lt_256_rand_test_cuda(opcode: LessThanOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -794,7 +799,7 @@ fn run_mul_256_rand_test_cuda(opcode: MulOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -856,7 +861,7 @@ fn run_shift_256_rand_test_cuda(opcode: ShiftOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -917,7 +922,7 @@ fn run_beq_256_rand_test_cuda(opcode: BranchEqualOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -980,7 +985,7 @@ fn run_blt_256_rand_test_cuda(opcode: BranchLessThanOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester diff --git a/extensions/rv32-adapters/src/heap_branch.rs b/extensions/rv32-adapters/src/heap_branch.rs index e87b4fd973..e616a8e07b 100644 --- a/extensions/rv32-adapters/src/heap_branch.rs +++ b/extensions/rv32-adapters/src/heap_branch.rs @@ -1,13 +1,14 @@ use std::{ array::from_fn, borrow::{Borrow, BorrowMut}, + marker::PhantomData, }; use itertools::izip; use openvm_circuit::{ arch::{ get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller, - BasicAdapterInterface, ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir, + ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir, VmAdapterInterface, }, system::memory::{ offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord}, @@ -32,43 +33,77 @@ use openvm_stark_backend::{ p3_field::{Field, FieldAlgebra, PrimeField32}, }; -/// This adapter reads from NUM_READS <= 2 pointers. -/// * The data is read from the heap (address space 2), and the pointers are read from registers -/// (address space 1). -/// * Reads are from the addresses in `rs[0]` (and `rs[1]` if `R = 2`). +pub type Rv32HeapBranchAdapterCols = + Rv32HeapBranchAdapterColsGeneric; +pub type Rv32HeapBranchAdapterAir = + Rv32HeapBranchAdapterAirGeneric; +pub type Rv32HeapBranchAdapterRecord = + Rv32HeapBranchAdapterRecordGeneric; +pub type Rv32HeapBranchAdapterExecutor = + Rv32HeapBranchAdapterExecutorGeneric; +pub type Rv32HeapBranchAdapterFiller = + Rv32HeapBranchAdapterFillerGeneric; + +pub struct VecHeapBranchAdapterInterface< + T, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +>(PhantomData); + +impl + VmAdapterInterface for VecHeapBranchAdapterInterface +{ + type Reads = [[[T; READ_SIZE]; BLOCKS_PER_READ]; NUM_READS]; + type Writes = [[T; 0]; 0]; + type ProcessedInstruction = ImmInstruction; +} + +/// Adapter cols for branching on heap values read in NUM_READS pointers. #[repr(C)] #[derive(AlignedBorrow)] -pub struct Rv32HeapBranchAdapterCols { +pub struct Rv32HeapBranchAdapterColsGeneric< + T, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub from_state: ExecutionState, pub rs_ptr: [T; NUM_READS], pub rs_val: [[T; RV32_REGISTER_NUM_LIMBS]; NUM_READS], pub rs_read_aux: [MemoryReadAuxCols; NUM_READS], - pub heap_read_aux: [MemoryReadAuxCols; NUM_READS], + pub heap_read_aux: [[MemoryReadAuxCols; BLOCKS_PER_READ]; NUM_READS], } #[derive(Clone, Copy, Debug, derive_new::new)] -pub struct Rv32HeapBranchAdapterAir { +pub struct Rv32HeapBranchAdapterAirGeneric< + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub(super) execution_bridge: ExecutionBridge, pub(super) memory_bridge: MemoryBridge, pub bus: BitwiseOperationLookupBus, address_bits: usize, } -impl BaseAir - for Rv32HeapBranchAdapterAir +impl + BaseAir for Rv32HeapBranchAdapterAirGeneric { fn width(&self) -> usize { - Rv32HeapBranchAdapterCols::::width() + Rv32HeapBranchAdapterColsGeneric::::width() } } -impl VmAdapterAir - for Rv32HeapBranchAdapterAir +impl VmAdapterAir + for Rv32HeapBranchAdapterAirGeneric +where + AB: InteractionBuilder, { type Interface = - BasicAdapterInterface, NUM_READS, 0, READ_SIZE, 0>; + VecHeapBranchAdapterInterface; fn eval( &self, @@ -76,7 +111,8 @@ impl VmA local: &[AB::Var], ctx: AdapterAirContext, ) { - let cols: &Rv32HeapBranchAdapterCols<_, NUM_READS, READ_SIZE> = local.borrow(); + let cols: &Rv32HeapBranchAdapterColsGeneric<_, NUM_READS, BLOCKS_PER_READ, READ_SIZE> = + local.borrow(); let timestamp = cols.from_state.timestamp; let mut timestamp_delta: usize = 0; let mut timestamp_pp = || { @@ -127,10 +163,18 @@ impl VmA acc * AB::F::from_canonical_u32(1 << RV32_CELL_BITS) + (*limb) }) }); - for (ptr, data, aux) in izip!(heap_ptr, ctx.reads, &cols.heap_read_aux) { - self.memory_bridge - .read(MemoryAddress::new(e, ptr), data, timestamp_pp(), aux) - .eval(builder, ctx.instruction.is_valid.clone()); + for (ptr, data_blocks, aux_blocks) in izip!(heap_ptr.iter(), ctx.reads.iter(), cols.heap_read_aux.iter()) { + for (block_idx, (data, aux)) in data_blocks.iter().zip(aux_blocks).enumerate() { + let offset = AB::Expr::from_canonical_usize(block_idx * READ_SIZE); + self.memory_bridge + .read( + MemoryAddress::new(e, ptr.clone() + offset), + data.clone(), + timestamp_pp(), + aux, + ) + .eval(builder, ctx.instruction.is_valid.clone()); + } } self.execution_bridge @@ -157,14 +201,15 @@ impl VmA } fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var { - let cols: &Rv32HeapBranchAdapterCols<_, NUM_READS, READ_SIZE> = local.borrow(); + let cols: &Rv32HeapBranchAdapterColsGeneric<_, NUM_READS, BLOCKS_PER_READ, READ_SIZE> = + local.borrow(); cols.from_state.pc } } #[repr(C)] #[derive(AlignedBytesBorrow, Debug)] -pub struct Rv32HeapBranchAdapterRecord { +pub struct Rv32HeapBranchAdapterRecordGeneric { pub from_pc: u32, pub from_timestamp: u32, @@ -172,22 +217,30 @@ pub struct Rv32HeapBranchAdapterRecord { pub rs_vals: [u32; NUM_READS], pub rs_read_aux: [MemoryReadAuxRecord; NUM_READS], - pub heap_read_aux: [MemoryReadAuxRecord; NUM_READS], + pub heap_read_aux: [[MemoryReadAuxRecord; BLOCKS_PER_READ]; NUM_READS], } #[derive(Clone, Copy)] -pub struct Rv32HeapBranchAdapterExecutor { +pub struct Rv32HeapBranchAdapterExecutorGeneric< + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub pointer_max_bits: usize, } -#[derive(derive_new::new)] -pub struct Rv32HeapBranchAdapterFiller { +#[derive(Clone, derive_new::new)] +pub struct Rv32HeapBranchAdapterFillerGeneric< + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub pointer_max_bits: usize, pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip, } -impl - Rv32HeapBranchAdapterExecutor +impl + Rv32HeapBranchAdapterExecutorGeneric { pub fn new(pointer_max_bits: usize) -> Self { assert!(NUM_READS <= 2); @@ -199,13 +252,19 @@ impl } } -impl AdapterTraceExecutor - for Rv32HeapBranchAdapterExecutor +impl< + F: PrimeField32, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, + > AdapterTraceExecutor + for Rv32HeapBranchAdapterExecutorGeneric { - const WIDTH: usize = Rv32HeapBranchAdapterCols::::width(); - type ReadData = [[u8; READ_SIZE]; NUM_READS]; + const WIDTH: usize = + Rv32HeapBranchAdapterColsGeneric::::width(); + type ReadData = [[[u8; READ_SIZE]; BLOCKS_PER_READ]; NUM_READS]; type WriteData = (); - type RecordMut<'a> = &'a mut Rv32HeapBranchAdapterRecord; + type RecordMut<'a> = &'a mut Rv32HeapBranchAdapterRecordGeneric; fn start(pc: u32, memory: &TracingMemory, adapter_record: &mut Self::RecordMut<'_>) { adapter_record.from_pc = pc; @@ -234,17 +293,20 @@ impl AdapterTra )) }); - // Read memory values + // Read memory values in 4-byte chunks from_fn(|i| { debug_assert!( - record.rs_vals[i] as usize + READ_SIZE - 1 < (1 << self.pointer_max_bits) + record.rs_vals[i] as usize + READ_SIZE * BLOCKS_PER_READ - 1 + < (1 << self.pointer_max_bits) ); - tracing_read( - memory, - RV32_MEMORY_AS, - record.rs_vals[i], - &mut record.heap_read_aux[i].prev_timestamp, - ) + from_fn(|j| { + tracing_read( + memory, + RV32_MEMORY_AS, + record.rs_vals[i] + (j * READ_SIZE) as u32, + &mut record.heap_read_aux[i][j].prev_timestamp, + ) + }) }) } @@ -259,18 +321,24 @@ impl AdapterTra } } -impl AdapterTraceFiller - for Rv32HeapBranchAdapterFiller +impl< + F: PrimeField32, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, + > AdapterTraceFiller + for Rv32HeapBranchAdapterFillerGeneric { - const WIDTH: usize = Rv32HeapBranchAdapterCols::::width(); + const WIDTH: usize = + Rv32HeapBranchAdapterColsGeneric::::width(); fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory, mut adapter_row: &mut [F]) { // SAFETY: // - caller ensures `adapter_row` contains a valid record representation that was previously // written by the executor - let record: &Rv32HeapBranchAdapterRecord = + let record: &Rv32HeapBranchAdapterRecordGeneric = unsafe { get_record_from_slice(&mut adapter_row, ()) }; - let cols: &mut Rv32HeapBranchAdapterCols = + let cols: &mut Rv32HeapBranchAdapterColsGeneric = adapter_row.borrow_mut(); // Range checks: @@ -288,12 +356,16 @@ impl AdapterTra ); // **NOTE**: Must iterate everything in reverse order to avoid overwriting the records - for i in (0..NUM_READS).rev() { - mem_helper.fill( - record.heap_read_aux[i].prev_timestamp, - record.from_timestamp + (i + NUM_READS) as u32, - cols.heap_read_aux[i].as_mut(), - ); + let heap_ts_start = record.from_timestamp + NUM_READS as u32; + for read_idx in (0..NUM_READS).rev() { + for block_idx in (0..BLOCKS_PER_READ).rev() { + let ts_offset = read_idx * BLOCKS_PER_READ + block_idx; + mem_helper.fill( + record.heap_read_aux[read_idx][block_idx].prev_timestamp, + heap_ts_start + ts_offset as u32, + cols.heap_read_aux[read_idx][block_idx].as_mut(), + ); + } } for i in (0..NUM_READS).rev() { diff --git a/extensions/rv32-adapters/src/vec_heap.rs b/extensions/rv32-adapters/src/vec_heap.rs index a9b612f39b..2dc45c9db1 100644 --- a/extensions/rv32-adapters/src/vec_heap.rs +++ b/extensions/rv32-adapters/src/vec_heap.rs @@ -301,7 +301,7 @@ pub struct Rv32VecHeapAdapterExecutor< pointer_max_bits: usize, } -#[derive(derive_new::new)] +#[derive(Clone, derive_new::new)] pub struct Rv32VecHeapAdapterFiller< const NUM_READS: usize, const BLOCKS_PER_READ: usize,