diff --git a/.gitignore b/.gitignore index c6e6aa2049..87e918c19e 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,11 @@ profile.json.gz # test fixtures benchmarks/fixtures + +#TODO: Remove this +crates/toolchain/tests/rv32im-test-vectors/tests/* +*.o +*.a +*.s +*.txt +riscv/* \ No newline at end of file diff --git a/crates/circuits/primitives/cuda/include/primitives/constants.h b/crates/circuits/primitives/cuda/include/primitives/constants.h index dec26b5f41..16396d41ec 100644 --- a/crates/circuits/primitives/cuda/include/primitives/constants.h +++ b/crates/circuits/primitives/cuda/include/primitives/constants.h @@ -3,91 +3,97 @@ #include namespace riscv { -static const size_t RV32_REGISTER_NUM_LIMBS = 4; -static const size_t RV32_CELL_BITS = 8; -static const size_t RV_J_TYPE_IMM_BITS = 21; +inline constexpr size_t RV32_REGISTER_NUM_LIMBS = 4; +inline constexpr size_t RV32_CELL_BITS = 8; +inline constexpr size_t RV_J_TYPE_IMM_BITS = 21; -static const size_t RV32_IMM_AS = 0; +inline constexpr size_t RV32_IMM_AS = 0; } // namespace riscv namespace program { -static const size_t PC_BITS = 30; -static const size_t DEFAULT_PC_STEP = 4; +inline constexpr size_t PC_BITS = 30; +inline constexpr size_t DEFAULT_PC_STEP = 4; } // namespace program namespace native { -static const size_t AS_IMMEDIATE = 0; -static const size_t AS_NATIVE = 4; -static const size_t EXT_DEG = 4; -static const size_t BETA = 11; +inline constexpr size_t AS_IMMEDIATE = 0; +inline constexpr size_t AS_NATIVE = 4; +inline constexpr size_t EXT_DEG = 4; +inline constexpr size_t BETA = 11; } // namespace native namespace poseidon2 { -static const size_t CHUNK = 8; +inline constexpr size_t CHUNK = 8; } // namespace poseidon2 namespace p3_keccak_air { -static const size_t NUM_ROUNDS = 24; -static const size_t BITS_PER_LIMB = 16; -static const size_t U64_LIMBS = 64 / BITS_PER_LIMB; -static const size_t RATE_BITS = 1088; -static const size_t RATE_LIMBS = RATE_BITS / BITS_PER_LIMB; +inline constexpr size_t NUM_ROUNDS = 24; +inline constexpr size_t BITS_PER_LIMB = 16; +inline constexpr size_t U64_LIMBS = 64 / BITS_PER_LIMB; +inline constexpr size_t RATE_BITS = 1088; +inline constexpr size_t RATE_LIMBS = RATE_BITS / BITS_PER_LIMB; } // namespace p3_keccak_air namespace keccak256 { /// Total number of sponge bytes: number of rate bytes + number of capacity bytes. -static const size_t KECCAK_WIDTH_BYTES = 200; +inline constexpr size_t KECCAK_WIDTH_BYTES = 200; /// Total number of 16-bit limbs in the sponge. -static const size_t KECCAK_WIDTH_U16S = KECCAK_WIDTH_BYTES / 2; +inline constexpr size_t KECCAK_WIDTH_U16S = KECCAK_WIDTH_BYTES / 2; /// Number of rate bytes. -static const size_t KECCAK_RATE_BYTES = 136; +inline constexpr size_t KECCAK_RATE_BYTES = 136; /// Number of 16-bit rate limbs. -static const size_t KECCAK_RATE_U16S = KECCAK_RATE_BYTES / 2; +inline constexpr size_t KECCAK_RATE_U16S = KECCAK_RATE_BYTES / 2; /// Number of absorb rounds, equal to rate in u64s. -static const size_t NUM_ABSORB_ROUNDS = KECCAK_RATE_BYTES / 8; +inline constexpr size_t NUM_ABSORB_ROUNDS = KECCAK_RATE_BYTES / 8; /// Number of capacity bytes. -static const size_t KECCAK_CAPACITY_BYTES = 64; +inline constexpr size_t KECCAK_CAPACITY_BYTES = 64; /// Number of 16-bit capacity limbs. -static const size_t KECCAK_CAPACITY_U16S = KECCAK_CAPACITY_BYTES / 2; +inline constexpr size_t KECCAK_CAPACITY_U16S = KECCAK_CAPACITY_BYTES / 2; /// Number of output digest bytes used during the squeezing phase. -static const size_t KECCAK_DIGEST_BYTES = 32; +inline constexpr size_t KECCAK_DIGEST_BYTES = 32; /// Number of 64-bit digest limbs. -static const size_t KECCAK_DIGEST_U64S = KECCAK_DIGEST_BYTES / 8; +inline constexpr size_t KECCAK_DIGEST_U64S = KECCAK_DIGEST_BYTES / 8; // ==== Constants for register/memory adapter ==== /// Register reads to get dst, src, len -static const size_t KECCAK_REGISTER_READS = 3; +inline constexpr size_t KECCAK_REGISTER_READS = 3; /// Number of cells to read/write in a single memory access -static const size_t KECCAK_WORD_SIZE = 4; +inline constexpr size_t KECCAK_WORD_SIZE = 4; /// Memory reads for absorb per row -static const size_t KECCAK_ABSORB_READS = KECCAK_RATE_BYTES / KECCAK_WORD_SIZE; +inline constexpr size_t KECCAK_ABSORB_READS = KECCAK_RATE_BYTES / KECCAK_WORD_SIZE; /// Memory writes for digest per row -static const size_t KECCAK_DIGEST_WRITES = KECCAK_DIGEST_BYTES / KECCAK_WORD_SIZE; +inline constexpr size_t KECCAK_DIGEST_WRITES = KECCAK_DIGEST_BYTES / KECCAK_WORD_SIZE; /// keccakf parameters -static const size_t KECCAK_ROUND = 24; -static const size_t KECCAK_STATE_SIZE = 25; -static const size_t KECCAK_Q_SIZE = 192; +inline constexpr size_t KECCAK_ROUND = 24; +inline constexpr size_t KECCAK_STATE_SIZE = 25; +inline constexpr size_t KECCAK_Q_SIZE = 192; /// From memory config -static const size_t KECCAK_POINTER_MAX_BITS = 29; +inline constexpr size_t KECCAK_POINTER_MAX_BITS = 29; } // namespace keccak256 namespace mod_builder { -static const size_t MAX_LIMBS = 97; +inline constexpr size_t MAX_LIMBS = 97; } // namespace mod_builder namespace sha256 { -static const size_t SHA256_BLOCK_BITS = 512; -static const size_t SHA256_BLOCK_U8S = 64; -static const size_t SHA256_BLOCK_WORDS = 16; -static const size_t SHA256_WORD_U8S = 4; -static const size_t SHA256_WORD_BITS = 32; -static const size_t SHA256_WORD_U16S = 2; -static const size_t SHA256_HASH_WORDS = 8; -static const size_t SHA256_NUM_READ_ROWS = 4; -static const size_t SHA256_ROWS_PER_BLOCK = 17; -static const size_t SHA256_ROUNDS_PER_ROW = 4; -static const size_t SHA256_ROW_VAR_CNT = 5; -static const size_t SHA256_REGISTER_READS = 3; -static const size_t SHA256_READ_SIZE = 16; -static const size_t SHA256_WRITE_SIZE = 32; -} // namespace sha256 \ No newline at end of file +inline constexpr size_t SHA256_BLOCK_BITS = 512; +inline constexpr size_t SHA256_BLOCK_U8S = 64; +inline constexpr size_t SHA256_BLOCK_WORDS = 16; +inline constexpr size_t SHA256_WORD_U8S = 4; +inline constexpr size_t SHA256_WORD_BITS = 32; +inline constexpr size_t SHA256_WORD_U16S = 2; +inline constexpr size_t SHA256_HASH_WORDS = 8; +inline constexpr size_t SHA256_NUM_READ_ROWS = 4; +inline constexpr size_t SHA256_ROWS_PER_BLOCK = 17; +inline constexpr size_t SHA256_ROUNDS_PER_ROW = 4; +inline constexpr size_t SHA256_ROW_VAR_CNT = 5; +inline constexpr size_t SHA256_REGISTER_READS = 3; +inline constexpr size_t SHA256_READ_SIZE = 16; +inline constexpr size_t SHA256_WRITE_SIZE = 32; +} // namespace sha256 + +namespace hintstore { +// Must match MAX_HINT_BUFFER_WORDS_BITS in openvm_rv32im_guest::lib.rs +inline constexpr size_t MAX_HINT_BUFFER_WORDS_BITS = 18; +inline constexpr size_t MAX_HINT_BUFFER_WORDS = (1 << MAX_HINT_BUFFER_WORDS_BITS) - 1; +} // namespace hintstore diff --git a/crates/toolchain/openvm/src/io/mod.rs b/crates/toolchain/openvm/src/io/mod.rs index eb00a9d3cd..05f073073e 100644 --- a/crates/toolchain/openvm/src/io/mod.rs +++ b/crates/toolchain/openvm/src/io/mod.rs @@ -6,7 +6,7 @@ use core::alloc::Layout; use core::fmt::Write; #[cfg(target_os = "zkvm")] -use openvm_rv32im_guest::{hint_buffer_u32, hint_input, hint_store_u32}; +use openvm_rv32im_guest::{hint_buffer_chunked, hint_input, hint_store_u32}; use serde::de::DeserializeOwned; #[cfg(not(target_os = "zkvm"))] @@ -83,7 +83,7 @@ pub(crate) fn read_vec_by_len(len: usize) -> Vec { // The heap-embedded-alloc uses linked list allocator, which has a minimum alignment of // `sizeof(usize) * 2 = 8` on 32-bit architectures: https://github.com/rust-osdev/linked-list-allocator/blob/b5caf3271259ddda60927752fa26527e0ccd2d56/src/hole.rs#L429 let mut bytes = Vec::with_capacity(capacity); - hint_buffer_u32!(bytes.as_mut_ptr(), num_words); + hint_buffer_chunked(bytes.as_mut_ptr(), num_words as usize); // SAFETY: We populate a `Vec` by hintstore-ing `num_words` 4 byte words. We set the // length to `len` and don't care about the extra `capacity - len` bytes stored. unsafe { diff --git a/crates/toolchain/openvm/src/io/read.rs b/crates/toolchain/openvm/src/io/read.rs index 39b2166e39..f2eff6cfa5 100644 --- a/crates/toolchain/openvm/src/io/read.rs +++ b/crates/toolchain/openvm/src/io/read.rs @@ -2,7 +2,7 @@ use core::mem::MaybeUninit; use openvm_platform::WORD_SIZE; #[cfg(target_os = "zkvm")] -use openvm_rv32im_guest::hint_buffer_u32; +use openvm_rv32im_guest::hint_buffer_chunked; use super::hint_store_word; use crate::serde::WordRead; @@ -31,7 +31,7 @@ impl WordRead for Reader { let num_words = words.len(); if let Some(new_remaining) = self.bytes_remaining.checked_sub(num_words * WORD_SIZE) { #[cfg(target_os = "zkvm")] - hint_buffer_u32!(words.as_mut_ptr(), words.len()); + hint_buffer_chunked(words.as_mut_ptr() as *mut u8, words.len()); #[cfg(not(target_os = "zkvm"))] { for w in words.iter_mut() { @@ -51,7 +51,7 @@ impl WordRead for Reader { } let mut num_padded_bytes = bytes.len(); #[cfg(target_os = "zkvm")] - hint_buffer_u32!(bytes as *mut [u8] as *mut u32, num_padded_bytes / WORD_SIZE); + hint_buffer_chunked(bytes.as_mut_ptr(), num_padded_bytes / WORD_SIZE); #[cfg(not(target_os = "zkvm"))] { let mut words = bytes.chunks_exact_mut(WORD_SIZE); diff --git a/crates/toolchain/openvm/src/pal_abi.rs b/crates/toolchain/openvm/src/pal_abi.rs index 0ab3d3f386..3797998bb8 100644 --- a/crates/toolchain/openvm/src/pal_abi.rs +++ b/crates/toolchain/openvm/src/pal_abi.rs @@ -5,7 +5,7 @@ /// system operations in the same way: there is no operating system and even the standard /// library should be directly handled with intrinsics. use openvm_platform::{fileno::*, memory::sys_alloc_aligned, rust_rt::terminate, WORD_SIZE}; -use openvm_rv32im_guest::{hint_buffer_u32, hint_random, raw_print_str_from_bytes}; +use openvm_rv32im_guest::{hint_buffer_chunked, hint_random, raw_print_str_from_bytes}; const DIGEST_WORDS: usize = 8; @@ -73,7 +73,7 @@ pub unsafe extern "C" fn sys_sha_buffer( #[no_mangle] pub unsafe extern "C" fn sys_rand(recv_buf: *mut u32, words: usize) { hint_random(words); - hint_buffer_u32!(recv_buf, words); + hint_buffer_chunked(recv_buf as *mut u8, words); } /// # Safety diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs index 3ffbfb74e0..f84a73a79b 100644 --- a/crates/vm/src/arch/config.rs +++ b/crates/vm/src/arch/config.rs @@ -26,9 +26,7 @@ use crate::{ Arena, ChipInventoryError, ExecutorInventory, ExecutorInventoryError, }, system::{ - memory::{ - merkle::public_values::PUBLIC_VALUES_AS, num_memory_airs, CHUNK, POINTER_MAX_BITS, - }, + memory::{merkle::public_values::PUBLIC_VALUES_AS, num_memory_airs, POINTER_MAX_BITS}, SystemChipComplex, }, }; @@ -123,6 +121,11 @@ pub const OPENVM_DEFAULT_INIT_FILE_NAME: &str = "openvm_init.rs"; const DEFAULT_U8_BLOCK_SIZE: usize = 4; const DEFAULT_NATIVE_BLOCK_SIZE: usize = 1; +/// The constant block size used for memory accesses when access adapters are disabled. +/// All memory accesses for address spaces 1-3 must use this block size. +/// This is also the block size used by the Boundary AIR for memory bus interactions. +pub const CONST_BLOCK_SIZE: usize = 4; + /// Trait for generating a init.rs file that contains a call to moduli_init!, /// complex_init!, sw_init! with the supported moduli and curves. /// Should be implemented by all VM config structs. @@ -183,6 +186,10 @@ pub struct MemoryConfig { pub decomp: usize, /// Maximum N AccessAdapter AIR to support. pub max_access_adapter_n: usize, + /// Whether access adapters are enabled. When disabled, all memory accesses must be of the + /// standard block size (ie, 4 for address spaces 1-3). + #[new(value = "true")] + pub access_adapters_enabled: bool, } impl Default for MemoryConfig { @@ -194,7 +201,15 @@ impl Default for MemoryConfig { addr_spaces[RV32_MEMORY_AS as usize].num_cells = MAX_CELLS; addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = DEFAULT_MAX_NUM_PUBLIC_VALUES; addr_spaces[NATIVE_AS as usize].num_cells = MAX_CELLS; - Self::new(3, addr_spaces, POINTER_MAX_BITS, 29, 17, 32) + Self { + addr_space_height: 3, + addr_spaces, + pointer_max_bits: POINTER_MAX_BITS, + timestamp_max_bits: 29, + decomp: 17, + max_access_adapter_n: 32, + access_adapters_enabled: true, + } } } @@ -245,6 +260,36 @@ impl MemoryConfig { .map(|addr_sp| log2_strict_usize(addr_sp.min_block_size) as u8) .collect() } + + /// Returns true if the Native address space (AS 4) is used + /// Native AS is considered "used" if it has any allocated cells + pub fn is_native_as_used(&self) -> bool { + self.addr_spaces + .get(NATIVE_AS as usize) + .is_some_and(|config| config.num_cells > 0) + } + + /// Disables access adapters. When disabled, all memory accesses for address spaces 1-3 + /// must use the constant block size (4). Access adapters will only be used for + /// address space 4 (Native) if it is enabled. + pub fn without_access_adapters(mut self) -> Self { + self.access_adapters_enabled = false; + self + } + + /// Enables access adapters. This is the default behavior + pub fn with_access_adapters(mut self) -> Self { + self.access_adapters_enabled = true; + self + } + + /// Automatically sets `access_adapters_enabled` based on whether Native AS is used. + /// If Native AS is not used, access adapters are disabled since all other address spaces + /// use a fixed block size of 4 + pub fn with_auto_access_adapters(mut self) -> Self { + self.access_adapters_enabled = self.is_native_as_used(); + self + } } /// System-level configuration for the virtual machine. Contains all configuration parameters that @@ -375,15 +420,42 @@ impl SystemConfig { + num_memory_airs( self.continuation_enabled, self.memory_config.max_access_adapter_n, + self.memory_config.access_adapters_enabled, ) } pub fn initial_block_size(&self) -> usize { match self.continuation_enabled { - true => CHUNK, + true => CONST_BLOCK_SIZE, false => 1, } } + + /// Disables access adapters. When disabled, all memory accesses for address spaces 1-3 + /// must use the constant block size (4) + pub fn without_access_adapters(mut self) -> Self { + self.memory_config.access_adapters_enabled = false; + self + } + + /// Enables access adapters. This is the default behavior. + pub fn with_access_adapters(mut self) -> Self { + self.memory_config.access_adapters_enabled = true; + self + } + + /// Automatically sets `access_adapters_enabled` based on whether Native AS is used. + /// If Native AS is not used, access adapters are disabled since all other address spaces + /// use a fixed block size of 4. + pub fn with_auto_access_adapters(mut self) -> Self { + self.memory_config = self.memory_config.with_auto_access_adapters(); + self + } + + /// Returns true if access adapters are enabled. + pub fn access_adapters_enabled(&self) -> bool { + self.memory_config.access_adapters_enabled + } } impl Default for SystemConfig { diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 234dfbd5b9..0b7a13bfe9 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -38,6 +38,12 @@ pub enum ExecutionError { DisabledOperation { pc: u32, opcode: VmOpcode }, #[error("at pc = {pc}")] HintOutOfBounds { pc: u32 }, + #[error("at pc {pc}, hint buffer num_words {num_words} exceeds MAX_HINT_BUFFER_WORDS {max_hint_buffer_words}")] + HintBufferTooLarge { + pc: u32, + num_words: u32, + max_hint_buffer_words: u32, + }, #[error("at pc {pc}, tried to publish into index {public_value_index} when num_public_values = {num_public_values}")] PublicValueIndexOutOfBounds { pc: u32, diff --git a/crates/vm/src/arch/execution_mode/metered/ctx.rs b/crates/vm/src/arch/execution_mode/metered/ctx.rs index 8428438ca7..0b67a3b92d 100644 --- a/crates/vm/src/arch/execution_mode/metered/ctx.rs +++ b/crates/vm/src/arch/execution_mode/metered/ctx.rs @@ -64,11 +64,13 @@ impl MeteredCtx { air_names[merkle_tree_index] ); } - debug_assert!( - air_names[memory_ctx.adapter_offset].contains("AccessAdapterAir<2>"), - "air_name={}", - air_names[memory_ctx.adapter_offset] - ); + if memory_ctx.access_adapters_enabled { + debug_assert!( + air_names[memory_ctx.adapter_offset].contains("AccessAdapterAir<2>"), + "air_name={}", + air_names[memory_ctx.adapter_offset] + ); + } let segmentation_ctx = SegmentationCtx::new(air_names, widths, interactions, config.segmentation_limits); diff --git a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs index 3429177d11..2397b1b12e 100644 --- a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs +++ b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs @@ -105,6 +105,7 @@ pub struct MemoryCtx { pub boundary_idx: usize, pub merkle_tree_index: Option, pub adapter_offset: usize, + pub access_adapters_enabled: bool, continuations_enabled: bool, chunk: u32, chunk_bits: u32, @@ -128,6 +129,7 @@ impl MemoryCtx { boundary_idx: config.memory_boundary_air_id(), merkle_tree_index: config.memory_merkle_air_id(), adapter_offset: config.access_adapter_air_id_offset(), + access_adapters_enabled: config.memory_config.access_adapters_enabled, chunk, chunk_bits, memory_dimensions, @@ -210,6 +212,10 @@ impl MemoryCtx { size_bits: u32, num: u32, ) { + if !self.access_adapters_enabled { + return; + } + debug_assert!((address_space as usize) < self.min_block_size_bits.len()); // SAFETY: address_space passed is usually a hardcoded constant or derived from an diff --git a/crates/vm/src/arch/execution_mode/metered_cost.rs b/crates/vm/src/arch/execution_mode/metered_cost.rs index 925bd25af2..69bfd6fe69 100644 --- a/crates/vm/src/arch/execution_mode/metered_cost.rs +++ b/crates/vm/src/arch/execution_mode/metered_cost.rs @@ -18,6 +18,7 @@ pub const DEFAULT_MAX_COST: u64 = DEFAULT_MAX_SEGMENTS * DEFAULT_SEGMENT_MAX_CEL pub struct AccessAdapterCtx { min_block_size_bits: Vec, idx_offset: usize, + enabled: bool, } impl AccessAdapterCtx { @@ -25,6 +26,7 @@ impl AccessAdapterCtx { Self { min_block_size_bits: config.memory_config.min_block_size_bits(), idx_offset: config.access_adapter_air_id_offset(), + enabled: config.memory_config.access_adapters_enabled, } } @@ -36,6 +38,10 @@ impl AccessAdapterCtx { size_bits: u32, widths: &[usize], ) { + if !self.enabled { + return; + } + debug_assert!((address_space as usize) < self.min_block_size_bits.len()); // SAFETY: address_space passed is usually a hardcoded constant or derived from an diff --git a/crates/vm/src/arch/testing/cpu.rs b/crates/vm/src/arch/testing/cpu.rs index 105962bc11..e5579fd22c 100644 --- a/crates/vm/src/arch/testing/cpu.rs +++ b/crates/vm/src/arch/testing/cpu.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use itertools::zip_eq; +use openvm_circuit::arch::CONST_BLOCK_SIZE; use openvm_circuit_primitives::var_range::{ SharedVariableRangeCheckerChip, VariableRangeCheckerBus, VariableRangeCheckerChip, }; @@ -48,7 +49,7 @@ use crate::{ adapter::records::arena_size_bound, offline_checker::{MemoryBridge, MemoryBus}, online::TracingMemory, - MemoryAirInventory, MemoryController, SharedMemoryHelper, CHUNK, + MemoryAirInventory, MemoryController, SharedMemoryHelper, }, poseidon2::Poseidon2PeripheryChip, program::ProgramBus, @@ -347,7 +348,7 @@ impl VmChipTestBuilder { pub fn persistent(mem_config: MemoryConfig) -> Self { setup_tracing_with_log_level(Level::INFO); - let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, CHUNK); + let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, CONST_BLOCK_SIZE); let hasher_chip = Arc::new(Poseidon2PeripheryChip::new( vm_poseidon2_config(), POSEIDON2_DIRECT_BUS, @@ -403,7 +404,7 @@ impl Default for VmChipTestBuilder { // removed when tests are updated. mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; - Self::volatile(mem_config) + Self::persistent(mem_config) } } diff --git a/crates/vm/src/arch/testing/memory/mod.rs b/crates/vm/src/arch/testing/memory/mod.rs index c5bea140ae..6ffd50cccf 100644 --- a/crates/vm/src/arch/testing/memory/mod.rs +++ b/crates/vm/src/arch/testing/memory/mod.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, convert::TryInto}; use air::{MemoryDummyAir, MemoryDummyChip}; use openvm_stark_backend::p3_field::{Field, PrimeField32}; @@ -42,6 +42,32 @@ impl MemoryTester { pub fn read(&mut self, addr_space: usize, ptr: usize) -> [F; N] { let memory = &mut self.memory; + // For AS 1-3, force CONST_BLOCK_SIZE (4-byte) accesses to avoid adapters. + if addr_space <= 3 && N > 4 { + let mut out = [F::ZERO; N]; + for (chunk_idx, chunk) in out.chunks_exact_mut(4).enumerate() { + let chunk_ptr = ptr + chunk_idx * 4; + let t = memory.timestamp(); + let (t_prev, data) = + unsafe { memory.read::(addr_space as u32, chunk_ptr as u32) }; + let data_f = data.map(F::from_canonical_u8); + chunk.copy_from_slice(&data_f); + self.chip_for_block.get_mut(&4).unwrap().receive( + addr_space as u32, + chunk_ptr as u32, + &data_f, + t_prev, + ); + self.chip_for_block.get_mut(&4).unwrap().send( + addr_space as u32, + chunk_ptr as u32, + &data_f, + t, + ); + } + return out; + } + let t = memory.timestamp(); // TODO: this could be improved if we added a TracingMemory::get_f function let (t_prev, data) = if addr_space <= 3 { @@ -66,6 +92,38 @@ impl MemoryTester { pub fn write(&mut self, addr_space: usize, ptr: usize, data: [F; N]) { let memory = &mut self.memory; + // For AS 1-3, force CONST_BLOCK_SIZE (4-byte) accesses to avoid adapters. + if addr_space <= 3 && N > 4 { + for (chunk_idx, chunk) in data.chunks_exact(4).enumerate() { + let chunk_ptr = ptr + chunk_idx * 4; + let t = memory.timestamp(); + let chunk_u8: [u8; 4] = chunk + .iter() + .map(|x| x.as_canonical_u32() as u8) + .collect::>() + .try_into() + .unwrap(); + let (t_prev, data_prev) = unsafe { + memory.write::(addr_space as u32, chunk_ptr as u32, chunk_u8) + }; + let data_prev_f = data_prev.map(F::from_canonical_u8); + let chunk_f: [F; 4] = chunk.try_into().unwrap(); + self.chip_for_block.get_mut(&4).unwrap().receive( + addr_space as u32, + chunk_ptr as u32, + &data_prev_f, + t_prev, + ); + self.chip_for_block.get_mut(&4).unwrap().send( + addr_space as u32, + chunk_ptr as u32, + &chunk_f, + t, + ); + } + return; + } + let t = memory.timestamp(); // TODO: this could be improved if we added a TracingMemory::write_f function let (t_prev, data_prev) = if addr_space <= 3 { diff --git a/crates/vm/src/arch/vm.rs b/crates/vm/src/arch/vm.rs index 68555050fe..23db960d6d 100644 --- a/crates/vm/src/arch/vm.rs +++ b/crates/vm/src/arch/vm.rs @@ -622,7 +622,13 @@ where let system_config: &SystemConfig = self.config().as_ref(); let adapter_offset = system_config.access_adapter_air_id_offset(); // ATTENTION: this must agree with `num_memory_airs` - let num_adapters = log2_strict_usize(system_config.memory_config.max_access_adapter_n); + + let num_adapters = if system_config.memory_config.access_adapters_enabled { + log2_strict_usize(system_config.memory_config.max_access_adapter_n) + } else { + 0 + }; + assert_eq!(adapter_offset + num_adapters, system_config.num_airs()); let access_adapter_arena_size_bound = records::arena_size_bound( &trace_heights[adapter_offset..adapter_offset + num_adapters], diff --git a/crates/vm/src/system/memory/adapter/mod.rs b/crates/vm/src/system/memory/adapter/mod.rs index 8b0797dcf6..a9c89fc2ea 100644 --- a/crates/vm/src/system/memory/adapter/mod.rs +++ b/crates/vm/src/system/memory/adapter/mod.rs @@ -58,21 +58,26 @@ impl AccessAdapterInventory { memory_bus: MemoryBus, memory_config: MemoryConfig, ) -> Self { - let rc = range_checker; - let mb = memory_bus; - let tmb = memory_config.timestamp_max_bits; - let maan = memory_config.max_access_adapter_n; - assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); - let chips: Vec<_> = [ - Self::create_access_adapter_chip::<2>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<4>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<8>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<16>(rc.clone(), mb, tmb, maan), - Self::create_access_adapter_chip::<32>(rc.clone(), mb, tmb, maan), - ] - .into_iter() - .flatten() - .collect(); + // Only create adapter chips if access adapters are enabled + let chips: Vec<_> = if memory_config.access_adapters_enabled { + let rc = range_checker; + let mb = memory_bus; + let tmb = memory_config.timestamp_max_bits; + let maan = memory_config.max_access_adapter_n; + assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); + [ + Self::create_access_adapter_chip::<2>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<4>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<8>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<16>(rc.clone(), mb, tmb, maan), + Self::create_access_adapter_chip::<32>(rc.clone(), mb, tmb, maan), + ] + .into_iter() + .flatten() + .collect() + } else { + Vec::new() + }; Self { memory_config, chips, diff --git a/crates/vm/src/system/memory/controller/mod.rs b/crates/vm/src/system/memory/controller/mod.rs index aabe4df08d..e4733ccdf4 100644 --- a/crates/vm/src/system/memory/controller/mod.rs +++ b/crates/vm/src/system/memory/controller/mod.rs @@ -14,7 +14,6 @@ use openvm_stark_backend::{ interaction::PermutationCheckBus, p3_commit::PolynomialSpace, p3_field::{Field, PrimeField32}, - p3_maybe_rayon::prelude::{IntoParallelIterator, ParallelIterator}, p3_util::{log2_ceil_usize, log2_strict_usize}, prover::{cpu::CpuBackend, types::AirProvingContext}, Chip, @@ -24,7 +23,7 @@ use serde::{Deserialize, Serialize}; use self::interface::MemoryInterface; use super::{volatile::VolatileBoundaryChip, AddressMap}; use crate::{ - arch::{DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET}, + arch::{DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE}, system::{ memory::{ adapter::AccessAdapterInventory, @@ -291,10 +290,30 @@ impl MemoryController { ) => { let hasher = self.hasher_chip.as_ref().unwrap(); boundary_chip.finalize(initial_memory, &final_memory, hasher.as_ref()); - let final_memory_values = final_memory - .into_par_iter() - .map(|(key, value)| (key, value.values)) - .collect(); + + // Rechunk CONST_BLOCK_SIZE blocks into CHUNK-sized blocks for merkle_chip + // Note: Equipartition key is (addr_space, ptr) where ptr is the starting pointer + let final_memory_values: Equipartition = { + use std::collections::BTreeMap; + let mut chunk_map: BTreeMap<(u32, u32), [F; CHUNK]> = BTreeMap::new(); + for ((addr_space, ptr), ts_values) in final_memory.into_iter() { + // Align to CHUNK boundary to get the chunk's starting pointer + let chunk_ptr = (ptr / CHUNK as u32) * CHUNK as u32; + let block_idx_in_chunk = + ((ptr % CHUNK as u32) / CONST_BLOCK_SIZE as u32) as usize; + let entry = chunk_map.entry((addr_space, chunk_ptr)).or_insert_with(|| { + // Initialize with values from initial memory + std::array::from_fn(|i| unsafe { + initial_memory.get_f::(addr_space, chunk_ptr + i as u32) + }) + }); + // Copy values for this block + for (i, val) in ts_values.values.into_iter().enumerate() { + entry[block_idx_in_chunk * CONST_BLOCK_SIZE + i] = val; + } + } + chunk_map + }; merkle_chip.finalize(initial_memory, &final_memory_values, hasher.as_ref()); } _ => panic!("TouchedMemory incorrect type"), diff --git a/crates/vm/src/system/memory/mod.rs b/crates/vm/src/system/memory/mod.rs index 411e7a5473..8c3f48c7f0 100644 --- a/crates/vm/src/system/memory/mod.rs +++ b/crates/vm/src/system/memory/mod.rs @@ -118,20 +118,24 @@ impl MemoryAirInventory { ); MemoryInterfaceAirs::Volatile { boundary } }; - // Memory access adapters - let lt_air = IsLtSubAir::new(range_bus, mem_config.timestamp_max_bits); - let maan = mem_config.max_access_adapter_n; - assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); - let access_adapters: Vec> = [ - Arc::new(AccessAdapterAir::<2> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<4> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<8> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<16> { memory_bus, lt_air }) as AirRef, - Arc::new(AccessAdapterAir::<32> { memory_bus, lt_air }) as AirRef, - ] - .into_iter() - .take(log2_strict_usize(maan)) - .collect(); + // Memory access adapters - only create if enabled + let access_adapters: Vec> = if mem_config.access_adapters_enabled { + let lt_air = IsLtSubAir::new(range_bus, mem_config.timestamp_max_bits); + let maan = mem_config.max_access_adapter_n; + assert!(matches!(maan, 2 | 4 | 8 | 16 | 32)); + [ + Arc::new(AccessAdapterAir::<2> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<4> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<8> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<16> { memory_bus, lt_air }) as AirRef, + Arc::new(AccessAdapterAir::<32> { memory_bus, lt_air }) as AirRef, + ] + .into_iter() + .take(log2_strict_usize(maan)) + .collect() + } else { + Vec::new() + }; Self { bridge, @@ -159,7 +163,16 @@ impl MemoryAirInventory { /// This is O(1) and returns the length of /// [`MemoryAirInventory::into_airs`]. -pub fn num_memory_airs(is_persistent: bool, max_access_adapter_n: usize) -> usize { - // boundary + { merkle if is_persistent } + access_adapters - 1 + usize::from(is_persistent) + log2_strict_usize(max_access_adapter_n) +pub fn num_memory_airs( + is_persistent: bool, + max_access_adapter_n: usize, + access_adapters_enabled: bool, +) -> usize { + // boundary + { merkle if is_persistent } + access_adapters (if enabled) + let num_adapters = if access_adapters_enabled { + log2_strict_usize(max_access_adapter_n) + } else { + 0 + }; + 1 + usize::from(is_persistent) + num_adapters } diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs index 6a16e0d12b..74e0e33d5c 100644 --- a/crates/vm/src/system/memory/online.rs +++ b/crates/vm/src/system/memory/online.rs @@ -13,12 +13,12 @@ use tracing::instrument; use crate::{ arch::{ AddressSpaceHostConfig, AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, - RecordArena, MAX_CELL_BYTE_SIZE, + RecordArena, CONST_BLOCK_SIZE, MAX_CELL_BYTE_SIZE, }, system::{ memory::{ adapter::records::{AccessLayout, AccessRecordHeader, MERGE_AND_NOT_SPLIT_FLAG}, - MemoryAddress, TimestampedEquipartition, TimestampedValues, CHUNK, + MemoryAddress, TimestampedEquipartition, TimestampedValues, }, TouchedMemory, }, @@ -580,6 +580,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } + assert_eq!(1, 0); // SAFETY: // - header.address_space is validated during instruction decoding and within bounds // - header.pointer and header.type_size define valid memory bounds within the address space @@ -612,7 +613,7 @@ impl TracingMemory { if header.block_size == header.lowest_block_size { return; } - + assert_eq!(1, 0); let record_mut = self .access_adapter_records .alloc(AccessLayout::from_record_header(&header)); @@ -944,7 +945,7 @@ impl TracingMemory { self.touched_blocks_to_equipartition::(touched_blocks), ), true => TouchedMemory::Persistent( - self.touched_blocks_to_equipartition::(touched_blocks), + self.touched_blocks_to_equipartition::(touched_blocks), ), } } diff --git a/crates/vm/src/system/memory/persistent.rs b/crates/vm/src/system/memory/persistent.rs index eeb22cbfd6..c30c80895e 100644 --- a/crates/vm/src/system/memory/persistent.rs +++ b/crates/vm/src/system/memory/persistent.rs @@ -20,15 +20,20 @@ use openvm_stark_backend::{ use rustc_hash::FxHashSet; use tracing::instrument; -use super::{merkle::SerialReceiver, online::INITIAL_TIMESTAMP, TimestampedValues}; +use super::{merkle::SerialReceiver, online::INITIAL_TIMESTAMP}; use crate::{ - arch::{hasher::Hasher, ADDR_SPACE_OFFSET}, + arch::{hasher::Hasher, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE}, system::memory::{ dimensions::MemoryDimensions, offline_checker::MemoryBus, MemoryAddress, MemoryImage, TimestampedEquipartition, }, }; +/// Number of CONST_BLOCK_SIZE blocks per CHUNK (e.g., 2 for 8/4). +/// Blocks are on the same row only for Merkle tree hashing (8 bytes at a time). +/// Memory bus interactions use per-block timestamps. +pub const BLOCKS_PER_CHUNK: usize = 2; + /// The values describe aligned chunk of memory of size `CHUNK`---the data together with the last /// accessed timestamp---in either the initial or final memory state. #[repr(C)] @@ -42,7 +47,10 @@ pub struct PersistentBoundaryCols { pub leaf_label: T, pub values: [T; CHUNK], pub hash: [T; CHUNK], - pub timestamp: T, + /// Per-block timestamps. Each CONST_BLOCK_SIZE block within the chunk has its own timestamp. + /// For untouched blocks, timestamp stays at 0 (balances: boundary sends at t=0 init, receives + /// at t=0 final). + pub timestamps: [T; BLOCKS_PER_CHUNK], } /// Imposes the following constraints: @@ -81,12 +89,14 @@ impl Air for PersistentBoundaryA local.expand_direction * local.expand_direction * local.expand_direction, ); - // Constrain that an "initial" row has timestamp zero. + // Constrain that an "initial" row has all timestamp zero. // Since `direction` is constrained to be in {-1, 0, 1}, we can select `direction == 1` // with the constraint below. - builder - .when(local.expand_direction * (local.expand_direction + AB::F::ONE)) - .assert_zero(local.timestamp); + let mut when_initial = + builder.when(local.expand_direction * (local.expand_direction + AB::F::ONE)); + for i in 0..BLOCKS_PER_CHUNK { + when_initial.assert_zero(local.timestamps[i]); + } let mut expand_fields = vec![ // direction = 1 => is_final = 0 @@ -109,16 +119,25 @@ impl Air for PersistentBoundaryA local.expand_direction * local.expand_direction, ); - self.memory_bus - .send( - MemoryAddress::new( - local.address_space, - local.leaf_label * AB::F::from_canonical_usize(CHUNK), - ), - local.values.to_vec(), - local.timestamp, - ) - .eval(builder, local.expand_direction); + debug_assert_eq!(CHUNK % CONST_BLOCK_SIZE, 0); + debug_assert_eq!(CHUNK / CONST_BLOCK_SIZE, BLOCKS_PER_CHUNK); + let chunk_size_f = AB::F::from_canonical_usize(CHUNK); + for block_idx in 0..BLOCKS_PER_CHUNK { + let offset = AB::F::from_canonical_usize(block_idx * CONST_BLOCK_SIZE); + // Split the 1xCHUNK leaf into CONST_BLOCK_SIZE-sized bus messages. + // Each block uses its own timestamp - untouched blocks stay at t=0. + self.memory_bus + .send( + MemoryAddress::new( + local.address_space, + local.leaf_label * chunk_size_f + offset, + ), + local.values[block_idx * CONST_BLOCK_SIZE..(block_idx + 1) * CONST_BLOCK_SIZE] + .to_vec(), + local.timestamps[block_idx], + ) + .eval(builder, local.expand_direction); + } } } @@ -142,7 +161,8 @@ pub struct FinalTouchedLabel { final_values: [F; CHUNK], init_hash: [F; CHUNK], final_hash: [F; CHUNK], - final_timestamp: u32, + /// Per-block timestamps. Each CONST_BLOCK_SIZE block has its own timestamp. + final_timestamps: [u32; BLOCKS_PER_CHUNK], } impl Default for TouchedLabels { @@ -207,34 +227,69 @@ impl PersistentBoundaryChip { } } + /// Finalize the boundary chip with per-block timestamped memory. + /// + /// `final_memory` is at CONST_BLOCK_SIZE granularity (4 bytes per entry, single timestamp + /// each). This function rechunks into CHUNK-sized (8 bytes) groups with per-block + /// timestamps. Untouched blocks within a touched chunk get values from initial_memory and + /// timestamp 0. #[instrument(name = "boundary_finalize", level = "debug", skip_all)] pub(crate) fn finalize( &mut self, initial_memory: &MemoryImage, - // Only touched stuff - final_memory: &TimestampedEquipartition, + // Touched stuff at CONST_BLOCK_SIZE granularity + final_memory: &TimestampedEquipartition, hasher: &H, ) where H: Hasher + Sync + for<'a> SerialReceiver<&'a [F]>, { - let final_touched_labels: Vec<_> = final_memory - .par_iter() - .map(|&((addr_space, ptr), ts_values)| { - let TimestampedValues { timestamp, values } = ts_values; + // Group CONST_BLOCK_SIZE blocks into CHUNK-sized groups + // Key: (addr_space, chunk_label), Value: per-block timestamps and values + use std::collections::BTreeMap; + let mut chunk_map: BTreeMap<(u32, u32), ([u32; BLOCKS_PER_CHUNK], [F; CHUNK])> = + BTreeMap::new(); + + for &((addr_space, ptr), ts_values) in final_memory.iter() { + let chunk_label = ptr / CHUNK as u32; + let block_idx_in_chunk = ((ptr % CHUNK as u32) / CONST_BLOCK_SIZE as u32) as usize; + + let entry = chunk_map + .entry((addr_space, chunk_label)) + .or_insert_with(|| { + // Initialize with values from initial memory and timestamps at 0 + let chunk_ptr = chunk_label * CHUNK as u32; + let init_values: [F; CHUNK] = array::from_fn(|i| unsafe { + initial_memory.get_f::(addr_space, chunk_ptr + i as u32) + }); + ([0u32; BLOCKS_PER_CHUNK], init_values) + }); + + // Set per-block timestamp + entry.0[block_idx_in_chunk] = ts_values.timestamp; + // Copy values for this block + for (i, &val) in ts_values.values.iter().enumerate() { + entry.1[block_idx_in_chunk * CONST_BLOCK_SIZE + i] = val; + } + } + + let final_touched_labels: Vec<_> = chunk_map + .into_par_iter() + .map(|((addr_space, chunk_label), (timestamps, final_values))| { + let chunk_ptr = chunk_label * CHUNK as u32; // SAFETY: addr_space from `final_memory` are all in bounds - let init_values = array::from_fn(|i| unsafe { - initial_memory.get_f::(addr_space, ptr + i as u32) + let init_values: [F; CHUNK] = array::from_fn(|i| unsafe { + initial_memory.get_f::(addr_space, chunk_ptr + i as u32) }); let initial_hash = hasher.hash(&init_values); - let final_hash = hasher.hash(&values); + let final_hash = hasher.hash(&final_values); FinalTouchedLabel { address_space: addr_space, - label: ptr / CHUNK as u32, + label: chunk_label, init_values, - final_values: values, + final_values, init_hash: initial_hash, final_hash, - final_timestamp: timestamp, + final_timestamps: timestamps, } }) .collect(); @@ -281,7 +336,9 @@ where leaf_label: Val::::from_canonical_u32(touched_label.label), values: touched_label.init_values, hash: touched_label.init_hash, - timestamp: Val::::from_canonical_u32(INITIAL_TIMESTAMP), + // Initial timestamps are all 0 (INITIAL_TIMESTAMP) + timestamps: [Val::::from_canonical_u32(INITIAL_TIMESTAMP); + BLOCKS_PER_CHUNK], }; *final_row.borrow_mut() = PersistentBoundaryCols { @@ -290,7 +347,10 @@ where leaf_label: Val::::from_canonical_u32(touched_label.label), values: touched_label.final_values, hash: touched_label.final_hash, - timestamp: Val::::from_canonical_u32(touched_label.final_timestamp), + // Per-block timestamps - untouched blocks stay at 0 + timestamps: touched_label + .final_timestamps + .map(Val::::from_canonical_u32), }; }); Arc::new(RowMajorMatrix::new(rows, width)) diff --git a/crates/vm/src/system/mod.rs b/crates/vm/src/system/mod.rs index d1ecb2daf1..195fa6a701 100644 --- a/crates/vm/src/system/mod.rs +++ b/crates/vm/src/system/mod.rs @@ -32,7 +32,8 @@ use crate::{ ChipInventoryError, DenseRecordArena, ExecutionBridge, ExecutionBus, ExecutionState, ExecutorInventory, ExecutorInventoryError, MatrixRecordArena, PhantomSubExecutor, RowMajorMatrixArena, SystemConfig, VmAirWrapper, VmBuilder, VmChipComplex, VmChipWrapper, - VmCircuitConfig, VmExecutionConfig, CONNECTOR_AIR_ID, PROGRAM_AIR_ID, PUBLIC_VALUES_AIR_ID, + VmCircuitConfig, VmExecutionConfig, CONNECTOR_AIR_ID, CONST_BLOCK_SIZE, PROGRAM_AIR_ID, + PUBLIC_VALUES_AIR_ID, }, system::{ connector::VmConnectorChip, @@ -145,7 +146,7 @@ pub struct SystemRecords { } pub enum TouchedMemory { - Persistent(TimestampedEquipartition), + Persistent(TimestampedEquipartition), Volatile(TimestampedEquipartition), } diff --git a/docs/vocs/docs/pages/specs/openvm/isa.mdx b/docs/vocs/docs/pages/specs/openvm/isa.mdx index 14b71fa05c..a1e8223540 100644 --- a/docs/vocs/docs/pages/specs/openvm/isa.mdx +++ b/docs/vocs/docs/pages/specs/openvm/isa.mdx @@ -35,6 +35,7 @@ OpenVM depends on the following parameters, some of which are fixed and some of | `addr_space_height` | The base 2 log of the number of writable address spaces supported. | Configurable, must satisfy `addr_space_height <= F::bits() - 2` | | `pointer_max_bits` | The maximum number of bits in a pointer. | Configurable, must satisfy `pointer_max_bits <= F::bits() - 2` | | `num_public_values` | The number of user public values. | Configurable. If continuation is enabled, it must equal `8` times a power of two(which is nonzero). | +| `MAX_HINT_BUFFER_WORDS_BITS` | The maximum number of bits for hint buffer word count. This determines `MAX_HINT_BUFFER_WORDS = 2^MAX_HINT_BUFFER_WORDS_BITS - 1` = 262,143 words (≈1MB), the maximum words per `HINT_BUFFER_RV32` instruction. | Fixed to 18. | We explain these parameters in subsequent sections. @@ -428,9 +429,11 @@ with user input-output. | Name | Operands | Description | | ---------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | HINT_STOREW_RV32 | `_,b,_,1,2` | `[r32{0}(b):4]_2 = next 4 bytes from hint stream`. Only valid if next 4 values in hint stream are bytes. | -| HINT_BUFFER_RV32 | `a,b,_,1,2` | `[r32{0}(b):4 * l]_2 = next 4 * l bytes from hint stream` where `l = r32{0}(a)`. Only valid if next `4 * l` values in hint stream are bytes. Very important: `l` should not be 0. The pointer address `r32{0}(b)` does not need to be a multiple of `4`. | +| HINT_BUFFER_RV32 | `a,b,_,1,2` | `[r32{0}(b):4 * l]_2 = next 4 * l bytes from hint stream` where `l = r32{0}(a)`. Only valid if next `4 * l` values in hint stream are bytes. `l` must be non-zero and <= `MAX_HINT_BUFFER_WORDS` (262,143 words ≈ 1MB). The pointer address `r32{0}(b)` does not need to be a multiple of `4`. | | REVEAL_RV32 | `a,b,c,1,3,_,g` | Pseudo-instruction for `STOREW_RV32 a,b,c,1,3,_,g` writing to the user IO address space `3`. Only valid when continuations are enabled. | +> **Note:** The `MAX_HINT_BUFFER_WORDS` bound on `HINT_BUFFER_RV32` is enforced by both the executor and AIR constraints. The SDK's `hint_buffer_chunked` function automatically splits larger reads into multiple `HINT_BUFFER_RV32` instructions. + #### Phantom Sub-Instructions The RV32IM extension defines the following phantom sub-instructions. diff --git a/extensions/algebra/moduli-macros/src/lib.rs b/extensions/algebra/moduli-macros/src/lib.rs index 4ea8af0211..0266b7468e 100644 --- a/extensions/algebra/moduli-macros/src/lib.rs +++ b/extensions/algebra/moduli-macros/src/lib.rs @@ -875,15 +875,15 @@ pub fn moduli_declare(input: TokenStream) -> TokenStream { } #[cfg(target_os = "zkvm")] { - use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_store_u32! and hint_buffer_u32! + use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_store_u32! and hint_buffer_chunked let is_square = core::mem::MaybeUninit::::uninit(); - let sqrt = core::mem::MaybeUninit::<#struct_name>::uninit(); + let mut sqrt = core::mem::MaybeUninit::<#struct_name>::uninit(); unsafe { #hint_sqrt_extern_func(self as *const #struct_name as usize); let is_square_ptr = is_square.as_ptr() as *const u32; openvm_rv32im_guest::hint_store_u32!(is_square_ptr); - openvm_rv32im_guest::hint_buffer_u32!(sqrt.as_ptr() as *const u8, <#struct_name as ::openvm_algebra_guest::IntMod>::NUM_LIMBS / 4); + openvm_rv32im_guest::hint_buffer_chunked(sqrt.as_mut_ptr() as *mut u8, <#struct_name as ::openvm_algebra_guest::IntMod>::NUM_LIMBS / 4 as usize); let is_square = is_square.assume_init(); if is_square == 0 || is_square == 1 { Some((is_square == 1, sqrt.assume_init())) @@ -902,14 +902,14 @@ pub fn moduli_declare(input: TokenStream) -> TokenStream { } #[cfg(target_os = "zkvm")] { - use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_buffer_u32! + use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_buffer_chunked let mut non_qr_uninit = core::mem::MaybeUninit::::uninit(); let mut non_qr; unsafe { #hint_non_qr_extern_func(); - let ptr = non_qr_uninit.as_ptr() as *const u8; - openvm_rv32im_guest::hint_buffer_u32!(ptr, ::NUM_LIMBS / 4); + let ptr = non_qr_uninit.as_mut_ptr() as *mut u8; + openvm_rv32im_guest::hint_buffer_chunked(ptr, ::NUM_LIMBS / 4 as usize); non_qr = non_qr_uninit.assume_init(); } // ensure non_qr < modulus diff --git a/extensions/bigint/circuit/src/base_alu.rs b/extensions/bigint/circuit/src/base_alu.rs index 6a8e49a239..2c9da56d0b 100644 --- a/extensions/bigint/circuit/src/base_alu.rs +++ b/extensions/bigint/circuit/src/base_alu.rs @@ -12,17 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::BaseAluExecutor; use openvm_rv32im_transpiler::BaseAluOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{bytes_to_u64_array, u64_array_to_bytes}, - Rv32BaseAlu256Executor, INT256_NUM_LIMBS, + common::{bytes_to_u64_array, u64_array_to_bytes, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32BaseAlu256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32BaseAlu256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -142,12 +141,10 @@ unsafe fn execute_e12_impl( let rs1_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let rd = ::compute(rs1, rs2); - exec_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd); + vm_write_int256(exec_state, u32::from_le_bytes(rd_ptr), &rd); let pc = exec_state.pc(); exec_state.set_pc(pc.wrapping_add(DEFAULT_PC_STEP)); } diff --git a/extensions/bigint/circuit/src/branch_eq.rs b/extensions/bigint/circuit/src/branch_eq.rs index 4732f6f9a7..ba8562cab8 100644 --- a/extensions/bigint/circuit/src/branch_eq.rs +++ b/extensions/bigint/circuit/src/branch_eq.rs @@ -9,14 +9,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapBranchAdapterExecutor; use openvm_rv32im_circuit::BranchEqualExecutor; use openvm_rv32im_transpiler::BranchEqualOpcode; use openvm_stark_backend::p3_field::PrimeField32; -use crate::{common::bytes_to_u64_array, Rv32BranchEqual256Executor, INT256_NUM_LIMBS}; +use crate::{ + common::{bytes_to_u64_array, vm_read_int256}, + BigintBranchAdapterExecutor, Rv32BranchEqual256Executor, INT256_NUM_LIMBS, +}; -type AdapterExecutor = Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintBranchAdapterExecutor; impl Rv32BranchEqual256Executor { pub fn new(adapter_step: AdapterExecutor, offset: usize, pc_step: u32) -> Self { @@ -131,10 +133,8 @@ unsafe fn execute_e12_impl(RV32_REGISTER_AS, pre_compute.a as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let cmp_result = u256_eq(rs1, rs2); if cmp_result ^ IS_NE { pc = (pc as isize + pre_compute.imm) as u32; diff --git a/extensions/bigint/circuit/src/branch_lt.rs b/extensions/bigint/circuit/src/branch_lt.rs index 8dc294d70d..0e777dc24f 100644 --- a/extensions/bigint/circuit/src/branch_lt.rs +++ b/extensions/bigint/circuit/src/branch_lt.rs @@ -12,17 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapBranchAdapterExecutor; use openvm_rv32im_circuit::BranchLessThanExecutor; use openvm_rv32im_transpiler::BranchLessThanOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{i256_lt, u256_lt}, - Rv32BranchLessThan256Executor, INT256_NUM_LIMBS, + common::{i256_lt, u256_lt, vm_read_int256}, + BigintBranchAdapterExecutor, Rv32BranchLessThan256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintBranchAdapterExecutor; impl Rv32BranchLessThan256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -139,10 +138,8 @@ unsafe fn execute_e12_impl(RV32_REGISTER_AS, pre_compute.a as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let cmp_result = OP::compute(rs1, rs2); if cmp_result { pc = (pc as isize + pre_compute.imm) as u32; diff --git a/extensions/bigint/circuit/src/common.rs b/extensions/bigint/circuit/src/common.rs index 329cf1d479..062c2a77a6 100644 --- a/extensions/bigint/circuit/src/common.rs +++ b/extensions/bigint/circuit/src/common.rs @@ -1,5 +1,55 @@ +use std::convert::TryInto; + +use openvm_circuit::{ + arch::{ExecutionCtxTrait, VmExecState, CONST_BLOCK_SIZE}, + system::memory::online::GuestMemory, +}; +use openvm_instructions::riscv::RV32_MEMORY_AS; +use openvm_stark_backend::p3_field::PrimeField32; + use crate::{INT256_NUM_LIMBS, RV32_CELL_BITS}; +pub const INT256_CHUNK_BYTES: usize = CONST_BLOCK_SIZE; +pub const INT256_BLOCKS_PER_ACCESS: usize = INT256_NUM_LIMBS / INT256_CHUNK_BYTES; + +#[inline(always)] +pub fn vm_read_int256( + exec_state: &mut VmExecState, + ptr: u32, +) -> [u8; INT256_NUM_LIMBS] { + let mut out = [0u8; INT256_NUM_LIMBS]; + for (i, chunk) in out + .chunks_exact_mut(INT256_CHUNK_BYTES) + .enumerate() + { + let data = exec_state.vm_read::( + RV32_MEMORY_AS, + ptr + (i * INT256_CHUNK_BYTES) as u32, + ); + chunk.copy_from_slice(&data); + } + out +} + +#[inline(always)] +pub fn vm_write_int256( + exec_state: &mut VmExecState, + ptr: u32, + data: &[u8; INT256_NUM_LIMBS], +) { + for (i, chunk) in data + .chunks_exact(INT256_CHUNK_BYTES) + .enumerate() + { + let chunk: &[u8; INT256_CHUNK_BYTES] = chunk.try_into().expect("chunk size"); + exec_state.vm_write::( + RV32_MEMORY_AS, + ptr + (i * INT256_CHUNK_BYTES) as u32, + chunk, + ); + } +} + #[inline(always)] pub fn bytes_to_u64_array(bytes: [u8; INT256_NUM_LIMBS]) -> [u64; 4] { // SAFETY: [u8; 32] to [u64; 4] transmute is safe - same size and compatible alignment diff --git a/extensions/bigint/circuit/src/extension/mod.rs b/extensions/bigint/circuit/src/extension/mod.rs index 0adb7fc595..fc4ebf81bd 100644 --- a/extensions/bigint/circuit/src/extension/mod.rs +++ b/extensions/bigint/circuit/src/extension/mod.rs @@ -98,19 +98,19 @@ impl VmExecutionExtension for Int256 { let pointer_max_bits = inventory.pointer_max_bits(); let alu = Rv32BaseAlu256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32BaseAlu256Opcode::CLASS_OFFSET, ); inventory.add_executor(alu, Rv32BaseAlu256Opcode::iter().map(|x| x.global_opcode()))?; let lt = Rv32LessThan256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32LessThan256Opcode::CLASS_OFFSET, ); inventory.add_executor(lt, Rv32LessThan256Opcode::iter().map(|x| x.global_opcode()))?; let beq = Rv32BranchEqual256Executor::new( - Rv32HeapBranchAdapterExecutor::new(pointer_max_bits), + BigintBranchAdapterExecutor::new(pointer_max_bits), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ); @@ -120,7 +120,7 @@ impl VmExecutionExtension for Int256 { )?; let blt = Rv32BranchLessThan256Executor::new( - Rv32HeapBranchAdapterExecutor::new(pointer_max_bits), + BigintBranchAdapterExecutor::new(pointer_max_bits), Rv32BranchLessThan256Opcode::CLASS_OFFSET, ); inventory.add_executor( @@ -129,13 +129,13 @@ impl VmExecutionExtension for Int256 { )?; let mult = Rv32Multiplication256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32Mul256Opcode::CLASS_OFFSET, ); inventory.add_executor(mult, Rv32Mul256Opcode::iter().map(|x| x.global_opcode()))?; let shift = Rv32Shift256Executor::new( - Rv32HeapAdapterExecutor::new(pointer_max_bits), + BigintHeapAdapterExecutor::new(pointer_max_bits), Rv32Shift256Opcode::CLASS_OFFSET, ); inventory.add_executor(shift, Rv32Shift256Opcode::iter().map(|x| x.global_opcode()))?; @@ -188,37 +188,37 @@ impl VmCircuitExtension for Int256 { }; let alu = Rv32BaseAlu256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), BaseAluCoreAir::new(bitwise_lu, Rv32BaseAlu256Opcode::CLASS_OFFSET), ); inventory.add_air(alu); let lt = Rv32LessThan256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), LessThanCoreAir::new(bitwise_lu, Rv32LessThan256Opcode::CLASS_OFFSET), ); inventory.add_air(lt); let beq = Rv32BranchEqual256Air::new( - Rv32HeapBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), BranchEqualCoreAir::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP), ); inventory.add_air(beq); let blt = Rv32BranchLessThan256Air::new( - Rv32HeapBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), BranchLessThanCoreAir::new(bitwise_lu, Rv32BranchLessThan256Opcode::CLASS_OFFSET), ); inventory.add_air(blt); let mult = Rv32Multiplication256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), MultiplicationCoreAir::new(range_tuple_checker, Rv32Mul256Opcode::CLASS_OFFSET), ); inventory.add_air(mult); let shift = Rv32Shift256Air::new( - Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), + BigintHeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits), ShiftCoreAir::new(bitwise_lu, range_checker, Rv32Shift256Opcode::CLASS_OFFSET), ); inventory.add_air(shift); @@ -281,7 +281,7 @@ where inventory.next_air::()?; let alu = Rv32BaseAlu256Chip::new( BaseAluFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), Rv32BaseAlu256Opcode::CLASS_OFFSET, ), @@ -292,7 +292,7 @@ where inventory.next_air::()?; let lt = Rv32LessThan256Chip::new( LessThanFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), Rv32LessThan256Opcode::CLASS_OFFSET, ), @@ -303,7 +303,7 @@ where inventory.next_air::()?; let beq = Rv32BranchEqual256Chip::new( BranchEqualFiller::new( - Rv32HeapBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ), @@ -314,7 +314,7 @@ where inventory.next_air::()?; let blt = Rv32BranchLessThan256Chip::new( BranchLessThanFiller::new( - Rv32HeapBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), Rv32BranchLessThan256Opcode::CLASS_OFFSET, ), @@ -325,7 +325,7 @@ where inventory.next_air::()?; let mult = Rv32Multiplication256Chip::new( MultiplicationFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), range_tuple_checker.clone(), Rv32Mul256Opcode::CLASS_OFFSET, ), @@ -336,7 +336,7 @@ where inventory.next_air::()?; let shift = Rv32Shift256Chip::new( ShiftFiller::new( - Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), + BigintHeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()), bitwise_lu.clone(), range_checker.clone(), Rv32Shift256Opcode::CLASS_OFFSET, diff --git a/extensions/bigint/circuit/src/less_than.rs b/extensions/bigint/circuit/src/less_than.rs index 68861d8ba0..9819e88a4b 100644 --- a/extensions/bigint/circuit/src/less_than.rs +++ b/extensions/bigint/circuit/src/less_than.rs @@ -12,14 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::LessThanExecutor; use openvm_rv32im_transpiler::LessThanOpcode; use openvm_stark_backend::p3_field::PrimeField32; -use crate::{common, Rv32LessThan256Executor, INT256_NUM_LIMBS}; +use crate::{ + common::{self, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32LessThan256Executor, INT256_NUM_LIMBS, +}; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32LessThan256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -134,10 +136,8 @@ unsafe fn execute_e12_impl(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let cmp_result = if IS_U256 { common::u256_lt(rs1, rs2) } else { @@ -145,7 +145,7 @@ unsafe fn execute_e12_impl; +type BigintHeapAdapterFillerInner = + Rv32VecHeapAdapterFiller<2, INT256_BLOCKS_PER_ACCESS, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES, INT256_CHUNK_BYTES>; + +type BigintBranchAdapterInner = + Rv32HeapBranchAdapterExecutorGeneric<2, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES>; +type BigintBranchAdapterFillerInner = + Rv32HeapBranchAdapterFillerGeneric<2, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES>; + +fn chunk_expr( + word: [T; INT256_NUM_LIMBS], +) -> [[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS] { + let mut iter = word.into_iter(); + std::array::from_fn(|_| std::array::from_fn(|_| iter.next().expect("chunk size"))) +} + +#[inline(always)] +fn flatten_int256( + chunks: [[u8; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS], +) -> [u8; INT256_NUM_LIMBS] { + let mut word = [0u8; INT256_NUM_LIMBS]; + for (block_idx, block) in chunks.into_iter().enumerate() { + let start = block_idx * INT256_CHUNK_BYTES; + word[start..start + INT256_CHUNK_BYTES].copy_from_slice(&block); + } + word +} + +#[inline(always)] +fn chunk_int256( + word: [u8; INT256_NUM_LIMBS], +) -> [[u8; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS] { + let mut chunks = [[0u8; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]; + for (block_idx, chunk) in chunks.iter_mut().enumerate() { + let start = block_idx * INT256_CHUNK_BYTES; + chunk.copy_from_slice(&word[start..start + INT256_CHUNK_BYTES]); + } + chunks +} + +#[derive(Clone)] +pub struct BigintAccessReads( + pub [[[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]; 2], +); + +#[derive(Clone)] +pub struct BigintAccessWrites(pub [[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]); + +impl BigintAccessReads { + fn into_inner(self) -> [[[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS]; 2] { + self.0 + } +} + +impl BigintAccessWrites { + fn into_inner(self) -> [[T; INT256_CHUNK_BYTES]; INT256_BLOCKS_PER_ACCESS] { + self.0 + } +} + +impl From<[[T; INT256_NUM_LIMBS]; 2]> for BigintAccessReads { + fn from(value: [[T; INT256_NUM_LIMBS]; 2]) -> Self { + let [first, second] = value; + Self([chunk_expr(first), chunk_expr(second)]) + } +} + +impl From<[[T; INT256_NUM_LIMBS]; 1]> for BigintAccessWrites { + fn from(value: [[T; INT256_NUM_LIMBS]; 1]) -> Self { + let [word] = value; + Self(chunk_expr(word)) + } +} + +pub struct BigintHeapAdapterInterface(PhantomData); + +impl VmAdapterInterface for BigintHeapAdapterInterface { + type Reads = BigintAccessReads; + type Writes = BigintAccessWrites; + type ProcessedInstruction = MinimalInstruction; +} + +pub struct BigintBranchAdapterInterface(PhantomData); + +impl VmAdapterInterface for BigintBranchAdapterInterface { + type Reads = BigintAccessReads; + type Writes = (); + type ProcessedInstruction = ImmInstruction; +} + +#[derive(Clone, Copy)] +pub struct BigintHeapAdapterAir { + inner: Rv32VecHeapAdapterAir< + 2, + INT256_BLOCKS_PER_ACCESS, + INT256_BLOCKS_PER_ACCESS, + INT256_CHUNK_BYTES, + INT256_CHUNK_BYTES, + >, +} + +impl BigintHeapAdapterAir { + pub fn new( + execution_bridge: ExecutionBridge, + memory_bridge: MemoryBridge, + bus: BitwiseOperationLookupBus, + address_bits: usize, + ) -> Self { + Self { + inner: Rv32VecHeapAdapterAir::new( + execution_bridge, + memory_bridge, + bus, + address_bits, + ), + } + } +} + +impl BaseAir for BigintHeapAdapterAir { + fn width(&self) -> usize { + as BaseAir>::width(&self.inner) + } +} + +impl VmAdapterAir for BigintHeapAdapterAir +where + AB: InteractionBuilder, +{ + type Interface = BigintHeapAdapterInterface; + + fn eval( + &self, + builder: &mut AB, + local: &[AB::Var], + ctx: AdapterAirContext, + ) { + let inner_ctx = AdapterAirContext { + to_pc: ctx.to_pc, + reads: ctx.reads.into_inner(), + writes: ctx.writes.into_inner(), + instruction: ctx.instruction, + }; + self.inner.eval(builder, local, inner_ctx); + } + + fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var { + as VmAdapterAir>::get_from_pc(&self.inner, local) + } +} + +#[derive(Clone, Copy)] +pub struct BigintBranchAdapterAir { + inner: + Rv32HeapBranchAdapterAirGeneric<2, INT256_BLOCKS_PER_ACCESS, INT256_CHUNK_BYTES>, +} + +impl BigintBranchAdapterAir { + pub fn new( + execution_bridge: ExecutionBridge, + memory_bridge: MemoryBridge, + bus: BitwiseOperationLookupBus, + address_bits: usize, + ) -> Self { + Self { + inner: Rv32HeapBranchAdapterAirGeneric::new( + execution_bridge, + memory_bridge, + bus, + address_bits, + ), + } + } +} + +impl BaseAir for BigintBranchAdapterAir { + fn width(&self) -> usize { + as BaseAir>::width(&self.inner) + } +} + +impl VmAdapterAir for BigintBranchAdapterAir +where + AB: InteractionBuilder, +{ + type Interface = BigintBranchAdapterInterface; + + fn eval( + &self, + builder: &mut AB, + local: &[AB::Var], + ctx: AdapterAirContext, + ) { + let inner_ctx = AdapterAirContext { + to_pc: ctx.to_pc, + reads: ctx.reads.into_inner(), + writes: [], + instruction: ctx.instruction, + }; + self.inner.eval(builder, local, inner_ctx); + } + + fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var { + as VmAdapterAir>::get_from_pc(&self.inner, local) + } +} + +#[derive(Clone, Copy)] +pub struct BigintHeapAdapterExecutor { + inner: BigintHeapAdapterInner, +} + +impl BigintHeapAdapterExecutor { + pub fn new(pointer_max_bits: usize) -> Self { + Self { + inner: BigintHeapAdapterInner::new(pointer_max_bits), + } + } +} + +impl AdapterTraceExecutor for BigintHeapAdapterExecutor { + const WIDTH: usize = >::WIDTH; + type ReadData = [[u8; INT256_NUM_LIMBS]; 2]; + type WriteData = [[u8; INT256_NUM_LIMBS]; 1]; + type RecordMut<'a> = >::RecordMut<'a>; + + fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) { + >::start(pc, memory, record); + } + + fn read( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + record: &mut Self::RecordMut<'_>, + ) -> Self::ReadData { + let chunked = >::read( + &self.inner, + memory, + instruction, + record, + ); + chunked.map(flatten_int256) + } + + fn write( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + data: Self::WriteData, + record: &mut Self::RecordMut<'_>, + ) { + let [word] = data; + let chunked = chunk_int256(word); + >::write( + &self.inner, + memory, + instruction, + chunked, + record, + ); + } +} + +#[derive(Clone)] +pub struct BigintHeapAdapterFiller { + inner: BigintHeapAdapterFillerInner, +} + +impl BigintHeapAdapterFiller { + pub fn new( + pointer_max_bits: usize, + bitwise_lookup_chip: SharedBitwiseOperationLookupChip, + ) -> Self { + Self { + inner: BigintHeapAdapterFillerInner::new(pointer_max_bits, bitwise_lookup_chip), + } + } +} + +impl AdapterTraceFiller for BigintHeapAdapterFiller { + const WIDTH: usize = >::WIDTH; + + fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory, adapter_row: &mut [F]) { + self.inner.fill_trace_row(mem_helper, adapter_row); + } +} + +#[derive(Clone, Copy)] +pub struct BigintBranchAdapterExecutor { + inner: BigintBranchAdapterInner, +} + +impl BigintBranchAdapterExecutor { + pub fn new(pointer_max_bits: usize) -> Self { + Self { + inner: BigintBranchAdapterInner::new(pointer_max_bits), + } + } +} + +impl AdapterTraceExecutor for BigintBranchAdapterExecutor { + const WIDTH: usize = >::WIDTH; + type ReadData = [[u8; INT256_NUM_LIMBS]; 2]; + type WriteData = (); + type RecordMut<'a> = >::RecordMut<'a>; + + fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) { + >::start(pc, memory, record); + } + + fn read( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + record: &mut Self::RecordMut<'_>, + ) -> Self::ReadData { + let chunked = >::read( + &self.inner, + memory, + instruction, + record, + ); + chunked.map(flatten_int256) + } + + fn write( + &self, + memory: &mut TracingMemory, + instruction: &Instruction, + data: Self::WriteData, + record: &mut Self::RecordMut<'_>, + ) { + >::write( + &self.inner, + memory, + instruction, + data, + record, + ); + } +} + +#[derive(Clone)] +pub struct BigintBranchAdapterFiller { + inner: BigintBranchAdapterFillerInner, +} + +impl BigintBranchAdapterFiller { + pub fn new( + pointer_max_bits: usize, + bitwise_lookup_chip: SharedBitwiseOperationLookupChip, + ) -> Self { + Self { + inner: BigintBranchAdapterFillerInner::new(pointer_max_bits, bitwise_lookup_chip), + } + } +} + +impl AdapterTraceFiller for BigintBranchAdapterFiller { + const WIDTH: usize = >::WIDTH; + + fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory, adapter_row: &mut [F]) { + self.inner.fill_trace_row(mem_helper, adapter_row); + } +} + /// BaseAlu256 pub type Rv32BaseAlu256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, BaseAluCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32BaseAlu256Executor( BaseAluExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -57,7 +469,7 @@ pub struct Rv32BaseAlu256Executor( pub type Rv32BaseAlu256Chip = VmChipWrapper< F, BaseAluFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -65,13 +477,13 @@ pub type Rv32BaseAlu256Chip = VmChipWrapper< /// LessThan256 pub type Rv32LessThan256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, LessThanCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32LessThan256Executor( LessThanExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -79,7 +491,7 @@ pub struct Rv32LessThan256Executor( pub type Rv32LessThan256Chip = VmChipWrapper< F, LessThanFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -87,13 +499,13 @@ pub type Rv32LessThan256Chip = VmChipWrapper< /// Multiplication256 pub type Rv32Multiplication256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, MultiplicationCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32Multiplication256Executor( MultiplicationExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -101,7 +513,7 @@ pub struct Rv32Multiplication256Executor( pub type Rv32Multiplication256Chip = VmChipWrapper< F, MultiplicationFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -109,13 +521,13 @@ pub type Rv32Multiplication256Chip = VmChipWrapper< /// Shift256 pub type Rv32Shift256Air = VmAirWrapper< - Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterAir, ShiftCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32Shift256Executor( ShiftExecutor< - Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -123,7 +535,7 @@ pub struct Rv32Shift256Executor( pub type Rv32Shift256Chip = VmChipWrapper< F, ShiftFiller< - Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>, + BigintHeapAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -131,27 +543,27 @@ pub type Rv32Shift256Chip = VmChipWrapper< /// BranchEqual256 pub type Rv32BranchEqual256Air = VmAirWrapper< - Rv32HeapBranchAdapterAir<2, INT256_NUM_LIMBS>, + BigintBranchAdapterAir, BranchEqualCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32BranchEqual256Executor( - BranchEqualExecutor, INT256_NUM_LIMBS>, + BranchEqualExecutor, ); pub type Rv32BranchEqual256Chip = VmChipWrapper< F, - BranchEqualFiller, INT256_NUM_LIMBS>, + BranchEqualFiller, >; /// BranchLessThan256 pub type Rv32BranchLessThan256Air = VmAirWrapper< - Rv32HeapBranchAdapterAir<2, INT256_NUM_LIMBS>, + BigintBranchAdapterAir, BranchLessThanCoreAir, >; #[derive(Clone, PreflightExecutor)] pub struct Rv32BranchLessThan256Executor( BranchLessThanExecutor< - Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>, + BigintBranchAdapterExecutor, INT256_NUM_LIMBS, RV32_CELL_BITS, >, @@ -159,7 +571,7 @@ pub struct Rv32BranchLessThan256Executor( pub type Rv32BranchLessThan256Chip = VmChipWrapper< F, BranchLessThanFiller< - Rv32HeapBranchAdapterFiller<2, INT256_NUM_LIMBS>, + BigintBranchAdapterFiller, INT256_NUM_LIMBS, RV32_CELL_BITS, >, diff --git a/extensions/bigint/circuit/src/mult.rs b/extensions/bigint/circuit/src/mult.rs index 2eff4b9096..10d629e9e6 100644 --- a/extensions/bigint/circuit/src/mult.rs +++ b/extensions/bigint/circuit/src/mult.rs @@ -9,17 +9,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::MultiplicationExecutor; use openvm_rv32im_transpiler::MulOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{bytes_to_u32_array, u32_array_to_bytes}, - Rv32Multiplication256Executor, INT256_NUM_LIMBS, + common::{bytes_to_u32_array, u32_array_to_bytes, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32Multiplication256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32Multiplication256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -125,12 +124,10 @@ unsafe fn execute_e12_impl( let rs1_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let rd = u256_mul(rs1, rs2); - exec_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd); + vm_write_int256(exec_state, u32::from_le_bytes(rd_ptr), &rd); let pc = exec_state.pc(); exec_state.set_pc(pc.wrapping_add(DEFAULT_PC_STEP)); diff --git a/extensions/bigint/circuit/src/shift.rs b/extensions/bigint/circuit/src/shift.rs index c08afc26e0..498d4731b0 100644 --- a/extensions/bigint/circuit/src/shift.rs +++ b/extensions/bigint/circuit/src/shift.rs @@ -12,17 +12,16 @@ use openvm_instructions::{ riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS}, LocalOpcode, }; -use openvm_rv32_adapters::Rv32HeapAdapterExecutor; use openvm_rv32im_circuit::ShiftExecutor; use openvm_rv32im_transpiler::ShiftOpcode; use openvm_stark_backend::p3_field::PrimeField32; use crate::{ - common::{bytes_to_u64_array, u64_array_to_bytes}, - Rv32Shift256Executor, INT256_NUM_LIMBS, + common::{bytes_to_u64_array, u64_array_to_bytes, vm_read_int256, vm_write_int256}, + BigintHeapAdapterExecutor, Rv32Shift256Executor, INT256_NUM_LIMBS, }; -type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>; +type AdapterExecutor = BigintHeapAdapterExecutor; impl Rv32Shift256Executor { pub fn new(adapter: AdapterExecutor, offset: usize) -> Self { @@ -138,12 +137,10 @@ unsafe fn execute_e12_impl let rs1_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.b as u32); let rs2_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.c as u32); let rd_ptr = exec_state.vm_read::(RV32_REGISTER_AS, pre_compute.a as u32); - let rs1 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr)); - let rs2 = - exec_state.vm_read::(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr)); + let rs1 = vm_read_int256(exec_state, u32::from_le_bytes(rs1_ptr)); + let rs2 = vm_read_int256(exec_state, u32::from_le_bytes(rs2_ptr)); let rd = OP::compute(rs1, rs2); - exec_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd); + vm_write_int256(exec_state, u32::from_le_bytes(rd_ptr), &rd); let pc = exec_state.pc(); exec_state.set_pc(pc.wrapping_add(DEFAULT_PC_STEP)); } diff --git a/extensions/bigint/circuit/src/tests.rs b/extensions/bigint/circuit/src/tests.rs index 00892e28a0..02a4c8e73c 100644 --- a/extensions/bigint/circuit/src/tests.rs +++ b/extensions/bigint/circuit/src/tests.rs @@ -10,7 +10,7 @@ use openvm_circuit::{ TestBuilder, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS, RANGE_TUPLE_CHECKER_BUS, }, - Arena, ExecutionBridge, PreflightExecutor, + Arena, ExecutionBridge, MemoryConfig, PreflightExecutor, }, system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper}, utils::generate_long_number, @@ -22,14 +22,10 @@ use openvm_circuit_primitives::{ }; use openvm_instructions::{ program::{DEFAULT_PC_STEP, PC_BITS}, - riscv::RV32_CELL_BITS, - LocalOpcode, -}; -use openvm_rv32_adapters::{ - rv32_heap_branch_default, rv32_write_heap_default, Rv32HeapAdapterAir, Rv32HeapAdapterExecutor, - Rv32HeapAdapterFiller, Rv32HeapBranchAdapterAir, Rv32HeapBranchAdapterExecutor, - Rv32HeapBranchAdapterFiller, + riscv::{RV32_CELL_BITS, RV32_REGISTER_AS}, + LocalOpcode, NATIVE_AS, }; +use openvm_rv32_adapters::{rv32_heap_branch_default, rv32_write_heap_default}; use openvm_rv32im_circuit::{ adapters::{INT256_NUM_LIMBS, RV_B_TYPE_IMM_BITS}, BaseAluCoreAir, BaseAluFiller, BranchEqualCoreAir, BranchEqualFiller, BranchLessThanCoreAir, @@ -63,12 +59,13 @@ use { }; use crate::{ - Rv32BaseAlu256Air, Rv32BaseAlu256Chip, Rv32BaseAlu256Executor, Rv32BranchEqual256Air, - Rv32BranchEqual256Chip, Rv32BranchEqual256Executor, Rv32BranchLessThan256Air, - Rv32BranchLessThan256Chip, Rv32BranchLessThan256Executor, Rv32LessThan256Air, - Rv32LessThan256Chip, Rv32LessThan256Executor, Rv32Multiplication256Air, - Rv32Multiplication256Chip, Rv32Multiplication256Executor, Rv32Shift256Air, Rv32Shift256Chip, - Rv32Shift256Executor, + BigintBranchAdapterAir, BigintBranchAdapterExecutor, BigintBranchAdapterFiller, + BigintHeapAdapterAir, BigintHeapAdapterExecutor, BigintHeapAdapterFiller, Rv32BaseAlu256Air, + Rv32BaseAlu256Chip, Rv32BaseAlu256Executor, Rv32BranchEqual256Air, Rv32BranchEqual256Chip, + Rv32BranchEqual256Executor, Rv32BranchLessThan256Air, Rv32BranchLessThan256Chip, + Rv32BranchLessThan256Executor, Rv32LessThan256Air, Rv32LessThan256Chip, + Rv32LessThan256Executor, Rv32Multiplication256Air, Rv32Multiplication256Chip, + Rv32Multiplication256Executor, Rv32Shift256Air, Rv32Shift256Chip, Rv32Shift256Executor, }; type F = BabyBear; @@ -79,6 +76,14 @@ const RANGE_TUPLE_SIZES: [u32; 2] = [ (INT256_NUM_LIMBS * (1 << RV32_CELL_BITS)) as u32, ]; +fn tester_with_access_adapters_disabled() -> VmChipTestBuilder { + let mut mem_config = MemoryConfig::default(); + mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; + mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; + mem_config.access_adapters_enabled = false; + VmChipTestBuilder::persistent(mem_config) +} + fn create_alu_harness_fields( memory_bridge: MemoryBridge, execution_bridge: ExecutionBridge, @@ -91,7 +96,7 @@ fn create_alu_harness_fields( Rv32BaseAlu256Chip, ) { let air = Rv32BaseAlu256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -100,12 +105,12 @@ fn create_alu_harness_fields( BaseAluCoreAir::new(bitwise_chip.bus(), Rv32BaseAlu256Opcode::CLASS_OFFSET), ); let executor = Rv32BaseAlu256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32BaseAlu256Opcode::CLASS_OFFSET, ); let chip = Rv32BaseAlu256Chip::new( BaseAluFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip, Rv32BaseAlu256Opcode::CLASS_OFFSET, ), @@ -126,7 +131,7 @@ fn create_lt_harness_fields( Rv32LessThan256Chip, ) { let air = Rv32LessThan256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -135,12 +140,12 @@ fn create_lt_harness_fields( LessThanCoreAir::new(bitwise_chip.bus(), Rv32LessThan256Opcode::CLASS_OFFSET), ); let executor = Rv32LessThan256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32LessThan256Opcode::CLASS_OFFSET, ); let chip = Rv32LessThan256Chip::new( LessThanFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip.clone(), Rv32LessThan256Opcode::CLASS_OFFSET, ), @@ -162,7 +167,7 @@ fn create_mul_harness_fields( Rv32Multiplication256Chip, ) { let air = Rv32Multiplication256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -171,12 +176,12 @@ fn create_mul_harness_fields( MultiplicationCoreAir::new(*range_tuple_chip.bus(), Rv32Mul256Opcode::CLASS_OFFSET), ); let executor = Rv32Multiplication256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32Mul256Opcode::CLASS_OFFSET, ); let chip = Rv32Multiplication256Chip::::new( MultiplicationFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip), range_tuple_chip, Rv32Mul256Opcode::CLASS_OFFSET, ), @@ -194,7 +199,7 @@ fn create_shift_harness_fields( address_bits: usize, ) -> (Rv32Shift256Air, Rv32Shift256Executor, Rv32Shift256Chip) { let air = Rv32Shift256Air::new( - Rv32HeapAdapterAir::new( + BigintHeapAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -207,12 +212,12 @@ fn create_shift_harness_fields( ), ); let executor = Rv32Shift256Executor::new( - Rv32HeapAdapterExecutor::new(address_bits), + BigintHeapAdapterExecutor::new(address_bits), Rv32Shift256Opcode::CLASS_OFFSET, ); let chip = Rv32Shift256Chip::new( ShiftFiller::new( - Rv32HeapAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintHeapAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip.clone(), range_checker_chip.clone(), Rv32Shift256Opcode::CLASS_OFFSET, @@ -234,7 +239,7 @@ fn create_beq_harness_fields( Rv32BranchEqual256Chip, ) { let air = Rv32BranchEqual256Air::new( - Rv32HeapBranchAdapterAir::new( + BigintBranchAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -243,13 +248,13 @@ fn create_beq_harness_fields( BranchEqualCoreAir::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP), ); let executor = Rv32BranchEqual256Executor::new( - Rv32HeapBranchAdapterExecutor::new(address_bits), + BigintBranchAdapterExecutor::new(address_bits), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ); let chip = Rv32BranchEqual256Chip::new( BranchEqualFiller::new( - Rv32HeapBranchAdapterFiller::new(address_bits, bitwise_chip), + BigintBranchAdapterFiller::new(address_bits, bitwise_chip), Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP, ), @@ -270,7 +275,7 @@ fn create_blt_harness_fields( Rv32BranchLessThan256Chip, ) { let air = Rv32BranchLessThan256Air::new( - Rv32HeapBranchAdapterAir::new( + BigintBranchAdapterAir::new( execution_bridge, memory_bridge, bitwise_chip.bus(), @@ -282,12 +287,12 @@ fn create_blt_harness_fields( ), ); let executor = Rv32BranchLessThan256Executor::new( - Rv32HeapBranchAdapterExecutor::new(address_bits), + BigintBranchAdapterExecutor::new(address_bits), Rv32BranchLessThan256Opcode::CLASS_OFFSET, ); let chip = Rv32BranchLessThan256Chip::new( BranchLessThanFiller::new( - Rv32HeapBranchAdapterFiller::new(address_bits, bitwise_chip.clone()), + BigintBranchAdapterFiller::new(address_bits, bitwise_chip.clone()), bitwise_chip, Rv32BranchLessThan256Opcode::CLASS_OFFSET, ), @@ -374,7 +379,7 @@ fn set_and_execute_rand>( #[test_case(BaseAluOpcode::AND, 24)] fn run_alu_256_rand_test(opcode: BaseAluOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32BaseAlu256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -413,7 +418,7 @@ fn run_alu_256_rand_test(opcode: BaseAluOpcode, num_ops: usize) { #[test_case(LessThanOpcode::SLTU, 24)] fn run_lt_256_rand_test(opcode: LessThanOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32LessThan256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -451,7 +456,7 @@ fn run_lt_256_rand_test(opcode: LessThanOpcode, num_ops: usize) { #[test_case(MulOpcode::MUL, 24)] fn run_mul_256_rand_test(opcode: MulOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32Mul256Opcode::CLASS_OFFSET; let range_tuple_bus = RangeTupleCheckerBus::new(RANGE_TUPLE_CHECKER_BUS, RANGE_TUPLE_SIZES); @@ -496,7 +501,7 @@ fn run_mul_256_rand_test(opcode: MulOpcode, num_ops: usize) { #[test_case(ShiftOpcode::SRA, 24)] fn run_shift_256_rand_test(opcode: ShiftOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32Shift256Opcode::CLASS_OFFSET; let range_checker_chip = tester.range_checker(); @@ -538,7 +543,7 @@ fn run_shift_256_rand_test(opcode: ShiftOpcode, num_ops: usize) { #[test_case(BranchEqualOpcode::BNE, 24)] fn run_beq_256_rand_test(opcode: BranchEqualOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32BranchEqual256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -579,7 +584,7 @@ fn run_beq_256_rand_test(opcode: BranchEqualOpcode, num_ops: usize) { #[test_case(BranchLessThanOpcode::BGEU, 24)] fn run_blt_256_rand_test(opcode: BranchLessThanOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); - let mut tester = VmChipTestBuilder::default(); + let mut tester = tester_with_access_adapters_disabled(); let offset = Rv32BranchLessThan256Opcode::CLASS_OFFSET; let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS); @@ -620,7 +625,7 @@ fn run_blt_256_rand_test(opcode: BranchLessThanOpcode, num_ops: usize) { #[test_case(BaseAluOpcode::XOR, 24)] #[test_case(BaseAluOpcode::OR, 24)] #[test_case(BaseAluOpcode::AND, 24)] -fn run_alu_256_rand_test_cuda(opcode: BaseAluOpcode, num_ops: usize) { +fn run_alu_256_ran_test_cuda(opcode: BaseAluOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); let mut tester = GpuChipTestBuilder::default().with_bitwise_op_lookup(default_bitwise_lookup_bus()); @@ -667,7 +672,7 @@ fn run_alu_256_rand_test_cuda(opcode: BaseAluOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -728,7 +733,7 @@ fn run_lt_256_rand_test_cuda(opcode: LessThanOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -794,7 +799,7 @@ fn run_mul_256_rand_test_cuda(opcode: MulOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -856,7 +861,7 @@ fn run_shift_256_rand_test_cuda(opcode: ShiftOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -917,7 +922,7 @@ fn run_beq_256_rand_test_cuda(opcode: BranchEqualOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester @@ -980,7 +985,7 @@ fn run_blt_256_rand_test_cuda(opcode: BranchLessThanOpcode, num_ops: usize) { .get_record_seeker::() .transfer_to_matrix_arena( &mut harness.matrix_arena, - EmptyAdapterCoreLayout::>::new(), + EmptyAdapterCoreLayout::::new(), ); tester diff --git a/extensions/rv32-adapters/src/heap_branch.rs b/extensions/rv32-adapters/src/heap_branch.rs index e87b4fd973..e616a8e07b 100644 --- a/extensions/rv32-adapters/src/heap_branch.rs +++ b/extensions/rv32-adapters/src/heap_branch.rs @@ -1,13 +1,14 @@ use std::{ array::from_fn, borrow::{Borrow, BorrowMut}, + marker::PhantomData, }; use itertools::izip; use openvm_circuit::{ arch::{ get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller, - BasicAdapterInterface, ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir, + ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir, VmAdapterInterface, }, system::memory::{ offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord}, @@ -32,43 +33,77 @@ use openvm_stark_backend::{ p3_field::{Field, FieldAlgebra, PrimeField32}, }; -/// This adapter reads from NUM_READS <= 2 pointers. -/// * The data is read from the heap (address space 2), and the pointers are read from registers -/// (address space 1). -/// * Reads are from the addresses in `rs[0]` (and `rs[1]` if `R = 2`). +pub type Rv32HeapBranchAdapterCols = + Rv32HeapBranchAdapterColsGeneric; +pub type Rv32HeapBranchAdapterAir = + Rv32HeapBranchAdapterAirGeneric; +pub type Rv32HeapBranchAdapterRecord = + Rv32HeapBranchAdapterRecordGeneric; +pub type Rv32HeapBranchAdapterExecutor = + Rv32HeapBranchAdapterExecutorGeneric; +pub type Rv32HeapBranchAdapterFiller = + Rv32HeapBranchAdapterFillerGeneric; + +pub struct VecHeapBranchAdapterInterface< + T, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +>(PhantomData); + +impl + VmAdapterInterface for VecHeapBranchAdapterInterface +{ + type Reads = [[[T; READ_SIZE]; BLOCKS_PER_READ]; NUM_READS]; + type Writes = [[T; 0]; 0]; + type ProcessedInstruction = ImmInstruction; +} + +/// Adapter cols for branching on heap values read in NUM_READS pointers. #[repr(C)] #[derive(AlignedBorrow)] -pub struct Rv32HeapBranchAdapterCols { +pub struct Rv32HeapBranchAdapterColsGeneric< + T, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub from_state: ExecutionState, pub rs_ptr: [T; NUM_READS], pub rs_val: [[T; RV32_REGISTER_NUM_LIMBS]; NUM_READS], pub rs_read_aux: [MemoryReadAuxCols; NUM_READS], - pub heap_read_aux: [MemoryReadAuxCols; NUM_READS], + pub heap_read_aux: [[MemoryReadAuxCols; BLOCKS_PER_READ]; NUM_READS], } #[derive(Clone, Copy, Debug, derive_new::new)] -pub struct Rv32HeapBranchAdapterAir { +pub struct Rv32HeapBranchAdapterAirGeneric< + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub(super) execution_bridge: ExecutionBridge, pub(super) memory_bridge: MemoryBridge, pub bus: BitwiseOperationLookupBus, address_bits: usize, } -impl BaseAir - for Rv32HeapBranchAdapterAir +impl + BaseAir for Rv32HeapBranchAdapterAirGeneric { fn width(&self) -> usize { - Rv32HeapBranchAdapterCols::::width() + Rv32HeapBranchAdapterColsGeneric::::width() } } -impl VmAdapterAir - for Rv32HeapBranchAdapterAir +impl VmAdapterAir + for Rv32HeapBranchAdapterAirGeneric +where + AB: InteractionBuilder, { type Interface = - BasicAdapterInterface, NUM_READS, 0, READ_SIZE, 0>; + VecHeapBranchAdapterInterface; fn eval( &self, @@ -76,7 +111,8 @@ impl VmA local: &[AB::Var], ctx: AdapterAirContext, ) { - let cols: &Rv32HeapBranchAdapterCols<_, NUM_READS, READ_SIZE> = local.borrow(); + let cols: &Rv32HeapBranchAdapterColsGeneric<_, NUM_READS, BLOCKS_PER_READ, READ_SIZE> = + local.borrow(); let timestamp = cols.from_state.timestamp; let mut timestamp_delta: usize = 0; let mut timestamp_pp = || { @@ -127,10 +163,18 @@ impl VmA acc * AB::F::from_canonical_u32(1 << RV32_CELL_BITS) + (*limb) }) }); - for (ptr, data, aux) in izip!(heap_ptr, ctx.reads, &cols.heap_read_aux) { - self.memory_bridge - .read(MemoryAddress::new(e, ptr), data, timestamp_pp(), aux) - .eval(builder, ctx.instruction.is_valid.clone()); + for (ptr, data_blocks, aux_blocks) in izip!(heap_ptr.iter(), ctx.reads.iter(), cols.heap_read_aux.iter()) { + for (block_idx, (data, aux)) in data_blocks.iter().zip(aux_blocks).enumerate() { + let offset = AB::Expr::from_canonical_usize(block_idx * READ_SIZE); + self.memory_bridge + .read( + MemoryAddress::new(e, ptr.clone() + offset), + data.clone(), + timestamp_pp(), + aux, + ) + .eval(builder, ctx.instruction.is_valid.clone()); + } } self.execution_bridge @@ -157,14 +201,15 @@ impl VmA } fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var { - let cols: &Rv32HeapBranchAdapterCols<_, NUM_READS, READ_SIZE> = local.borrow(); + let cols: &Rv32HeapBranchAdapterColsGeneric<_, NUM_READS, BLOCKS_PER_READ, READ_SIZE> = + local.borrow(); cols.from_state.pc } } #[repr(C)] #[derive(AlignedBytesBorrow, Debug)] -pub struct Rv32HeapBranchAdapterRecord { +pub struct Rv32HeapBranchAdapterRecordGeneric { pub from_pc: u32, pub from_timestamp: u32, @@ -172,22 +217,30 @@ pub struct Rv32HeapBranchAdapterRecord { pub rs_vals: [u32; NUM_READS], pub rs_read_aux: [MemoryReadAuxRecord; NUM_READS], - pub heap_read_aux: [MemoryReadAuxRecord; NUM_READS], + pub heap_read_aux: [[MemoryReadAuxRecord; BLOCKS_PER_READ]; NUM_READS], } #[derive(Clone, Copy)] -pub struct Rv32HeapBranchAdapterExecutor { +pub struct Rv32HeapBranchAdapterExecutorGeneric< + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub pointer_max_bits: usize, } -#[derive(derive_new::new)] -pub struct Rv32HeapBranchAdapterFiller { +#[derive(Clone, derive_new::new)] +pub struct Rv32HeapBranchAdapterFillerGeneric< + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, +> { pub pointer_max_bits: usize, pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip, } -impl - Rv32HeapBranchAdapterExecutor +impl + Rv32HeapBranchAdapterExecutorGeneric { pub fn new(pointer_max_bits: usize) -> Self { assert!(NUM_READS <= 2); @@ -199,13 +252,19 @@ impl } } -impl AdapterTraceExecutor - for Rv32HeapBranchAdapterExecutor +impl< + F: PrimeField32, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, + > AdapterTraceExecutor + for Rv32HeapBranchAdapterExecutorGeneric { - const WIDTH: usize = Rv32HeapBranchAdapterCols::::width(); - type ReadData = [[u8; READ_SIZE]; NUM_READS]; + const WIDTH: usize = + Rv32HeapBranchAdapterColsGeneric::::width(); + type ReadData = [[[u8; READ_SIZE]; BLOCKS_PER_READ]; NUM_READS]; type WriteData = (); - type RecordMut<'a> = &'a mut Rv32HeapBranchAdapterRecord; + type RecordMut<'a> = &'a mut Rv32HeapBranchAdapterRecordGeneric; fn start(pc: u32, memory: &TracingMemory, adapter_record: &mut Self::RecordMut<'_>) { adapter_record.from_pc = pc; @@ -234,17 +293,20 @@ impl AdapterTra )) }); - // Read memory values + // Read memory values in 4-byte chunks from_fn(|i| { debug_assert!( - record.rs_vals[i] as usize + READ_SIZE - 1 < (1 << self.pointer_max_bits) + record.rs_vals[i] as usize + READ_SIZE * BLOCKS_PER_READ - 1 + < (1 << self.pointer_max_bits) ); - tracing_read( - memory, - RV32_MEMORY_AS, - record.rs_vals[i], - &mut record.heap_read_aux[i].prev_timestamp, - ) + from_fn(|j| { + tracing_read( + memory, + RV32_MEMORY_AS, + record.rs_vals[i] + (j * READ_SIZE) as u32, + &mut record.heap_read_aux[i][j].prev_timestamp, + ) + }) }) } @@ -259,18 +321,24 @@ impl AdapterTra } } -impl AdapterTraceFiller - for Rv32HeapBranchAdapterFiller +impl< + F: PrimeField32, + const NUM_READS: usize, + const BLOCKS_PER_READ: usize, + const READ_SIZE: usize, + > AdapterTraceFiller + for Rv32HeapBranchAdapterFillerGeneric { - const WIDTH: usize = Rv32HeapBranchAdapterCols::::width(); + const WIDTH: usize = + Rv32HeapBranchAdapterColsGeneric::::width(); fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory, mut adapter_row: &mut [F]) { // SAFETY: // - caller ensures `adapter_row` contains a valid record representation that was previously // written by the executor - let record: &Rv32HeapBranchAdapterRecord = + let record: &Rv32HeapBranchAdapterRecordGeneric = unsafe { get_record_from_slice(&mut adapter_row, ()) }; - let cols: &mut Rv32HeapBranchAdapterCols = + let cols: &mut Rv32HeapBranchAdapterColsGeneric = adapter_row.borrow_mut(); // Range checks: @@ -288,12 +356,16 @@ impl AdapterTra ); // **NOTE**: Must iterate everything in reverse order to avoid overwriting the records - for i in (0..NUM_READS).rev() { - mem_helper.fill( - record.heap_read_aux[i].prev_timestamp, - record.from_timestamp + (i + NUM_READS) as u32, - cols.heap_read_aux[i].as_mut(), - ); + let heap_ts_start = record.from_timestamp + NUM_READS as u32; + for read_idx in (0..NUM_READS).rev() { + for block_idx in (0..BLOCKS_PER_READ).rev() { + let ts_offset = read_idx * BLOCKS_PER_READ + block_idx; + mem_helper.fill( + record.heap_read_aux[read_idx][block_idx].prev_timestamp, + heap_ts_start + ts_offset as u32, + cols.heap_read_aux[read_idx][block_idx].as_mut(), + ); + } } for i in (0..NUM_READS).rev() { diff --git a/extensions/rv32-adapters/src/vec_heap.rs b/extensions/rv32-adapters/src/vec_heap.rs index a9b612f39b..2dc45c9db1 100644 --- a/extensions/rv32-adapters/src/vec_heap.rs +++ b/extensions/rv32-adapters/src/vec_heap.rs @@ -301,7 +301,7 @@ pub struct Rv32VecHeapAdapterExecutor< pointer_max_bits: usize, } -#[derive(derive_new::new)] +#[derive(Clone, derive_new::new)] pub struct Rv32VecHeapAdapterFiller< const NUM_READS: usize, const BLOCKS_PER_READ: usize, diff --git a/extensions/rv32im/circuit/cuda/src/hintstore.cu b/extensions/rv32im/circuit/cuda/src/hintstore.cu index ce09a22477..b4e3a1b607 100644 --- a/extensions/rv32im/circuit/cuda/src/hintstore.cu +++ b/extensions/rv32im/circuit/cuda/src/hintstore.cu @@ -6,6 +6,8 @@ using namespace riscv; using namespace program; +using hintstore::MAX_HINT_BUFFER_WORDS; +using hintstore::MAX_HINT_BUFFER_WORDS_BITS; template struct Rv32HintStoreCols { // common @@ -87,11 +89,25 @@ struct Rv32HintStore { COL_WRITE_ARRAY(row, Rv32HintStoreCols, mem_ptr_limbs, mem_ptr_limbs); if (local_idx == 0) { + // The overflow check for mem_ptr + num_words * 4 is not needed because + // 4 * MAX_HINT_BUFFER_WORDS < 2^pointer_max_bits guarantees no overflow + assert(MAX_HINT_BUFFER_WORDS_BITS + 2 < pointer_max_bits); + + // Range check for mem_ptr (using pointer_max_bits) uint32_t msl_rshift = (RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS; uint32_t msl_lshift = RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - pointer_max_bits; + + // Range check for num_words (using MAX_HINT_BUFFER_WORDS_BITS) + // These constraints only work for MAX_HINT_BUFFER_WORDS_BITS in [16, 23] + assert(MAX_HINT_BUFFER_WORDS_BITS >= 16 && MAX_HINT_BUFFER_WORDS_BITS <= 23); + + assert(record.num_words <= MAX_HINT_BUFFER_WORDS); + uint32_t rem_words_limb2_lshift = (RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS - MAX_HINT_BUFFER_WORDS_BITS; + + // Combined range check for mem_ptr and num_words bitwise_lookup.add_range( (record.mem_ptr >> msl_rshift) << msl_lshift, - (record.num_words >> msl_rshift) << msl_lshift + ((record.num_words >> 16) & 0xFF) << rem_words_limb2_lshift ); mem_helper.fill( row.slice_from(COL_INDEX(Rv32HintStoreCols, mem_ptr_aux_cols)), diff --git a/extensions/rv32im/circuit/src/base_alu/tests.rs b/extensions/rv32im/circuit/src/base_alu/tests.rs index 8f38dea1f5..7e42b38989 100644 --- a/extensions/rv32im/circuit/src/base_alu/tests.rs +++ b/extensions/rv32im/circuit/src/base_alu/tests.rs @@ -163,8 +163,11 @@ fn rand_rv32_alu_test(opcode: BaseAluOpcode, num_ops: usize) { // TODO(AG): make a more meaningful test for memory accesses tester.write(2, 1024, [F::ONE; 4]); tester.write(2, 1028, [F::ONE; 4]); - let sm = tester.read(2, 1024); - assert_eq!(sm, [F::ONE; 8]); + // Avoid wider-than-min-block accesses when access adapters are disabled + let sm1 = tester.read(2, 1024); + let sm2 = tester.read(2, 1028); + assert_eq!(sm1, [F::ONE; 4]); + assert_eq!(sm2, [F::ONE; 4]); for _ in 0..num_ops { set_and_execute( @@ -201,8 +204,11 @@ fn rand_rv32_alu_test_persistent(opcode: BaseAluOpcode, num_ops: usize) { // TODO(AG): make a more meaningful test for memory accesses tester.write(2, 1024, [F::ONE; 4]); tester.write(2, 1028, [F::ONE; 4]); - let sm = tester.read(2, 1024); - assert_eq!(sm, [F::ONE; 8]); + // Avoid wider-than-min-block accesses when access adapters are disabled + let sm1 = tester.read(2, 1024); + let sm2 = tester.read(2, 1028); + assert_eq!(sm1, [F::ONE; 4]); + assert_eq!(sm2, [F::ONE; 4]); for _ in 0..num_ops { set_and_execute( diff --git a/extensions/rv32im/circuit/src/common/mod.rs b/extensions/rv32im/circuit/src/common/mod.rs index 0a58b7310b..20855af15d 100644 --- a/extensions/rv32im/circuit/src/common/mod.rs +++ b/extensions/rv32im/circuit/src/common/mod.rs @@ -9,7 +9,7 @@ mod aot { use openvm_circuit::{ arch::{ execution_mode::{metered::memory_ctx::MemoryCtx, MeteredCtx}, - AotError, SystemConfig, VmExecState, ADDR_SPACE_OFFSET, + AotError, SystemConfig, VmExecState, ADDR_SPACE_OFFSET, CONST_BLOCK_SIZE, }, system::memory::{merkle::public_values::PUBLIC_VALUES_AS, online::GuestMemory, CHUNK}, }; @@ -244,12 +244,12 @@ mod aot { // Therefore the loop only iterates once for `page_id = start_page_id`. let initial_block_size: usize = config.initial_block_size(); - if initial_block_size != CHUNK { + if initial_block_size != CONST_BLOCK_SIZE { return Err(AotError::Other(format!( - "initial_block_size must be {CHUNK}, got {initial_block_size}" + "initial_block_size must be {CONST_BLOCK_SIZE}, got {initial_block_size}" ))); } - let chunk_bits = CHUNK.ilog2(); + let chunk_bits = CONST_BLOCK_SIZE.ilog2(); let as_offset = ((address_space - ADDR_SPACE_OFFSET) as u64) << (config.memory_config.memory_dimensions().address_height); diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs index 47e68e3084..631cdb79a1 100644 --- a/extensions/rv32im/circuit/src/hintstore/execution.rs +++ b/extensions/rv32im/circuit/src/hintstore/execution.rs @@ -14,6 +14,7 @@ use openvm_instructions::{ use openvm_rv32im_transpiler::{ Rv32HintStoreOpcode, Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW}, + MAX_HINT_BUFFER_WORDS, }; use openvm_stark_backend::p3_field::PrimeField32; @@ -172,6 +173,15 @@ unsafe fn execute_e12_impl MAX_HINT_BUFFER_WORDS as u32 { + return Err(ExecutionError::HintBufferTooLarge { + pc, + num_words, + max_hint_buffer_words: MAX_HINT_BUFFER_WORDS as u32, + }); + } + if exec_state.streams.hint_stream.len() < RV32_REGISTER_NUM_LIMBS * num_words as usize { let err = ExecutionError::HintOutOfBounds { pc }; return Err(err); diff --git a/extensions/rv32im/circuit/src/hintstore/mod.rs b/extensions/rv32im/circuit/src/hintstore/mod.rs index 35955bb979..b9cac88249 100644 --- a/extensions/rv32im/circuit/src/hintstore/mod.rs +++ b/extensions/rv32im/circuit/src/hintstore/mod.rs @@ -25,6 +25,7 @@ use openvm_instructions::{ use openvm_rv32im_transpiler::{ Rv32HintStoreOpcode, Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW}, + MAX_HINT_BUFFER_WORDS, MAX_HINT_BUFFER_WORDS_BITS, }; use openvm_stark_backend::{ interaction::InteractionBuilder, @@ -202,19 +203,29 @@ impl Air for Rv32HintStoreAir { ) .eval(builder, is_start.clone()); - // Preventing mem_ptr and rem_words overflow - // Constraining mem_ptr_limbs[RV32_REGISTER_NUM_LIMBS - 1] < 2^(pointer_max_bits - - // (RV32_REGISTER_NUM_LIMBS - 1)*RV32_CELL_BITS) which implies mem_ptr <= - // 2^pointer_max_bits Similarly for rem_words <= 2^pointer_max_bits + // Preventing rem_words overflow: rem_words < 2^MAX_HINT_BUFFER_WORDS_BITS + // These constraints only work for MAX_HINT_BUFFER_WORDS_BITS in [16, 23] + debug_assert!( + (16..=23).contains(&MAX_HINT_BUFFER_WORDS_BITS), + "MAX_HINT_BUFFER_WORDS_BITS must be in [16, 23] for these constraints to work" + ); + // For MAX_HINT_BUFFER_WORDS_BITS = 18, this requires: + // - limbs[3] = 0 (since 2^18 < 2^24) + // - limbs[2] < 4 (since 2^18 = 4 * 2^16) + builder.assert_zero(local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 1]); + + // Preventing mem_ptr overflow: mem_ptr < 2^pointer_max_bits + // (rem_words overflow is handled below with the stricter MAX_HINT_BUFFER_WORDS_BITS bound) self.bitwise_operation_lookup_bus .send_range( local_cols.mem_ptr_limbs[RV32_REGISTER_NUM_LIMBS - 1] * AB::F::from_canonical_usize( 1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits), ), - local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 1] + local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 2] * AB::F::from_canonical_usize( - 1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits), + 1 << ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS + - MAX_HINT_BUFFER_WORDS_BITS), ), ) .eval(builder, is_start.clone()); @@ -409,6 +420,15 @@ where read_rv32_register(state.memory.data(), a) }; + // Bounds check: num_words must not exceed MAX_HINT_BUFFER_WORDS + if num_words > MAX_HINT_BUFFER_WORDS as u32 { + return Err(ExecutionError::HintBufferTooLarge { + pc: *state.pc, + num_words, + max_hint_buffer_words: MAX_HINT_BUFFER_WORDS as u32, + }); + } + let record = state.ctx.alloc(MultiRowLayout::new(Rv32HintStoreMetadata { num_words: num_words as usize, })); @@ -508,6 +528,10 @@ impl TraceFiller for Rv32HintStoreFiller { let msl_lshift: u32 = (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits) as u32; + // Scale factors for rem_words range check (using MAX_HINT_BUFFER_WORDS_BITS) + let rem_words_limb2_lshift: u32 = + ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS - MAX_HINT_BUFFER_WORDS_BITS) as u32; + chunks .par_iter_mut() .zip(sizes.par_iter()) @@ -526,9 +550,17 @@ impl TraceFiller for Rv32HintStoreFiller { }), ) }; + // Range check for mem_ptr (using pointer_max_bits) + // (num_words overflow check is handled below with the stricter + // MAX_HINT_BUFFER_WORDS_BITS bound) + // Range check for num_words (using MAX_HINT_BUFFER_WORDS_BITS) + debug_assert!( + num_words <= MAX_HINT_BUFFER_WORDS as u32, + "num_words must be <= MAX_HINT_BUFFER_WORDS" + ); self.bitwise_lookup_chip.request_range( (record.inner.mem_ptr >> msl_rshift) << msl_lshift, - (num_words >> msl_rshift) << msl_lshift, + ((num_words >> 16) & 0xFF) << rem_words_limb2_lshift, ); let mut timestamp = record.inner.timestamp + num_words * 3; diff --git a/extensions/rv32im/circuit/src/hintstore/tests.rs b/extensions/rv32im/circuit/src/hintstore/tests.rs index e79066aae6..61019a1104 100644 --- a/extensions/rv32im/circuit/src/hintstore/tests.rs +++ b/extensions/rv32im/circuit/src/hintstore/tests.rs @@ -19,7 +19,10 @@ use openvm_instructions::{ riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS}, LocalOpcode, }; -use openvm_rv32im_transpiler::Rv32HintStoreOpcode::{self, *}; +use openvm_rv32im_transpiler::{ + Rv32HintStoreOpcode::{self, *}, + MAX_HINT_BUFFER_WORDS, +}; use openvm_stark_backend::{ p3_field::FieldAlgebra, p3_matrix::{ @@ -194,6 +197,94 @@ fn rand_hintstore_test() { // part of the trace and check that the chip throws the expected error. ////////////////////////////////////////////////////////////////////////////////////// +#[test] +#[should_panic(expected = "HintBufferTooLarge")] +fn test_hint_buffer_exceeds_max_words() { + let mut rng = create_seeded_rng(); + let mut tester = VmChipTestBuilder::default(); + + let (mut harness, _bitwise) = create_harness::>(&mut tester); + + let num_words = (MAX_HINT_BUFFER_WORDS + 1) as u32; + + let a = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write( + RV32_REGISTER_AS as usize, + a, + num_words.to_le_bytes().map(F::from_canonical_u8), + ); + + let mem_ptr = gen_pointer(&mut rng, 4) as u32; + let b = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8)); + + for _ in 0..num_words { + let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8); + tester.streams_mut().hint_stream.extend(data); + } + + tester.execute( + &mut harness.executor, + &mut harness.arena, + &Instruction::from_usize( + HINT_BUFFER.global_opcode(), + [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize], + ), + ); +} + +#[test] +fn test_hint_buffer_rem_words_range_check() { + let mut rng = create_seeded_rng(); + let mut tester = VmChipTestBuilder::default(); + + let (mut harness, bitwise) = create_harness(&mut tester); + + // Build a small, valid buffer instruction with 1 word so trace has 1 row. + let num_words: u32 = 1; + let a = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write( + RV32_REGISTER_AS as usize, + a, + num_words.to_le_bytes().map(F::from_canonical_u8), + ); + + let mem_ptr = gen_pointer(&mut rng, 4) as u32; + let b = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8)); + + for _ in 0..num_words { + let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8); + tester.streams_mut().hint_stream.extend(data); + } + + tester.execute( + &mut harness.executor, + &mut harness.arena, + &Instruction::from_usize( + HINT_BUFFER.global_opcode(), + [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize], + ), + ); + + let modify_trace = |trace: &mut DenseMatrix| { + let mut trace_row = trace.row_slice(0).to_vec(); + let cols: &mut Rv32HintStoreCols = trace_row.as_mut_slice().borrow_mut(); + // Force `rem_words` to overflow MAX_HINT_BUFFER_WORDS_BITS on the start row. + cols.rem_words_limbs = [F::ZERO, F::ZERO, F::ZERO, F::from_canonical_u8(1)]; + *trace = RowMajorMatrix::new(trace_row, trace.width()); + }; + + disable_debug_builder(); + let tester = tester + .build() + .load_and_prank_trace(harness, modify_trace) + .load_periphery(bitwise) + .finalize(); + + tester.simple_test_with_expected_error(get_verification_error(false)); +} + #[allow(clippy::too_many_arguments)] fn run_negative_hintstore_test( opcode: Rv32HintStoreOpcode, diff --git a/extensions/rv32im/circuit/src/loadstore/tests.rs b/extensions/rv32im/circuit/src/loadstore/tests.rs index 240da983d0..978c0cd856 100644 --- a/extensions/rv32im/circuit/src/loadstore/tests.rs +++ b/extensions/rv32im/circuit/src/loadstore/tests.rs @@ -10,7 +10,9 @@ use openvm_circuit::{ }, }; use openvm_circuit_primitives::var_range::VariableRangeCheckerChip; -use openvm_instructions::{instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode}; +use openvm_instructions::{ + instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode, NATIVE_AS, +}; use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *}; use openvm_stark_backend::{ p3_air::BaseAir, @@ -131,7 +133,8 @@ fn set_and_execute>( let mem_as = mem_as.unwrap_or(if is_load { 2 } else { - *[2, 3, 4].choose(rng).unwrap() + // Avoid Native AS while access adapters are disabled. + *[2, 3].choose(rng).unwrap() }); let shift_amount = ptr_val % 4; @@ -215,10 +218,13 @@ fn rand_loadstore_test(opcode: Rv32LoadStoreOpcode, num_ops: usize) { let mut rng = create_seeded_rng(); let mut mem_config = MemoryConfig::default(); mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; + mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; if [STOREW, STOREB, STOREH].contains(&opcode) { mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29; } - let mut tester = VmChipTestBuilder::volatile(mem_config); + // Use persistent memory so initial block size matches the 4-byte alignment and + // avoids access-adapter split/merge paths when adapters are disabled. + let mut tester = VmChipTestBuilder::persistent(mem_config); let mut harness = create_harness(&mut tester); for _ in 0..num_ops { @@ -268,10 +274,12 @@ fn run_negative_loadstore_test( let mut rng = create_seeded_rng(); let mut mem_config = MemoryConfig::default(); mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29; + mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0; if [STOREW, STOREB, STOREH].contains(&opcode) { mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29; } - let mut tester = VmChipTestBuilder::volatile(mem_config); + // Use persistent memory so the min block size matches alignment without needing adapters. + let mut tester = VmChipTestBuilder::persistent(mem_config); let mut harness = create_harness(&mut tester); set_and_execute( diff --git a/extensions/rv32im/guest/src/io.rs b/extensions/rv32im/guest/src/io.rs index 664b9b1117..535959d4cc 100644 --- a/extensions/rv32im/guest/src/io.rs +++ b/extensions/rv32im/guest/src/io.rs @@ -1,5 +1,5 @@ #![allow(unused_imports)] -use crate::{PhantomImm, PHANTOM_FUNCT3, SYSTEM_OPCODE}; +use crate::{PhantomImm, MAX_HINT_BUFFER_WORDS, PHANTOM_FUNCT3, SYSTEM_OPCODE}; /// Store the next 4 bytes from the hint stream to [[rd]_1]_2. #[macro_export] @@ -21,8 +21,8 @@ macro_rules! hint_buffer_u32 { ($x:expr, $len:expr) => { if $len != 0 { openvm_custom_insn::custom_insn_i!( - opcode = openvm_rv32im_guest::SYSTEM_OPCODE, - funct3 = openvm_rv32im_guest::HINT_FUNCT3, + opcode = $crate::SYSTEM_OPCODE, + funct3 = $crate::HINT_FUNCT3, rd = In $x, rs1 = In $len, imm = Const 1, @@ -31,6 +31,18 @@ macro_rules! hint_buffer_u32 { }; } +/// Read hint buffer with automatic chunking for large reads. +/// Splits reads larger than MAX_HINT_BUFFER_WORDS into multiple instructions. +#[inline(always)] +pub fn hint_buffer_chunked(mut ptr: *mut u8, mut num_words: usize) { + while num_words > 0 { + let chunk = core::cmp::min(num_words, MAX_HINT_BUFFER_WORDS); + hint_buffer_u32!(ptr, chunk); + ptr = ptr.wrapping_add(chunk * 4); + num_words -= chunk; + } +} + /// Reset the hint stream with the next hint. #[inline(always)] pub fn hint_input() { diff --git a/extensions/rv32im/guest/src/lib.rs b/extensions/rv32im/guest/src/lib.rs index 99f1a6f97f..cea29068e2 100644 --- a/extensions/rv32im/guest/src/lib.rs +++ b/extensions/rv32im/guest/src/lib.rs @@ -25,6 +25,16 @@ pub const REVEAL_FUNCT3: u8 = 0b010; pub const PHANTOM_FUNCT3: u8 = 0b011; pub const CSRRW_FUNCT3: u8 = 0b001; +/// Maximum number of bits for hint buffer size. +/// IMPORTANT: Must be synced with MAX_HINT_BUFFER_WORDS_BITS constant for cuda +/// `crates/circuits/primitives/cuda/include/primitives/constants.h` +// For the constraints, they are configured for a range of MAX_HINT_BUFFER_WORDS_BITS between +// [16,23] +pub const MAX_HINT_BUFFER_WORDS_BITS: usize = 18; +/// Maximum number of words that can be read in a single HINT_BUFFER instruction. +/// AIR constraint requires rem_words < 2^MAX_HINT_BUFFER_WORDS_BITS, so max is one less +pub const MAX_HINT_BUFFER_WORDS: usize = (1 << MAX_HINT_BUFFER_WORDS_BITS) - 1; // 262,143 words ≈ 1MB + /// imm options for system phantom instructions #[derive(Debug, Copy, Clone, PartialEq, Eq, FromRepr)] #[repr(u16)] diff --git a/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs b/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs new file mode 100644 index 0000000000..64472b0f25 --- /dev/null +++ b/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs @@ -0,0 +1,25 @@ +#![cfg_attr(not(feature = "std"), no_main)] +#![cfg_attr(not(feature = "std"), no_std)] + +use openvm::io::read_vec; +use openvm_rv32im_guest::MAX_HINT_BUFFER_WORDS; + +openvm::entry!(main); + +pub fn main() { + let vec = read_vec(); + + // Create a hint buffer larger than MAX_HINT_BUFFER_WORDS, to test chunking + let expected_words = MAX_HINT_BUFFER_WORDS + 100; + let expected_len = expected_words * 4; + + if vec.len() != expected_len { + openvm::process::panic(); + } + + for (i, item) in vec.iter().enumerate() { + if *item != (i as u8) { + openvm::process::panic(); + } + } +} diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index ff141398f5..c4302ae808 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -13,7 +13,7 @@ mod tests { }; use openvm_instructions::{exe::VmExe, instruction::Instruction, LocalOpcode, SystemOpcode}; use openvm_rv32im_circuit::{Rv32IBuilder, Rv32IConfig, Rv32ImBuilder, Rv32ImConfig}; - use openvm_rv32im_guest::hint_load_by_key_encode; + use openvm_rv32im_guest::{hint_load_by_key_encode, MAX_HINT_BUFFER_WORDS}; use openvm_rv32im_transpiler::{ DivRemOpcode, MulHOpcode, MulOpcode, Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension, @@ -169,6 +169,37 @@ mod tests { Ok(()) } + /// NOTE: This test is slow because it processes > 1MB of data. It is marked #[ignore] + /// and can be run with: cargo test -p openvm-rv32im-integration-tests test_hint_buffer_chunking + /// -- --ignored + #[test] + #[ignore = "slow test: processes >1MB of data"] + fn test_hint_buffer_chunking() -> Result<()> { + let config = test_rv32im_config(); + let elf = build_example_program_at_path(get_programs_dir!(), "hint_large_buffer", &config)?; + let exe = VmExe::from_elf( + elf, + Transpiler::::default() + .with_extension(Rv32ITranspilerExtension) + .with_extension(Rv32MTranspilerExtension) + .with_extension(Rv32IoTranspilerExtension), + )?; + + // Create input buffer larger than MAX_HINT_BUFFER_WORDS + // This will require chunking to succeed + let expected_words = MAX_HINT_BUFFER_WORDS + 100; + let expected_len = expected_words * 4; + + // Create data with a pattern that can be verified + let data: Vec = (0..expected_len) + .map(|i| F::from_canonical_u8((i % 256) as u8)) + .collect(); + + let input = vec![data]; + air_test_with_min_segments(Rv32ImBuilder, config, exe, input, 1); + Ok(()) + } + #[test] fn test_read() -> Result<()> { let config = test_rv32im_config(); diff --git a/extensions/rv32im/transpiler/src/lib.rs b/extensions/rv32im/transpiler/src/lib.rs index 03a354517e..218202c369 100644 --- a/extensions/rv32im/transpiler/src/lib.rs +++ b/extensions/rv32im/transpiler/src/lib.rs @@ -9,6 +9,7 @@ use openvm_rv32im_guest::{ NATIVE_STOREW_FUNCT3, NATIVE_STOREW_FUNCT7, PHANTOM_FUNCT3, REVEAL_FUNCT3, RV32M_FUNCT7, RV32_ALU_OPCODE, SYSTEM_OPCODE, TERMINATE_FUNCT3, }; +pub use openvm_rv32im_guest::{MAX_HINT_BUFFER_WORDS, MAX_HINT_BUFFER_WORDS_BITS}; use openvm_stark_backend::p3_field::PrimeField32; use openvm_transpiler::{ util::{nop, unimp}, diff --git a/guest-libs/pairing/src/bls12_381/pairing.rs b/guest-libs/pairing/src/bls12_381/pairing.rs index db13c785e1..8ed9df2f68 100644 --- a/guest-libs/pairing/src/bls12_381/pairing.rs +++ b/guest-libs/pairing/src/bls12_381/pairing.rs @@ -25,7 +25,7 @@ use { openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3}, openvm_platform::custom_insn_r, openvm_rv32im_guest, - openvm_rv32im_guest::hint_buffer_u32, + openvm_rv32im_guest::hint_buffer_chunked, }; use super::{Bls12_381, Fp, Fp12, Fp2}; @@ -280,7 +280,7 @@ impl PairingCheck for Bls12_381 { } #[cfg(target_os = "zkvm")] { - let hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); + let mut hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); // We do not rely on the slice P's memory layout since rust does not guarantee it across // compiler versions. let p_fat_ptr = (P.as_ptr() as u32, P.len() as u32); @@ -294,8 +294,8 @@ impl PairingCheck for Bls12_381 { rs1 = In &p_fat_ptr, rs2 = In &q_fat_ptr ); - let ptr = hint.as_ptr() as *const u8; - hint_buffer_u32!(ptr, (48 * 12 * 2) / 4); + let ptr = hint.as_mut_ptr() as *mut u8; + hint_buffer_chunked(ptr, (48 * 12 * 2) / 4 as usize); hint.assume_init() } } diff --git a/guest-libs/pairing/src/bn254/pairing.rs b/guest-libs/pairing/src/bn254/pairing.rs index c0f1cc35f2..9fb160511b 100644 --- a/guest-libs/pairing/src/bn254/pairing.rs +++ b/guest-libs/pairing/src/bn254/pairing.rs @@ -21,7 +21,7 @@ use { core::mem::MaybeUninit, openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3}, openvm_platform::custom_insn_r, - openvm_rv32im_guest::hint_buffer_u32, + openvm_rv32im_guest::hint_buffer_chunked, }; use super::{Bn254, Fp, Fp12, Fp2}; @@ -314,7 +314,7 @@ impl PairingCheck for Bn254 { } #[cfg(target_os = "zkvm")] { - let hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); + let mut hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); // We do not rely on the slice P's memory layout since rust does not guarantee it across // compiler versions. let p_fat_ptr = (P.as_ptr() as u32, P.len() as u32); @@ -328,8 +328,8 @@ impl PairingCheck for Bn254 { rs1 = In &p_fat_ptr, rs2 = In &q_fat_ptr ); - let ptr = hint.as_ptr() as *const u8; - hint_buffer_u32!(ptr, (32 * 12 * 2) / 4); + let ptr = hint.as_mut_ptr() as *mut u8; + hint_buffer_chunked(ptr, (32 * 12 * 2) / 4 as usize); hint.assume_init() } }