From 21ab950839d164b24fc433003b095a5abbf40261 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sat, 29 Nov 2025 06:26:21 +0100 Subject: [PATCH 1/2] fix: resolve function inlining idempotence issue (#33) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Root Cause The idempotence test was failing because of a bug in how custom sections (specifically the "name" section) were being encoded during WASM round-trips: 1. When parsing WASM, we used `reader.range()` to extract custom section bytes 2. The range included the section payload WITH the name field 3. When re-encoding, wasm-encoder added the name field again 4. Result: Each encode/parse cycle duplicated the name field, growing the file This caused parse → encode → parse → encode to produce different sizes (89 bytes → 94 bytes), making the idempotence test fail. ## Fix 1. Changed custom section parsing to use `reader.data()` instead of `reader.range()` - This extracts only the section data WITHOUT the name field - The encoder properly adds the name field during re-encoding - Now encode/parse cycles are deterministic 2. Made inline_functions run to fixed point - Loops until no more inlining candidates found - Ensures true idempotence even if new opportunities arise - More robust optimization behavior 3. Re-enabled the idempotence test - Removed #[ignore] attribute - Test now passes consistently ## Testing - All existing tests pass - Idempotence test now passes - WASM encoding is deterministic across multiple round-trips Note: advanced_math.wat and crypto_utils.wat still fail validation due to a separate stack discipline bug in the inlining logic itself (not related to idempotence). These remain skipped in CI per the existing workaround. --- loom-core/src/lib.rs | 96 +++++++++++++++------------ loom-core/tests/optimization_tests.rs | 1 - 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs index f0e4bcf..8e8f141 100644 --- a/loom-core/src/lib.rs +++ b/loom-core/src/lib.rs @@ -680,12 +680,9 @@ pub mod parse { element_section_bytes = Some(bytes[range.start..range.end].to_vec()); } Payload::CustomSection(reader) => { - // Store raw custom section bytes to pass through unchanged - let range = reader.range(); - custom_sections.push(( - reader.name().to_string(), - bytes[range.start..range.end].to_vec(), - )); + // Store custom section data (without the name field) + // The encoder will add the name field when re-encoding + custom_sections.push((reader.name().to_string(), reader.data().to_vec())); } _ => { // Ignore other payloads (e.g., End, Version, etc.) @@ -6806,53 +6803,64 @@ pub mod optimize { pub fn inline_functions(module: &mut Module) -> Result<()> { use std::collections::HashMap; - // Phase 1: Build call graph and analyze functions - let mut call_counts: HashMap = HashMap::new(); - let mut function_sizes: HashMap = HashMap::new(); + // Run inlining to fixed point to ensure idempotence + // Keep inlining until no more candidates are found + loop { + // Phase 1: Build call graph and analyze functions + let mut call_counts: HashMap = HashMap::new(); + let mut function_sizes: HashMap = HashMap::new(); - // Calculate function sizes (instruction count) - for (idx, func) in module.functions.iter().enumerate() { - let size = count_instructions_recursive(&func.instructions); - function_sizes.insert(idx as u32, size); - } + // Calculate function sizes (instruction count) + for (idx, func) in module.functions.iter().enumerate() { + let size = count_instructions_recursive(&func.instructions); + function_sizes.insert(idx as u32, size); + } - // Count call sites for each function - for func in &module.functions { - count_calls_recursive(&func.instructions, &mut call_counts); - } + // Count call sites for each function + for func in &module.functions { + count_calls_recursive(&func.instructions, &mut call_counts); + } - // Phase 2: Identify inlining candidates - let mut inline_candidates = Vec::new(); - for (func_idx, &call_count) in &call_counts { - let size = function_sizes.get(func_idx).copied().unwrap_or(0); + // Phase 2: Identify inlining candidates + let mut inline_candidates = Vec::new(); + for (func_idx, &call_count) in &call_counts { + let size = function_sizes.get(func_idx).copied().unwrap_or(0); - // Heuristic: inline if: - // 1. Single call site, OR - // 2. Small function (< 10 instructions) - if call_count == 1 || size < 10 { - // Don't inline large functions even if single call site - if size < 50 { - inline_candidates.push(*func_idx); + // Heuristic: inline if: + // 1. Single call site, OR + // 2. Small function (< 10 instructions) + if call_count == 1 || size < 10 { + // Don't inline large functions even if single call site + if size < 50 { + inline_candidates.push(*func_idx); + } } } - } - // Phase 3: Perform inlining - // For each function, inline calls to candidate functions - let inline_set: std::collections::HashSet = - inline_candidates.iter().copied().collect(); + // Exit loop if no more inlining opportunities - this ensures idempotence + if inline_candidates.is_empty() { + break; + } - // Clone functions to avoid borrow checker issues - let all_functions = module.functions.clone(); + // Phase 3: Perform inlining + // For each function, inline calls to candidate functions + let inline_set: std::collections::HashSet = + inline_candidates.iter().copied().collect(); - for func in &mut module.functions { - func.instructions = inline_calls_in_block( - &func.instructions, - &inline_set, - &all_functions, - func.signature.params.len() as u32, - &mut func.locals, - ); + // Clone functions to avoid borrow checker issues + let all_functions = module.functions.clone(); + + for func in &mut module.functions { + func.instructions = inline_calls_in_block( + &func.instructions, + &inline_set, + &all_functions, + func.signature.params.len() as u32, + &mut func.locals, + ); + } + + // Continue to next iteration to check for more inlining opportunities } Ok(()) diff --git a/loom-core/tests/optimization_tests.rs b/loom-core/tests/optimization_tests.rs index 2be19c0..1f42e4d 100644 --- a/loom-core/tests/optimization_tests.rs +++ b/loom-core/tests/optimization_tests.rs @@ -1796,7 +1796,6 @@ fn test_inline_stack_discipline_with_locals() { } #[test] -#[ignore] // TODO: Fix idempotence issue - see GitHub issue #33 fn test_inline_stack_discipline_idempotence() { let input = r#" (module From 3c8f9437235b38573b5dc11596cf03de5b7d38b9 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sat, 29 Nov 2025 07:10:51 +0100 Subject: [PATCH 2/2] fix: disable buggy eliminate_redundant_sets optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem The `eliminate_redundant_sets` optimization in `simplify-locals` was incorrectly removing stack-producing instructions, causing invalid WASM output with "values remaining on stack" errors. ## Root Cause The optimization assumed that the value for a `local.set` was always produced by the immediately preceding instruction (`result.len() - 1`). This is incorrect in stack-based WASM where values can come from complex expression trees spanning multiple instructions. Example of buggy behavior: ```wat Input: local.get $base i32.const 3 i32.shl # ← Produces value for local.set local.set $temp1 Output (broken): local.get $base i32.const 3 # i32.shl REMOVED! ← Bug: incorrectly removed local.set $temp1 ``` ## Fix Disabled the `eliminate_redundant_sets` optimization by commenting it out in `simplify_locals` (loom-core/src/lib.rs:4155). The optimization needs to be rewritten to properly understand the stack discipline and track which instructions actually produce values for sets. ## Changes 1. **loom-core/src/lib.rs:4155** - Commented out eliminate_redundant_sets call 2. **loom-core/src/lib.rs:4369** - Added `#[allow(dead_code)]` to function 3. **loom-core/tests/optimization_tests.rs** - Marked RSE tests as `#[ignore]` 4. **.github/workflows/ci.yml** - Removed skips for advanced_math.wat and crypto_utils.wat ## Testing - ✅ Both advanced_math.wat and crypto_utils.wat now validate correctly - ✅ All other tests pass - ✅ Idempotence test still passes ## Impact The `simplify-locals` pass still performs other optimizations (local variable equivalence, redundant get/set elimination). Only the buggy redundant set elimination is disabled. --- .claude/settings.local.json | 3 ++- .github/workflows/ci.yml | 5 ----- loom-core/src/lib.rs | 8 ++++++-- loom-core/tests/optimization_tests.rs | 8 ++++++++ 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 5ba9331..cc646b2 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -59,7 +59,8 @@ "Bash(git apply:*)", "Bash(Z3_SYS_Z3_HEADER=/opt/homebrew/opt/z3/include/z3.h git add -A)", "Bash(Z3_SYS_Z3_HEADER=/opt/homebrew/opt/z3/include/z3.h git commit -m \"fix: skip advanced_math.wat in CI validation (known issue #33)\n\nThe function inlining pass produces invalid WASM for advanced_math.wat,\nleaving values on the stack. This is tracked in issue #33.\nSkip this fixture in CI until the bug is fixed.\")", - "Bash(git push:*)" + "Bash(git push:*)", + "Bash(Z3_SYS_Z3_HEADER=/opt/homebrew/opt/z3/include/z3.h git commit -m \"$(cat <<''EOF''\nfix: disable buggy eliminate_redundant_sets optimization\n\n## Problem\nThe `eliminate_redundant_sets` optimization in `simplify-locals` was\nincorrectly removing stack-producing instructions, causing invalid WASM\noutput with \"values remaining on stack\" errors.\n\n## Root Cause\nThe optimization assumed that the value for a `local.set` was always\nproduced by the immediately preceding instruction (`result.len() - 1`).\nThis is incorrect in stack-based WASM where values can come from complex\nexpression trees spanning multiple instructions.\n\nExample of buggy behavior:\n```wat\nInput:\n local.get $base\n i32.const 3\n i32.shl # ← Produces value for local.set\n local.set $temp1\n\nOutput (broken):\n local.get $base\n i32.const 3\n # i32.shl REMOVED! ← Bug: incorrectly removed\n local.set $temp1\n```\n\n## Fix\nDisabled the `eliminate_redundant_sets` optimization by commenting it out\nin `simplify_locals` (loom-core/src/lib.rs:4155).\n\nThe optimization needs to be rewritten to properly understand the stack\ndiscipline and track which instructions actually produce values for sets.\n\n## Changes\n1. **loom-core/src/lib.rs:4155** - Commented out eliminate_redundant_sets call\n2. **loom-core/src/lib.rs:4369** - Added `#[allow(dead_code)]` to function\n3. **loom-core/tests/optimization_tests.rs** - Marked RSE tests as `#[ignore]`\n4. **.github/workflows/ci.yml** - Removed skips for advanced_math.wat and crypto_utils.wat\n\n## Testing\n- ✅ Both advanced_math.wat and crypto_utils.wat now validate correctly\n- ✅ All other tests pass\n- ✅ Idempotence test still passes\n\n## Impact\nThe `simplify-locals` pass still performs other optimizations (local variable\nequivalence, redundant get/set elimination). Only the buggy redundant set\nelimination is disabled.\nEOF\n)\")" ], "deny": [], "ask": [] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fec72e8..49d57b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,11 +83,6 @@ jobs: - name: Optimize test fixtures run: | for fixture in tests/fixtures/*.wat; do - # Skip fixtures with known inlining bugs (issue #33) - if [[ "$fixture" == *"advanced_math.wat"* ]] || [[ "$fixture" == *"crypto_utils.wat"* ]]; then - echo "⏭️ Skipping $fixture (known issue #33)" - continue - fi echo "Optimizing $fixture" ./target/release/loom optimize "$fixture" -o "/tmp/$(basename $fixture .wat).wasm" wasm-tools validate "/tmp/$(basename $fixture .wat).wasm" diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs index 8e8f141..3f9cc21 100644 --- a/loom-core/src/lib.rs +++ b/loom-core/src/lib.rs @@ -4148,8 +4148,11 @@ pub mod optimize { func.instructions = simplify_instructions(&func.instructions, &usage, &equivalences, &mut changed); - // Eliminate redundant sets (new optimization) - func.instructions = eliminate_redundant_sets(&func.instructions, &mut changed); + // TODO: Fix eliminate_redundant_sets - it incorrectly removes stack-producing instructions + // The bug is that it assumes value_idx is always result.len() - 1, but in stack-based + // WASM, the value could be produced by an instruction multiple positions back. + // Disabled until properly fixed. + // func.instructions = eliminate_redundant_sets(&func.instructions, &mut changed); } } Ok(()) @@ -4363,6 +4366,7 @@ pub mod optimize { /// - 2-5% binary size reduction on typical code /// - Reduces unnecessary local variable writes /// - Enables further optimizations (dead code elimination) + #[allow(dead_code)] // Currently disabled due to bugs - see comment in simplify_locals fn eliminate_redundant_sets( instructions: &[Instruction], changed: &mut bool, diff --git a/loom-core/tests/optimization_tests.rs b/loom-core/tests/optimization_tests.rs index 1f42e4d..2013e72 100644 --- a/loom-core/tests/optimization_tests.rs +++ b/loom-core/tests/optimization_tests.rs @@ -886,6 +886,7 @@ fn test_self_ne_i64() { // ============================================================================ #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_simple_redundant_set() { let input = r#" (module @@ -923,6 +924,7 @@ fn test_rse_simple_redundant_set() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_with_intervening_get() { let input = r#" (module @@ -952,6 +954,7 @@ fn test_rse_with_intervening_get() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_multiple_redundant_sets() { let input = r#" (module @@ -979,6 +982,7 @@ fn test_rse_multiple_redundant_sets() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_different_locals() { let input = r#" (module @@ -1018,6 +1022,7 @@ fn test_rse_different_locals() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_with_tee() { let input = r#" (module @@ -1050,6 +1055,7 @@ fn test_rse_with_tee() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_in_block() { let input = r#" (module @@ -1078,6 +1084,7 @@ fn test_rse_in_block() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_conservative_in_if() { let input = r#" (module @@ -1637,6 +1644,7 @@ fn test_merge_constant_adds_i64() { } #[test] +#[ignore] // eliminate_redundant_sets disabled - has bugs fn test_rse_no_false_positive_with_call() { let input = r#" (module