From 21ab950839d164b24fc433003b095a5abbf40261 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Sat, 29 Nov 2025 06:26:21 +0100
Subject: [PATCH 1/2] fix: resolve function inlining idempotence issue (#33)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Root Cause
The idempotence test was failing because of a bug in how custom sections
(specifically the "name" section) were being encoded during WASM round-trips:

1. When parsing WASM, we used `reader.range()` to extract custom section bytes
2. The range included the section payload WITH the name field
3. When re-encoding, wasm-encoder added the name field again
4. Result: Each encode/parse cycle duplicated the name field, growing the file

This caused parse → encode → parse → encode to produce different sizes
(89 bytes → 94 bytes), making the idempotence test fail.

## Fix
1. Changed custom section parsing to use `reader.data()` instead of `reader.range()`
   - This extracts only the section data WITHOUT the name field
   - The encoder properly adds the name field during re-encoding
   - Now encode/parse cycles are deterministic

2. Made inline_functions run to fixed point
   - Loops until no more inlining candidates found
   - Ensures true idempotence even if new opportunities arise
   - More robust optimization behavior

3. Re-enabled the idempotence test
   - Removed #[ignore] attribute
   - Test now passes consistently

## Testing
- All existing tests pass
- Idempotence test now passes
- WASM encoding is deterministic across multiple round-trips

Note: advanced_math.wat and crypto_utils.wat still fail validation due to
a separate stack discipline bug in the inlining logic itself (not related
to idempotence). These remain skipped in CI per the existing workaround.
---
 loom-core/src/lib.rs                  | 96 +++++++++++++++------------
 loom-core/tests/optimization_tests.rs |  1 -
 2 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs
index f0e4bcf..8e8f141 100644
--- a/loom-core/src/lib.rs
+++ b/loom-core/src/lib.rs
@@ -680,12 +680,9 @@ pub mod parse {
                     element_section_bytes = Some(bytes[range.start..range.end].to_vec());
                 }
                 Payload::CustomSection(reader) => {
-                    // Store raw custom section bytes to pass through unchanged
-                    let range = reader.range();
-                    custom_sections.push((
-                        reader.name().to_string(),
-                        bytes[range.start..range.end].to_vec(),
-                    ));
+                    // Store custom section data (without the name field)
+                    // The encoder will add the name field when re-encoding
+                    custom_sections.push((reader.name().to_string(), reader.data().to_vec()));
                 }
                 _ => {
                     // Ignore other payloads (e.g., End, Version, etc.)
@@ -6806,53 +6803,64 @@ pub mod optimize {
     pub fn inline_functions(module: &mut Module) -> Result<()> {
         use std::collections::HashMap;
 
-        // Phase 1: Build call graph and analyze functions
-        let mut call_counts: HashMap<u32, usize> = HashMap::new();
-        let mut function_sizes: HashMap<u32, usize> = HashMap::new();
+        // Run inlining to fixed point to ensure idempotence
+        // Keep inlining until no more candidates are found
+        loop {
+            // Phase 1: Build call graph and analyze functions
+            let mut call_counts: HashMap<u32, usize> = HashMap::new();
+            let mut function_sizes: HashMap<u32, usize> = HashMap::new();
 
-        // Calculate function sizes (instruction count)
-        for (idx, func) in module.functions.iter().enumerate() {
-            let size = count_instructions_recursive(&func.instructions);
-            function_sizes.insert(idx as u32, size);
-        }
+            // Calculate function sizes (instruction count)
+            for (idx, func) in module.functions.iter().enumerate() {
+                let size = count_instructions_recursive(&func.instructions);
+                function_sizes.insert(idx as u32, size);
+            }
 
-        // Count call sites for each function
-        for func in &module.functions {
-            count_calls_recursive(&func.instructions, &mut call_counts);
-        }
+            // Count call sites for each function
+            for func in &module.functions {
+                count_calls_recursive(&func.instructions, &mut call_counts);
+            }
 
-        // Phase 2: Identify inlining candidates
-        let mut inline_candidates = Vec::new();
-        for (func_idx, &call_count) in &call_counts {
-            let size = function_sizes.get(func_idx).copied().unwrap_or(0);
+            // Phase 2: Identify inlining candidates
+            let mut inline_candidates = Vec::new();
+            for (func_idx, &call_count) in &call_counts {
+                let size = function_sizes.get(func_idx).copied().unwrap_or(0);
 
-            // Heuristic: inline if:
-            // 1. Single call site, OR
-            // 2. Small function (< 10 instructions)
-            if call_count == 1 || size < 10 {
-                // Don't inline large functions even if single call site
-                if size < 50 {
-                    inline_candidates.push(*func_idx);
+                // Heuristic: inline if:
+                // 1. Single call site, OR
+                // 2. Small function (< 10 instructions)
+                if call_count == 1 || size < 10 {
+                    // Don't inline large functions even if single call site
+                    if size < 50 {
+                        inline_candidates.push(*func_idx);
+                    }
                 }
             }
-        }
 
-        // Phase 3: Perform inlining
-        // For each function, inline calls to candidate functions
-        let inline_set: std::collections::HashSet<u32> =
-            inline_candidates.iter().copied().collect();
+            // Exit loop if no more inlining opportunities - this ensures idempotence
+            if inline_candidates.is_empty() {
+                break;
+            }
 
-        // Clone functions to avoid borrow checker issues
-        let all_functions = module.functions.clone();
+            // Phase 3: Perform inlining
+            // For each function, inline calls to candidate functions
+            let inline_set: std::collections::HashSet<u32> =
+                inline_candidates.iter().copied().collect();
 
-        for func in &mut module.functions {
-            func.instructions = inline_calls_in_block(
-                &func.instructions,
-                &inline_set,
-                &all_functions,
-                func.signature.params.len() as u32,
-                &mut func.locals,
-            );
+            // Clone functions to avoid borrow checker issues
+            let all_functions = module.functions.clone();
+
+            for func in &mut module.functions {
+                func.instructions = inline_calls_in_block(
+                    &func.instructions,
+                    &inline_set,
+                    &all_functions,
+                    func.signature.params.len() as u32,
+                    &mut func.locals,
+                );
+            }
+
+            // Continue to next iteration to check for more inlining opportunities
         }
 
         Ok(())
diff --git a/loom-core/tests/optimization_tests.rs b/loom-core/tests/optimization_tests.rs
index 2be19c0..1f42e4d 100644
--- a/loom-core/tests/optimization_tests.rs
+++ b/loom-core/tests/optimization_tests.rs
@@ -1796,7 +1796,6 @@ fn test_inline_stack_discipline_with_locals() {
 }
 
 #[test]
-#[ignore] // TODO: Fix idempotence issue - see GitHub issue #33
 fn test_inline_stack_discipline_idempotence() {
     let input = r#"
         (module

From 3c8f9437235b38573b5dc11596cf03de5b7d38b9 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Sat, 29 Nov 2025 07:10:51 +0100
Subject: [PATCH 2/2] fix: disable buggy eliminate_redundant_sets optimization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem
The `eliminate_redundant_sets` optimization in `simplify-locals` was
incorrectly removing stack-producing instructions, causing invalid WASM
output with "values remaining on stack" errors.

## Root Cause
The optimization assumed that the value for a `local.set` was always
produced by the immediately preceding instruction (`result.len() - 1`).
This is incorrect in stack-based WASM where values can come from complex
expression trees spanning multiple instructions.

Example of buggy behavior:
```wat
Input:
  local.get $base
  i32.const 3
  i32.shl            # ← Produces value for local.set
  local.set $temp1

Output (broken):
  local.get $base
  i32.const 3
  # i32.shl REMOVED!  ← Bug: incorrectly removed
  local.set $temp1
```

## Fix
Disabled the `eliminate_redundant_sets` optimization by commenting it out
in `simplify_locals` (loom-core/src/lib.rs:4155).

The optimization needs to be rewritten to properly understand the stack
discipline and track which instructions actually produce values for sets.

## Changes
1. **loom-core/src/lib.rs:4155** - Commented out eliminate_redundant_sets call
2. **loom-core/src/lib.rs:4369** - Added `#[allow(dead_code)]` to function
3. **loom-core/tests/optimization_tests.rs** - Marked RSE tests as `#[ignore]`
4. **.github/workflows/ci.yml** - Removed skips for advanced_math.wat and crypto_utils.wat

## Testing
- ✅ Both advanced_math.wat and crypto_utils.wat now validate correctly
- ✅ All other tests pass
- ✅ Idempotence test still passes

## Impact
The `simplify-locals` pass still performs other optimizations (local variable
equivalence, redundant get/set elimination). Only the buggy redundant set
elimination is disabled.
---
 .claude/settings.local.json           | 3 ++-
 .github/workflows/ci.yml              | 5 -----
 loom-core/src/lib.rs                  | 8 ++++++--
 loom-core/tests/optimization_tests.rs | 8 ++++++++
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 5ba9331..cc646b2 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -59,7 +59,8 @@
       "Bash(git apply:*)",
       "Bash(Z3_SYS_Z3_HEADER=/opt/homebrew/opt/z3/include/z3.h git add -A)",
       "Bash(Z3_SYS_Z3_HEADER=/opt/homebrew/opt/z3/include/z3.h git commit -m \"fix: skip advanced_math.wat in CI validation (known issue #33)\n\nThe function inlining pass produces invalid WASM for advanced_math.wat,\nleaving values on the stack. This is tracked in issue #33.\nSkip this fixture in CI until the bug is fixed.\")",
-      "Bash(git push:*)"
+      "Bash(git push:*)",
+      "Bash(Z3_SYS_Z3_HEADER=/opt/homebrew/opt/z3/include/z3.h git commit -m \"$(cat <<''EOF''\nfix: disable buggy eliminate_redundant_sets optimization\n\n## Problem\nThe `eliminate_redundant_sets` optimization in `simplify-locals` was\nincorrectly removing stack-producing instructions, causing invalid WASM\noutput with \"values remaining on stack\" errors.\n\n## Root Cause\nThe optimization assumed that the value for a `local.set` was always\nproduced by the immediately preceding instruction (`result.len() - 1`).\nThis is incorrect in stack-based WASM where values can come from complex\nexpression trees spanning multiple instructions.\n\nExample of buggy behavior:\n```wat\nInput:\n  local.get $base\n  i32.const 3\n  i32.shl            # ← Produces value for local.set\n  local.set $temp1\n\nOutput (broken):\n  local.get $base\n  i32.const 3\n  # i32.shl REMOVED!  ← Bug: incorrectly removed\n  local.set $temp1\n```\n\n## Fix\nDisabled the `eliminate_redundant_sets` optimization by commenting it out\nin `simplify_locals` (loom-core/src/lib.rs:4155).\n\nThe optimization needs to be rewritten to properly understand the stack\ndiscipline and track which instructions actually produce values for sets.\n\n## Changes\n1. **loom-core/src/lib.rs:4155** - Commented out eliminate_redundant_sets call\n2. **loom-core/src/lib.rs:4369** - Added `#[allow(dead_code)]` to function\n3. **loom-core/tests/optimization_tests.rs** - Marked RSE tests as `#[ignore]`\n4. **.github/workflows/ci.yml** - Removed skips for advanced_math.wat and crypto_utils.wat\n\n## Testing\n- ✅ Both advanced_math.wat and crypto_utils.wat now validate correctly\n- ✅ All other tests pass\n- ✅ Idempotence test still passes\n\n## Impact\nThe `simplify-locals` pass still performs other optimizations (local variable\nequivalence, redundant get/set elimination). Only the buggy redundant set\nelimination is disabled.\nEOF\n)\")"
     ],
     "deny": [],
     "ask": []
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fec72e8..49d57b0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -83,11 +83,6 @@ jobs:
       - name: Optimize test fixtures
         run: |
           for fixture in tests/fixtures/*.wat; do
-            # Skip fixtures with known inlining bugs (issue #33)
-            if [[ "$fixture" == *"advanced_math.wat"* ]] || [[ "$fixture" == *"crypto_utils.wat"* ]]; then
-              echo "⏭️  Skipping $fixture (known issue #33)"
-              continue
-            fi
             echo "Optimizing $fixture"
             ./target/release/loom optimize "$fixture" -o "/tmp/$(basename $fixture .wat).wasm"
             wasm-tools validate "/tmp/$(basename $fixture .wat).wasm"
diff --git a/loom-core/src/lib.rs b/loom-core/src/lib.rs
index 8e8f141..3f9cc21 100644
--- a/loom-core/src/lib.rs
+++ b/loom-core/src/lib.rs
@@ -4148,8 +4148,11 @@ pub mod optimize {
                 func.instructions =
                     simplify_instructions(&func.instructions, &usage, &equivalences, &mut changed);
 
-                // Eliminate redundant sets (new optimization)
-                func.instructions = eliminate_redundant_sets(&func.instructions, &mut changed);
+                // TODO: Fix eliminate_redundant_sets - it incorrectly removes stack-producing instructions
+                // The bug is that it assumes value_idx is always result.len() - 1, but in stack-based
+                // WASM, the value could be produced by an instruction multiple positions back.
+                // Disabled until properly fixed.
+                // func.instructions = eliminate_redundant_sets(&func.instructions, &mut changed);
             }
         }
         Ok(())
@@ -4363,6 +4366,7 @@ pub mod optimize {
     /// - 2-5% binary size reduction on typical code
     /// - Reduces unnecessary local variable writes
     /// - Enables further optimizations (dead code elimination)
+    #[allow(dead_code)] // Currently disabled due to bugs - see comment in simplify_locals
     fn eliminate_redundant_sets(
         instructions: &[Instruction],
         changed: &mut bool,
diff --git a/loom-core/tests/optimization_tests.rs b/loom-core/tests/optimization_tests.rs
index 1f42e4d..2013e72 100644
--- a/loom-core/tests/optimization_tests.rs
+++ b/loom-core/tests/optimization_tests.rs
@@ -886,6 +886,7 @@ fn test_self_ne_i64() {
 // ============================================================================
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_simple_redundant_set() {
     let input = r#"
         (module
@@ -923,6 +924,7 @@ fn test_rse_simple_redundant_set() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_with_intervening_get() {
     let input = r#"
         (module
@@ -952,6 +954,7 @@ fn test_rse_with_intervening_get() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_multiple_redundant_sets() {
     let input = r#"
         (module
@@ -979,6 +982,7 @@ fn test_rse_multiple_redundant_sets() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_different_locals() {
     let input = r#"
         (module
@@ -1018,6 +1022,7 @@ fn test_rse_different_locals() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_with_tee() {
     let input = r#"
         (module
@@ -1050,6 +1055,7 @@ fn test_rse_with_tee() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_in_block() {
     let input = r#"
         (module
@@ -1078,6 +1084,7 @@ fn test_rse_in_block() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_conservative_in_if() {
     let input = r#"
         (module
@@ -1637,6 +1644,7 @@ fn test_merge_constant_adds_i64() {
 }
 
 #[test]
+#[ignore] // eliminate_redundant_sets disabled - has bugs
 fn test_rse_no_false_positive_with_call() {
     let input = r#"
         (module