Skip to content

Commit c5475e1

Browse files
authored
🤖 fix: use effort param only for Claude Opus 4.5 (#743)
## Summary The `effort` parameter is a new feature specific to Claude Opus 4.5 for controlling how much computational work the model applies to each task. Other Anthropic models (Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.) must continue to use the `thinking.budgetTokens` approach for extended thinking. ## Changes - Add `ANTHROPIC_EFFORT` mapping to `thinking.ts` for Opus 4.5's effort levels - Update `buildProviderOptions` to detect Opus 4.5 and use the `effort` parameter - Update `calculateEffectiveMaxOutputTokens` to skip budget adjustment for Opus 4.5 (effort-based models don't need this) - Add unit tests for provider options builder covering both Opus 4.5 (effort) and other models (thinking.budgetTokens) - Add integration tests (`sendMessage.reasoning.test.ts`) for reasoning with both Sonnet 4.5 and Opus 4.5 ## Testing - Unit tests pass: `bun test src/common/utils/ai/providerOptions.test.ts` - All existing tests pass: `make test` (1199 pass) - Integration tests added for regression prevention (run with `TEST_INTEGRATION=1`) _Generated with `mux`_
1 parent d086f87 commit c5475e1

File tree

4 files changed

+244
-14
lines changed

4 files changed

+244
-14
lines changed

src/common/types/thinking.ts

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,33 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
1414
export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;
1515

1616
/**
17-
* Anthropic effort level mapping
17+
* Anthropic thinking token budget mapping
1818
*
19-
* Maps our unified thinking levels to Anthropic's effort parameter:
20-
* - off: No effort specified (undefined)
21-
* - low: Most efficient - significant token savings
22-
* - medium: Balanced approach with moderate token savings
23-
* - high: Maximum capability (default behavior)
19+
* These heuristics balance thinking depth with response time and cost.
20+
* Used for models that support extended thinking with budgetTokens
21+
* (e.g., Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.)
2422
*
25-
* The effort parameter controls all token spend including thinking,
26-
* text responses, and tool calls. Unlike budget_tokens, it doesn't require
27-
* thinking to be explicitly enabled.
23+
* - off: No extended thinking
24+
* - low: Quick thinking for straightforward tasks (4K tokens)
25+
* - medium: Standard thinking for moderate complexity (10K tokens)
26+
* - high: Deep thinking for complex problems (20K tokens)
27+
*/
28+
export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
29+
off: 0,
30+
low: 4000,
31+
medium: 10000,
32+
high: 20000,
33+
};
34+
35+
/**
36+
* Anthropic Opus 4.5 effort parameter mapping
37+
*
38+
* The effort parameter is a new feature ONLY available for Claude Opus 4.5.
39+
* It controls how much computational work the model applies to each task.
40+
*
41+
* Other Anthropic models must use the thinking.budgetTokens approach instead.
42+
*
43+
* @see https://www.anthropic.com/news/claude-opus-4-5
2844
*/
2945
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
3046
off: undefined,
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/**
2+
* Tests for provider options builder
3+
*/
4+
5+
import { describe, test, expect, mock } from "bun:test";
6+
import { buildProviderOptions } from "./providerOptions";
7+
import type { ThinkingLevel } from "@/common/types/thinking";
8+
9+
// Mock the log module to avoid console noise
10+
void mock.module("@/node/services/log", () => ({
11+
log: {
12+
debug: (): void => undefined,
13+
info: (): void => undefined,
14+
warn: (): void => undefined,
15+
error: (): void => undefined,
16+
},
17+
}));
18+
19+
// Mock enforceThinkingPolicy to pass through
20+
void mock.module("@/browser/utils/thinking/policy", () => ({
21+
enforceThinkingPolicy: (_model: string, level: ThinkingLevel) => level,
22+
}));
23+
24+
describe("buildProviderOptions - Anthropic", () => {
25+
describe("Opus 4.5 (effort parameter)", () => {
26+
test("should use effort parameter for claude-opus-4-5", () => {
27+
const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
28+
29+
expect(result).toEqual({
30+
anthropic: {
31+
disableParallelToolUse: false,
32+
sendReasoning: true,
33+
effort: "medium",
34+
},
35+
});
36+
});
37+
38+
test("should use effort parameter for claude-opus-4-5-20251101", () => {
39+
const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
40+
41+
expect(result).toEqual({
42+
anthropic: {
43+
disableParallelToolUse: false,
44+
sendReasoning: true,
45+
effort: "high",
46+
},
47+
});
48+
});
49+
50+
test("should omit effort when thinking is off for Opus 4.5", () => {
51+
const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
52+
53+
expect(result).toEqual({
54+
anthropic: {
55+
disableParallelToolUse: false,
56+
sendReasoning: true,
57+
},
58+
});
59+
});
60+
});
61+
62+
describe("Other Anthropic models (thinking/budgetTokens)", () => {
63+
test("should use thinking.budgetTokens for claude-sonnet-4-5", () => {
64+
const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "medium");
65+
66+
expect(result).toEqual({
67+
anthropic: {
68+
disableParallelToolUse: false,
69+
sendReasoning: true,
70+
thinking: {
71+
type: "enabled",
72+
budgetTokens: 10000,
73+
},
74+
},
75+
});
76+
});
77+
78+
test("should use thinking.budgetTokens for claude-opus-4-1", () => {
79+
const result = buildProviderOptions("anthropic:claude-opus-4-1", "high");
80+
81+
expect(result).toEqual({
82+
anthropic: {
83+
disableParallelToolUse: false,
84+
sendReasoning: true,
85+
thinking: {
86+
type: "enabled",
87+
budgetTokens: 20000,
88+
},
89+
},
90+
});
91+
});
92+
93+
test("should use thinking.budgetTokens for claude-haiku-4-5", () => {
94+
const result = buildProviderOptions("anthropic:claude-haiku-4-5", "low");
95+
96+
expect(result).toEqual({
97+
anthropic: {
98+
disableParallelToolUse: false,
99+
sendReasoning: true,
100+
thinking: {
101+
type: "enabled",
102+
budgetTokens: 4000,
103+
},
104+
},
105+
});
106+
});
107+
108+
test("should omit thinking when thinking is off for non-Opus 4.5", () => {
109+
const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "off");
110+
111+
expect(result).toEqual({
112+
anthropic: {
113+
disableParallelToolUse: false,
114+
sendReasoning: true,
115+
},
116+
});
117+
});
118+
});
119+
});

src/common/utils/ai/providerOptions.ts

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import type { MuxProviderOptions } from "@/common/types/providerOptions";
1212
import type { ThinkingLevel } from "@/common/types/thinking";
1313
import {
1414
ANTHROPIC_EFFORT,
15+
ANTHROPIC_THINKING_BUDGETS,
1516
GEMINI_THINKING_BUDGETS,
1617
OPENAI_REASONING_EFFORT,
1718
OPENROUTER_REASONING_EFFORT,
@@ -83,19 +84,53 @@ export function buildProviderOptions(
8384

8485
// Build Anthropic-specific options
8586
if (provider === "anthropic") {
86-
const effort = ANTHROPIC_EFFORT[effectiveThinking];
87+
// Extract model name from model string (e.g., "anthropic:claude-opus-4-5" -> "claude-opus-4-5")
88+
const [, modelName] = modelString.split(":");
89+
90+
// Check if this is Opus 4.5 (supports effort parameter)
91+
// Opus 4.5 uses the new "effort" parameter for reasoning control
92+
// All other Anthropic models use the "thinking" parameter with budgetTokens
93+
const isOpus45 = modelName?.includes("opus-4-5") ?? false;
94+
95+
if (isOpus45) {
96+
// Opus 4.5: Use effort parameter for reasoning control
97+
const effort = ANTHROPIC_EFFORT[effectiveThinking];
98+
log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
99+
effort,
100+
thinkingLevel: effectiveThinking,
101+
});
102+
103+
const options: ProviderOptions = {
104+
anthropic: {
105+
disableParallelToolUse: false, // Always enable concurrent tool execution
106+
sendReasoning: true, // Include reasoning traces in requests sent to the model
107+
// Use effort parameter (Opus 4.5 only) to control token spend
108+
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
109+
...(effort && { effort }),
110+
},
111+
};
112+
log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);
113+
return options;
114+
}
115+
116+
// Other Anthropic models: Use thinking parameter with budgetTokens
117+
const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
87118
log.debug("buildProviderOptions: Anthropic config", {
88-
effort,
119+
budgetTokens,
89120
thinkingLevel: effectiveThinking,
90121
});
91122

92123
const options: ProviderOptions = {
93124
anthropic: {
94125
disableParallelToolUse: false, // Always enable concurrent tool execution
95126
sendReasoning: true, // Include reasoning traces in requests sent to the model
96-
// Use effort parameter to control token spend (thinking, text, and tool calls)
97-
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
98-
...(effort && { effort }),
127+
// Conditionally add thinking configuration (non-Opus 4.5 models)
128+
...(budgetTokens > 0 && {
129+
thinking: {
130+
type: "enabled",
131+
budgetTokens,
132+
},
133+
}),
99134
},
100135
};
101136
log.debug("buildProviderOptions: Returning Anthropic options", options);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* Integration tests for reasoning/thinking functionality across Anthropic models.
3+
* Verifies Opus 4.5 uses `effort` and Sonnet 4.5 uses `thinking.budgetTokens`.
4+
*/
5+
6+
import { shouldRunIntegrationTests, validateApiKeys } from "./setup";
7+
import { sendMessage, assertStreamSuccess, waitForStreamSuccess } from "./helpers";
8+
import { createSharedRepo, cleanupSharedRepo, withSharedWorkspace } from "./sendMessageTestHelpers";
9+
import { KNOWN_MODELS } from "@/common/constants/knownModels";
10+
11+
const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;
12+
13+
if (shouldRunIntegrationTests()) {
14+
validateApiKeys(["ANTHROPIC_API_KEY"]);
15+
}
16+
17+
beforeAll(createSharedRepo);
18+
afterAll(cleanupSharedRepo);
19+
20+
describeIntegration("Anthropic reasoning parameter tests", () => {
21+
test.concurrent(
22+
"Sonnet 4.5 with thinking (budgetTokens)",
23+
async () => {
24+
await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => {
25+
const result = await sendMessage(
26+
env.mockIpcRenderer,
27+
workspaceId,
28+
"What is 2+2? Answer in one word.",
29+
{ model: KNOWN_MODELS.SONNET.id, thinkingLevel: "low" }
30+
);
31+
expect(result.success).toBe(true);
32+
33+
const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 30000);
34+
assertStreamSuccess(collector);
35+
expect(collector.getDeltas().length).toBeGreaterThan(0);
36+
});
37+
},
38+
60000
39+
);
40+
41+
test.concurrent(
42+
"Opus 4.5 with thinking (effort)",
43+
async () => {
44+
await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => {
45+
const result = await sendMessage(
46+
env.mockIpcRenderer,
47+
workspaceId,
48+
"What is 4+4? Answer in one word.",
49+
{ model: KNOWN_MODELS.OPUS.id, thinkingLevel: "low" }
50+
);
51+
expect(result.success).toBe(true);
52+
53+
const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 60000);
54+
assertStreamSuccess(collector);
55+
expect(collector.getDeltas().length).toBeGreaterThan(0);
56+
});
57+
},
58+
90000
59+
);
60+
});

0 commit comments

Comments
 (0)