Skip to content

Commit d8710d7

Browse files
authored
🤖 fix: improve web_fetch HTTP error reporting with body parsing (#747)
## Summary Improves error messages when `web_fetch` encounters HTTP 4xx/5xx responses: 1. **HTTP status code in error**: Shows actual status code (e.g., `HTTP 404`) instead of generic `HTTP error (4xx/5xx)` 2. **Parsed error body**: When the server returns an error page (like a 404), we now parse and include its content so the AI can understand what went wrong 3. **Cloudflare detection**: Detects Cloudflare JS challenge pages and shows a clear error: `Cloudflare security challenge (page requires JavaScript)` ## Example Outputs Before: ``` Failed to fetch URL: HTTP error (4xx/5xx) ``` After: ``` HTTP 404 [parsed 404 page content shown] ``` Or for Cloudflare-protected sites: ``` HTTP 403: Cloudflare security challenge (page requires JavaScript) ``` ## Testing - Added test for HTTP 404 status code parsing - Added test for Cloudflare challenge detection _Generated with `mux`_
1 parent 2c5365a commit d8710d7

File tree

4 files changed

+126
-16
lines changed

4 files changed

+126
-16
lines changed

src/browser/components/tools/WebFetchToolCall.tsx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,10 @@ export const WebFetchToolCall: React.FC<WebFetchToolCallProps> = ({
102102
</DetailSection>
103103
)}
104104

105-
{result.success && result.content && (
105+
{/* Show content for both success and error responses (error pages may have parsed content) */}
106+
{result.content && (
106107
<DetailSection>
107-
<DetailLabel>Content</DetailLabel>
108+
<DetailLabel>{result.success ? "Content" : "Error Page Content"}</DetailLabel>
108109
<div className="bg-code-bg max-h-[300px] overflow-y-auto rounded px-3 py-2 text-[12px]">
109110
<MarkdownRenderer content={result.content} />
110111
</div>

src/common/types/tools.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,4 +219,6 @@ export type WebFetchToolResult =
219219
| {
220220
success: false;
221221
error: string;
222+
/** Parsed error response body (e.g., from HTTP 4xx/5xx pages) */
223+
content?: string;
222224
};

src/node/services/tools/web_fetch.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,36 @@ describe("web_fetch tool", () => {
215215
expect(result.error).toContain("Failed to fetch URL");
216216
}
217217
});
218+
219+
// Test HTTP error handling with body parsing
220+
it("should include HTTP status code in error for 404 responses", async () => {
221+
using testEnv = createTestWebFetchTool();
222+
const args: WebFetchToolArgs = {
223+
// GitHub returns a proper 404 page for nonexistent users
224+
url: "https://github.com/this-user-definitely-does-not-exist-12345",
225+
};
226+
227+
const result = (await testEnv.tool.execute!(args, toolCallOptions)) as WebFetchToolResult;
228+
229+
expect(result.success).toBe(false);
230+
if (!result.success) {
231+
expect(result.error).toContain("HTTP 404");
232+
}
233+
});
234+
235+
it("should detect Cloudflare challenge pages", async () => {
236+
using testEnv = createTestWebFetchTool();
237+
const args: WebFetchToolArgs = {
238+
// platform.openai.com is known to serve Cloudflare challenges
239+
url: "https://platform.openai.com",
240+
};
241+
242+
const result = (await testEnv.tool.execute!(args, toolCallOptions)) as WebFetchToolResult;
243+
244+
expect(result.success).toBe(false);
245+
if (!result.success) {
246+
expect(result.error).toContain("Cloudflare");
247+
expect(result.error).toContain("JavaScript");
248+
}
249+
});
218250
});

src/node/services/tools/web_fetch.ts

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,64 @@ import { execBuffered } from "@/node/utils/runtime/helpers";
1414

1515
const USER_AGENT = "Mux/1.0 (https://github.com/coder/mux; web-fetch tool)";
1616

17+
/** Parse curl -i output into headers and body */
18+
function parseResponse(output: string): { headers: string; body: string; statusCode: string } {
19+
// Find the last HTTP status line (after redirects) and its headers
20+
// curl -i with -L shows all redirect responses, we want the final one
21+
const httpMatches = [...output.matchAll(/HTTP\/[\d.]+ (\d{3})[^\r\n]*/g)];
22+
const lastStatusMatch = httpMatches.length > 0 ? httpMatches[httpMatches.length - 1] : null;
23+
const statusCode = lastStatusMatch ? lastStatusMatch[1] : "";
24+
25+
// Headers end with \r\n\r\n (or \n\n for some servers)
26+
const headerEndIndex = output.indexOf("\r\n\r\n");
27+
const altHeaderEndIndex = output.indexOf("\n\n");
28+
const splitIndex =
29+
headerEndIndex !== -1
30+
? headerEndIndex + 4
31+
: altHeaderEndIndex !== -1
32+
? altHeaderEndIndex + 2
33+
: 0;
34+
35+
const headers = splitIndex > 0 ? output.slice(0, splitIndex).toLowerCase() : "";
36+
const body = splitIndex > 0 ? output.slice(splitIndex) : output;
37+
38+
return { headers, body, statusCode };
39+
}
40+
41+
/** Detect if error response is a Cloudflare challenge page */
42+
function isCloudflareChallenge(headers: string, body: string): boolean {
43+
return (
44+
headers.includes("cf-mitigated") ||
45+
(body.includes("Just a moment") && body.includes("Enable JavaScript"))
46+
);
47+
}
48+
49+
/** Try to extract readable content from HTML, returns null on failure */
50+
function tryExtractContent(
51+
body: string,
52+
url: string,
53+
maxBytes: number
54+
): { title: string; content: string } | null {
55+
try {
56+
const dom = new JSDOM(body, { url });
57+
const reader = new Readability(dom.window.document);
58+
const article = reader.parse();
59+
if (!article?.content) return null;
60+
61+
const turndown = new TurndownService({
62+
headingStyle: "atx",
63+
codeBlockStyle: "fenced",
64+
});
65+
let content = turndown.turndown(article.content);
66+
if (content.length > maxBytes) {
67+
content = content.slice(0, maxBytes) + "\n\n[Content truncated]";
68+
}
69+
return { title: article.title ?? "Untitled", content };
70+
} catch {
71+
return null;
72+
}
73+
}
74+
1775
/**
1876
* Web fetch tool factory for AI assistant
1977
* Creates a tool that fetches web pages and extracts readable content as markdown
@@ -62,12 +120,41 @@ export const createWebFetchTool: ToolFactory = (config: ToolConfiguration) => {
62120
const exitCodeMessages: Record<number, string> = {
63121
6: "Could not resolve host",
64122
7: "Failed to connect",
65-
22: "HTTP error (4xx/5xx)",
66123
28: "Operation timed out",
67124
35: "SSL/TLS handshake failed",
68125
56: "Network data receive error",
69126
63: "Maximum file size exceeded",
70127
};
128+
129+
// For HTTP errors (exit 22), try to parse and include the error body
130+
if (result.exitCode === 22 && result.stdout) {
131+
const { headers, body, statusCode } = parseResponse(result.stdout);
132+
const statusText = statusCode ? `HTTP ${statusCode}` : "HTTP error";
133+
134+
// Detect Cloudflare challenge pages
135+
if (isCloudflareChallenge(headers, body)) {
136+
return {
137+
success: false,
138+
error: `${statusText}: Cloudflare security challenge (page requires JavaScript)`,
139+
};
140+
}
141+
142+
// Try to extract readable content from error page
143+
const extracted = tryExtractContent(body, url, WEB_FETCH_MAX_OUTPUT_BYTES);
144+
if (extracted) {
145+
return {
146+
success: false,
147+
error: statusText,
148+
content: extracted.content,
149+
};
150+
}
151+
152+
return {
153+
success: false,
154+
error: statusText,
155+
};
156+
}
157+
71158
const reason = exitCodeMessages[result.exitCode] || result.stderr || "Unknown error";
72159
return {
73160
success: false,
@@ -76,19 +163,7 @@ export const createWebFetchTool: ToolFactory = (config: ToolConfiguration) => {
76163
}
77164

78165
// Parse headers and body from curl -i output
79-
// Headers end with \r\n\r\n (or \n\n for some servers)
80-
const output = result.stdout;
81-
const headerEndIndex = output.indexOf("\r\n\r\n");
82-
const altHeaderEndIndex = output.indexOf("\n\n");
83-
const splitIndex =
84-
headerEndIndex !== -1
85-
? headerEndIndex + 4
86-
: altHeaderEndIndex !== -1
87-
? altHeaderEndIndex + 2
88-
: 0;
89-
90-
const headers = splitIndex > 0 ? output.slice(0, splitIndex).toLowerCase() : "";
91-
const body = splitIndex > 0 ? output.slice(splitIndex) : output;
166+
const { headers, body } = parseResponse(result.stdout);
92167

93168
if (!body || body.trim().length === 0) {
94169
return {

0 commit comments

Comments
 (0)