Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/beige-taxes-punch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand-evals": patch
---

Update screenshot collector in agent evals cli
6 changes: 5 additions & 1 deletion .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ packages/core/lib/dom/build/
packages/core/lib/v3/dom/build/
packages/evals/dist/
packages/docs/
*.min.js
*.min.js
.browserbase/
.browserbase/**
**/.browserbase/
**/.browserbase/**
3 changes: 3 additions & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ export default [
"packages/core/lib/v3/dom/build/**",
"**/*.config.js",
"**/*.config.mjs",
".browserbase/**",
"**/.browserbase/**",
"**/*.json",
],
},
pluginJs.configs.recommended,
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"build": "turbo run build",
"build:cli": "turbo run build:cli",
"lint": "turbo run lint",
"format": "turbo run format",
"format": "prettier --write .",
"prettier": "prettier --write .",
"eslint": "eslint .",
"test": "turbo run test",
Expand Down
19 changes: 19 additions & 0 deletions packages/core/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { ModelProvider } from "./v3/types/public/model";
import { ZodPathSegments } from "./v3/types/private/internal";
import type { StagehandZodSchema } from "./v3/zodCompat";
import { isZod4Schema } from "./v3/zodCompat";
import sharp from "sharp";

const ID_PATTERN = /^\d+-\d+$/;

Expand Down Expand Up @@ -837,3 +838,21 @@ export function jsonSchemaToZod(schema: JsonSchema): ZodTypeAny {
return z.any();
}
}

export async function imageResize(
img: Buffer,
scaleFactor: number,
): Promise<Buffer> {
const metadata = await sharp(img).metadata();
// calculate new dimensions
const width = Math.round(metadata.width * scaleFactor);
const height = Math.round(metadata.height * scaleFactor);
return await sharp(img)
.resize(width, height, { fit: "inside", kernel: sharp.kernel.lanczos3 })
.png({
compressionLevel: 9,
adaptiveFiltering: true,
palette: true,
})
.toBuffer();
}
28 changes: 28 additions & 0 deletions packages/core/lib/v3/handlers/v3AgentHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,17 @@ export class V3AgentHandler {
}
}
state.currentPageUrl = (await this.v3.context.awaitActivePage()).url();

// Capture screenshot after tool execution and emit event
try {
await this.captureAndEmitScreenshot();
} catch (e) {
this.logger({
category: "agent",
message: `Warning: Failed to capture screenshot: ${getErrorMessage(e)}`,
level: 1,
});
}
}

if (userCallback) {
Expand Down Expand Up @@ -448,4 +459,21 @@ export class V3AgentHandler {
}
return stepCountIs(maxSteps)(result);
}

/**
* Capture a screenshot and emit it via the event bus
*/
private async captureAndEmitScreenshot(): Promise<void> {
try {
const page = await this.v3.context.awaitActivePage();
const screenshot = await page.screenshot({ fullPage: false });
this.v3.bus.emit("agent_screensot_taken_event", screenshot);
} catch (error) {
this.logger({
category: "agent",
message: `Error capturing screenshot: ${getErrorMessage(error)}`,
level: 0,
});
}
}
}
23 changes: 2 additions & 21 deletions packages/core/lib/v3/handlers/v3CuaAgentHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ export class V3CuaAgentHandler {
private agentClient: AgentClient;
private options: AgentHandlerOptions;
private highlightCursor: boolean;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
private screenshotCollector?: any;

constructor(
v3: V3,
Expand Down Expand Up @@ -545,9 +543,8 @@ export class V3CuaAgentHandler {
try {
const page = await this.v3.context.awaitActivePage();
const base64Image = await page.screenshot({ fullPage: false });
if (this.screenshotCollector) {
this.screenshotCollector.addScreenshot(base64Image);
}
// Emit screenshot event via the bus
this.v3.bus.emit("agent_screensot_taken_event", base64Image);
const currentUrl = page.url();
return await this.agentClient.captureScreenshot({
base64Image,
Expand All @@ -571,20 +568,4 @@ export class V3CuaAgentHandler {
// Best-effort only
}
}

/**
* Set the screenshot collector for this agent handler
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setScreenshotCollector(collector: any): void {
this.screenshotCollector = collector;
}

/**
* Get the screenshot collector
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
getScreenshotCollector(): any {
return this.screenshotCollector;
}
}
7 changes: 7 additions & 0 deletions packages/core/lib/v3/v3.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dotenv from "dotenv";
import { EventEmitter } from "events";
import fs from "fs";
import os from "os";
import path from "path";
Expand Down Expand Up @@ -136,6 +137,12 @@ export class V3 {
private observeHandler: ObserveHandler | null = null;
private ctx: V3Context | null = null;
public llmClient!: LLMClient;

/**
* Event bus for internal communication.
* Emits events like 'screenshot' when screenshots are captured during agent execution.
*/
public readonly bus: EventEmitter = new EventEmitter();
private modelName: AvailableModel;
private modelClientOptions: ClientOptions;
private llmProvider: LLMProvider;
Expand Down
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"pino": "^9.6.0",
"pino-pretty": "^13.0.0",
"playwright": "^1.52.0",
"sharp": "^0.34.5",
"ws": "^8.18.0",
"zod-to-json-schema": "^3.25.0"
},
Expand Down
8 changes: 1 addition & 7 deletions packages/evals/index.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,7 @@ const generateFilteredTestcases = (): Testcase[] => {
// Pass full EvalInput to the task (data-driven params available via input.params)
let result;
try {
result = await taskFunction({
// ...taskInput,
v3: v3Input?.v3,
v3Agent: v3Input?.agent,
logger: v3Input?.logger,
v3Input,
});
result = await taskFunction({ ...v3Input, input });
// Log result to console
if (result && result._success) {
console.log(`✅ ${input.name}: Passed`);
Expand Down
5 changes: 3 additions & 2 deletions packages/evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
"format": "prettier --write ."
},
"dependencies": {
"@ai-sdk/provider": "^2.0.0",
"@browserbasehq/stagehand": "workspace:*",
"ai": "^5.0.0",
"@ai-sdk/provider": "^2.0.0",
"openai": "^4.87.1",
"dotenv": "16.4.5",
"openai": "^4.87.1",
"sharp": "^0.34.5",
"zod": "^4.1.8"
},
"devDependencies": {
Expand Down
10 changes: 9 additions & 1 deletion packages/evals/tasks/agent/onlineMind2Web.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export const onlineMind2Web: EvalFunction = async ({
});

const agent = v3.agent({
cua: true,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will address conditional loading on a follow up pr

model: modelName,
systemPrompt: `You are a helpful assistant that must solve the task by browsing. At the end, produce a single line: "Final Answer: <answer>" summarizing the requested result (e.g., score, list, or text). Current page: ${await page.title()}. ALWAYS OPERATE WITHIN THE PAGE OPENED BY THE USER, WHICHEVER TASK YOU ARE ATTEMPTING TO COMPLETE CAN BE ACCOMPLISHED WITHIN THE PAGE.`,
});
Expand All @@ -50,14 +51,21 @@ export const onlineMind2Web: EvalFunction = async ({
captureOnNavigation: true, // Also capture on page navigation
});

// Subscribe to screenshot events from the agent via the bus
const screenshotHandler = (buffer: Buffer) => {
screenshotCollector.addScreenshot(buffer);
};
v3.bus.on("agent_screensot_taken_event", screenshotHandler);

screenshotCollector.start();

const agentResult = await agent.execute({
instruction: params.confirmed_task,
maxSteps: Number(process.env.AGENT_EVAL_MAX_STEPS) || 50,
});

// Stop collecting and get all screenshots
// Stop collecting, clean up event listener, and get all screenshots
v3.bus.off("agent_screensot_taken_event", screenshotHandler);
const screenshots = screenshotCollector.stop();

logger.log({
Expand Down
1 change: 1 addition & 0 deletions packages/evals/types/screenshotCollector.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export interface ScreenshotCollectorOptions {
interval?: number;
maxScreenshots?: number;
/* @deprecated for V3, there's a new method to intercept screenshots by injecting into the agent loop */
captureOnNavigation?: boolean;
}

Expand Down
Loading