Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ede184e
Fix: Use App (with plugins) for eval when available
ishanrajsingh Dec 5, 2025
d0db7fd
Merge branch 'main' into fix-eval-use-app-with-plugins
ishanrajsingh Dec 5, 2025
cf77b85
Fix critical issues from code review
ishanrajsingh Dec 5, 2025
2311528
feat: Support App plugins in evaluation framework
ishanrajsingh Dec 5, 2025
de93f9f
feat: Support App plugins in evaluation framework
ishanrajsingh Dec 5, 2025
884b8df
Update src/google/adk/evaluation/evaluation_generator.py
ishanrajsingh Dec 5, 2025
1380758
feat: Support App plugins in evaluation framework
ishanrajsingh Dec 5, 2025
9bd1433
feat: Support App plugins in evaluation framework
ishanrajsingh Dec 5, 2025
379c9bd
refactor: Address Gemini Code Assist feedback
ishanrajsingh Dec 5, 2025
3c1c7a4
Update src/google/adk/evaluation/evaluation_generator.py
ishanrajsingh Dec 5, 2025
dae85d3
Update src/google/adk/evaluation/evaluation_generator.py
ishanrajsingh Dec 5, 2025
d03e414
refactor: Address Gemini Code Assist feedback
ishanrajsingh Dec 5, 2025
c1babc6
Update src/google/adk/evaluation/evaluation_generator.py
ishanrajsingh Dec 5, 2025
7909bee
Update src/google/adk/cli/cli_tools_click.py
ishanrajsingh Dec 6, 2025
1f41633
Merge branch 'main' into fix-eval-use-app-with-plugins
ishanrajsingh Dec 6, 2025
03919bd
fix: address CI/CD check failures
ishanrajsingh Dec 11, 2025
5f6b1ca
Merge branch 'main' into fix-eval-use-app-with-plugins
ishanrajsingh Dec 11, 2025
40b879e
Merge branch 'main' into fix-eval-use-app-with-plugins
ishanrajsingh Dec 11, 2025
72f6639
Merge branch 'main' into fix-eval-use-app-with-plugins
ishanrajsingh Dec 12, 2025
e71bdc2
fix: Add AppInferenceAdapter to remove cli dependency and enable App-…
ishanrajsingh Dec 12, 2025
c04a97e
Merge branch 'fix-eval-use-app-with-plugins' of https://github.com/is…
ishanrajsingh Dec 12, 2025
81fc2b7
fix: Move app loader to utils to fix CI check
ishanrajsingh Dec 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion src/google/adk/cli/cli_tools_click.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@
import os
from pathlib import Path
import tempfile
import textwrap
from typing import Optional
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from ..apps.app import App

import textwrap

import click
from click.core import ParameterSource
Expand All @@ -36,6 +41,7 @@
from . import cli_deploy
from .. import version
from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
from ..utils.app_loader import load_app_from_module
from .cli import run_cli
from .fast_api import get_fast_api_app
from .utils import envs
Expand Down Expand Up @@ -733,10 +739,19 @@ def cli_eval(
)

try:
# Try to load App if available (for plugin support like ReflectAndRetryToolPlugin)
app = load_app_from_module(agent_module_file_path)

if app:
logger.info("Using App instance for evaluation (plugins will be applied)")
else:
logger.info("No App found, using root_agent directly")

eval_service = LocalEvalService(
root_agent=root_agent,
eval_sets_manager=eval_sets_manager,
eval_set_results_manager=eval_set_results_manager,
app=app, # NEW: Pass app if available
user_simulator_provider=user_simulator_provider,
)

Expand Down
92 changes: 92 additions & 0 deletions src/google/adk/evaluation/app_inference_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Optional
from typing import TYPE_CHECKING

from ..artifacts.base_artifact_service import BaseArtifactService
from ..memory.base_memory_service import BaseMemoryService
from ..runners import Runner
from ..sessions.base_session_service import BaseSessionService
from ._retry_options_utils import EnsureRetryOptionsPlugin
from .request_intercepter_plugin import _RequestIntercepterPlugin
from .simulation.user_simulator import UserSimulator

if TYPE_CHECKING:
from .eval_case import SessionInput


class AppInferenceAdapter:
"""Adapter to generate inferences from App without importing cli.*"""

@staticmethod
async def generate_inferences_from_app(
app,
user_simulator: UserSimulator,
initial_session: Optional["SessionInput"],
session_id: str,
session_service: BaseSessionService,
artifact_service: BaseArtifactService,
memory_service: BaseMemoryService,
):
"""Shared app inference logic extracted from EvaluationGenerator."""

user_id = initial_session.user_id if initial_session else "test_user_id"
app_name = initial_session.app_name if initial_session else app.name

# Create session
await session_service.create_session(
app_name=app_name,
user_id=user_id,
session_id=session_id,
state=initial_session.state if initial_session else {},
)

# Add evaluation-required plugins
request_intercepter_plugin = _RequestIntercepterPlugin(
name="request_intercepter_plugin"
)
ensure_retry_options_plugin = EnsureRetryOptionsPlugin(
name="ensure_retry_options"
)

# Duplicate app safely
app_for_runner = app.model_copy(deep=True)

plugin_names = {p.name for p in app_for_runner.plugins}
if request_intercepter_plugin.name not in plugin_names:
app_for_runner.plugins.append(request_intercepter_plugin)
if ensure_retry_options_plugin.name not in plugin_names:
app_for_runner.plugins.append(ensure_retry_options_plugin)

# Run simulation loop via runner
async with Runner(
app=app_for_runner,
session_service=session_service,
artifact_service=artifact_service,
memory_service=memory_service,
) as runner:

# Reuse existing eval user simulation loop
from .evaluation_generator import EvaluationGenerator

return await EvaluationGenerator._run_user_simulation_loop(
runner=runner,
user_id=user_id,
session_id=session_id,
user_simulator=user_simulator,
request_intercepter_plugin=request_intercepter_plugin,
)
139 changes: 117 additions & 22 deletions src/google/adk/evaluation/evaluation_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
from typing import Any
from typing import AsyncGenerator
from typing import Optional
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from ..apps.app import App

import uuid

from google.genai.types import Content
Expand All @@ -39,6 +44,7 @@
from .app_details import AgentDetails
from .app_details import AppDetails
from .eval_case import EvalCase
from .eval_case import IntermediateData
from .eval_case import Invocation
from .eval_case import InvocationEvent
from .eval_case import InvocationEvents
Expand Down Expand Up @@ -156,6 +162,55 @@ async def _process_query(
initial_session=initial_session,
)

@staticmethod
async def _run_user_simulation_loop(
runner: Runner,
user_id: str,
session_id: str,
user_simulator: UserSimulator,
request_intercepter_plugin: _RequestIntercepterPlugin,
) -> list[Invocation]:
"""Run the user simulation loop and return invocations.

Args:
runner: Configured Runner instance
user_id: User identifier
session_id: Session identifier
user_simulator: User simulator to generate messages
request_intercepter_plugin: Plugin to intercept requests for app_details

Returns:
List of Invocation objects from the simulation
"""
events = []

# Loop through user simulator messages (handles both static and dynamic)
while True:
next_user_message = await user_simulator.get_next_user_message(
copy.deepcopy(events)
)
if next_user_message.status == UserSimulatorStatus.SUCCESS:
async for (
event
) in EvaluationGenerator._generate_inferences_for_single_user_invocation(
runner, user_id, session_id, next_user_message.user_message
):
events.append(event)
else: # no more messages
break

# Extract app details from intercepted requests
app_details_by_invocation_id = (
EvaluationGenerator._get_app_details_by_invocation_id(
events, request_intercepter_plugin
)
)

# Convert events to invocations
return EvaluationGenerator.convert_events_to_eval_invocations(
events, app_details_by_invocation_id
)

@staticmethod
async def _generate_inferences_for_single_user_invocation(
runner: Runner,
Expand Down Expand Up @@ -240,28 +295,12 @@ async def _generate_inferences_from_root_agent(
memory_service=memory_service,
plugins=[request_intercepter_plugin, ensure_retry_options_plugin],
) as runner:
events = []
while True:
next_user_message = await user_simulator.get_next_user_message(
copy.deepcopy(events)
)
if next_user_message.status == UserSimulatorStatus.SUCCESS:
async for (
event
) in EvaluationGenerator._generate_inferences_for_single_user_invocation(
runner, user_id, session_id, next_user_message.user_message
):
events.append(event)
else: # no message generated
break

app_details_by_invocation_id = (
EvaluationGenerator._get_app_details_by_invocation_id(
events, request_intercepter_plugin
)
)
return EvaluationGenerator.convert_events_to_eval_invocations(
events, app_details_by_invocation_id
return await EvaluationGenerator._run_user_simulation_loop(
runner=runner,
user_id=user_id,
session_id=session_id,
user_simulator=user_simulator,
request_intercepter_plugin=request_intercepter_plugin,
)

@staticmethod
Expand Down Expand Up @@ -326,6 +365,62 @@ def convert_events_to_eval_invocations(

return invocations

@staticmethod
async def _generate_inferences_from_app(
app: "App",
user_simulator: "UserSimulator",
initial_session: Optional["SessionInput"],
session_id: str,
session_service: "BaseSessionService",
artifact_service: "BaseArtifactService",
memory_service: "BaseMemoryService",
) -> list["Invocation"]:
"""Generate inferences by invoking through App (preserving plugins)."""

# Determine user_id consistently
user_id = initial_session.user_id if initial_session else "test_user_id"

# Initialize session
app_name = initial_session.app_name if initial_session else app.name
await session_service.create_session(
app_name=app_name,
user_id=user_id,
session_id=session_id,
state=initial_session.state if initial_session else {},
)

# Create plugins to track requests (needed for app_details)
request_intercepter_plugin = _RequestIntercepterPlugin(
name="request_intercepter_plugin"
)
ensure_retry_options_plugin = EnsureRetryOptionsPlugin(
name="ensure_retry_options"
)

# Create a copy of the app to avoid mutating the original object and add eval-specific plugins.
app_for_runner = app.model_copy(deep=True)
# Add eval-specific plugins, ensuring no duplicates.
existing_plugin_names = {p.name for p in app_for_runner.plugins}
if request_intercepter_plugin.name not in existing_plugin_names:
app_for_runner.plugins.append(request_intercepter_plugin)
if ensure_retry_options_plugin.name not in existing_plugin_names:
app_for_runner.plugins.append(ensure_retry_options_plugin)

# Create Runner with the modified App to preserve plugins
async with Runner(
app=app_for_runner,
session_service=session_service,
artifact_service=artifact_service,
memory_service=memory_service,
) as runner:
return await EvaluationGenerator._run_user_simulation_loop(
runner=runner,
user_id=user_id,
session_id=session_id,
user_simulator=user_simulator,
request_intercepter_plugin=request_intercepter_plugin,
)

@staticmethod
def _get_app_details_by_invocation_id(
events: list[Event], request_intercepter: _RequestIntercepterPlugin
Expand Down
Loading