feat(bedrock): add guardrail_last_turn_only option

aianch · JackYPCOnline · commit 8942c9874710 · 2025-12-19T11:46:01.000-08:00
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
@@ -82,6 +82,8 @@ class BedrockConfig(TypedDict, total=False):
             guardrail_redact_input_message: If a Bedrock Input guardrail triggers, replace the input with this message.
             guardrail_redact_output: Flag to redact output if guardrail is triggered. Defaults to False.
             guardrail_redact_output_message: If a Bedrock Output guardrail triggers, replace output with this message.
+            guardrail_last_turn_only: Flag to send only the last turn to guardrails instead of full conversation.
+                Defaults to False.
             max_tokens: Maximum number of tokens to generate in the response
             model_id: The Bedrock model ID (e.g., "us.anthropic.claude-sonnet-4-20250514-v1:0")
             include_tool_result_status: Flag to include status field in tool results.
@@ -105,6 +107,7 @@ class BedrockConfig(TypedDict, total=False):
         guardrail_redact_input_message: Optional[str]
         guardrail_redact_output: Optional[bool]
         guardrail_redact_output_message: Optional[str]
+        guardrail_last_turn_only: Optional[bool]
         max_tokens: Optional[int]
         model_id: str
         include_tool_result_status: Optional[Literal["auto"] | bool]
@@ -206,9 +209,19 @@ def _format_request(
         Returns:
             A Bedrock converse stream request.
         """
+        # Filter messages for guardrails if guardrail_last_turn_only is enabled
+        messages_for_request = messages
+        if (
+            self.config.get("guardrail_last_turn_only", False)
+            and self.config.get("guardrail_id")
+            and self.config.get("guardrail_version")
+        ):
+            messages_for_request = self._get_last_turn_messages(messages)
+
         if not tool_specs:
             has_tool_content = any(
-                any("toolUse" in block or "toolResult" in block for block in msg.get("content", [])) for msg in messages
+                any("toolUse" in block or "toolResult" in block for block in msg.get("content", []))
+                for msg in messages_for_request
             )
             if has_tool_content:
                 tool_specs = [noop_tool.tool_spec]
@@ -224,7 +237,7 @@ def _format_request(
 
         return {
             "modelId": self.config["model_id"],
-            "messages": self._format_bedrock_messages(messages),
+            "messages": self._format_bedrock_messages(messages_for_request),
             "system": system_blocks,
             **(
                 {
@@ -295,6 +308,42 @@ def _format_request(
             ),
         }
 
+    def _get_last_turn_messages(self, messages: Messages) -> Messages:
+        """Get the last turn messages for guardrail evaluation.
+
+        Returns the latest user message and the previous assistant message (if it exists).
+        This reduces the conversation context sent to guardrails when guardrail_last_turn_only is True.
+
+        Args:
+            messages: Full conversation messages.
+
+        Returns:
+            Messages containing only the last turn (user + previous assistant if exists).
+        """
+        if not messages:
+            return []
+
+        # Find the last user message
+        last_user_index = -1
+        for i in range(len(messages) - 1, -1, -1):
+            if messages[i]["role"] == "user":
+                last_user_index = i
+                break
+
+        if last_user_index == -1:
+            # No user message found, return empty
+            return []
+
+        # Include the previous assistant message if it exists
+        result_messages: Messages = []
+        if last_user_index > 0 and messages[last_user_index - 1]["role"] == "assistant":
+            result_messages.append(messages[last_user_index - 1])
+
+        # Add the last user message
+        result_messages.append(messages[last_user_index])
+
+        return result_messages
+
     def _format_bedrock_messages(self, messages: Messages) -> list[dict[str, Any]]:
         """Format messages for Bedrock API compatibility.
 
diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py
@@ -2052,6 +2052,50 @@ def test_format_request_filters_output_schema(model, messages, model_id):
     assert tool_spec["inputSchema"] == {"type": "object", "properties": {}}
 
 
+def test_get_last_turn_messages(model):
+    """Test _get_last_turn_messages helper method."""
+    # Test empty messages
+    assert model._get_last_turn_messages([]) == []
+
+    # Test single user message
+    messages = [{"role": "user", "content": [{"text": "Hello"}]}]
+    result = model._get_last_turn_messages(messages)
+    assert len(result) == 1
+    assert result[0]["role"] == "user"
+
+    # Test user-assistant pair
+    messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi"}]},
+        {"role": "user", "content": [{"text": "How are you?"}]},
+    ]
+    result = model._get_last_turn_messages(messages)
+    assert len(result) == 2
+    assert result[0]["role"] == "assistant"
+    assert result[1]["role"] == "user"
+    assert result[1]["content"][0]["text"] == "How are you?"
+
+
+def test_format_request_with_guardrail_last_turn_only(model, model_id):
+    """Test _format_request uses filtered messages when guardrail_last_turn_only=True."""
+    model.update_config(guardrail_id="test-guardrail", guardrail_version="DRAFT", guardrail_last_turn_only=True)
+
+    messages = [
+        {"role": "user", "content": [{"text": "First message"}]},
+        {"role": "assistant", "content": [{"text": "First response"}]},
+        {"role": "user", "content": [{"text": "Latest message"}]},
+    ]
+
+    request = model._format_request(messages)
+
+    # Should only include the last turn (assistant + user)
+    formatted_messages = request["messages"]
+    assert len(formatted_messages) == 2
+    assert formatted_messages[0]["role"] == "assistant"
+    assert formatted_messages[1]["role"] == "user"
+    assert formatted_messages[1]["content"][0]["text"] == "Latest message"
+
+
 @pytest.mark.asyncio
 async def test_stream_backward_compatibility_system_prompt(bedrock_client, model, messages, alist):
     """Test that system_prompt is converted to system_prompt_content when system_prompt_content is None."""
diff --git a/tests_integ/test_bedrock_guardrails.py b/tests_integ/test_bedrock_guardrails.py
@@ -289,6 +289,60 @@ def list_users() -> str:
     assert tool_result["content"][0]["text"] == INPUT_REDACT_MESSAGE
 
 
+def test_guardrail_last_turn_only(boto_session, bedrock_guardrail):
+    """Test that guardrail_last_turn_only only sends the last turn to guardrails."""
+    bedrock_model = BedrockModel(
+        guardrail_id=bedrock_guardrail,
+        guardrail_version="DRAFT",
+        guardrail_last_turn_only=True,
+        boto_session=boto_session,
+    )
+
+    agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None)
+
+    # First conversation turn - should not trigger guardrail
+    response1 = agent("Hello, how are you?")
+    assert response1.stop_reason != "guardrail_intervened"
+
+    # Second conversation turn with blocked word - should trigger guardrail
+    # Since guardrail_last_turn_only=True, only this message and the previous assistant response
+    # should be evaluated by the guardrail, not the entire conversation history
+    response2 = agent("CACTUS")
+    assert response2.stop_reason == "guardrail_intervened"
+    assert str(response2).strip() == BLOCKED_INPUT
+
+
+def test_guardrail_last_turn_only_recovery_scenario(boto_session, bedrock_guardrail):
+    """Test guardrail recovery: blocked content followed by normal question.
+
+    This tests the key benefit of guardrail_last_turn_only:
+    1. First turn: blocked content triggers guardrail
+    2. Second turn: normal question should work because only last turn is analyzed
+    """
+    bedrock_model = BedrockModel(
+        guardrail_id=bedrock_guardrail,
+        guardrail_version="DRAFT",
+        guardrail_last_turn_only=True,
+        boto_session=boto_session,
+    )
+
+    agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None)
+
+    # First turn - should be blocked by guardrail
+    response1 = agent("CACTUS")
+    assert response1.stop_reason == "guardrail_intervened"
+    assert str(response1).strip() == BLOCKED_INPUT
+
+    # Second turn - should work normally with last turn only
+    # This is the key test: normal questions should work after blocked content
+    response2 = agent("What is the weather like today?")
+    assert response2.stop_reason != "guardrail_intervened"
+    assert str(response2).strip() != BLOCKED_INPUT
+
+    # Verify the conversation has both messages
+    assert len(agent.messages) == 4  # 2 user + 2 assistant messages
+
+
 def test_guardrail_input_intervention_properly_redacts_in_session(boto_session, bedrock_guardrail, temp_dir):
     bedrock_model = BedrockModel(
         guardrail_id=bedrock_guardrail,