diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 25af7afde2e22..09a4710610b94 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -26,6 +26,7 @@ AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, ) +from langchain_core.exceptions import OutputParserException from langchain_core.language_models import LanguageModelInput from langchain_core.language_models.chat_models import ( BaseChatModel, @@ -83,6 +84,15 @@ } +class AnthropicTool(TypedDict): + """Anthropic tool definition.""" + + name: str + description: str + input_schema: Dict[str, Any] + cache_control: NotRequired[Dict[str, str]] + + def _format_image(image_url: str) -> Dict: """ Formats an image of format data:image/jpeg;base64,{b64_string} @@ -954,6 +964,31 @@ async def _agenerate( data = await self._async_client.messages.create(**payload) return self._format_output(data, **kwargs) + def _get_llm_for_structured_output_when_thinking_is_enabled( + self, + schema: Union[Dict, type], + formatted_tool: AnthropicTool, + ) -> Runnable[LanguageModelInput, BaseMessage]: + thinking_admonition = ( + "Anthropic structured output relies on forced tool calling, " + "which is not supported when `thinking` is enabled. This method will raise " + "langchain_core.exceptions.OutputParserException if tool calls are not " + "generated. Consider disabling `thinking` or adjust your prompt to ensure " + "the tool is called." + ) + warnings.warn(thinking_admonition) + llm = self.bind_tools( + [schema], + structured_output_format={"kwargs": {}, "schema": formatted_tool}, + ) + + def _raise_if_no_tool_calls(message: AIMessage) -> AIMessage: + if not message.tool_calls: + raise OutputParserException(thinking_admonition) + return message + + return llm | _raise_if_no_tool_calls + def bind_tools( self, tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], @@ -1251,11 +1286,17 @@ class AnswerWithJustification(BaseModel): """ # noqa: E501 formatted_tool = convert_to_anthropic_tool(schema) tool_name = formatted_tool["name"] - llm = self.bind_tools( - [schema], - tool_choice=tool_name, - structured_output_format={"kwargs": {}, "schema": formatted_tool}, - ) + if self.thinking is not None and self.thinking.get("type") == "enabled": + llm = self._get_llm_for_structured_output_when_thinking_is_enabled( + schema, formatted_tool + ) + else: + llm = self.bind_tools( + [schema], + tool_choice=tool_name, + structured_output_format={"kwargs": {}, "schema": formatted_tool}, + ) + if isinstance(schema, type) and is_basemodel_subclass(schema): output_parser: OutputParserLike = PydanticToolsParser( tools=[schema], first_tool_only=True @@ -1358,15 +1399,6 @@ def get_weather(location: str) -> str: return response.input_tokens -class AnthropicTool(TypedDict): - """Anthropic tool definition.""" - - name: str - description: str - input_schema: Dict[str, Any] - cache_control: NotRequired[Dict[str, str]] - - def convert_to_anthropic_tool( tool: Union[Dict[str, Any], Type, Callable, BaseTool], ) -> AnthropicTool: diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 906de58ea08ae..cde69d25bccee 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -6,7 +6,9 @@ import pytest import requests +from anthropic import BadRequestError from langchain_core.callbacks import CallbackManager +from langchain_core.exceptions import OutputParserException from langchain_core.messages import ( AIMessage, AIMessageChunk, @@ -730,3 +732,39 @@ def test_redacted_thinking() -> None: assert set(block.keys()) == {"type", "data", "index"} assert block["data"] and isinstance(block["data"], str) assert stream_has_reasoning + + +def test_structured_output_thinking_enabled() -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", + max_tokens=5_000, + thinking={"type": "enabled", "budget_tokens": 2_000}, + ) + with pytest.warns(match="structured output"): + structured_llm = llm.with_structured_output(GenerateUsername) + query = "Generate a username for Sally with green hair" + response = structured_llm.invoke(query) + assert isinstance(response, GenerateUsername) + + with pytest.raises(OutputParserException): + structured_llm.invoke("Hello") + + # Test streaming + for chunk in structured_llm.stream(query): + assert isinstance(chunk, GenerateUsername) + + +def test_structured_output_thinking_force_tool_use() -> None: + # Structured output currently relies on forced tool use, which is not supported + # when `thinking` is enabled. When this test fails, it means that the feature + # is supported and the workarounds in `with_structured_output` should be removed. + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", + max_tokens=5_000, + thinking={"type": "enabled", "budget_tokens": 2_000}, + ).bind_tools( + [GenerateUsername], + tool_choice="GenerateUsername", + ) + with pytest.raises(BadRequestError): + llm.invoke("Generate a username for Sally with green hair")