mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 03:51:50 +00:00
Using `pyupgrade` to get all `partners` code up to 3.9 standards (mostly, fixing old `typing` imports).
1219 lines
49 KiB
Python
1219 lines
49 KiB
Python
"""Ollama chat models."""
|
|
|
|
import json
|
|
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
|
|
from operator import itemgetter
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Final,
|
|
Literal,
|
|
Optional,
|
|
Union,
|
|
cast,
|
|
)
|
|
from uuid import uuid4
|
|
|
|
from langchain_core.callbacks import (
|
|
CallbackManagerForLLMRun,
|
|
)
|
|
from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
|
|
from langchain_core.exceptions import OutputParserException
|
|
from langchain_core.language_models import LanguageModelInput
|
|
from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
|
|
from langchain_core.messages import (
|
|
AIMessage,
|
|
AIMessageChunk,
|
|
BaseMessage,
|
|
BaseMessageChunk,
|
|
HumanMessage,
|
|
SystemMessage,
|
|
ToolCall,
|
|
ToolMessage,
|
|
)
|
|
from langchain_core.messages.ai import UsageMetadata
|
|
from langchain_core.messages.tool import tool_call
|
|
from langchain_core.output_parsers import (
|
|
JsonOutputKeyToolsParser,
|
|
JsonOutputParser,
|
|
PydanticOutputParser,
|
|
PydanticToolsParser,
|
|
)
|
|
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
|
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
|
|
from langchain_core.tools import BaseTool
|
|
from langchain_core.utils.function_calling import (
|
|
convert_to_json_schema,
|
|
convert_to_openai_tool,
|
|
)
|
|
from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
|
|
from ollama import AsyncClient, Client, Message, Options
|
|
from pydantic import BaseModel, PrivateAttr, model_validator
|
|
from pydantic.json_schema import JsonSchemaValue
|
|
from pydantic.v1 import BaseModel as BaseModelV1
|
|
from typing_extensions import Self, is_typeddict
|
|
|
|
DEFAULT_THINK_TOKEN_START: Final[str] = "<think>"
|
|
DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
|
|
|
|
|
|
def _get_usage_metadata_from_generation_info(
|
|
generation_info: Optional[Mapping[str, Any]],
|
|
) -> Optional[UsageMetadata]:
|
|
"""Get usage metadata from ollama generation info mapping."""
|
|
if generation_info is None:
|
|
return None
|
|
input_tokens: Optional[int] = generation_info.get("prompt_eval_count")
|
|
output_tokens: Optional[int] = generation_info.get("eval_count")
|
|
if input_tokens is not None and output_tokens is not None:
|
|
return UsageMetadata(
|
|
input_tokens=input_tokens,
|
|
output_tokens=output_tokens,
|
|
total_tokens=input_tokens + output_tokens,
|
|
)
|
|
return None
|
|
|
|
|
|
def _parse_json_string(
|
|
json_string: str, raw_tool_call: dict[str, Any], skip: bool
|
|
) -> Any:
|
|
"""Attempt to parse a JSON string for tool calling.
|
|
|
|
Args:
|
|
json_string: JSON string to parse.
|
|
skip: Whether to ignore parsing errors and return the value anyways.
|
|
raw_tool_call: Raw tool call to include in error message.
|
|
|
|
Returns:
|
|
The parsed JSON string.
|
|
|
|
Raises:
|
|
OutputParserException: If the JSON string wrong invalid and skip=False.
|
|
"""
|
|
try:
|
|
return json.loads(json_string)
|
|
except json.JSONDecodeError as e:
|
|
if skip:
|
|
return json_string
|
|
msg = (
|
|
f"Function {raw_tool_call['function']['name']} arguments:\n\n"
|
|
f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
|
|
f"Received JSONDecodeError {e}"
|
|
)
|
|
raise OutputParserException(msg) from e
|
|
except TypeError as e:
|
|
if skip:
|
|
return json_string
|
|
msg = (
|
|
f"Function {raw_tool_call['function']['name']} arguments:\n\n"
|
|
f"{raw_tool_call['function']['arguments']}\n\nare not a string or a "
|
|
f"dictionary. Received TypeError {e}"
|
|
)
|
|
raise OutputParserException(msg) from e
|
|
|
|
|
|
def _parse_arguments_from_tool_call(
|
|
raw_tool_call: dict[str, Any],
|
|
) -> Optional[dict[str, Any]]:
|
|
"""Parse arguments by trying to parse any shallowly nested string-encoded JSON.
|
|
|
|
Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
|
|
Should be removed/changed if fixed upstream.
|
|
See https://github.com/ollama/ollama/issues/6155
|
|
"""
|
|
if "function" not in raw_tool_call:
|
|
return None
|
|
arguments = raw_tool_call["function"]["arguments"]
|
|
parsed_arguments: dict = {}
|
|
if isinstance(arguments, dict):
|
|
for key, value in arguments.items():
|
|
if isinstance(value, str):
|
|
parsed_value = _parse_json_string(
|
|
value, skip=True, raw_tool_call=raw_tool_call
|
|
)
|
|
if isinstance(parsed_value, (dict, list)):
|
|
parsed_arguments[key] = parsed_value
|
|
else:
|
|
parsed_arguments[key] = value
|
|
else:
|
|
parsed_arguments[key] = value
|
|
else:
|
|
parsed_arguments = _parse_json_string(
|
|
arguments, skip=False, raw_tool_call=raw_tool_call
|
|
)
|
|
return parsed_arguments
|
|
|
|
|
|
def _get_tool_calls_from_response(
|
|
response: Mapping[str, Any],
|
|
) -> list[ToolCall]:
|
|
"""Get tool calls from ollama response."""
|
|
tool_calls = []
|
|
if "message" in response:
|
|
if raw_tool_calls := response["message"].get("tool_calls"):
|
|
for tc in raw_tool_calls:
|
|
tool_calls.append(
|
|
tool_call(
|
|
id=str(uuid4()),
|
|
name=tc["function"]["name"],
|
|
args=_parse_arguments_from_tool_call(tc) or {},
|
|
)
|
|
)
|
|
return tool_calls
|
|
|
|
|
|
def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
|
|
return {
|
|
"type": "function",
|
|
"id": tool_call["id"],
|
|
"function": {
|
|
"name": tool_call["name"],
|
|
"arguments": tool_call["args"],
|
|
},
|
|
}
|
|
|
|
|
|
def _is_pydantic_class(obj: Any) -> bool:
|
|
return isinstance(obj, type) and is_basemodel_subclass(obj)
|
|
|
|
|
|
class ChatOllama(BaseChatModel):
|
|
r"""Ollama chat model integration.
|
|
|
|
.. dropdown:: Setup
|
|
:open:
|
|
|
|
Install ``langchain-ollama`` and download any models you want to use from ollama.
|
|
|
|
.. code-block:: bash
|
|
|
|
ollama pull mistral:v0.3
|
|
pip install -U langchain-ollama
|
|
|
|
Key init args — completion params:
|
|
model: str
|
|
Name of Ollama model to use.
|
|
temperature: float
|
|
Sampling temperature. Ranges from 0.0 to 1.0.
|
|
num_predict: Optional[int]
|
|
Max number of tokens to generate.
|
|
|
|
See full list of supported init args and their descriptions in the params section.
|
|
|
|
Instantiate:
|
|
.. code-block:: python
|
|
|
|
from langchain_ollama import ChatOllama
|
|
|
|
llm = ChatOllama(
|
|
model = "llama3",
|
|
temperature = 0.8,
|
|
num_predict = 256,
|
|
# other params ...
|
|
)
|
|
|
|
Invoke:
|
|
.. code-block:: python
|
|
|
|
messages = [
|
|
("system", "You are a helpful translator. Translate the user sentence to French."),
|
|
("human", "I love programming."),
|
|
]
|
|
llm.invoke(messages)
|
|
|
|
.. code-block:: python
|
|
|
|
AIMessage(content='J'adore le programmation. (Note: "programming" can also refer to the act of writing code, so if you meant that, I could translate it as "J'adore programmer". But since you didn\'t specify, I assumed you were talking about the activity itself, which is what "le programmation" usually refers to.)', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0')
|
|
|
|
Stream:
|
|
.. code-block:: python
|
|
|
|
messages = [
|
|
("human", "Return the words Hello World!"),
|
|
]
|
|
for chunk in llm.stream(messages):
|
|
print(chunk.text(), end="")
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
content='Hello' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
|
content=' World' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
|
content='!' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
|
content='' response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:39:42.274449Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 411875125, 'load_duration': 1898166, 'prompt_eval_count': 14, 'prompt_eval_duration': 297320000, 'eval_count': 4, 'eval_duration': 111099000} id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
stream = llm.stream(messages)
|
|
full = next(stream)
|
|
for chunk in stream:
|
|
full += chunk
|
|
full
|
|
|
|
.. code-block:: python
|
|
|
|
AIMessageChunk(content='Je adore le programmation.(Note: "programmation" is the formal way to say "programming" in French, but informally, people might use the phrase "le développement logiciel" or simply "le code")', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:38:54.933154Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1977300042, 'load_duration': 1345709, 'prompt_eval_duration': 159343000, 'eval_count': 47, 'eval_duration': 1815123000}, id='run-3c81a3ed-3e79-4dd3-a796-04064d804890')
|
|
|
|
Async:
|
|
.. code-block:: python
|
|
|
|
messages = [
|
|
("human", "Hello how are you!"),
|
|
]
|
|
await llm.ainvoke(messages)
|
|
|
|
.. code-block:: python
|
|
|
|
AIMessage(content="Hi there! I'm just an AI, so I don't have feelings or emotions like humans do. But I'm functioning properly and ready to help with any questions or tasks you may have! How can I assist you today?", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:52:08.165478Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2138492875, 'load_duration': 1364000, 'prompt_eval_count': 10, 'prompt_eval_duration': 297081000, 'eval_count': 47, 'eval_duration': 1838524000}, id='run-29c510ae-49a4-4cdd-8f23-b972bfab1c49-0')
|
|
|
|
.. code-block:: python
|
|
|
|
messages = [
|
|
("human", "Say hello world!"),
|
|
]
|
|
async for chunk in llm.astream(messages):
|
|
print(chunk.content)
|
|
|
|
.. code-block:: python
|
|
|
|
HEL
|
|
LO
|
|
WORLD
|
|
!
|
|
|
|
.. code-block:: python
|
|
|
|
messages = [
|
|
("human", "Say hello world!"),
|
|
("human","Say goodbye world!")
|
|
]
|
|
await llm.abatch(messages)
|
|
|
|
.. code-block:: python
|
|
|
|
[AIMessage(content='HELLO, WORLD!', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.315396Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1696745458, 'load_duration': 1505000, 'prompt_eval_count': 8, 'prompt_eval_duration': 111627000, 'eval_count': 6, 'eval_duration': 185181000}, id='run-da6c7562-e25a-4a44-987a-2c83cd8c2686-0'),
|
|
AIMessage(content="It's been a blast chatting with you! Say goodbye to the world for me, and don't forget to come back and visit us again soon!", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.018076Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1399391083, 'load_duration': 1187417, 'prompt_eval_count': 20, 'prompt_eval_duration': 230349000, 'eval_count': 31, 'eval_duration': 1166047000}, id='run-96cad530-6f3e-4cf9-86b4-e0f8abba4cdb-0')]
|
|
|
|
JSON mode:
|
|
.. code-block:: python
|
|
|
|
|
|
json_llm = ChatOllama(format="json")
|
|
messages = [
|
|
("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
|
|
]
|
|
llm.invoke(messages).content
|
|
|
|
.. code-block:: python
|
|
|
|
'{"location": "Pune, India", "time_of_day": "morning"}'
|
|
|
|
Tool Calling:
|
|
|
|
.. code-block:: python
|
|
|
|
from langchain_ollama import ChatOllama
|
|
from pydantic import BaseModel, Field
|
|
|
|
class Multiply(BaseModel):
|
|
a: int = Field(..., description="First integer")
|
|
b: int = Field(..., description="Second integer")
|
|
|
|
ans = await chat.invoke("What is 45*67")
|
|
ans.tool_calls
|
|
|
|
.. code-block:: python
|
|
|
|
[{'name': 'Multiply',
|
|
'args': {'a': 45, 'b': 67},
|
|
'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
|
|
'type': 'tool_call'}]
|
|
""" # noqa: E501
|
|
|
|
model: str
|
|
"""Model name to use."""
|
|
|
|
extract_reasoning: Optional[Union[bool, tuple[str, str]]] = False
|
|
"""Whether to extract the reasoning tokens in think blocks.
|
|
Extracts `chunk.content` to `chunk.additional_kwargs.reasoning_content`.
|
|
If a tuple is supplied, they are assumed to be the (start, end) tokens.
|
|
If `extract_reasoning=True`, the tokens will default to (<think>, </think>).
|
|
"""
|
|
|
|
mirostat: Optional[int] = None
|
|
"""Enable Mirostat sampling for controlling perplexity.
|
|
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
|
|
|
mirostat_eta: Optional[float] = None
|
|
"""Influences how quickly the algorithm responds to feedback
|
|
from the generated text. A lower learning rate will result in
|
|
slower adjustments, while a higher learning rate will make
|
|
the algorithm more responsive. (Default: 0.1)"""
|
|
|
|
mirostat_tau: Optional[float] = None
|
|
"""Controls the balance between coherence and diversity
|
|
of the output. A lower value will result in more focused and
|
|
coherent text. (Default: 5.0)"""
|
|
|
|
num_ctx: Optional[int] = None
|
|
"""Sets the size of the context window used to generate the
|
|
next token. (Default: 2048) """
|
|
|
|
num_gpu: Optional[int] = None
|
|
"""The number of GPUs to use. On macOS it defaults to 1 to
|
|
enable metal support, 0 to disable."""
|
|
|
|
num_thread: Optional[int] = None
|
|
"""Sets the number of threads to use during computation.
|
|
By default, Ollama will detect this for optimal performance.
|
|
It is recommended to set this value to the number of physical
|
|
CPU cores your system has (as opposed to the logical number of cores)."""
|
|
|
|
num_predict: Optional[int] = None
|
|
"""Maximum number of tokens to predict when generating text.
|
|
(Default: 128, -1 = infinite generation, -2 = fill context)"""
|
|
|
|
repeat_last_n: Optional[int] = None
|
|
"""Sets how far back for the model to look back to prevent
|
|
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
|
|
|
repeat_penalty: Optional[float] = None
|
|
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
|
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
|
will be more lenient. (Default: 1.1)"""
|
|
|
|
temperature: Optional[float] = None
|
|
"""The temperature of the model. Increasing the temperature will
|
|
make the model answer more creatively. (Default: 0.8)"""
|
|
|
|
seed: Optional[int] = None
|
|
"""Sets the random number seed to use for generation. Setting this
|
|
to a specific number will make the model generate the same text for
|
|
the same prompt."""
|
|
|
|
stop: Optional[list[str]] = None
|
|
"""Sets the stop tokens to use."""
|
|
|
|
tfs_z: Optional[float] = None
|
|
"""Tail free sampling is used to reduce the impact of less probable
|
|
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
|
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
|
|
|
top_k: Optional[int] = None
|
|
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
|
will give more diverse answers, while a lower value (e.g. 10)
|
|
will be more conservative. (Default: 40)"""
|
|
|
|
top_p: Optional[float] = None
|
|
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
|
to more diverse text, while a lower value (e.g., 0.5) will
|
|
generate more focused and conservative text. (Default: 0.9)"""
|
|
|
|
format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
|
|
"""Specify the format of the output (options: "json", JSON schema)."""
|
|
|
|
keep_alive: Optional[Union[int, str]] = None
|
|
"""How long the model will stay loaded into memory."""
|
|
|
|
base_url: Optional[str] = None
|
|
"""Base url the model is hosted under."""
|
|
|
|
client_kwargs: Optional[dict] = {}
|
|
"""Additional kwargs to pass to the httpx Client.
|
|
For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
|
|
"""
|
|
|
|
_client: Client = PrivateAttr(default=None) # type: ignore
|
|
"""
|
|
The client to use for making requests.
|
|
"""
|
|
|
|
_async_client: AsyncClient = PrivateAttr(default=None) # type: ignore
|
|
"""
|
|
The async client to use for making requests.
|
|
"""
|
|
|
|
def _chat_params(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
**kwargs: Any,
|
|
) -> dict[str, Any]:
|
|
ollama_messages = self._convert_messages_to_ollama_messages(messages)
|
|
|
|
if self.stop is not None and stop is not None:
|
|
raise ValueError("`stop` found in both the input and default params.")
|
|
elif self.stop is not None:
|
|
stop = self.stop
|
|
|
|
options_dict = kwargs.pop(
|
|
"options",
|
|
{
|
|
"mirostat": self.mirostat,
|
|
"mirostat_eta": self.mirostat_eta,
|
|
"mirostat_tau": self.mirostat_tau,
|
|
"num_ctx": self.num_ctx,
|
|
"num_gpu": self.num_gpu,
|
|
"num_thread": self.num_thread,
|
|
"num_predict": self.num_predict,
|
|
"repeat_last_n": self.repeat_last_n,
|
|
"repeat_penalty": self.repeat_penalty,
|
|
"temperature": self.temperature,
|
|
"seed": self.seed,
|
|
"stop": self.stop if stop is None else stop,
|
|
"tfs_z": self.tfs_z,
|
|
"top_k": self.top_k,
|
|
"top_p": self.top_p,
|
|
},
|
|
)
|
|
|
|
params = {
|
|
"messages": ollama_messages,
|
|
"stream": kwargs.pop("stream", True),
|
|
"model": kwargs.pop("model", self.model),
|
|
"format": kwargs.pop("format", self.format),
|
|
"options": Options(**options_dict),
|
|
"keep_alive": kwargs.pop("keep_alive", self.keep_alive),
|
|
**kwargs,
|
|
}
|
|
|
|
if tools := kwargs.get("tools"):
|
|
params["tools"] = tools
|
|
|
|
return params
|
|
|
|
@model_validator(mode="after")
|
|
def _set_clients(self) -> Self:
|
|
"""Set clients to use for ollama."""
|
|
client_kwargs = self.client_kwargs or {}
|
|
self._client = Client(host=self.base_url, **client_kwargs)
|
|
self._async_client = AsyncClient(host=self.base_url, **client_kwargs)
|
|
return self
|
|
|
|
def _convert_messages_to_ollama_messages(
|
|
self, messages: list[BaseMessage]
|
|
) -> Sequence[Message]:
|
|
ollama_messages: list = []
|
|
for message in messages:
|
|
role: Literal["user", "assistant", "system", "tool"]
|
|
tool_call_id: Optional[str] = None
|
|
tool_calls: Optional[list[dict[str, Any]]] = None
|
|
if isinstance(message, HumanMessage):
|
|
role = "user"
|
|
elif isinstance(message, AIMessage):
|
|
role = "assistant"
|
|
tool_calls = (
|
|
[
|
|
_lc_tool_call_to_openai_tool_call(tool_call)
|
|
for tool_call in message.tool_calls
|
|
]
|
|
if message.tool_calls
|
|
else None
|
|
)
|
|
elif isinstance(message, SystemMessage):
|
|
role = "system"
|
|
elif isinstance(message, ToolMessage):
|
|
role = "tool"
|
|
tool_call_id = message.tool_call_id
|
|
else:
|
|
raise ValueError("Received unsupported message type for Ollama.")
|
|
|
|
content = ""
|
|
images = []
|
|
if isinstance(message.content, str):
|
|
content = message.content
|
|
else:
|
|
for content_part in cast(list[dict], message.content):
|
|
if content_part.get("type") == "text":
|
|
content += f"\n{content_part['text']}"
|
|
elif content_part.get("type") == "tool_use":
|
|
continue
|
|
elif content_part.get("type") == "image_url":
|
|
image_url = None
|
|
temp_image_url = content_part.get("image_url")
|
|
if isinstance(temp_image_url, str):
|
|
image_url = temp_image_url
|
|
elif (
|
|
isinstance(temp_image_url, dict)
|
|
and "url" in temp_image_url
|
|
and isinstance(temp_image_url["url"], str)
|
|
):
|
|
image_url = temp_image_url["url"]
|
|
else:
|
|
raise ValueError(
|
|
"Only string image_url or dict with string 'url' "
|
|
"inside content parts are supported."
|
|
)
|
|
|
|
image_url_components = image_url.split(",")
|
|
# Support data:image/jpeg;base64,<image> format
|
|
# and base64 strings
|
|
if len(image_url_components) > 1:
|
|
images.append(image_url_components[1])
|
|
else:
|
|
images.append(image_url_components[0])
|
|
|
|
else:
|
|
raise ValueError(
|
|
"Unsupported message content type. "
|
|
"Must either have type 'text' or type 'image_url' "
|
|
"with a string 'image_url' field."
|
|
)
|
|
# Should convert to ollama.Message once role includes tool, and tool_call_id is in Message # noqa: E501
|
|
msg: dict = {
|
|
"role": role,
|
|
"content": content,
|
|
"images": images,
|
|
}
|
|
if tool_calls:
|
|
msg["tool_calls"] = tool_calls # type: ignore
|
|
if tool_call_id:
|
|
msg["tool_call_id"] = tool_call_id
|
|
ollama_messages.append(msg)
|
|
|
|
return ollama_messages
|
|
|
|
def _extract_reasoning(
|
|
self, message_chunk: BaseMessageChunk, is_thinking: bool
|
|
) -> tuple[BaseMessageChunk, bool]:
|
|
"""Mutate a message chunk to extract reasoning content."""
|
|
if not self.extract_reasoning:
|
|
return message_chunk, is_thinking
|
|
elif self.extract_reasoning is True:
|
|
start_token = DEFAULT_THINK_TOKEN_START
|
|
end_token = DEFAULT_THINK_TOKEN_END
|
|
else:
|
|
start_token, end_token = cast(tuple, self.extract_reasoning)
|
|
if start_token in message_chunk.content:
|
|
is_thinking = True
|
|
content = message_chunk.content
|
|
if is_thinking:
|
|
message_chunk.additional_kwargs["reasoning_content"] = content
|
|
message_chunk.content = ""
|
|
if end_token in content:
|
|
is_thinking = False
|
|
|
|
return message_chunk, is_thinking
|
|
|
|
async def _acreate_chat_stream(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
**kwargs: Any,
|
|
) -> AsyncIterator[Union[Mapping[str, Any], str]]:
|
|
chat_params = self._chat_params(messages, stop, **kwargs)
|
|
|
|
if chat_params["stream"]:
|
|
async for part in await self._async_client.chat(**chat_params):
|
|
yield part
|
|
else:
|
|
yield await self._async_client.chat(**chat_params)
|
|
|
|
def _create_chat_stream(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
**kwargs: Any,
|
|
) -> Iterator[Union[Mapping[str, Any], str]]:
|
|
chat_params = self._chat_params(messages, stop, **kwargs)
|
|
|
|
if chat_params["stream"]:
|
|
yield from self._client.chat(**chat_params)
|
|
else:
|
|
yield self._client.chat(**chat_params)
|
|
|
|
def _chat_stream_with_aggregation(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
verbose: bool = False,
|
|
**kwargs: Any,
|
|
) -> ChatGenerationChunk:
|
|
final_chunk = None
|
|
for chunk in self._iterate_over_stream(messages, stop, **kwargs):
|
|
if final_chunk is None:
|
|
final_chunk = chunk
|
|
else:
|
|
final_chunk += chunk
|
|
if run_manager:
|
|
run_manager.on_llm_new_token(
|
|
chunk.text,
|
|
chunk=chunk,
|
|
verbose=verbose,
|
|
)
|
|
if final_chunk is None:
|
|
raise ValueError("No data received from Ollama stream.")
|
|
|
|
return final_chunk
|
|
|
|
async def _achat_stream_with_aggregation(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
verbose: bool = False,
|
|
**kwargs: Any,
|
|
) -> ChatGenerationChunk:
|
|
final_chunk = None
|
|
async for chunk in self._aiterate_over_stream(messages, stop, **kwargs):
|
|
if final_chunk is None:
|
|
final_chunk = chunk
|
|
else:
|
|
final_chunk += chunk
|
|
if run_manager:
|
|
await run_manager.on_llm_new_token(
|
|
chunk.text,
|
|
chunk=chunk,
|
|
verbose=verbose,
|
|
)
|
|
if final_chunk is None:
|
|
raise ValueError("No data received from Ollama stream.")
|
|
|
|
return final_chunk
|
|
|
|
def _get_ls_params(
|
|
self, stop: Optional[list[str]] = None, **kwargs: Any
|
|
) -> LangSmithParams:
|
|
"""Get standard params for tracing."""
|
|
params = self._get_invocation_params(stop=stop, **kwargs)
|
|
ls_params = LangSmithParams(
|
|
ls_provider="ollama",
|
|
ls_model_name=self.model,
|
|
ls_model_type="chat",
|
|
ls_temperature=params.get("temperature", self.temperature),
|
|
)
|
|
if ls_stop := stop or params.get("stop", None) or self.stop:
|
|
ls_params["ls_stop"] = ls_stop
|
|
return ls_params
|
|
|
|
def _generate(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> ChatResult:
|
|
final_chunk = self._chat_stream_with_aggregation(
|
|
messages, stop, run_manager, verbose=self.verbose, **kwargs
|
|
)
|
|
generation_info = final_chunk.generation_info
|
|
chat_generation = ChatGeneration(
|
|
message=AIMessage(
|
|
content=final_chunk.text,
|
|
usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
|
|
tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
|
|
additional_kwargs=final_chunk.message.additional_kwargs,
|
|
),
|
|
generation_info=generation_info,
|
|
)
|
|
return ChatResult(generations=[chat_generation])
|
|
|
|
def _iterate_over_stream(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
**kwargs: Any,
|
|
) -> Iterator[ChatGenerationChunk]:
|
|
is_thinking = False
|
|
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
|
if not isinstance(stream_resp, str):
|
|
chunk = ChatGenerationChunk(
|
|
message=AIMessageChunk(
|
|
content=(
|
|
stream_resp["message"]["content"]
|
|
if "message" in stream_resp
|
|
and "content" in stream_resp["message"]
|
|
else ""
|
|
),
|
|
usage_metadata=_get_usage_metadata_from_generation_info(
|
|
stream_resp
|
|
),
|
|
tool_calls=_get_tool_calls_from_response(stream_resp),
|
|
),
|
|
generation_info=(
|
|
dict(stream_resp).pop("message", None)
|
|
if stream_resp.get("done") is True
|
|
else None
|
|
),
|
|
)
|
|
if chunk.generation_info and (
|
|
model := chunk.generation_info.get("model")
|
|
):
|
|
chunk.generation_info["model_name"] = model # backwards compat
|
|
if self.extract_reasoning:
|
|
message, is_thinking = self._extract_reasoning(
|
|
chunk.message, is_thinking
|
|
)
|
|
chunk.message = message
|
|
yield chunk
|
|
|
|
def _stream(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> Iterator[ChatGenerationChunk]:
|
|
for chunk in self._iterate_over_stream(messages, stop, **kwargs):
|
|
if run_manager:
|
|
run_manager.on_llm_new_token(
|
|
chunk.text,
|
|
verbose=self.verbose,
|
|
)
|
|
yield chunk
|
|
|
|
async def _aiterate_over_stream(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
**kwargs: Any,
|
|
) -> AsyncIterator[ChatGenerationChunk]:
|
|
is_thinking = False
|
|
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
|
if not isinstance(stream_resp, str):
|
|
chunk = ChatGenerationChunk(
|
|
message=AIMessageChunk(
|
|
content=(
|
|
stream_resp["message"]["content"]
|
|
if "message" in stream_resp
|
|
and "content" in stream_resp["message"]
|
|
else ""
|
|
),
|
|
usage_metadata=_get_usage_metadata_from_generation_info(
|
|
stream_resp
|
|
),
|
|
tool_calls=_get_tool_calls_from_response(stream_resp),
|
|
),
|
|
generation_info=(
|
|
dict(stream_resp).pop("message", None)
|
|
if stream_resp.get("done") is True
|
|
else None
|
|
),
|
|
)
|
|
if chunk.generation_info and (
|
|
model := chunk.generation_info.get("model")
|
|
):
|
|
chunk.generation_info["model_name"] = model # backwards compat
|
|
if self.extract_reasoning:
|
|
message, is_thinking = self._extract_reasoning(
|
|
chunk.message, is_thinking
|
|
)
|
|
chunk.message = message
|
|
yield chunk
|
|
|
|
async def _astream(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> AsyncIterator[ChatGenerationChunk]:
|
|
async for chunk in self._aiterate_over_stream(messages, stop, **kwargs):
|
|
if run_manager:
|
|
await run_manager.on_llm_new_token(
|
|
chunk.text,
|
|
verbose=self.verbose,
|
|
)
|
|
yield chunk
|
|
|
|
async def _agenerate(
|
|
self,
|
|
messages: list[BaseMessage],
|
|
stop: Optional[list[str]] = None,
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> ChatResult:
|
|
final_chunk = await self._achat_stream_with_aggregation(
|
|
messages, stop, run_manager, verbose=self.verbose, **kwargs
|
|
)
|
|
generation_info = final_chunk.generation_info
|
|
chat_generation = ChatGeneration(
|
|
message=AIMessage(
|
|
content=final_chunk.text,
|
|
usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
|
|
tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
|
|
additional_kwargs=final_chunk.message.additional_kwargs,
|
|
),
|
|
generation_info=generation_info,
|
|
)
|
|
return ChatResult(generations=[chat_generation])
|
|
|
|
@property
|
|
def _llm_type(self) -> str:
|
|
"""Return type of chat model."""
|
|
return "chat-ollama"
|
|
|
|
def bind_tools(
|
|
self,
|
|
tools: Sequence[Union[dict[str, Any], type, Callable, BaseTool]],
|
|
*,
|
|
tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None,
|
|
**kwargs: Any,
|
|
) -> Runnable[LanguageModelInput, BaseMessage]:
|
|
"""Bind tool-like objects to this chat model.
|
|
|
|
Assumes model is compatible with OpenAI tool-calling API.
|
|
|
|
Args:
|
|
tools: A list of tool definitions to bind to this chat model.
|
|
Supports any tool definition handled by
|
|
:meth:`langchain_core.utils.function_calling.convert_to_openai_tool`.
|
|
tool_choice: If provided, which tool for model to call. **This parameter
|
|
is currently ignored as it is not supported by Ollama.**
|
|
kwargs: Any additional parameters are passed directly to
|
|
``self.bind(**kwargs)``.
|
|
""" # noqa: E501
|
|
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
|
return super().bind(tools=formatted_tools, **kwargs)
|
|
|
|
def with_structured_output(
|
|
self,
|
|
schema: Union[dict, type],
|
|
*,
|
|
method: Literal["function_calling", "json_mode", "json_schema"] = "json_schema",
|
|
include_raw: bool = False,
|
|
**kwargs: Any,
|
|
) -> Runnable[LanguageModelInput, Union[dict, BaseModel]]:
|
|
"""Model wrapper that returns outputs formatted to match the given schema.
|
|
|
|
Args:
|
|
schema:
|
|
The output schema. Can be passed in as:
|
|
|
|
- a Pydantic class,
|
|
- a JSON schema
|
|
- a TypedDict class
|
|
- an OpenAI function/tool schema.
|
|
|
|
If ``schema`` is a Pydantic class then the model output will be a
|
|
Pydantic instance of that class, and the model-generated fields will be
|
|
validated by the Pydantic class. Otherwise the model output will be a
|
|
dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`
|
|
for more on how to properly specify types and descriptions of
|
|
schema fields when specifying a Pydantic or TypedDict class.
|
|
|
|
method: The method for steering model generation, one of:
|
|
|
|
- "json_schema":
|
|
Uses Ollama's structured output API: https://ollama.com/blog/structured-outputs
|
|
- "function_calling":
|
|
Uses Ollama's tool-calling API
|
|
- "json_mode":
|
|
Specifies ``format="json"``. Note that if using JSON mode then you
|
|
must include instructions for formatting the output into the
|
|
desired schema into the model call.
|
|
|
|
include_raw:
|
|
If False then only the parsed structured output is returned. If
|
|
an error occurs during model output parsing it will be raised. If True
|
|
then both the raw model response (a BaseMessage) and the parsed model
|
|
response will be returned. If an error occurs during output parsing it
|
|
will be caught and returned as well. The final output is always a dict
|
|
with keys "raw", "parsed", and "parsing_error".
|
|
|
|
kwargs: Additional keyword args aren't supported.
|
|
|
|
Returns:
|
|
A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
|
|
|
|
| If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
|
|
|
| If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
|
|
|
- "raw": BaseMessage
|
|
- "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
|
- "parsing_error": Optional[BaseException]
|
|
|
|
.. versionchanged:: 0.2.2
|
|
|
|
Added support for structured output API via ``format`` parameter.
|
|
|
|
.. versionchanged:: 0.3.0
|
|
|
|
Updated default ``method`` to ``"json_schema"``.
|
|
|
|
.. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
|
|
|
|
.. code-block:: python
|
|
|
|
from typing import Optional
|
|
|
|
from langchain_ollama import ChatOllama
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class AnswerWithJustification(BaseModel):
|
|
'''An answer to the user question along with justification for the answer.'''
|
|
|
|
answer: str
|
|
justification: Optional[str] = Field(
|
|
default=..., description="A justification for the answer."
|
|
)
|
|
|
|
|
|
llm = ChatOllama(model="llama3.1", temperature=0)
|
|
structured_llm = llm.with_structured_output(
|
|
AnswerWithJustification
|
|
)
|
|
|
|
structured_llm.invoke(
|
|
"What weighs more a pound of bricks or a pound of feathers"
|
|
)
|
|
|
|
# -> AnswerWithJustification(
|
|
# answer='They weigh the same',
|
|
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
|
# )
|
|
|
|
.. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True
|
|
|
|
.. code-block:: python
|
|
|
|
from langchain_ollama import ChatOllama
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class AnswerWithJustification(BaseModel):
|
|
'''An answer to the user question along with justification for the answer.'''
|
|
|
|
answer: str
|
|
justification: str
|
|
|
|
|
|
llm = ChatOllama(model="llama3.1", temperature=0)
|
|
structured_llm = llm.with_structured_output(
|
|
AnswerWithJustification, include_raw=True
|
|
)
|
|
|
|
structured_llm.invoke(
|
|
"What weighs more a pound of bricks or a pound of feathers"
|
|
)
|
|
# -> {
|
|
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
|
|
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
|
|
# 'parsing_error': None
|
|
# }
|
|
|
|
.. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False
|
|
|
|
.. code-block:: python
|
|
|
|
from typing import Optional
|
|
|
|
from langchain_ollama import ChatOllama
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class AnswerWithJustification(BaseModel):
|
|
'''An answer to the user question along with justification for the answer.'''
|
|
|
|
answer: str
|
|
justification: Optional[str] = Field(
|
|
default=..., description="A justification for the answer."
|
|
)
|
|
|
|
|
|
llm = ChatOllama(model="llama3.1", temperature=0)
|
|
structured_llm = llm.with_structured_output(
|
|
AnswerWithJustification, method="function_calling"
|
|
)
|
|
|
|
structured_llm.invoke(
|
|
"What weighs more a pound of bricks or a pound of feathers"
|
|
)
|
|
|
|
# -> AnswerWithJustification(
|
|
# answer='They weigh the same',
|
|
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
|
# )
|
|
|
|
.. dropdown:: Example: schema=TypedDict class, method="function_calling", include_raw=False
|
|
|
|
.. code-block:: python
|
|
|
|
# IMPORTANT: If you are using Python <=3.8, you need to import Annotated
|
|
# from typing_extensions, not from typing.
|
|
from typing_extensions import Annotated, TypedDict
|
|
|
|
from langchain_ollama import ChatOllama
|
|
|
|
|
|
class AnswerWithJustification(TypedDict):
|
|
'''An answer to the user question along with justification for the answer.'''
|
|
|
|
answer: str
|
|
justification: Annotated[
|
|
Optional[str], None, "A justification for the answer."
|
|
]
|
|
|
|
|
|
llm = ChatOllama(model="llama3.1", temperature=0)
|
|
structured_llm = llm.with_structured_output(AnswerWithJustification)
|
|
|
|
structured_llm.invoke(
|
|
"What weighs more a pound of bricks or a pound of feathers"
|
|
)
|
|
# -> {
|
|
# 'answer': 'They weigh the same',
|
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
|
# }
|
|
|
|
.. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False
|
|
|
|
.. code-block:: python
|
|
|
|
from langchain_ollama import ChatOllama
|
|
|
|
oai_schema = {
|
|
'name': 'AnswerWithJustification',
|
|
'description': 'An answer to the user question along with justification for the answer.',
|
|
'parameters': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'answer': {'type': 'string'},
|
|
'justification': {'description': 'A justification for the answer.', 'type': 'string'}
|
|
},
|
|
'required': ['answer']
|
|
}
|
|
}
|
|
|
|
llm = ChatOllama(model="llama3.1", temperature=0)
|
|
structured_llm = llm.with_structured_output(oai_schema)
|
|
|
|
structured_llm.invoke(
|
|
"What weighs more a pound of bricks or a pound of feathers"
|
|
)
|
|
# -> {
|
|
# 'answer': 'They weigh the same',
|
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
|
# }
|
|
|
|
.. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True
|
|
|
|
.. code-block::
|
|
|
|
from langchain_ollama import ChatOllama
|
|
from pydantic import BaseModel
|
|
|
|
class AnswerWithJustification(BaseModel):
|
|
answer: str
|
|
justification: str
|
|
|
|
llm = ChatOllama(model="llama3.1", temperature=0)
|
|
structured_llm = llm.with_structured_output(
|
|
AnswerWithJustification,
|
|
method="json_mode",
|
|
include_raw=True
|
|
)
|
|
|
|
structured_llm.invoke(
|
|
"Answer the following question. "
|
|
"Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n"
|
|
"What's heavier a pound of bricks or a pound of feathers?"
|
|
)
|
|
# -> {
|
|
# 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'),
|
|
# 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
|
|
# 'parsing_error': None
|
|
# }
|
|
""" # noqa: E501, D301
|
|
_ = kwargs.pop("strict", None)
|
|
if kwargs:
|
|
raise ValueError(f"Received unsupported arguments {kwargs}")
|
|
is_pydantic_schema = _is_pydantic_class(schema)
|
|
if method == "function_calling":
|
|
if schema is None:
|
|
raise ValueError(
|
|
"schema must be specified when method is not 'json_mode'. "
|
|
"Received None."
|
|
)
|
|
formatted_tool = convert_to_openai_tool(schema)
|
|
tool_name = formatted_tool["function"]["name"]
|
|
llm = self.bind_tools(
|
|
[schema],
|
|
tool_choice=tool_name,
|
|
ls_structured_output_format={
|
|
"kwargs": {"method": method},
|
|
"schema": formatted_tool,
|
|
},
|
|
)
|
|
if is_pydantic_schema:
|
|
output_parser: Runnable = PydanticToolsParser(
|
|
tools=[schema], # type: ignore[list-item]
|
|
first_tool_only=True,
|
|
)
|
|
else:
|
|
output_parser = JsonOutputKeyToolsParser(
|
|
key_name=tool_name, first_tool_only=True
|
|
)
|
|
elif method == "json_mode":
|
|
llm = self.bind(
|
|
format="json",
|
|
ls_structured_output_format={
|
|
"kwargs": {"method": method},
|
|
"schema": schema,
|
|
},
|
|
)
|
|
output_parser = (
|
|
PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
|
|
if is_pydantic_schema
|
|
else JsonOutputParser()
|
|
)
|
|
elif method == "json_schema":
|
|
if schema is None:
|
|
raise ValueError(
|
|
"schema must be specified when method is not 'json_mode'. "
|
|
"Received None."
|
|
)
|
|
if is_pydantic_schema:
|
|
schema = cast(TypeBaseModel, schema)
|
|
if issubclass(schema, BaseModelV1):
|
|
response_format = schema.schema()
|
|
else:
|
|
response_format = schema.model_json_schema()
|
|
llm = self.bind(
|
|
format=response_format,
|
|
ls_structured_output_format={
|
|
"kwargs": {"method": method},
|
|
"schema": schema,
|
|
},
|
|
)
|
|
output_parser = PydanticOutputParser(pydantic_object=schema)
|
|
else:
|
|
if is_typeddict(schema):
|
|
response_format = convert_to_json_schema(schema)
|
|
if "required" not in response_format:
|
|
response_format["required"] = list(
|
|
response_format["properties"].keys()
|
|
)
|
|
else:
|
|
# is JSON schema
|
|
response_format = cast(dict, schema)
|
|
llm = self.bind(
|
|
format=response_format,
|
|
ls_structured_output_format={
|
|
"kwargs": {"method": method},
|
|
"schema": response_format,
|
|
},
|
|
)
|
|
output_parser = JsonOutputParser()
|
|
else:
|
|
raise ValueError(
|
|
f"Unrecognized method argument. Expected one of 'function_calling', "
|
|
f"'json_schema', or 'json_mode'. Received: '{method}'"
|
|
)
|
|
|
|
if include_raw:
|
|
parser_assign = RunnablePassthrough.assign(
|
|
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
|
|
)
|
|
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
|
|
parser_with_fallback = parser_assign.with_fallbacks(
|
|
[parser_none], exception_key="parsing_error"
|
|
)
|
|
return RunnableMap(raw=llm) | parser_with_fallback
|
|
else:
|
|
return llm | output_parser
|