core: allow artifact in create_retriever_tool (#28903)

Add option to return content and artifacts, to also be able to access
the full info of the retrieved documents.

They are returned as a list of dicts in the `artifacts` property if
parameter `response_format` is set to `"content_and_artifact"`.

Defaults to `"content"` to keep current behavior.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Adrián Panella
2025-01-03 17:10:31 -05:00
committed by GitHub
parent 3e618b16cd
commit acddfc772e
2 changed files with 83 additions and 6 deletions

View File

@@ -1,11 +1,12 @@
from __future__ import annotations
from functools import partial
from typing import Optional
from typing import Literal, Optional, Union
from pydantic import BaseModel, Field
from langchain_core.callbacks import Callbacks
from langchain_core.documents import Document
from langchain_core.prompts import (
BasePromptTemplate,
PromptTemplate,
@@ -28,11 +29,16 @@ def _get_relevant_documents(
document_prompt: BasePromptTemplate,
document_separator: str,
callbacks: Callbacks = None,
) -> str:
response_format: Literal["content", "content_and_artifact"] = "content",
) -> Union[str, tuple[str, list[Document]]]:
docs = retriever.invoke(query, config={"callbacks": callbacks})
return document_separator.join(
content = document_separator.join(
format_document(doc, document_prompt) for doc in docs
)
if response_format == "content_and_artifact":
return (content, docs)
return content
async def _aget_relevant_documents(
@@ -41,12 +47,18 @@ async def _aget_relevant_documents(
document_prompt: BasePromptTemplate,
document_separator: str,
callbacks: Callbacks = None,
) -> str:
response_format: Literal["content", "content_and_artifact"] = "content",
) -> Union[str, tuple[str, list[Document]]]:
docs = await retriever.ainvoke(query, config={"callbacks": callbacks})
return document_separator.join(
content = document_separator.join(
[await aformat_document(doc, document_prompt) for doc in docs]
)
if response_format == "content_and_artifact":
return (content, docs)
return content
def create_retriever_tool(
retriever: BaseRetriever,
@@ -55,6 +67,7 @@ def create_retriever_tool(
*,
document_prompt: Optional[BasePromptTemplate] = None,
document_separator: str = "\n\n",
response_format: Literal["content", "content_and_artifact"] = "content",
) -> Tool:
"""Create a tool to do retrieval of documents.
@@ -66,6 +79,11 @@ def create_retriever_tool(
model, so should be descriptive.
document_prompt: The prompt to use for the document. Defaults to None.
document_separator: The separator to use between documents. Defaults to "\n\n".
response_format: The tool response format. If "content" then the output of
the tool is interpreted as the contents of a ToolMessage. If
"content_and_artifact" then the output is expected to be a two-tuple
corresponding to the (content, artifact) of a ToolMessage (artifact
being a list of documents in this case). Defaults to "content".
Returns:
Tool class to pass to an agent.
@@ -76,12 +94,14 @@ def create_retriever_tool(
retriever=retriever,
document_prompt=document_prompt,
document_separator=document_separator,
response_format=response_format,
)
afunc = partial(
_aget_relevant_documents,
retriever=retriever,
document_prompt=document_prompt,
document_separator=document_separator,
response_format=response_format,
)
return Tool(
name=name,
@@ -89,4 +109,5 @@ def create_retriever_tool(
func=func,
coroutine=afunc,
args_schema=RetrieverInput,
response_format=response_format,
)