feat(perplexity): expose search_results in chat model (#31468)

Description
The Perplexity chat model already returns a search_results field, but
LangChain dropped it when mapping Perplexity responses to
additional_kwargs.
This patch adds "search_results" to the allowed attribute lists in both
_stream and _generate, so downstream code can access it just like
images, citations, or related_questions.

Dependencies
None. The change is purely internal; no new imports or optional
dependencies required.


https://community.perplexity.ai/t/new-feature-search-results-field-with-richer-metadata/398

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
Inácio Nery 2025-07-16 16:16:35 -03:00 committed by GitHub
parent 2df05f6f6a
commit ea8f2a05ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 131 additions and 16 deletions

View File

@ -240,6 +240,65 @@
"response.content"
]
},
{
"cell_type": "markdown",
"id": "382335a6",
"metadata": {},
"source": [
"### Accessing the search results metadata\n",
"\n",
"Perplexity often provides a list of the web pages it consulted (“search_results”).\n",
"You don't need to pass any special parameter — the list is placed in\n",
"`response.additional_kwargs[\"search_results\"]`.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b09214a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The tallest mountain in South America is Aconcagua. It has a summit elevation of approximately 6,961 meters (22,838 feet), making it not only the highest peak in South America but also the highest mountain in the Americas, the Western Hemisphere, and the Southern Hemisphere[1][2][4].\n",
"\n",
"Aconcagua is located in the Principal Cordillera of the Andes mountain range, in Mendoza Province, Argentina, near the border with Chile[1][2][4]. It is of volcanic origin but is not an active volcano[4]. The mountain is part of Aconcagua Provincial Park and features several glaciers, including the large Ventisquero Horcones Inferior glacier[1].\n",
"\n",
"In summary, Aconcagua stands as the tallest mountain in South America at about 6,961 meters (22,838 feet) in height.\n"
]
},
{
"data": {
"text/plain": [
"[{'title': 'Aconcagua - Wikipedia',\n",
" 'url': 'https://en.wikipedia.org/wiki/Aconcagua',\n",
" 'date': None},\n",
" {'title': 'The 10 Highest Mountains in South America - Much Better Adventures',\n",
" 'url': 'https://www.muchbetteradventures.com/magazine/highest-mountains-south-america/',\n",
" 'date': '2023-07-05'}]"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chat = ChatPerplexity(temperature=0, model=\"sonar\")\n",
"\n",
"response = chat.invoke(\n",
" \"What is the tallest mountain in South America?\",\n",
")\n",
"\n",
"# Main answer\n",
"print(response.content)\n",
"\n",
"# First two supporting search results\n",
"response.additional_kwargs[\"search_results\"][:2]"
]
},
{
"cell_type": "markdown",
"id": "13d93dc4",

View File

@ -74,7 +74,7 @@ class ChatPerplexity(BaseChatModel):
Key init args - completion params:
model: str
Name of the model to use. e.g. "llama-3.1-sonar-small-128k-online"
Name of the model to use. e.g. "sonar"
temperature: float
Sampling temperature to use. Default is 0.7
max_tokens: Optional[int]
@ -95,11 +95,9 @@ class ChatPerplexity(BaseChatModel):
Instantiate:
.. code-block:: python
from langchain_community.chat_models import ChatPerplexity
from langchain_perplexity import ChatPerplexity
llm = ChatPerplexity(
model="llama-3.1-sonar-small-128k-online", temperature=0.7
)
llm = ChatPerplexity(model="sonar", temperature=0.7)
Invoke:
.. code-block:: python
@ -147,7 +145,7 @@ class ChatPerplexity(BaseChatModel):
""" # noqa: E501
client: Any = None #: :meta private:
model: str = "llama-3.1-sonar-small-128k-online"
model: str = "sonar"
"""Model name."""
temperature: float = 0.7
"""What sampling temperature to use."""
@ -325,7 +323,7 @@ class ChatPerplexity(BaseChatModel):
additional_kwargs = {}
if first_chunk:
additional_kwargs["citations"] = chunk.get("citations", [])
for attr in ["images", "related_questions"]:
for attr in ["images", "related_questions", "search_results"]:
if attr in chunk:
additional_kwargs[attr] = chunk[attr]
@ -376,7 +374,7 @@ class ChatPerplexity(BaseChatModel):
usage_metadata = None
additional_kwargs = {}
for attr in ["citations", "images", "related_questions"]:
for attr in ["citations", "images", "related_questions", "search_results"]:
if hasattr(response, attr):
additional_kwargs[attr] = getattr(response, attr)

View File

@ -1,7 +1,7 @@
from typing import Any, Optional
from typing import Any, Optional, cast
from unittest.mock import MagicMock
from langchain_core.messages import AIMessageChunk, BaseMessageChunk
from langchain_core.messages import AIMessageChunk, BaseMessage
from pytest_mock import MockerFixture
from langchain_perplexity import ChatPerplexity
@ -58,9 +58,9 @@ def test_perplexity_stream_includes_citations(mocker: MockerFixture) -> None:
llm.client.chat.completions, "create", return_value=mock_stream
)
stream = llm.stream("Hello langchain")
full: Optional[BaseMessageChunk] = None
full: Optional[BaseMessage] = None
for i, chunk in enumerate(stream):
full = chunk if full is None else full + chunk
full = chunk if full is None else cast(BaseMessage, full + chunk)
assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
if i == 0:
assert chunk.additional_kwargs["citations"] == [
@ -110,9 +110,9 @@ def test_perplexity_stream_includes_citations_and_images(mocker: MockerFixture)
llm.client.chat.completions, "create", return_value=mock_stream
)
stream = llm.stream("Hello langchain")
full: Optional[BaseMessageChunk] = None
full: Optional[BaseMessage] = None
for i, chunk in enumerate(stream):
full = chunk if full is None else full + chunk
full = chunk if full is None else cast(BaseMessage, full + chunk)
assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
if i == 0:
assert chunk.additional_kwargs["citations"] == [
@ -169,9 +169,9 @@ def test_perplexity_stream_includes_citations_and_related_questions(
llm.client.chat.completions, "create", return_value=mock_stream
)
stream = llm.stream("Hello langchain")
full: Optional[BaseMessageChunk] = None
full: Optional[BaseMessage] = None
for i, chunk in enumerate(stream):
full = chunk if full is None else full + chunk
full = chunk if full is None else cast(BaseMessage, full + chunk)
assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
if i == 0:
assert chunk.additional_kwargs["citations"] == [
@ -193,3 +193,61 @@ def test_perplexity_stream_includes_citations_and_related_questions(
}
patcher.assert_called_once()
def test_perplexity_stream_includes_citations_and_search_results(
mocker: MockerFixture,
) -> None:
"""Test that the stream method exposes `search_results` via additional_kwargs."""
llm = ChatPerplexity(model="test", timeout=30, verbose=True)
mock_chunk_0 = {
"choices": [{"delta": {"content": "Hello "}, "finish_reason": None}],
"citations": ["example.com/a", "example.com/b"],
"search_results": [
{"title": "Mock result", "url": "https://example.com/result", "date": None}
],
}
mock_chunk_1 = {
"choices": [{"delta": {"content": "Perplexity"}, "finish_reason": None}],
"citations": ["example.com/a", "example.com/b"],
"search_results": [
{"title": "Mock result", "url": "https://example.com/result", "date": None}
],
}
mock_chunks: list[dict[str, Any]] = [mock_chunk_0, mock_chunk_1]
mock_stream = MagicMock()
mock_stream.__iter__.return_value = mock_chunks
patcher = mocker.patch.object(
llm.client.chat.completions, "create", return_value=mock_stream
)
stream = llm.stream("Hello langchain")
full: Optional[BaseMessage] = None
for i, chunk in enumerate(stream):
full = chunk if full is None else cast(BaseMessage, full + chunk)
assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
if i == 0:
assert chunk.additional_kwargs["citations"] == [
"example.com/a",
"example.com/b",
]
assert chunk.additional_kwargs["search_results"] == [
{
"title": "Mock result",
"url": "https://example.com/result",
"date": None,
}
]
else:
assert "citations" not in chunk.additional_kwargs
assert "search_results" not in chunk.additional_kwargs
assert isinstance(full, AIMessageChunk)
assert full.content == "Hello Perplexity"
assert full.additional_kwargs == {
"citations": ["example.com/a", "example.com/b"],
"search_results": [
{"title": "Mock result", "url": "https://example.com/result", "date": None}
],
}
patcher.assert_called_once()