mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 13:54:48 +00:00
core[patch]: Add LLM output to message response_metadata (#19158)
This will more easily expose token usage information. CC @baskaryan --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
6fa1438334
commit
bd329e9aad
@ -615,6 +615,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
|||||||
generation.message.response_metadata = _gen_info_and_msg_metadata(
|
generation.message.response_metadata = _gen_info_and_msg_metadata(
|
||||||
generation
|
generation
|
||||||
)
|
)
|
||||||
|
if len(result.generations) == 1 and result.llm_output is not None:
|
||||||
|
result.generations[0].message.response_metadata = {
|
||||||
|
**result.llm_output,
|
||||||
|
**result.generations[0].message.response_metadata,
|
||||||
|
}
|
||||||
if check_cache and llm_cache:
|
if check_cache and llm_cache:
|
||||||
llm_cache.update(prompt, llm_string, result.generations)
|
llm_cache.update(prompt, llm_string, result.generations)
|
||||||
return result
|
return result
|
||||||
@ -651,6 +656,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
|||||||
generation.message.response_metadata = _gen_info_and_msg_metadata(
|
generation.message.response_metadata = _gen_info_and_msg_metadata(
|
||||||
generation
|
generation
|
||||||
)
|
)
|
||||||
|
if len(result.generations) == 1 and result.llm_output is not None:
|
||||||
|
result.generations[0].message.response_metadata = {
|
||||||
|
**result.llm_output,
|
||||||
|
**result.generations[0].message.response_metadata,
|
||||||
|
}
|
||||||
if check_cache and llm_cache:
|
if check_cache and llm_cache:
|
||||||
await llm_cache.aupdate(prompt, llm_string, result.generations)
|
await llm_cache.aupdate(prompt, llm_string, result.generations)
|
||||||
return result
|
return result
|
||||||
|
@ -403,35 +403,67 @@ def test_invoke() -> None:
|
|||||||
assert isinstance(result.content, str)
|
assert isinstance(result.content, str)
|
||||||
|
|
||||||
|
|
||||||
def test_logprobs() -> None:
|
def test_response_metadata() -> None:
|
||||||
llm = ChatOpenAI()
|
llm = ChatOpenAI()
|
||||||
result = llm.invoke([HumanMessage(content="I'm PickleRick")], logprobs=True)
|
result = llm.invoke([HumanMessage(content="I'm PickleRick")], logprobs=True)
|
||||||
assert result.response_metadata
|
assert result.response_metadata
|
||||||
|
assert all(
|
||||||
|
k in result.response_metadata
|
||||||
|
for k in (
|
||||||
|
"token_usage",
|
||||||
|
"model_name",
|
||||||
|
"logprobs",
|
||||||
|
"system_fingerprint",
|
||||||
|
"finish_reason",
|
||||||
|
)
|
||||||
|
)
|
||||||
assert "content" in result.response_metadata["logprobs"]
|
assert "content" in result.response_metadata["logprobs"]
|
||||||
|
|
||||||
|
|
||||||
async def test_async_logprobs() -> None:
|
async def test_async_response_metadata() -> None:
|
||||||
llm = ChatOpenAI()
|
llm = ChatOpenAI()
|
||||||
result = await llm.ainvoke([HumanMessage(content="I'm PickleRick")], logprobs=True)
|
result = await llm.ainvoke([HumanMessage(content="I'm PickleRick")], logprobs=True)
|
||||||
assert result.response_metadata
|
assert result.response_metadata
|
||||||
|
assert all(
|
||||||
|
k in result.response_metadata
|
||||||
|
for k in (
|
||||||
|
"token_usage",
|
||||||
|
"model_name",
|
||||||
|
"logprobs",
|
||||||
|
"system_fingerprint",
|
||||||
|
"finish_reason",
|
||||||
|
)
|
||||||
|
)
|
||||||
assert "content" in result.response_metadata["logprobs"]
|
assert "content" in result.response_metadata["logprobs"]
|
||||||
|
|
||||||
|
|
||||||
def test_logprobs_streaming() -> None:
|
def test_response_metadata_streaming() -> None:
|
||||||
llm = ChatOpenAI()
|
llm = ChatOpenAI()
|
||||||
full: Optional[BaseMessageChunk] = None
|
full: Optional[BaseMessageChunk] = None
|
||||||
for chunk in llm.stream("I'm Pickle Rick", logprobs=True):
|
for chunk in llm.stream("I'm Pickle Rick", logprobs=True):
|
||||||
assert isinstance(chunk.content, str)
|
assert isinstance(chunk.content, str)
|
||||||
full = chunk if full is None else full + chunk
|
full = chunk if full is None else full + chunk
|
||||||
assert cast(BaseMessageChunk, full).response_metadata
|
assert all(
|
||||||
|
k in cast(BaseMessageChunk, full).response_metadata
|
||||||
|
for k in (
|
||||||
|
"logprobs",
|
||||||
|
"finish_reason",
|
||||||
|
)
|
||||||
|
)
|
||||||
assert "content" in cast(BaseMessageChunk, full).response_metadata["logprobs"]
|
assert "content" in cast(BaseMessageChunk, full).response_metadata["logprobs"]
|
||||||
|
|
||||||
|
|
||||||
async def test_async_logprobs_streaming() -> None:
|
async def test_async_response_metadata_streaming() -> None:
|
||||||
llm = ChatOpenAI()
|
llm = ChatOpenAI()
|
||||||
full: Optional[BaseMessageChunk] = None
|
full: Optional[BaseMessageChunk] = None
|
||||||
async for chunk in llm.astream("I'm Pickle Rick", logprobs=True):
|
async for chunk in llm.astream("I'm Pickle Rick", logprobs=True):
|
||||||
assert isinstance(chunk.content, str)
|
assert isinstance(chunk.content, str)
|
||||||
full = chunk if full is None else full + chunk
|
full = chunk if full is None else full + chunk
|
||||||
assert cast(BaseMessageChunk, full).response_metadata
|
assert all(
|
||||||
|
k in cast(BaseMessageChunk, full).response_metadata
|
||||||
|
for k in (
|
||||||
|
"logprobs",
|
||||||
|
"finish_reason",
|
||||||
|
)
|
||||||
|
)
|
||||||
assert "content" in cast(BaseMessageChunk, full).response_metadata["logprobs"]
|
assert "content" in cast(BaseMessageChunk, full).response_metadata["logprobs"]
|
||||||
|
Loading…
Reference in New Issue
Block a user