mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 22:29:51 +00:00
community[patch]: Fixed bug in merging generation_info
during chunk concatenation in Tongyi and ChatTongyi (#19014)
- **Description:** In #16218 , during the `GenerationChunk` and `ChatGenerationChunk` concatenation, the `generation_info` merging changed from simple keys & values replacement to using the util method [`merge_dicts`](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/utils/_merge.py):  The `merge_dicts` method could not handle merging values of `int` or some other types, and would raise a [`TypeError`](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/utils/_merge.py#L55). This PR fixes this issue in the **Tongyi and ChatTongyi Model** by adopting the `generation_info` of the last chunk and discarding the `generation_info` of the intermediate chunks, ensuring that `stream` and `astream` function correctly. - **Issue:** - Related issues or PRs about Tongyi & ChatTongyi: #16605, #17105 - Other models or cases: #18441, #17376 - **Dependencies:** No new dependencies
This commit is contained in:
parent
f79d0cb9fb
commit
c244e1a50b
@ -49,7 +49,11 @@ from tenacity import (
|
|||||||
wait_exponential,
|
wait_exponential,
|
||||||
)
|
)
|
||||||
|
|
||||||
from langchain_community.llms.tongyi import check_response
|
from langchain_community.llms.tongyi import (
|
||||||
|
agenerate_with_last_element_mark,
|
||||||
|
check_response,
|
||||||
|
generate_with_last_element_mark,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -338,9 +342,13 @@ class ChatTongyi(BaseChatModel):
|
|||||||
params: Dict[str, Any] = self._invocation_params(
|
params: Dict[str, Any] = self._invocation_params(
|
||||||
messages=messages, stop=stop, stream=True, **kwargs
|
messages=messages, stop=stop, stream=True, **kwargs
|
||||||
)
|
)
|
||||||
for stream_resp in self.stream_completion_with_retry(**params):
|
for stream_resp, is_last_chunk in generate_with_last_element_mark(
|
||||||
|
self.stream_completion_with_retry(**params)
|
||||||
|
):
|
||||||
chunk = ChatGenerationChunk(
|
chunk = ChatGenerationChunk(
|
||||||
**self._chat_generation_from_qwen_resp(stream_resp, is_chunk=True)
|
**self._chat_generation_from_qwen_resp(
|
||||||
|
stream_resp, is_chunk=True, is_last_chunk=is_last_chunk
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if run_manager:
|
if run_manager:
|
||||||
run_manager.on_llm_new_token(chunk.text, chunk=chunk)
|
run_manager.on_llm_new_token(chunk.text, chunk=chunk)
|
||||||
@ -356,9 +364,13 @@ class ChatTongyi(BaseChatModel):
|
|||||||
params: Dict[str, Any] = self._invocation_params(
|
params: Dict[str, Any] = self._invocation_params(
|
||||||
messages=messages, stop=stop, stream=True, **kwargs
|
messages=messages, stop=stop, stream=True, **kwargs
|
||||||
)
|
)
|
||||||
async for stream_resp in self.astream_completion_with_retry(**params):
|
async for stream_resp, is_last_chunk in agenerate_with_last_element_mark(
|
||||||
|
self.astream_completion_with_retry(**params)
|
||||||
|
):
|
||||||
chunk = ChatGenerationChunk(
|
chunk = ChatGenerationChunk(
|
||||||
**self._chat_generation_from_qwen_resp(stream_resp, is_chunk=True)
|
**self._chat_generation_from_qwen_resp(
|
||||||
|
stream_resp, is_chunk=True, is_last_chunk=is_last_chunk
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if run_manager:
|
if run_manager:
|
||||||
await run_manager.on_llm_new_token(chunk.text, chunk=chunk)
|
await run_manager.on_llm_new_token(chunk.text, chunk=chunk)
|
||||||
@ -398,18 +410,28 @@ class ChatTongyi(BaseChatModel):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _chat_generation_from_qwen_resp(
|
def _chat_generation_from_qwen_resp(
|
||||||
resp: Any, is_chunk: bool = False
|
resp: Any, is_chunk: bool = False, is_last_chunk: bool = True
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
|
# According to the response from dashscope,
|
||||||
|
# each chunk's `generation_info` overwrites the previous one.
|
||||||
|
# Besides, The `merge_dicts` method,
|
||||||
|
# which is used to concatenate `generation_info` in `GenerationChunk`,
|
||||||
|
# does not support merging of int type values.
|
||||||
|
# Therefore, we adopt the `generation_info` of the last chunk
|
||||||
|
# and discard the `generation_info` of the intermediate chunks.
|
||||||
choice = resp["output"]["choices"][0]
|
choice = resp["output"]["choices"][0]
|
||||||
message = convert_dict_to_message(choice["message"], is_chunk=is_chunk)
|
message = convert_dict_to_message(choice["message"], is_chunk=is_chunk)
|
||||||
return dict(
|
if is_last_chunk:
|
||||||
message=message,
|
return dict(
|
||||||
generation_info=dict(
|
message=message,
|
||||||
finish_reason=choice["finish_reason"],
|
generation_info=dict(
|
||||||
request_id=resp["request_id"],
|
finish_reason=choice["finish_reason"],
|
||||||
token_usage=dict(resp["usage"]),
|
request_id=resp["request_id"],
|
||||||
),
|
token_usage=dict(resp["usage"]),
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return dict(message=message)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _chunk_to_generation(chunk: ChatGenerationChunk) -> ChatGeneration:
|
def _chunk_to_generation(chunk: ChatGenerationChunk) -> ChatGeneration:
|
||||||
|
@ -5,13 +5,17 @@ import functools
|
|||||||
import logging
|
import logging
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
|
AsyncIterable,
|
||||||
AsyncIterator,
|
AsyncIterator,
|
||||||
Callable,
|
Callable,
|
||||||
Dict,
|
Dict,
|
||||||
|
Iterable,
|
||||||
Iterator,
|
Iterator,
|
||||||
List,
|
List,
|
||||||
Mapping,
|
Mapping,
|
||||||
Optional,
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
TypeVar,
|
||||||
)
|
)
|
||||||
|
|
||||||
from langchain_core.callbacks import (
|
from langchain_core.callbacks import (
|
||||||
@ -32,6 +36,7 @@ from tenacity import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
def _create_retry_decorator(llm: Tongyi) -> Callable[[Any], Any]:
|
def _create_retry_decorator(llm: Tongyi) -> Callable[[Any], Any]:
|
||||||
@ -122,6 +127,36 @@ async def astream_generate_with_retry(llm: Tongyi, **kwargs: Any) -> Any:
|
|||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
|
def generate_with_last_element_mark(iterable: Iterable[T]) -> Iterator[Tuple[T, bool]]:
|
||||||
|
"""Generate elements from an iterable,
|
||||||
|
and a boolean indicating if it is the last element."""
|
||||||
|
iterator = iter(iterable)
|
||||||
|
try:
|
||||||
|
item = next(iterator)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
for next_item in iterator:
|
||||||
|
yield item, False
|
||||||
|
item = next_item
|
||||||
|
yield item, True
|
||||||
|
|
||||||
|
|
||||||
|
async def agenerate_with_last_element_mark(
|
||||||
|
iterable: AsyncIterable[T],
|
||||||
|
) -> AsyncIterator[Tuple[T, bool]]:
|
||||||
|
"""Generate elements from an async iterable,
|
||||||
|
and a boolean indicating if it is the last element."""
|
||||||
|
iterator = iterable.__aiter__()
|
||||||
|
try:
|
||||||
|
item = await iterator.__anext__()
|
||||||
|
except StopAsyncIteration:
|
||||||
|
return
|
||||||
|
async for next_item in iterator:
|
||||||
|
yield item, False
|
||||||
|
item = next_item
|
||||||
|
yield item, True
|
||||||
|
|
||||||
|
|
||||||
class Tongyi(BaseLLM):
|
class Tongyi(BaseLLM):
|
||||||
"""Tongyi Qwen large language models.
|
"""Tongyi Qwen large language models.
|
||||||
|
|
||||||
@ -283,8 +318,12 @@ class Tongyi(BaseLLM):
|
|||||||
params: Dict[str, Any] = self._invocation_params(
|
params: Dict[str, Any] = self._invocation_params(
|
||||||
stop=stop, stream=True, **kwargs
|
stop=stop, stream=True, **kwargs
|
||||||
)
|
)
|
||||||
for stream_resp in stream_generate_with_retry(self, prompt=prompt, **params):
|
for stream_resp, is_last_chunk in generate_with_last_element_mark(
|
||||||
chunk = GenerationChunk(**self._generation_from_qwen_resp(stream_resp))
|
stream_generate_with_retry(self, prompt=prompt, **params)
|
||||||
|
):
|
||||||
|
chunk = GenerationChunk(
|
||||||
|
**self._generation_from_qwen_resp(stream_resp, is_last_chunk)
|
||||||
|
)
|
||||||
if run_manager:
|
if run_manager:
|
||||||
run_manager.on_llm_new_token(
|
run_manager.on_llm_new_token(
|
||||||
chunk.text,
|
chunk.text,
|
||||||
@ -303,10 +342,12 @@ class Tongyi(BaseLLM):
|
|||||||
params: Dict[str, Any] = self._invocation_params(
|
params: Dict[str, Any] = self._invocation_params(
|
||||||
stop=stop, stream=True, **kwargs
|
stop=stop, stream=True, **kwargs
|
||||||
)
|
)
|
||||||
async for stream_resp in astream_generate_with_retry(
|
async for stream_resp, is_last_chunk in agenerate_with_last_element_mark(
|
||||||
self, prompt=prompt, **params
|
astream_generate_with_retry(self, prompt=prompt, **params)
|
||||||
):
|
):
|
||||||
chunk = GenerationChunk(**self._generation_from_qwen_resp(stream_resp))
|
chunk = GenerationChunk(
|
||||||
|
**self._generation_from_qwen_resp(stream_resp, is_last_chunk)
|
||||||
|
)
|
||||||
if run_manager:
|
if run_manager:
|
||||||
await run_manager.on_llm_new_token(
|
await run_manager.on_llm_new_token(
|
||||||
chunk.text,
|
chunk.text,
|
||||||
@ -327,15 +368,27 @@ class Tongyi(BaseLLM):
|
|||||||
return params
|
return params
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _generation_from_qwen_resp(resp: Any) -> Dict[str, Any]:
|
def _generation_from_qwen_resp(
|
||||||
return dict(
|
resp: Any, is_last_chunk: bool = True
|
||||||
text=resp["output"]["text"],
|
) -> Dict[str, Any]:
|
||||||
generation_info=dict(
|
# According to the response from dashscope,
|
||||||
finish_reason=resp["output"]["finish_reason"],
|
# each chunk's `generation_info` overwrites the previous one.
|
||||||
request_id=resp["request_id"],
|
# Besides, The `merge_dicts` method,
|
||||||
token_usage=dict(resp["usage"]),
|
# which is used to concatenate `generation_info` in `GenerationChunk`,
|
||||||
),
|
# does not support merging of int type values.
|
||||||
)
|
# Therefore, we adopt the `generation_info` of the last chunk
|
||||||
|
# and discard the `generation_info` of the intermediate chunks.
|
||||||
|
if is_last_chunk:
|
||||||
|
return dict(
|
||||||
|
text=resp["output"]["text"],
|
||||||
|
generation_info=dict(
|
||||||
|
finish_reason=resp["output"]["finish_reason"],
|
||||||
|
request_id=resp["request_id"],
|
||||||
|
token_usage=dict(resp["usage"]),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return dict(text=resp["output"]["text"])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _chunk_to_generation(chunk: GenerationChunk) -> Generation:
|
def _chunk_to_generation(chunk: GenerationChunk) -> Generation:
|
||||||
|
Loading…
Reference in New Issue
Block a user