mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 06:14:37 +00:00
langchain[patch]: Fix to avoid infinite loop during collapse chain in map reduce (#16253)
- **Description:** Depending on `token_max` used in `load_summarize_chain`, it could cause an infinite loop when documents cannot collapse under `token_max`. This change would not affect the existing feature, but it also gives an option to users to avoid the situation. - **Issue:** https://github.com/langchain-ai/langchain/issues/16251 - **Dependencies:** None - **Twitter handle:** None --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
5d06797905
commit
2df7387c91
@ -200,6 +200,10 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|||||||
"""The maximum number of tokens to group documents into. For example, if
|
"""The maximum number of tokens to group documents into. For example, if
|
||||||
set to 3000 then documents will be grouped into chunks of no greater than
|
set to 3000 then documents will be grouped into chunks of no greater than
|
||||||
3000 tokens before trying to combine them into a smaller chunk."""
|
3000 tokens before trying to combine them into a smaller chunk."""
|
||||||
|
collapse_max_retries: Optional[int] = None
|
||||||
|
"""The maximum number of retries to collapse documents to fit token_max.
|
||||||
|
If None, it will keep trying to collapse documents to fit token_max.
|
||||||
|
Otherwise, after it reaches the max number, it will throw an error"""
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration for this pydantic object."""
|
"""Configuration for this pydantic object."""
|
||||||
@ -289,6 +293,7 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|||||||
)
|
)
|
||||||
|
|
||||||
_token_max = token_max or self.token_max
|
_token_max = token_max or self.token_max
|
||||||
|
retries: int = 0
|
||||||
while num_tokens is not None and num_tokens > _token_max:
|
while num_tokens is not None and num_tokens > _token_max:
|
||||||
new_result_doc_list = split_list_of_docs(
|
new_result_doc_list = split_list_of_docs(
|
||||||
result_docs, length_func, _token_max, **kwargs
|
result_docs, length_func, _token_max, **kwargs
|
||||||
@ -298,6 +303,12 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|||||||
new_doc = collapse_docs(docs, _collapse_docs_func, **kwargs)
|
new_doc = collapse_docs(docs, _collapse_docs_func, **kwargs)
|
||||||
result_docs.append(new_doc)
|
result_docs.append(new_doc)
|
||||||
num_tokens = length_func(result_docs, **kwargs)
|
num_tokens = length_func(result_docs, **kwargs)
|
||||||
|
retries += 1
|
||||||
|
if self.collapse_max_retries and retries == self.collapse_max_retries:
|
||||||
|
raise ValueError(
|
||||||
|
f"Exceed {self.collapse_max_retries} tries to \
|
||||||
|
collapse document to {_token_max} tokens."
|
||||||
|
)
|
||||||
return result_docs, {}
|
return result_docs, {}
|
||||||
|
|
||||||
async def _acollapse(
|
async def _acollapse(
|
||||||
@ -317,6 +328,7 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|||||||
)
|
)
|
||||||
|
|
||||||
_token_max = token_max or self.token_max
|
_token_max = token_max or self.token_max
|
||||||
|
retries: int = 0
|
||||||
while num_tokens is not None and num_tokens > _token_max:
|
while num_tokens is not None and num_tokens > _token_max:
|
||||||
new_result_doc_list = split_list_of_docs(
|
new_result_doc_list = split_list_of_docs(
|
||||||
result_docs, length_func, _token_max, **kwargs
|
result_docs, length_func, _token_max, **kwargs
|
||||||
@ -326,6 +338,12 @@ class ReduceDocumentsChain(BaseCombineDocumentsChain):
|
|||||||
new_doc = await acollapse_docs(docs, _collapse_docs_func, **kwargs)
|
new_doc = await acollapse_docs(docs, _collapse_docs_func, **kwargs)
|
||||||
result_docs.append(new_doc)
|
result_docs.append(new_doc)
|
||||||
num_tokens = length_func(result_docs, **kwargs)
|
num_tokens = length_func(result_docs, **kwargs)
|
||||||
|
retries += 1
|
||||||
|
if self.collapse_max_retries and retries == self.collapse_max_retries:
|
||||||
|
raise ValueError(
|
||||||
|
f"Exceed {self.collapse_max_retries} tries to \
|
||||||
|
collapse document to {_token_max} tokens."
|
||||||
|
)
|
||||||
return result_docs, {}
|
return result_docs, {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -52,6 +52,8 @@ def _load_map_reduce_chain(
|
|||||||
verbose: Optional[bool] = None,
|
verbose: Optional[bool] = None,
|
||||||
token_max: int = 3000,
|
token_max: int = 3000,
|
||||||
callbacks: Callbacks = None,
|
callbacks: Callbacks = None,
|
||||||
|
*,
|
||||||
|
collapse_max_retries: Optional[int] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> MapReduceDocumentsChain:
|
) -> MapReduceDocumentsChain:
|
||||||
map_chain = LLMChain(
|
map_chain = LLMChain(
|
||||||
@ -92,6 +94,7 @@ def _load_map_reduce_chain(
|
|||||||
token_max=token_max,
|
token_max=token_max,
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
|
collapse_max_retries=collapse_max_retries,
|
||||||
)
|
)
|
||||||
return MapReduceDocumentsChain(
|
return MapReduceDocumentsChain(
|
||||||
llm_chain=map_chain,
|
llm_chain=map_chain,
|
||||||
|
Loading…
Reference in New Issue
Block a user