From dd1d818a82bb7ad89e97982c1477e6aaba69664f Mon Sep 17 00:00:00 2001 From: xuxiang <418720028@qq.com> Date: Tue, 2 Jan 2024 08:50:13 +0800 Subject: [PATCH] Fixing the Issue with DashScopeEmbeddings Handling More than 25 Rows of Data (#14662) This change addresses the issue where DashScopeEmbeddingAPI limits requests to 25 lines of data, and DashScopeEmbeddings did not handle cases with more than 25 lines, leading to errors. I have implemented a fix to manage data exceeding this limit efficiently. --------- Co-authored-by: xuxiang --- .../embeddings/dashscope.py | 35 +++++++++++-------- .../embeddings/test_dashscope.py | 33 +++++++++++++++-- 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/libs/community/langchain_community/embeddings/dashscope.py b/libs/community/langchain_community/embeddings/dashscope.py index 9b1b7db8874..0042c05debc 100644 --- a/libs/community/langchain_community/embeddings/dashscope.py +++ b/libs/community/langchain_community/embeddings/dashscope.py @@ -45,20 +45,27 @@ def embed_with_retry(embeddings: DashScopeEmbeddings, **kwargs: Any) -> Any: @retry_decorator def _embed_with_retry(**kwargs: Any) -> Any: - resp = embeddings.client.call(**kwargs) - if resp.status_code == 200: - return resp.output["embeddings"] - elif resp.status_code in [400, 401]: - raise ValueError( - f"status_code: {resp.status_code} \n " - f"code: {resp.code} \n message: {resp.message}" - ) - else: - raise HTTPError( - f"HTTP error occurred: status_code: {resp.status_code} \n " - f"code: {resp.code} \n message: {resp.message}", - response=resp, - ) + result = [] + i = 0 + input_data = kwargs["input"] + while i < len(input_data): + kwargs["input"] = input_data[i : i + 25] + resp = embeddings.client.call(**kwargs) + if resp.status_code == 200: + result += resp.output["embeddings"] + elif resp.status_code in [400, 401]: + raise ValueError( + f"status_code: {resp.status_code} \n " + f"code: {resp.code} \n message: {resp.message}" + ) + else: + raise HTTPError( + f"HTTP error occurred: status_code: {resp.status_code} \n " + f"code: {resp.code} \n message: {resp.message}", + response=resp, + ) + i += 25 + return result return _embed_with_retry(**kwargs) diff --git a/libs/community/tests/integration_tests/embeddings/test_dashscope.py b/libs/community/tests/integration_tests/embeddings/test_dashscope.py index 4c189c53550..82e507c863c 100644 --- a/libs/community/tests/integration_tests/embeddings/test_dashscope.py +++ b/libs/community/tests/integration_tests/embeddings/test_dashscope.py @@ -15,10 +15,39 @@ def test_dashscope_embedding_documents() -> None: def test_dashscope_embedding_documents_multiple() -> None: """Test dashscope embeddings.""" - documents = ["foo bar", "bar foo", "foo"] + documents = [ + "foo bar", + "bar foo", + "foo", + "foo0", + "foo1", + "foo2", + "foo3", + "foo4", + "foo5", + "foo6", + "foo7", + "foo8", + "foo9", + "foo10", + "foo11", + "foo12", + "foo13", + "foo14", + "foo15", + "foo16", + "foo17", + "foo18", + "foo19", + "foo20", + "foo21", + "foo22", + "foo23", + "foo24", + ] embedding = DashScopeEmbeddings(model="text-embedding-v1") output = embedding.embed_documents(documents) - assert len(output) == 3 + assert len(output) == 28 assert len(output[0]) == 1536 assert len(output[1]) == 1536 assert len(output[2]) == 1536