Fixing the Issue with DashScopeEmbeddings Handling More than 25 Rows of Data (#14662)

This change addresses the issue where DashScopeEmbeddingAPI limits requests to 25 lines of data, and DashScopeEmbeddings did not handle cases with more than 25 lines, leading to errors. I have implemented a fix to manage data exceeding this limit efficiently. --------- Co-authored-by: xuxiang <xuxiang@aliyun.com>
2025-09-22 19:09:57 +00:00 · 2024-01-02 08:50:13 +08:00
parent 9d8468a576
commit dd1d818a82
2 changed files with 52 additions and 16 deletions
--- a/libs/community/langchain_community/embeddings/dashscope.py
+++ b/libs/community/langchain_community/embeddings/dashscope.py
@@ -45,20 +45,27 @@ def embed_with_retry(embeddings: DashScopeEmbeddings, **kwargs: Any) -> Any:

    @retry_decorator
    def _embed_with_retry(**kwargs: Any) -> Any:
-        resp = embeddings.client.call(**kwargs)
-        if resp.status_code == 200:
-            return resp.output["embeddings"]
-        elif resp.status_code in [400, 401]:
-            raise ValueError(
-                f"status_code: {resp.status_code} \n "
-                f"code: {resp.code} \n message: {resp.message}"
-            )
-        else:
-            raise HTTPError(
-                f"HTTP error occurred: status_code: {resp.status_code} \n "
-                f"code: {resp.code} \n message: {resp.message}",
-                response=resp,
-            )
+        result = []
+        i = 0
+        input_data = kwargs["input"]
+        while i < len(input_data):
+            kwargs["input"] = input_data[i : i + 25]
+            resp = embeddings.client.call(**kwargs)
+            if resp.status_code == 200:
+                result += resp.output["embeddings"]
+            elif resp.status_code in [400, 401]:
+                raise ValueError(
+                    f"status_code: {resp.status_code} \n "
+                    f"code: {resp.code} \n message: {resp.message}"
+                )
+            else:
+                raise HTTPError(
+                    f"HTTP error occurred: status_code: {resp.status_code} \n "
+                    f"code: {resp.code} \n message: {resp.message}",
+                    response=resp,
+                )
+            i += 25
+        return result

    return _embed_with_retry(**kwargs)

--- a/libs/community/tests/integration_tests/embeddings/test_dashscope.py
+++ b/libs/community/tests/integration_tests/embeddings/test_dashscope.py
@@ -15,10 +15,39 @@ def test_dashscope_embedding_documents() -> None:

 def test_dashscope_embedding_documents_multiple() -> None:
    """Test dashscope embeddings."""
-    documents = ["foo bar", "bar foo", "foo"]
+    documents = [
+        "foo bar",
+        "bar foo",
+        "foo",
+        "foo0",
+        "foo1",
+        "foo2",
+        "foo3",
+        "foo4",
+        "foo5",
+        "foo6",
+        "foo7",
+        "foo8",
+        "foo9",
+        "foo10",
+        "foo11",
+        "foo12",
+        "foo13",
+        "foo14",
+        "foo15",
+        "foo16",
+        "foo17",
+        "foo18",
+        "foo19",
+        "foo20",
+        "foo21",
+        "foo22",
+        "foo23",
+        "foo24",
+    ]
    embedding = DashScopeEmbeddings(model="text-embedding-v1")
    output = embedding.embed_documents(documents)
-    assert len(output) == 3
+    assert len(output) == 28
    assert len(output[0]) == 1536
    assert len(output[1]) == 1536
    assert len(output[2]) == 1536