partner-upstage[patch]: embeddings empty list bug (#22057)

Fixed an error in `embed_documents` when the input was given as an empty list. And I have revised the document.
2025-07-06 05:08:20 +00:00 · 2024-05-24 00:44:30 +09:00 · 2024-05-24 00:44:30 +09:00 · d9eff44400
commit d9eff44400
parent 2df8ac402a
6 changed files with 30 additions and 10 deletions
--- a/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb
+++ b/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb
@ -36,7 +36,9 @@
    "\n",
    "docs = loader.load()\n",
    "\n",
-    "vectorstore = DocArrayInMemorySearch.from_documents(docs, embedding=UpstageEmbeddings())\n",
+    "vectorstore = DocArrayInMemorySearch.from_documents(\n",
+    "    docs, embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\")\n",
+    ")\n",
    "retriever = vectorstore.as_retriever()\n",
    "\n",
    "template = \"\"\"Answer the question based only on the following context:\n",
--- a/docs/docs/integrations/providers/upstage.ipynb
+++ b/docs/docs/integrations/providers/upstage.ipynb
@ -115,13 +115,13 @@
   "source": [
    "from langchain_upstage import UpstageEmbeddings\n",
    "\n",
-    "embeddings = UpstageEmbeddings()\n",
+    "embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")\n",
    "doc_result = embeddings.embed_documents(\n",
-    "    [\"Sam is a teacher.\", \"This is another document\"]\n",
+    "    [\"Sung is a professor.\", \"This is another document\"]\n",
    ")\n",
    "print(doc_result)\n",
    "\n",
-    "query_result = embeddings.embed_query(\"What does Sam do?\")\n",
+    "query_result = embeddings.embed_query(\"What does Sung do?\")\n",
    "print(query_result)"
   ]
  },
--- a/docs/docs/integrations/text_embedding/upstage.ipynb
+++ b/docs/docs/integrations/text_embedding/upstage.ipynb
@ -80,7 +80,7 @@
   "source": [
    "from langchain_upstage import UpstageEmbeddings\n",
    "\n",
-    "embeddings = UpstageEmbeddings()"
+    "embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")"
   ]
  },
  {
@ -101,7 +101,7 @@
   "outputs": [],
   "source": [
    "doc_result = embeddings.embed_documents(\n",
-    "    [\"Sam is a teacher.\", \"This is another document\"]\n",
+    "    [\"Sung is a professor.\", \"This is another document\"]\n",
    ")\n",
    "print(doc_result)"
   ]
@ -123,7 +123,7 @@
   },
   "outputs": [],
   "source": [
-    "query_result = embeddings.embed_query(\"What does Sam do?\")\n",
+    "query_result = embeddings.embed_query(\"What does Sung do?\")\n",
    "print(query_result)"
   ]
  },
@ -184,7 +184,7 @@
    "\n",
    "vectorstore = DocArrayInMemorySearch.from_texts(\n",
    "    [\"harrison worked at kensho\", \"bears like to eat honey\"],\n",
-    "    embedding=UpstageEmbeddings(),\n",
+    "    embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\"),\n",
    ")\n",
    "retriever = vectorstore.as_retriever()\n",
    "docs = retriever.invoke(\"Where did Harrison work?\")\n",
--- a/libs/partners/upstage/README.md
+++ b/libs/partners/upstage/README.md
@ -21,5 +21,5 @@ See a [usage example](https://python.langchain.com/docs/integrations/chat/upstag

 See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/upstage)

-Use `solar-1-mini-embedding` as the default model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
+Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
 `UpstageEmbeddings` will automatically add the suffixes based on the method called.
--- a/libs/partners/upstage/langchain_upstage/embeddings.py
+++ b/libs/partners/upstage/langchain_upstage/embeddings.py
@ -46,7 +46,7 @@ class UpstageEmbeddings(BaseModel, Embeddings):

            from langchain_upstage import UpstageEmbeddings

-            model = UpstageEmbeddings()
+            model = UpstageEmbeddings(model='solar-embedding-1-large')
    """

    client: Any = Field(default=None, exclude=True)  #: :meta private:
@ -200,6 +200,8 @@ class UpstageEmbeddings(BaseModel, Embeddings):
        assert (
            self.embed_batch_size <= MAX_EMBED_BATCH_SIZE
        ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}."
+        if not texts:
+            return []
        params = self._invocation_params
        params["model"] = params["model"] + "-passage"
        embeddings = []
@ -242,6 +244,8 @@ class UpstageEmbeddings(BaseModel, Embeddings):
        assert (
            self.embed_batch_size <= MAX_EMBED_BATCH_SIZE
        ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}."
+        if not texts:
+            return []
        params = self._invocation_params
        params["model"] = params["model"] + "-passage"
        embeddings = []
--- a/libs/partners/upstage/tests/integration_tests/test_embeddings.py
+++ b/libs/partners/upstage/tests/integration_tests/test_embeddings.py
@ -35,3 +35,17 @@ async def test_langchain_upstage_aembed_query() -> None:
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = await embedding.aembed_query(query)
    assert len(output) > 0
+
+
+def test_langchain_upstage_embed_documents_with_empty_list() -> None:
+    """Test Upstage embeddings with empty list."""
+    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
+    output = embedding.embed_documents([])
+    assert len(output) == 0
+
+
+async def test_langchain_upstage_aembed_documents_with_empty_list() -> None:
+    """Test Upstage embeddings asynchronous with empty list."""
+    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
+    output = await embedding.aembed_documents([])
+    assert len(output) == 0