From d9eff444008392ab5e3a347f5671915837ffc78f Mon Sep 17 00:00:00 2001 From: JuHyung Son Date: Fri, 24 May 2024 00:44:30 +0900 Subject: [PATCH] partner-upstage[patch]: embeddings empty list bug (#22057) Fixed an error in `embed_documents` when the input was given as an empty list. And I have revised the document. --- ...pstage_layout_analysis_groundedness_check.ipynb | 4 +++- docs/docs/integrations/providers/upstage.ipynb | 6 +++--- .../docs/integrations/text_embedding/upstage.ipynb | 8 ++++---- libs/partners/upstage/README.md | 2 +- .../upstage/langchain_upstage/embeddings.py | 6 +++++- .../tests/integration_tests/test_embeddings.py | 14 ++++++++++++++ 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb b/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb index 6adc4411427..fafb1dfbbad 100644 --- a/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb +++ b/cookbook/rag_upstage_layout_analysis_groundedness_check.ipynb @@ -36,7 +36,9 @@ "\n", "docs = loader.load()\n", "\n", - "vectorstore = DocArrayInMemorySearch.from_documents(docs, embedding=UpstageEmbeddings())\n", + "vectorstore = DocArrayInMemorySearch.from_documents(\n", + " docs, embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\")\n", + ")\n", "retriever = vectorstore.as_retriever()\n", "\n", "template = \"\"\"Answer the question based only on the following context:\n", diff --git a/docs/docs/integrations/providers/upstage.ipynb b/docs/docs/integrations/providers/upstage.ipynb index 1355f523171..b43bfe163d2 100644 --- a/docs/docs/integrations/providers/upstage.ipynb +++ b/docs/docs/integrations/providers/upstage.ipynb @@ -115,13 +115,13 @@ "source": [ "from langchain_upstage import UpstageEmbeddings\n", "\n", - "embeddings = UpstageEmbeddings()\n", + "embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")\n", "doc_result = embeddings.embed_documents(\n", - " [\"Sam is a teacher.\", \"This is another document\"]\n", + " [\"Sung is a professor.\", \"This is another document\"]\n", ")\n", "print(doc_result)\n", "\n", - "query_result = embeddings.embed_query(\"What does Sam do?\")\n", + "query_result = embeddings.embed_query(\"What does Sung do?\")\n", "print(query_result)" ] }, diff --git a/docs/docs/integrations/text_embedding/upstage.ipynb b/docs/docs/integrations/text_embedding/upstage.ipynb index 6f2452b9785..5736dd5f532 100644 --- a/docs/docs/integrations/text_embedding/upstage.ipynb +++ b/docs/docs/integrations/text_embedding/upstage.ipynb @@ -80,7 +80,7 @@ "source": [ "from langchain_upstage import UpstageEmbeddings\n", "\n", - "embeddings = UpstageEmbeddings()" + "embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")" ] }, { @@ -101,7 +101,7 @@ "outputs": [], "source": [ "doc_result = embeddings.embed_documents(\n", - " [\"Sam is a teacher.\", \"This is another document\"]\n", + " [\"Sung is a professor.\", \"This is another document\"]\n", ")\n", "print(doc_result)" ] @@ -123,7 +123,7 @@ }, "outputs": [], "source": [ - "query_result = embeddings.embed_query(\"What does Sam do?\")\n", + "query_result = embeddings.embed_query(\"What does Sung do?\")\n", "print(query_result)" ] }, @@ -184,7 +184,7 @@ "\n", "vectorstore = DocArrayInMemorySearch.from_texts(\n", " [\"harrison worked at kensho\", \"bears like to eat honey\"],\n", - " embedding=UpstageEmbeddings(),\n", + " embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\"),\n", ")\n", "retriever = vectorstore.as_retriever()\n", "docs = retriever.invoke(\"Where did Harrison work?\")\n", diff --git a/libs/partners/upstage/README.md b/libs/partners/upstage/README.md index fb91c0a8898..e26cb409a7c 100644 --- a/libs/partners/upstage/README.md +++ b/libs/partners/upstage/README.md @@ -21,5 +21,5 @@ See a [usage example](https://python.langchain.com/docs/integrations/chat/upstag See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/upstage) -Use `solar-1-mini-embedding` as the default model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name. +Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name. `UpstageEmbeddings` will automatically add the suffixes based on the method called. diff --git a/libs/partners/upstage/langchain_upstage/embeddings.py b/libs/partners/upstage/langchain_upstage/embeddings.py index 08976c608f7..5a74b32832c 100644 --- a/libs/partners/upstage/langchain_upstage/embeddings.py +++ b/libs/partners/upstage/langchain_upstage/embeddings.py @@ -46,7 +46,7 @@ class UpstageEmbeddings(BaseModel, Embeddings): from langchain_upstage import UpstageEmbeddings - model = UpstageEmbeddings() + model = UpstageEmbeddings(model='solar-embedding-1-large') """ client: Any = Field(default=None, exclude=True) #: :meta private: @@ -200,6 +200,8 @@ class UpstageEmbeddings(BaseModel, Embeddings): assert ( self.embed_batch_size <= MAX_EMBED_BATCH_SIZE ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}." + if not texts: + return [] params = self._invocation_params params["model"] = params["model"] + "-passage" embeddings = [] @@ -242,6 +244,8 @@ class UpstageEmbeddings(BaseModel, Embeddings): assert ( self.embed_batch_size <= MAX_EMBED_BATCH_SIZE ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}." + if not texts: + return [] params = self._invocation_params params["model"] = params["model"] + "-passage" embeddings = [] diff --git a/libs/partners/upstage/tests/integration_tests/test_embeddings.py b/libs/partners/upstage/tests/integration_tests/test_embeddings.py index bd056d2d40b..c8e4765d5ed 100644 --- a/libs/partners/upstage/tests/integration_tests/test_embeddings.py +++ b/libs/partners/upstage/tests/integration_tests/test_embeddings.py @@ -35,3 +35,17 @@ async def test_langchain_upstage_aembed_query() -> None: embedding = UpstageEmbeddings(model="solar-embedding-1-large") output = await embedding.aembed_query(query) assert len(output) > 0 + + +def test_langchain_upstage_embed_documents_with_empty_list() -> None: + """Test Upstage embeddings with empty list.""" + embedding = UpstageEmbeddings(model="solar-embedding-1-large") + output = embedding.embed_documents([]) + assert len(output) == 0 + + +async def test_langchain_upstage_aembed_documents_with_empty_list() -> None: + """Test Upstage embeddings asynchronous with empty list.""" + embedding = UpstageEmbeddings(model="solar-embedding-1-large") + output = await embedding.aembed_documents([]) + assert len(output) == 0