mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 11:55:21 +00:00
partner-upstage[patch]: embeddings empty list bug (#22057)
Fixed an error in `embed_documents` when the input was given as an empty list. And I have revised the document.
This commit is contained in:
parent
2df8ac402a
commit
d9eff44400
@ -36,7 +36,9 @@
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"vectorstore = DocArrayInMemorySearch.from_documents(docs, embedding=UpstageEmbeddings())\n",
|
||||
"vectorstore = DocArrayInMemorySearch.from_documents(\n",
|
||||
" docs, embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\")\n",
|
||||
")\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"template = \"\"\"Answer the question based only on the following context:\n",
|
||||
|
@ -115,13 +115,13 @@
|
||||
"source": [
|
||||
"from langchain_upstage import UpstageEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = UpstageEmbeddings()\n",
|
||||
"embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")\n",
|
||||
"doc_result = embeddings.embed_documents(\n",
|
||||
" [\"Sam is a teacher.\", \"This is another document\"]\n",
|
||||
" [\"Sung is a professor.\", \"This is another document\"]\n",
|
||||
")\n",
|
||||
"print(doc_result)\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(\"What does Sam do?\")\n",
|
||||
"query_result = embeddings.embed_query(\"What does Sung do?\")\n",
|
||||
"print(query_result)"
|
||||
]
|
||||
},
|
||||
|
@ -80,7 +80,7 @@
|
||||
"source": [
|
||||
"from langchain_upstage import UpstageEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = UpstageEmbeddings()"
|
||||
"embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -101,7 +101,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_result = embeddings.embed_documents(\n",
|
||||
" [\"Sam is a teacher.\", \"This is another document\"]\n",
|
||||
" [\"Sung is a professor.\", \"This is another document\"]\n",
|
||||
")\n",
|
||||
"print(doc_result)"
|
||||
]
|
||||
@ -123,7 +123,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(\"What does Sam do?\")\n",
|
||||
"query_result = embeddings.embed_query(\"What does Sung do?\")\n",
|
||||
"print(query_result)"
|
||||
]
|
||||
},
|
||||
@ -184,7 +184,7 @@
|
||||
"\n",
|
||||
"vectorstore = DocArrayInMemorySearch.from_texts(\n",
|
||||
" [\"harrison worked at kensho\", \"bears like to eat honey\"],\n",
|
||||
" embedding=UpstageEmbeddings(),\n",
|
||||
" embedding=UpstageEmbeddings(model=\"solar-embedding-1-large\"),\n",
|
||||
")\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"docs = retriever.invoke(\"Where did Harrison work?\")\n",
|
||||
|
@ -21,5 +21,5 @@ See a [usage example](https://python.langchain.com/docs/integrations/chat/upstag
|
||||
|
||||
See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/upstage)
|
||||
|
||||
Use `solar-1-mini-embedding` as the default model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
|
||||
Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
|
||||
`UpstageEmbeddings` will automatically add the suffixes based on the method called.
|
||||
|
@ -46,7 +46,7 @@ class UpstageEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
from langchain_upstage import UpstageEmbeddings
|
||||
|
||||
model = UpstageEmbeddings()
|
||||
model = UpstageEmbeddings(model='solar-embedding-1-large')
|
||||
"""
|
||||
|
||||
client: Any = Field(default=None, exclude=True) #: :meta private:
|
||||
@ -200,6 +200,8 @@ class UpstageEmbeddings(BaseModel, Embeddings):
|
||||
assert (
|
||||
self.embed_batch_size <= MAX_EMBED_BATCH_SIZE
|
||||
), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}."
|
||||
if not texts:
|
||||
return []
|
||||
params = self._invocation_params
|
||||
params["model"] = params["model"] + "-passage"
|
||||
embeddings = []
|
||||
@ -242,6 +244,8 @@ class UpstageEmbeddings(BaseModel, Embeddings):
|
||||
assert (
|
||||
self.embed_batch_size <= MAX_EMBED_BATCH_SIZE
|
||||
), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}."
|
||||
if not texts:
|
||||
return []
|
||||
params = self._invocation_params
|
||||
params["model"] = params["model"] + "-passage"
|
||||
embeddings = []
|
||||
|
@ -35,3 +35,17 @@ async def test_langchain_upstage_aembed_query() -> None:
|
||||
embedding = UpstageEmbeddings(model="solar-embedding-1-large")
|
||||
output = await embedding.aembed_query(query)
|
||||
assert len(output) > 0
|
||||
|
||||
|
||||
def test_langchain_upstage_embed_documents_with_empty_list() -> None:
|
||||
"""Test Upstage embeddings with empty list."""
|
||||
embedding = UpstageEmbeddings(model="solar-embedding-1-large")
|
||||
output = embedding.embed_documents([])
|
||||
assert len(output) == 0
|
||||
|
||||
|
||||
async def test_langchain_upstage_aembed_documents_with_empty_list() -> None:
|
||||
"""Test Upstage embeddings asynchronous with empty list."""
|
||||
embedding = UpstageEmbeddings(model="solar-embedding-1-large")
|
||||
output = await embedding.aembed_documents([])
|
||||
assert len(output) == 0
|
||||
|
Loading…
Reference in New Issue
Block a user