mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-30 11:39:03 +00:00
Improve the performance of add_texts interface and upgrade the AwaDB from 0.3.2 to 0.3.3 (#6316)
1. Changed the implementation of add_texts interface for the AwaDB vector store in order to improve the performance 2. Upgrade the AwaDB from 0.3.2 to 0.3.3 --------- Co-authored-by: vincent <awadb.vincent@gmail.com>
This commit is contained in:
parent
24b2af5218
commit
ad324a39ae
@ -39,7 +39,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
@ -59,22 +59,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "62b7a4c5",
|
||||
"execution_count": 4,
|
||||
"id": "4b172de8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9b4be48",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87fec6b5",
|
||||
@ -103,22 +103,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0045583",
|
||||
"execution_count": 4,
|
||||
"id": "93cd0b7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.561813814013747)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c2da99d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.561813814013747)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b49fb59",
|
||||
@ -153,19 +153,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"awadb_client = awadb.Client()\n",
|
||||
"ret = awadb_client.Load(\"langchain_awadb\")\n",
|
||||
"if ret:\n",
|
||||
" print(\"awadb load table success\")\n",
|
||||
"ret = awadb_client.Load('langchain_awadb')\n",
|
||||
"if ret : print('awadb load table success')\n",
|
||||
"else:\n",
|
||||
" print(\"awadb load table failed\")"
|
||||
" print('awadb load table failed')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ae9a9dd",
|
||||
"cell_type": "raw",
|
||||
"id": "aba255c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"awadb load table success"
|
||||
]
|
||||
|
@ -57,13 +57,15 @@ class AwaDB(VectorStore):
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
is_duplicate_texts: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
kwargs: vectorstore specific parameters
|
||||
is_duplicate_texts: Optional whether to duplicate texts.
|
||||
kwargs: vectorstore specific parameters.
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
@ -74,28 +76,10 @@ class AwaDB(VectorStore):
|
||||
embeddings = None
|
||||
if self.embedding_model is not None:
|
||||
embeddings = self.embedding_model.embed_documents(list(texts))
|
||||
added_results: List[str] = []
|
||||
doc_no = 0
|
||||
for text in texts:
|
||||
doc: List[Any] = []
|
||||
if embeddings is not None:
|
||||
doc.append(text)
|
||||
doc.append(embeddings[doc_no])
|
||||
else:
|
||||
dict_tmp = {}
|
||||
dict_tmp["embedding_text"] = text
|
||||
doc.append(dict_tmp)
|
||||
|
||||
if metadatas is not None:
|
||||
if doc_no < metadatas.__len__():
|
||||
doc.append(metadatas[doc_no])
|
||||
self.awadb_client.Add(doc)
|
||||
added_results.append(str(self.added_doc_count))
|
||||
|
||||
doc_no = doc_no + 1
|
||||
self.added_doc_count = self.added_doc_count + 1
|
||||
|
||||
return added_results
|
||||
return self.awadb_client.AddTexts(
|
||||
"text", "text_embedding", texts, embeddings, metadatas, is_duplicate_texts
|
||||
)
|
||||
|
||||
def load_local(
|
||||
self,
|
||||
|
20
poetry.lock
generated
20
poetry.lock
generated
@ -572,19 +572,19 @@ test = ["coverage (>=5,<6)", "pytest (>=6,<7)"]
|
||||
|
||||
[[package]]
|
||||
name = "awadb"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
description = "The AI Native database for embedding vectors"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "awadb-0.3.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:f3ce3b066198782fa413f452c56001c58ebec71a1e1dca0eee68f73321ba15a9"},
|
||||
{file = "awadb-0.3.2-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:c96b5e263c32b2563b1fa027035bdcf50540808ad303071cc1aed3471c3c39b7"},
|
||||
{file = "awadb-0.3.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:3e43b5a74753261857d0b146543a4620e00938833181259f138f07457fa84812"},
|
||||
{file = "awadb-0.3.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:6330b4d18a814c1562113b3b7897db629c2ac9b5818236ead0fc5f3445b6b7fb"},
|
||||
{file = "awadb-0.3.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:82b4e61cc905339868a9f833d0988098f56411b42e0f8dd571aec7c8d6a3f1fa"},
|
||||
{file = "awadb-0.3.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5efaa93d69c467f16ec4f65ed250ec26015781826c0d059c8a54613a5d3e2c3e"},
|
||||
{file = "awadb-0.3.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7be0811550d72f49018e4790d290cf521f92ffa84d65ef1073e621f225d142ec"},
|
||||
{file = "awadb-0.3.3-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:daebc108103c8cace41dfb3235fcfdda28ea48e6cd6548b6072f7ad49b64274b"},
|
||||
{file = "awadb-0.3.3-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:2bb3ca2f943448060b1bba4395dd99e2218d7f2149507a8fdfa7a3fd4cfe97ec"},
|
||||
{file = "awadb-0.3.3-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:7b99662af9f7b58e217661a70c295e40605900552bec6d8e9553d90dbf19c5c1"},
|
||||
{file = "awadb-0.3.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:94be44e587f28fa26b2cade0b6f4c04689f50cb0c07183db5ee50e48fe2e9ae3"},
|
||||
{file = "awadb-0.3.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:314929dc3a8d25c0f234a2b86c920543050f4eb298a6f68bd2c97c9fe3fb6224"},
|
||||
{file = "awadb-0.3.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8bfccff1c7373899153427d93d96a97ae5371e8a6f09ff4dcbd28fb9f3f63ff4"},
|
||||
{file = "awadb-0.3.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:810021a90b873f668d8ab63e2c2747b2b2835bf0ae25f4223b6c94f06faffea4"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
@ -11486,4 +11486,4 @@ text-helpers = ["chardet"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "b4a782d8223ccc19b2dfb777978c3ad636b11a79cc58a5c45e4dcdb0fe5e29c1"
|
||||
content-hash = "dd54bb9201b260b734ceebad2c6629b4b1b3bf224731de092782678219f32120"
|
||||
|
@ -106,7 +106,7 @@ pyspark = {version = "^3.4.0", optional = true}
|
||||
tigrisdb = {version = "^1.0.0b6", optional = true}
|
||||
nebula3-python = {version = "^3.4.0", optional = true}
|
||||
langchainplus-sdk = ">=0.0.9"
|
||||
awadb = {version = "^0.3.2", optional = true}
|
||||
awadb = {version = "^0.3.3", optional = true}
|
||||
azure-search-documents = {version = "11.4.0a20230509004", source = "azure-sdk-dev", optional = true}
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
|
Loading…
Reference in New Issue
Block a user