mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-05 06:33:20 +00:00
Upgrade the version of AwaDB and add some new interfaces (#6565)
1. upgrade the version of AwaDB 2. add some new interfaces 3. fix bug of packing page content error @dev2049 please review, thanks! --------- Co-authored-by: vincent <awadb.vincent@gmail.com>
This commit is contained in:
parent
937a7e93f2
commit
ca24dc2d5f
@ -2,6 +2,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import uuid
|
||||||
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Type
|
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Type
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
@ -48,10 +49,15 @@ class AwaDB(VectorStore):
|
|||||||
else:
|
else:
|
||||||
self.awadb_client = awadb.Client()
|
self.awadb_client = awadb.Client()
|
||||||
|
|
||||||
self.awadb_client.Create(table_name)
|
if table_name == self._DEFAULT_TABLE_NAME:
|
||||||
self.embedding_model = embedding_model
|
table_name += "_"
|
||||||
|
table_name += str(uuid.uuid4()).split("-")[-1]
|
||||||
|
|
||||||
self.added_doc_count = 0
|
self.awadb_client.Create(table_name)
|
||||||
|
self.table2embeddings: dict[str, Embeddings] = {}
|
||||||
|
if embedding_model is not None:
|
||||||
|
self.table2embeddings[table_name] = embedding_model
|
||||||
|
self.using_table_name = table_name
|
||||||
|
|
||||||
def add_texts(
|
def add_texts(
|
||||||
self,
|
self,
|
||||||
@ -74,16 +80,23 @@ class AwaDB(VectorStore):
|
|||||||
raise ValueError("AwaDB client is None!!!")
|
raise ValueError("AwaDB client is None!!!")
|
||||||
|
|
||||||
embeddings = None
|
embeddings = None
|
||||||
if self.embedding_model is not None:
|
if self.using_table_name in self.table2embeddings:
|
||||||
embeddings = self.embedding_model.embed_documents(list(texts))
|
embeddings = self.table2embeddings[self.using_table_name].embed_documents(
|
||||||
|
list(texts)
|
||||||
|
)
|
||||||
|
|
||||||
return self.awadb_client.AddTexts(
|
return self.awadb_client.AddTexts(
|
||||||
"text", "text_embedding", texts, embeddings, metadatas, is_duplicate_texts
|
"embedding_text",
|
||||||
|
"text_embedding",
|
||||||
|
texts,
|
||||||
|
embeddings,
|
||||||
|
metadatas,
|
||||||
|
is_duplicate_texts,
|
||||||
)
|
)
|
||||||
|
|
||||||
def load_local(
|
def load_local(
|
||||||
self,
|
self,
|
||||||
table_name: str = _DEFAULT_TABLE_NAME,
|
table_name: str,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
if self.awadb_client is None:
|
if self.awadb_client is None:
|
||||||
@ -102,8 +115,8 @@ class AwaDB(VectorStore):
|
|||||||
raise ValueError("AwaDB client is None!!!")
|
raise ValueError("AwaDB client is None!!!")
|
||||||
|
|
||||||
embedding = None
|
embedding = None
|
||||||
if self.embedding_model is not None:
|
if self.using_table_name in self.table2embeddings:
|
||||||
embedding = self.embedding_model.embed_query(query)
|
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
|
||||||
else:
|
else:
|
||||||
from awadb import llm_embedding
|
from awadb import llm_embedding
|
||||||
|
|
||||||
@ -127,21 +140,16 @@ class AwaDB(VectorStore):
|
|||||||
raise ValueError("AwaDB client is None!!!")
|
raise ValueError("AwaDB client is None!!!")
|
||||||
|
|
||||||
embedding = None
|
embedding = None
|
||||||
if self.embedding_model is not None:
|
if self.using_table_name in self.table2embeddings:
|
||||||
embedding = self.embedding_model.embed_query(query)
|
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
|
||||||
else:
|
else:
|
||||||
from awadb import llm_embedding
|
from awadb import llm_embedding
|
||||||
|
|
||||||
llm = llm_embedding.LLMEmbedding()
|
llm = llm_embedding.LLMEmbedding()
|
||||||
embedding = llm.Embedding(query)
|
embedding = llm.Embedding(query)
|
||||||
|
|
||||||
# show_results = self.awadb_client.Search(embedding, k)
|
|
||||||
|
|
||||||
results: List[Tuple[Document, float]] = []
|
results: List[Tuple[Document, float]] = []
|
||||||
|
|
||||||
# if show_results.__len__() == 0:
|
|
||||||
# return results
|
|
||||||
|
|
||||||
scores: List[float] = []
|
scores: List[float] = []
|
||||||
retrieval_docs = self.similarity_search_by_vector(embedding, k, scores)
|
retrieval_docs = self.similarity_search_by_vector(embedding, k, scores)
|
||||||
|
|
||||||
@ -173,8 +181,8 @@ class AwaDB(VectorStore):
|
|||||||
raise ValueError("AwaDB client is None!!!")
|
raise ValueError("AwaDB client is None!!!")
|
||||||
|
|
||||||
embedding = None
|
embedding = None
|
||||||
if self.embedding_model is not None:
|
if self.using_table_name in self.table2embeddings:
|
||||||
embedding = self.embedding_model.embed_query(query)
|
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
|
||||||
|
|
||||||
show_results = self.awadb_client.Search(embedding, k)
|
show_results = self.awadb_client.Search(embedding, k)
|
||||||
|
|
||||||
@ -234,12 +242,15 @@ class AwaDB(VectorStore):
|
|||||||
meta_data = {}
|
meta_data = {}
|
||||||
for item_key in item_detail:
|
for item_key in item_detail:
|
||||||
if (
|
if (
|
||||||
item_key == "Field@0" and self.embedding_model is not None
|
item_key == "Field@0"
|
||||||
|
and self.using_table_name in self.table2embeddings
|
||||||
): # text for the document
|
): # text for the document
|
||||||
content = item_detail[item_key]
|
content = item_detail[item_key]
|
||||||
elif self.embedding_model is None and item_key == "embedding_text":
|
elif item_key == "embedding_text":
|
||||||
content = item_detail[item_key]
|
content = item_detail[item_key]
|
||||||
elif item_key == "Field@1": # embedding field for the document
|
elif (
|
||||||
|
item_key == "Field@1" or item_key == "text_embedding"
|
||||||
|
): # embedding field for the document
|
||||||
continue
|
continue
|
||||||
elif item_key == "score": # L2 distance
|
elif item_key == "score": # L2 distance
|
||||||
if scores is not None:
|
if scores is not None:
|
||||||
@ -250,6 +261,57 @@ class AwaDB(VectorStore):
|
|||||||
results.append(Document(page_content=content, metadata=meta_data))
|
results.append(Document(page_content=content, metadata=meta_data))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def create_table(
|
||||||
|
self,
|
||||||
|
table_name: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> bool:
|
||||||
|
"""Create a new table."""
|
||||||
|
|
||||||
|
if self.awadb_client is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
ret = self.awadb_client.Create(table_name)
|
||||||
|
|
||||||
|
if ret:
|
||||||
|
self.using_table_name = table_name
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def use(
|
||||||
|
self,
|
||||||
|
table_name: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> bool:
|
||||||
|
"""Use the specified table. Don't know the tables, please invoke list_tables."""
|
||||||
|
|
||||||
|
if self.awadb_client is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
ret = self.awadb_client.Use(table_name)
|
||||||
|
if ret:
|
||||||
|
self.using_table_name = table_name
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def list_tables(
|
||||||
|
self,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[str]:
|
||||||
|
"""List all the tables created by the client."""
|
||||||
|
|
||||||
|
if self.awadb_client is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return self.awadb_client.ListAllTables()
|
||||||
|
|
||||||
|
def get_current_table(
|
||||||
|
self,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> str:
|
||||||
|
"""Get the current table."""
|
||||||
|
|
||||||
|
return self.using_table_name
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_texts(
|
def from_texts(
|
||||||
cls: Type[AwaDB],
|
cls: Type[AwaDB],
|
||||||
@ -300,7 +362,7 @@ class AwaDB(VectorStore):
|
|||||||
Args:
|
Args:
|
||||||
documents (List[Document]): List of documents to add to the vectorstore.
|
documents (List[Document]): List of documents to add to the vectorstore.
|
||||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||||
table_name (str): Name of the collection to create.
|
table_name (str): Name of the table to create.
|
||||||
logging_and_data_dir (Optional[str]): Directory to persist the table.
|
logging_and_data_dir (Optional[str]): Directory to persist the table.
|
||||||
client (Optional[awadb.Client]): AwaDB client
|
client (Optional[awadb.Client]): AwaDB client
|
||||||
|
|
||||||
|
16
poetry.lock
generated
16
poetry.lock
generated
@ -572,19 +572,19 @@ test = ["coverage (>=5,<6)", "pytest (>=6,<7)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "awadb"
|
name = "awadb"
|
||||||
version = "0.3.3"
|
version = "0.3.5"
|
||||||
description = "The AI Native database for embedding vectors"
|
description = "The AI Native database for embedding vectors"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "awadb-0.3.3-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:daebc108103c8cace41dfb3235fcfdda28ea48e6cd6548b6072f7ad49b64274b"},
|
{file = "awadb-0.3.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9addae6f0cfd57cdb3e8915778772c51fc1c41f5cacebad1322c5bfe30c95516"},
|
||||||
{file = "awadb-0.3.3-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:2bb3ca2f943448060b1bba4395dd99e2218d7f2149507a8fdfa7a3fd4cfe97ec"},
|
{file = "awadb-0.3.5-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:1b06099c4baf906829e4550f3cf0da602aba44465c89ede5889943619c0b49ce"},
|
||||||
{file = "awadb-0.3.3-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:7b99662af9f7b58e217661a70c295e40605900552bec6d8e9553d90dbf19c5c1"},
|
{file = "awadb-0.3.5-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:dedbb62496aadb70894fa965922625ff3003397445f38d5da7f4092e17f93725"},
|
||||||
{file = "awadb-0.3.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:94be44e587f28fa26b2cade0b6f4c04689f50cb0c07183db5ee50e48fe2e9ae3"},
|
{file = "awadb-0.3.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0f7eafcbe5311cc7b976faca368bc666a70f22ebd7f0039b5c5f791f2909377e"},
|
||||||
{file = "awadb-0.3.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:314929dc3a8d25c0f234a2b86c920543050f4eb298a6f68bd2c97c9fe3fb6224"},
|
{file = "awadb-0.3.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:950ddb5c483ea1ce24550d32c79e4c40c10dfc73cafc6f6faa8b14f22271dce5"},
|
||||||
{file = "awadb-0.3.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8bfccff1c7373899153427d93d96a97ae5371e8a6f09ff4dcbd28fb9f3f63ff4"},
|
{file = "awadb-0.3.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:98b5d5a7a0d11253e23bec48295dc45356eead2338eac0d4f73a3755060992db"},
|
||||||
{file = "awadb-0.3.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:810021a90b873f668d8ab63e2c2747b2b2835bf0ae25f4223b6c94f06faffea4"},
|
{file = "awadb-0.3.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1c14d9014e7e0ccf8eaecc540e0c88893fcdd3a1438f71c110b9ec80b565dae6"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
|
Loading…
Reference in New Issue
Block a user