mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-03 05:34:01 +00:00
Upgrade the version of AwaDB and add some new interfaces (#6565)
1. upgrade the version of AwaDB 2. add some new interfaces 3. fix bug of packing page content error @dev2049 please review, thanks! --------- Co-authored-by: vincent <awadb.vincent@gmail.com>
This commit is contained in:
parent
937a7e93f2
commit
ca24dc2d5f
@ -2,6 +2,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Type
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
@ -48,10 +49,15 @@ class AwaDB(VectorStore):
|
||||
else:
|
||||
self.awadb_client = awadb.Client()
|
||||
|
||||
self.awadb_client.Create(table_name)
|
||||
self.embedding_model = embedding_model
|
||||
if table_name == self._DEFAULT_TABLE_NAME:
|
||||
table_name += "_"
|
||||
table_name += str(uuid.uuid4()).split("-")[-1]
|
||||
|
||||
self.added_doc_count = 0
|
||||
self.awadb_client.Create(table_name)
|
||||
self.table2embeddings: dict[str, Embeddings] = {}
|
||||
if embedding_model is not None:
|
||||
self.table2embeddings[table_name] = embedding_model
|
||||
self.using_table_name = table_name
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
@ -74,16 +80,23 @@ class AwaDB(VectorStore):
|
||||
raise ValueError("AwaDB client is None!!!")
|
||||
|
||||
embeddings = None
|
||||
if self.embedding_model is not None:
|
||||
embeddings = self.embedding_model.embed_documents(list(texts))
|
||||
if self.using_table_name in self.table2embeddings:
|
||||
embeddings = self.table2embeddings[self.using_table_name].embed_documents(
|
||||
list(texts)
|
||||
)
|
||||
|
||||
return self.awadb_client.AddTexts(
|
||||
"text", "text_embedding", texts, embeddings, metadatas, is_duplicate_texts
|
||||
"embedding_text",
|
||||
"text_embedding",
|
||||
texts,
|
||||
embeddings,
|
||||
metadatas,
|
||||
is_duplicate_texts,
|
||||
)
|
||||
|
||||
def load_local(
|
||||
self,
|
||||
table_name: str = _DEFAULT_TABLE_NAME,
|
||||
table_name: str,
|
||||
**kwargs: Any,
|
||||
) -> bool:
|
||||
if self.awadb_client is None:
|
||||
@ -102,8 +115,8 @@ class AwaDB(VectorStore):
|
||||
raise ValueError("AwaDB client is None!!!")
|
||||
|
||||
embedding = None
|
||||
if self.embedding_model is not None:
|
||||
embedding = self.embedding_model.embed_query(query)
|
||||
if self.using_table_name in self.table2embeddings:
|
||||
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
|
||||
else:
|
||||
from awadb import llm_embedding
|
||||
|
||||
@ -127,21 +140,16 @@ class AwaDB(VectorStore):
|
||||
raise ValueError("AwaDB client is None!!!")
|
||||
|
||||
embedding = None
|
||||
if self.embedding_model is not None:
|
||||
embedding = self.embedding_model.embed_query(query)
|
||||
if self.using_table_name in self.table2embeddings:
|
||||
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
|
||||
else:
|
||||
from awadb import llm_embedding
|
||||
|
||||
llm = llm_embedding.LLMEmbedding()
|
||||
embedding = llm.Embedding(query)
|
||||
|
||||
# show_results = self.awadb_client.Search(embedding, k)
|
||||
|
||||
results: List[Tuple[Document, float]] = []
|
||||
|
||||
# if show_results.__len__() == 0:
|
||||
# return results
|
||||
|
||||
scores: List[float] = []
|
||||
retrieval_docs = self.similarity_search_by_vector(embedding, k, scores)
|
||||
|
||||
@ -173,8 +181,8 @@ class AwaDB(VectorStore):
|
||||
raise ValueError("AwaDB client is None!!!")
|
||||
|
||||
embedding = None
|
||||
if self.embedding_model is not None:
|
||||
embedding = self.embedding_model.embed_query(query)
|
||||
if self.using_table_name in self.table2embeddings:
|
||||
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
|
||||
|
||||
show_results = self.awadb_client.Search(embedding, k)
|
||||
|
||||
@ -234,12 +242,15 @@ class AwaDB(VectorStore):
|
||||
meta_data = {}
|
||||
for item_key in item_detail:
|
||||
if (
|
||||
item_key == "Field@0" and self.embedding_model is not None
|
||||
item_key == "Field@0"
|
||||
and self.using_table_name in self.table2embeddings
|
||||
): # text for the document
|
||||
content = item_detail[item_key]
|
||||
elif self.embedding_model is None and item_key == "embedding_text":
|
||||
elif item_key == "embedding_text":
|
||||
content = item_detail[item_key]
|
||||
elif item_key == "Field@1": # embedding field for the document
|
||||
elif (
|
||||
item_key == "Field@1" or item_key == "text_embedding"
|
||||
): # embedding field for the document
|
||||
continue
|
||||
elif item_key == "score": # L2 distance
|
||||
if scores is not None:
|
||||
@ -250,6 +261,57 @@ class AwaDB(VectorStore):
|
||||
results.append(Document(page_content=content, metadata=meta_data))
|
||||
return results
|
||||
|
||||
def create_table(
|
||||
self,
|
||||
table_name: str,
|
||||
**kwargs: Any,
|
||||
) -> bool:
|
||||
"""Create a new table."""
|
||||
|
||||
if self.awadb_client is None:
|
||||
return False
|
||||
|
||||
ret = self.awadb_client.Create(table_name)
|
||||
|
||||
if ret:
|
||||
self.using_table_name = table_name
|
||||
return ret
|
||||
|
||||
def use(
|
||||
self,
|
||||
table_name: str,
|
||||
**kwargs: Any,
|
||||
) -> bool:
|
||||
"""Use the specified table. Don't know the tables, please invoke list_tables."""
|
||||
|
||||
if self.awadb_client is None:
|
||||
return False
|
||||
|
||||
ret = self.awadb_client.Use(table_name)
|
||||
if ret:
|
||||
self.using_table_name = table_name
|
||||
|
||||
return ret
|
||||
|
||||
def list_tables(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""List all the tables created by the client."""
|
||||
|
||||
if self.awadb_client is None:
|
||||
return []
|
||||
|
||||
return self.awadb_client.ListAllTables()
|
||||
|
||||
def get_current_table(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Get the current table."""
|
||||
|
||||
return self.using_table_name
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls: Type[AwaDB],
|
||||
@ -300,7 +362,7 @@ class AwaDB(VectorStore):
|
||||
Args:
|
||||
documents (List[Document]): List of documents to add to the vectorstore.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
table_name (str): Name of the collection to create.
|
||||
table_name (str): Name of the table to create.
|
||||
logging_and_data_dir (Optional[str]): Directory to persist the table.
|
||||
client (Optional[awadb.Client]): AwaDB client
|
||||
|
||||
|
16
poetry.lock
generated
16
poetry.lock
generated
@ -572,19 +572,19 @@ test = ["coverage (>=5,<6)", "pytest (>=6,<7)"]
|
||||
|
||||
[[package]]
|
||||
name = "awadb"
|
||||
version = "0.3.3"
|
||||
version = "0.3.5"
|
||||
description = "The AI Native database for embedding vectors"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "awadb-0.3.3-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:daebc108103c8cace41dfb3235fcfdda28ea48e6cd6548b6072f7ad49b64274b"},
|
||||
{file = "awadb-0.3.3-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:2bb3ca2f943448060b1bba4395dd99e2218d7f2149507a8fdfa7a3fd4cfe97ec"},
|
||||
{file = "awadb-0.3.3-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:7b99662af9f7b58e217661a70c295e40605900552bec6d8e9553d90dbf19c5c1"},
|
||||
{file = "awadb-0.3.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:94be44e587f28fa26b2cade0b6f4c04689f50cb0c07183db5ee50e48fe2e9ae3"},
|
||||
{file = "awadb-0.3.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:314929dc3a8d25c0f234a2b86c920543050f4eb298a6f68bd2c97c9fe3fb6224"},
|
||||
{file = "awadb-0.3.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8bfccff1c7373899153427d93d96a97ae5371e8a6f09ff4dcbd28fb9f3f63ff4"},
|
||||
{file = "awadb-0.3.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:810021a90b873f668d8ab63e2c2747b2b2835bf0ae25f4223b6c94f06faffea4"},
|
||||
{file = "awadb-0.3.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9addae6f0cfd57cdb3e8915778772c51fc1c41f5cacebad1322c5bfe30c95516"},
|
||||
{file = "awadb-0.3.5-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:1b06099c4baf906829e4550f3cf0da602aba44465c89ede5889943619c0b49ce"},
|
||||
{file = "awadb-0.3.5-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:dedbb62496aadb70894fa965922625ff3003397445f38d5da7f4092e17f93725"},
|
||||
{file = "awadb-0.3.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0f7eafcbe5311cc7b976faca368bc666a70f22ebd7f0039b5c5f791f2909377e"},
|
||||
{file = "awadb-0.3.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:950ddb5c483ea1ce24550d32c79e4c40c10dfc73cafc6f6faa8b14f22271dce5"},
|
||||
{file = "awadb-0.3.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:98b5d5a7a0d11253e23bec48295dc45356eead2338eac0d4f73a3755060992db"},
|
||||
{file = "awadb-0.3.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1c14d9014e7e0ccf8eaecc540e0c88893fcdd3a1438f71c110b9ec80b565dae6"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
|
Loading…
Reference in New Issue
Block a user