mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 12:48:12 +00:00
add from_documents interface in awadb vector store (#6023)
added new interface from_documents in awadb vector store @dev2049 --------- Co-authored-by: vincent <awadb.vincent@gmail.com>
This commit is contained in:
parent
2c9619bc1d
commit
c5bce4a465
@ -49,8 +49,7 @@ class AwaDB(VectorStore):
|
|||||||
self.awadb_client = awadb.Client()
|
self.awadb_client = awadb.Client()
|
||||||
|
|
||||||
self.awadb_client.Create(table_name)
|
self.awadb_client.Create(table_name)
|
||||||
if embedding_model is not None:
|
self.embedding_model = embedding_model
|
||||||
self.embedding_model = embedding_model
|
|
||||||
|
|
||||||
self.added_doc_count = 0
|
self.added_doc_count = 0
|
||||||
|
|
||||||
@ -121,6 +120,11 @@ class AwaDB(VectorStore):
|
|||||||
embedding = None
|
embedding = None
|
||||||
if self.embedding_model is not None:
|
if self.embedding_model is not None:
|
||||||
embedding = self.embedding_model.embed_query(query)
|
embedding = self.embedding_model.embed_query(query)
|
||||||
|
else:
|
||||||
|
from awadb import llm_embedding
|
||||||
|
|
||||||
|
llm = llm_embedding.LLMEmbedding()
|
||||||
|
embedding = llm.Embedding(query)
|
||||||
|
|
||||||
return self.similarity_search_by_vector(embedding, k)
|
return self.similarity_search_by_vector(embedding, k)
|
||||||
|
|
||||||
@ -141,13 +145,18 @@ class AwaDB(VectorStore):
|
|||||||
embedding = None
|
embedding = None
|
||||||
if self.embedding_model is not None:
|
if self.embedding_model is not None:
|
||||||
embedding = self.embedding_model.embed_query(query)
|
embedding = self.embedding_model.embed_query(query)
|
||||||
|
else:
|
||||||
|
from awadb import llm_embedding
|
||||||
|
|
||||||
show_results = self.awadb_client.Search(embedding, k)
|
llm = llm_embedding.LLMEmbedding()
|
||||||
|
embedding = llm.Embedding(query)
|
||||||
|
|
||||||
|
# show_results = self.awadb_client.Search(embedding, k)
|
||||||
|
|
||||||
results: List[Tuple[Document, float]] = []
|
results: List[Tuple[Document, float]] = []
|
||||||
|
|
||||||
if show_results.__len__() == 0:
|
# if show_results.__len__() == 0:
|
||||||
return results
|
# return results
|
||||||
|
|
||||||
scores: List[float] = []
|
scores: List[float] = []
|
||||||
retrieval_docs = self.similarity_search_by_vector(embedding, k, scores)
|
retrieval_docs = self.similarity_search_by_vector(embedding, k, scores)
|
||||||
@ -159,7 +168,7 @@ class AwaDB(VectorStore):
|
|||||||
L2_Norm = pow(L2_Norm, 0.5)
|
L2_Norm = pow(L2_Norm, 0.5)
|
||||||
doc_no = 0
|
doc_no = 0
|
||||||
for doc in retrieval_docs:
|
for doc in retrieval_docs:
|
||||||
doc_tuple = (doc, 1 - scores[doc_no] / L2_Norm)
|
doc_tuple = (doc, 1 - (scores[doc_no] / L2_Norm))
|
||||||
results.append(doc_tuple)
|
results.append(doc_tuple)
|
||||||
doc_no = doc_no + 1
|
doc_no = doc_no + 1
|
||||||
|
|
||||||
@ -208,7 +217,7 @@ class AwaDB(VectorStore):
|
|||||||
|
|
||||||
def similarity_search_by_vector(
|
def similarity_search_by_vector(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: Optional[List[float]] = None,
|
||||||
k: int = DEFAULT_TOPN,
|
k: int = DEFAULT_TOPN,
|
||||||
scores: Optional[list] = None,
|
scores: Optional[list] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
@ -226,10 +235,13 @@ class AwaDB(VectorStore):
|
|||||||
if self.awadb_client is None:
|
if self.awadb_client is None:
|
||||||
raise ValueError("AwaDB client is None!!!")
|
raise ValueError("AwaDB client is None!!!")
|
||||||
|
|
||||||
show_results = self.awadb_client.Search(embedding, k)
|
|
||||||
|
|
||||||
results: List[Document] = []
|
results: List[Document] = []
|
||||||
|
|
||||||
|
if embedding is None:
|
||||||
|
return results
|
||||||
|
|
||||||
|
show_results = self.awadb_client.Search(embedding, k)
|
||||||
|
|
||||||
if show_results.__len__() == 0:
|
if show_results.__len__() == 0:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@ -237,7 +249,11 @@ class AwaDB(VectorStore):
|
|||||||
content = ""
|
content = ""
|
||||||
meta_data = {}
|
meta_data = {}
|
||||||
for item_key in item_detail:
|
for item_key in item_detail:
|
||||||
if item_key == "Field@0": # text for the document
|
if (
|
||||||
|
item_key == "Field@0" and self.embedding_model is not None
|
||||||
|
): # text for the document
|
||||||
|
content = item_detail[item_key]
|
||||||
|
elif self.embedding_model is None and item_key == "embedding_text":
|
||||||
content = item_detail[item_key]
|
content = item_detail[item_key]
|
||||||
elif item_key == "Field@1": # embedding field for the document
|
elif item_key == "Field@1": # embedding field for the document
|
||||||
continue
|
continue
|
||||||
@ -282,3 +298,38 @@ class AwaDB(VectorStore):
|
|||||||
)
|
)
|
||||||
awadb_client.add_texts(texts=texts, metadatas=metadatas)
|
awadb_client.add_texts(texts=texts, metadatas=metadatas)
|
||||||
return awadb_client
|
return awadb_client
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_documents(
|
||||||
|
cls: Type[AwaDB],
|
||||||
|
documents: List[Document],
|
||||||
|
embedding: Optional[Embeddings] = None,
|
||||||
|
table_name: str = _DEFAULT_TABLE_NAME,
|
||||||
|
logging_and_data_dir: Optional[str] = None,
|
||||||
|
client: Optional[awadb.Client] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> AwaDB:
|
||||||
|
"""Create an AwaDB vectorstore from a list of documents.
|
||||||
|
|
||||||
|
If a logging_and_data_dir specified, the table will be persisted there.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents (List[Document]): List of documents to add to the vectorstore.
|
||||||
|
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||||
|
table_name (str): Name of the collection to create.
|
||||||
|
logging_and_data_dir (Optional[str]): Directory to persist the table.
|
||||||
|
client (Optional[awadb.Client]): AwaDB client
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AwaDB: AwaDB vectorstore.
|
||||||
|
"""
|
||||||
|
texts = [doc.page_content for doc in documents]
|
||||||
|
metadatas = [doc.metadata for doc in documents]
|
||||||
|
return cls.from_texts(
|
||||||
|
texts=texts,
|
||||||
|
embedding=embedding,
|
||||||
|
metadatas=metadatas,
|
||||||
|
table_name=table_name,
|
||||||
|
logging_and_data_dir=logging_and_data_dir,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user