mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 15:19:33 +00:00
Ndb enterprise (#21233)
Description: Adds NeuralDBClientVectorStore to the langchain, which is our enterprise client. --------- Co-authored-by: kartikTAI <129414343+kartikTAI@users.noreply.github.com> Co-authored-by: Kartik Sarangmath <kartik@thirdai.com>
This commit is contained in:
parent
74044e44a5
commit
cb31c3611f
@ -236,6 +236,7 @@ if TYPE_CHECKING:
|
||||
TencentVectorDB,
|
||||
)
|
||||
from langchain_community.vectorstores.thirdai_neuraldb import (
|
||||
NeuralDBClientVectorStore,
|
||||
NeuralDBVectorStore,
|
||||
)
|
||||
from langchain_community.vectorstores.tidb_vector import (
|
||||
@ -345,6 +346,7 @@ __all__ = [
|
||||
"MyScale",
|
||||
"MyScaleSettings",
|
||||
"Neo4jVector",
|
||||
"NeuralDBClientVectorStore",
|
||||
"NeuralDBVectorStore",
|
||||
"OracleVS",
|
||||
"OpenSearchVectorSearch",
|
||||
@ -441,6 +443,7 @@ _module_lookup = {
|
||||
"MyScale": "langchain_community.vectorstores.myscale",
|
||||
"MyScaleSettings": "langchain_community.vectorstores.myscale",
|
||||
"Neo4jVector": "langchain_community.vectorstores.neo4j_vector",
|
||||
"NeuralDBClientVectorStore": "langchain_community.vectorstores.thirdai_neuraldb", # noqa: E501
|
||||
"NeuralDBVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
|
||||
"OpenSearchVectorSearch": "langchain_community.vectorstores.opensearch_vector_search", # noqa: E501
|
||||
"OracleVS": "langchain_community.vectorstores.oraclevs",
|
||||
|
@ -166,7 +166,7 @@ class NeuralDBVectorStore(VectorStore):
|
||||
offset = self.db._savable_state.documents.get_source_by_id(source_id)[1]
|
||||
return [str(offset + i) for i in range(len(texts))] # type: ignore[arg-type]
|
||||
|
||||
@root_validator()
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environments(cls, values: Dict) -> Dict:
|
||||
"""Validate ThirdAI environment variables."""
|
||||
values["thirdai_key"] = convert_to_secret_str(
|
||||
@ -314,3 +314,161 @@ class NeuralDBVectorStore(VectorStore):
|
||||
path: path on disk to save the NeuralDB instance to.
|
||||
"""
|
||||
self.db.save(path)
|
||||
|
||||
|
||||
class NeuralDBClientVectorStore(VectorStore):
|
||||
"""Vectorstore that uses ThirdAI's NeuralDB Enterprise Python Client for NeuralDBs.
|
||||
|
||||
To use, you should have the ``thirdai[neural_db]`` python package installed.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import NeuralDBClientVectorStore
|
||||
from thirdai.neural_db import ModelBazaar, NeuralDBClient
|
||||
|
||||
bazaar = ModelBazaar(base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/")
|
||||
bazaar.log_in(email="user@thirdai.com", password="1234")
|
||||
|
||||
ndb_client = NeuralDBClient(
|
||||
deployment_identifier="user/model-0:user/deployment-0",
|
||||
base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/",
|
||||
bazaar=bazaar
|
||||
)
|
||||
vectorstore = NeuralDBClientVectorStore(db=ndb_client)
|
||||
retriever = vectorstore.as_retriever(search_kwargs={'k':5})
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, db: Any) -> None:
|
||||
self.db = db
|
||||
|
||||
db: Any = None #: :meta private:
|
||||
"""NeuralDB Client instance"""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
underscore_attrs_are_private = True
|
||||
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 10, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Retrieve {k} contexts with for a given query
|
||||
|
||||
Args:
|
||||
query: Query to submit to the model
|
||||
k: The max number of context results to retrieve. Defaults to 10.
|
||||
"""
|
||||
try:
|
||||
references = self.db.search(query=query, top_k=k, **kwargs)["references"]
|
||||
return [
|
||||
Document(
|
||||
page_content=ref["text"],
|
||||
metadata={
|
||||
"id": ref["id"],
|
||||
"source": ref["source"],
|
||||
"metadata": ref["metadata"],
|
||||
"score": ref["source"],
|
||||
"context": ref["context"],
|
||||
},
|
||||
)
|
||||
for ref in references
|
||||
]
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error while retrieving documents: {e}") from e
|
||||
|
||||
def insert(self, documents: List[Dict[str, Any]]): # type: ignore[no-untyped-def, no-untyped-def]
|
||||
"""
|
||||
Inserts documents into the VectorStore and return the corresponding Sources.
|
||||
|
||||
Args:
|
||||
documents (List[Dict[str, Any]]): A list of dictionaries that
|
||||
represent documents to be inserted to the VectorStores.
|
||||
The document dictionaries must be in the following format:
|
||||
{"document_type": "DOCUMENT_TYPE", **kwargs} where "DOCUMENT_TYPE"
|
||||
is one of the following:
|
||||
"PDF", "CSV", "DOCX", "URL", "SentenceLevelPDF", "SentenceLevelDOCX",
|
||||
"Unstructured", "InMemoryText".
|
||||
The kwargs for each document type are shown below:
|
||||
|
||||
class PDF(Document):
|
||||
document_type: Literal["PDF"]
|
||||
path: str
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
version: str = "v1"
|
||||
chunk_size: int = 100
|
||||
stride: int = 40
|
||||
emphasize_first_words: int = 0
|
||||
ignore_header_footer: bool = True
|
||||
ignore_nonstandard_orientation: bool = True
|
||||
|
||||
class CSV(Document):
|
||||
document_type: Literal["CSV"]
|
||||
path: str
|
||||
id_column: Optional[str] = None
|
||||
strong_columns: Optional[List[str]] = None
|
||||
weak_columns: Optional[List[str]] = None
|
||||
reference_columns: Optional[List[str]] = None
|
||||
save_extra_info: bool = True
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
has_offset: bool = False
|
||||
on_disk: bool = False
|
||||
|
||||
class DOCX(Document):
|
||||
document_type: Literal["DOCX"]
|
||||
path: str
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
|
||||
class URL(Document):
|
||||
document_type: Literal["URL"]
|
||||
url: str
|
||||
save_extra_info: bool = True
|
||||
title_is_strong: bool = False
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
|
||||
class SentenceLevelPDF(Document):
|
||||
document_type: Literal["SentenceLevelPDF"]
|
||||
path: str
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
|
||||
class SentenceLevelDOCX(Document):
|
||||
document_type: Literal["SentenceLevelDOCX"]
|
||||
path: str
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
|
||||
class Unstructured(Document):
|
||||
document_type: Literal["Unstructured"]
|
||||
path: str
|
||||
save_extra_info: bool = True
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
|
||||
class InMemoryText(Document):
|
||||
document_type: Literal["InMemoryText"]
|
||||
name: str
|
||||
texts: list[str]
|
||||
metadatas: Optional[list[dict[str, Any]]] = None
|
||||
global_metadata: Optional[dict[str, Any]] = None
|
||||
on_disk: bool = False
|
||||
|
||||
For Document types with the arg "path", ensure that
|
||||
the path exists on your local machine.
|
||||
"""
|
||||
return self.db.insert(documents)
|
||||
|
||||
def remove_documents(self, source_ids: List[str]): # type: ignore[no-untyped-def]
|
||||
"""
|
||||
Deletes documents from the VectorStore using source ids.
|
||||
|
||||
Args:
|
||||
files (List[str]): A list of source ids to delete from the VectorStore.
|
||||
"""
|
||||
|
||||
self.db.delete(source_ids)
|
||||
|
@ -58,6 +58,7 @@ EXPECTED_ALL = [
|
||||
"MyScale",
|
||||
"MyScaleSettings",
|
||||
"Neo4jVector",
|
||||
"NeuralDBClientVectorStore",
|
||||
"NeuralDBVectorStore",
|
||||
"OpenSearchVectorSearch",
|
||||
"OracleVS",
|
||||
|
@ -93,6 +93,7 @@ _EXPECTED = [
|
||||
"AzureCosmosDBVectorSearch",
|
||||
"VectorStore",
|
||||
"Yellowbrick",
|
||||
"NeuralDBClientVectorStore",
|
||||
"NeuralDBVectorStore",
|
||||
"CouchbaseVectorStore",
|
||||
]
|
||||
|
@ -66,6 +66,7 @@ if TYPE_CHECKING:
|
||||
MyScale,
|
||||
MyScaleSettings,
|
||||
Neo4jVector,
|
||||
NeuralDBClientVectorStore,
|
||||
NeuralDBVectorStore,
|
||||
OpenSearchVectorSearch,
|
||||
PGEmbedding,
|
||||
@ -142,6 +143,7 @@ DEPRECATED_LOOKUP = {
|
||||
"MyScale": "langchain_community.vectorstores",
|
||||
"MyScaleSettings": "langchain_community.vectorstores",
|
||||
"Neo4jVector": "langchain_community.vectorstores",
|
||||
"NeuralDBClientVectorStore": "langchain_community.vectorstores",
|
||||
"NeuralDBVectorStore": "langchain_community.vectorstores",
|
||||
"NEuralDBVectorStore": "langchain_community.vectorstores",
|
||||
"OpenSearchVectorSearch": "langchain_community.vectorstores",
|
||||
@ -224,6 +226,7 @@ __all__ = [
|
||||
"MyScale",
|
||||
"MyScaleSettings",
|
||||
"Neo4jVector",
|
||||
"NeuralDBClientVectorStore",
|
||||
"NeuralDBVectorStore",
|
||||
"OpenSearchVectorSearch",
|
||||
"PGEmbedding",
|
||||
|
@ -42,6 +42,7 @@ _EXPECTED = [
|
||||
"MyScale",
|
||||
"MyScaleSettings",
|
||||
"Neo4jVector",
|
||||
"NeuralDBClientVectorStore",
|
||||
"NeuralDBVectorStore",
|
||||
"OpenSearchVectorSearch",
|
||||
"PGEmbedding",
|
||||
|
Loading…
Reference in New Issue
Block a user