mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
Ndb enterprise (#21233)
Description: Adds NeuralDBClientVectorStore to the langchain, which is our enterprise client. --------- Co-authored-by: kartikTAI <129414343+kartikTAI@users.noreply.github.com> Co-authored-by: Kartik Sarangmath <kartik@thirdai.com>
This commit is contained in:
parent
74044e44a5
commit
cb31c3611f
@ -236,6 +236,7 @@ if TYPE_CHECKING:
|
|||||||
TencentVectorDB,
|
TencentVectorDB,
|
||||||
)
|
)
|
||||||
from langchain_community.vectorstores.thirdai_neuraldb import (
|
from langchain_community.vectorstores.thirdai_neuraldb import (
|
||||||
|
NeuralDBClientVectorStore,
|
||||||
NeuralDBVectorStore,
|
NeuralDBVectorStore,
|
||||||
)
|
)
|
||||||
from langchain_community.vectorstores.tidb_vector import (
|
from langchain_community.vectorstores.tidb_vector import (
|
||||||
@ -345,6 +346,7 @@ __all__ = [
|
|||||||
"MyScale",
|
"MyScale",
|
||||||
"MyScaleSettings",
|
"MyScaleSettings",
|
||||||
"Neo4jVector",
|
"Neo4jVector",
|
||||||
|
"NeuralDBClientVectorStore",
|
||||||
"NeuralDBVectorStore",
|
"NeuralDBVectorStore",
|
||||||
"OracleVS",
|
"OracleVS",
|
||||||
"OpenSearchVectorSearch",
|
"OpenSearchVectorSearch",
|
||||||
@ -441,6 +443,7 @@ _module_lookup = {
|
|||||||
"MyScale": "langchain_community.vectorstores.myscale",
|
"MyScale": "langchain_community.vectorstores.myscale",
|
||||||
"MyScaleSettings": "langchain_community.vectorstores.myscale",
|
"MyScaleSettings": "langchain_community.vectorstores.myscale",
|
||||||
"Neo4jVector": "langchain_community.vectorstores.neo4j_vector",
|
"Neo4jVector": "langchain_community.vectorstores.neo4j_vector",
|
||||||
|
"NeuralDBClientVectorStore": "langchain_community.vectorstores.thirdai_neuraldb", # noqa: E501
|
||||||
"NeuralDBVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
|
"NeuralDBVectorStore": "langchain_community.vectorstores.thirdai_neuraldb",
|
||||||
"OpenSearchVectorSearch": "langchain_community.vectorstores.opensearch_vector_search", # noqa: E501
|
"OpenSearchVectorSearch": "langchain_community.vectorstores.opensearch_vector_search", # noqa: E501
|
||||||
"OracleVS": "langchain_community.vectorstores.oraclevs",
|
"OracleVS": "langchain_community.vectorstores.oraclevs",
|
||||||
|
@ -166,7 +166,7 @@ class NeuralDBVectorStore(VectorStore):
|
|||||||
offset = self.db._savable_state.documents.get_source_by_id(source_id)[1]
|
offset = self.db._savable_state.documents.get_source_by_id(source_id)[1]
|
||||||
return [str(offset + i) for i in range(len(texts))] # type: ignore[arg-type]
|
return [str(offset + i) for i in range(len(texts))] # type: ignore[arg-type]
|
||||||
|
|
||||||
@root_validator()
|
@root_validator(allow_reuse=True)
|
||||||
def validate_environments(cls, values: Dict) -> Dict:
|
def validate_environments(cls, values: Dict) -> Dict:
|
||||||
"""Validate ThirdAI environment variables."""
|
"""Validate ThirdAI environment variables."""
|
||||||
values["thirdai_key"] = convert_to_secret_str(
|
values["thirdai_key"] = convert_to_secret_str(
|
||||||
@ -314,3 +314,161 @@ class NeuralDBVectorStore(VectorStore):
|
|||||||
path: path on disk to save the NeuralDB instance to.
|
path: path on disk to save the NeuralDB instance to.
|
||||||
"""
|
"""
|
||||||
self.db.save(path)
|
self.db.save(path)
|
||||||
|
|
||||||
|
|
||||||
|
class NeuralDBClientVectorStore(VectorStore):
|
||||||
|
"""Vectorstore that uses ThirdAI's NeuralDB Enterprise Python Client for NeuralDBs.
|
||||||
|
|
||||||
|
To use, you should have the ``thirdai[neural_db]`` python package installed.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain_community.vectorstores import NeuralDBClientVectorStore
|
||||||
|
from thirdai.neural_db import ModelBazaar, NeuralDBClient
|
||||||
|
|
||||||
|
bazaar = ModelBazaar(base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/")
|
||||||
|
bazaar.log_in(email="user@thirdai.com", password="1234")
|
||||||
|
|
||||||
|
ndb_client = NeuralDBClient(
|
||||||
|
deployment_identifier="user/model-0:user/deployment-0",
|
||||||
|
base_url="http://{NEURAL_DB_ENTERPRISE_IP}/api/",
|
||||||
|
bazaar=bazaar
|
||||||
|
)
|
||||||
|
vectorstore = NeuralDBClientVectorStore(db=ndb_client)
|
||||||
|
retriever = vectorstore.as_retriever(search_kwargs={'k':5})
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, db: Any) -> None:
|
||||||
|
self.db = db
|
||||||
|
|
||||||
|
db: Any = None #: :meta private:
|
||||||
|
"""NeuralDB Client instance"""
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration for this pydantic object."""
|
||||||
|
|
||||||
|
extra = Extra.forbid
|
||||||
|
underscore_attrs_are_private = True
|
||||||
|
|
||||||
|
def similarity_search(
|
||||||
|
self, query: str, k: int = 10, **kwargs: Any
|
||||||
|
) -> List[Document]:
|
||||||
|
"""Retrieve {k} contexts with for a given query
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Query to submit to the model
|
||||||
|
k: The max number of context results to retrieve. Defaults to 10.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
references = self.db.search(query=query, top_k=k, **kwargs)["references"]
|
||||||
|
return [
|
||||||
|
Document(
|
||||||
|
page_content=ref["text"],
|
||||||
|
metadata={
|
||||||
|
"id": ref["id"],
|
||||||
|
"source": ref["source"],
|
||||||
|
"metadata": ref["metadata"],
|
||||||
|
"score": ref["source"],
|
||||||
|
"context": ref["context"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for ref in references
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error while retrieving documents: {e}") from e
|
||||||
|
|
||||||
|
def insert(self, documents: List[Dict[str, Any]]): # type: ignore[no-untyped-def, no-untyped-def]
|
||||||
|
"""
|
||||||
|
Inserts documents into the VectorStore and return the corresponding Sources.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents (List[Dict[str, Any]]): A list of dictionaries that
|
||||||
|
represent documents to be inserted to the VectorStores.
|
||||||
|
The document dictionaries must be in the following format:
|
||||||
|
{"document_type": "DOCUMENT_TYPE", **kwargs} where "DOCUMENT_TYPE"
|
||||||
|
is one of the following:
|
||||||
|
"PDF", "CSV", "DOCX", "URL", "SentenceLevelPDF", "SentenceLevelDOCX",
|
||||||
|
"Unstructured", "InMemoryText".
|
||||||
|
The kwargs for each document type are shown below:
|
||||||
|
|
||||||
|
class PDF(Document):
|
||||||
|
document_type: Literal["PDF"]
|
||||||
|
path: str
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
version: str = "v1"
|
||||||
|
chunk_size: int = 100
|
||||||
|
stride: int = 40
|
||||||
|
emphasize_first_words: int = 0
|
||||||
|
ignore_header_footer: bool = True
|
||||||
|
ignore_nonstandard_orientation: bool = True
|
||||||
|
|
||||||
|
class CSV(Document):
|
||||||
|
document_type: Literal["CSV"]
|
||||||
|
path: str
|
||||||
|
id_column: Optional[str] = None
|
||||||
|
strong_columns: Optional[List[str]] = None
|
||||||
|
weak_columns: Optional[List[str]] = None
|
||||||
|
reference_columns: Optional[List[str]] = None
|
||||||
|
save_extra_info: bool = True
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
has_offset: bool = False
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
class DOCX(Document):
|
||||||
|
document_type: Literal["DOCX"]
|
||||||
|
path: str
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
class URL(Document):
|
||||||
|
document_type: Literal["URL"]
|
||||||
|
url: str
|
||||||
|
save_extra_info: bool = True
|
||||||
|
title_is_strong: bool = False
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
class SentenceLevelPDF(Document):
|
||||||
|
document_type: Literal["SentenceLevelPDF"]
|
||||||
|
path: str
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
class SentenceLevelDOCX(Document):
|
||||||
|
document_type: Literal["SentenceLevelDOCX"]
|
||||||
|
path: str
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
class Unstructured(Document):
|
||||||
|
document_type: Literal["Unstructured"]
|
||||||
|
path: str
|
||||||
|
save_extra_info: bool = True
|
||||||
|
metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
class InMemoryText(Document):
|
||||||
|
document_type: Literal["InMemoryText"]
|
||||||
|
name: str
|
||||||
|
texts: list[str]
|
||||||
|
metadatas: Optional[list[dict[str, Any]]] = None
|
||||||
|
global_metadata: Optional[dict[str, Any]] = None
|
||||||
|
on_disk: bool = False
|
||||||
|
|
||||||
|
For Document types with the arg "path", ensure that
|
||||||
|
the path exists on your local machine.
|
||||||
|
"""
|
||||||
|
return self.db.insert(documents)
|
||||||
|
|
||||||
|
def remove_documents(self, source_ids: List[str]): # type: ignore[no-untyped-def]
|
||||||
|
"""
|
||||||
|
Deletes documents from the VectorStore using source ids.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files (List[str]): A list of source ids to delete from the VectorStore.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.db.delete(source_ids)
|
||||||
|
@ -58,6 +58,7 @@ EXPECTED_ALL = [
|
|||||||
"MyScale",
|
"MyScale",
|
||||||
"MyScaleSettings",
|
"MyScaleSettings",
|
||||||
"Neo4jVector",
|
"Neo4jVector",
|
||||||
|
"NeuralDBClientVectorStore",
|
||||||
"NeuralDBVectorStore",
|
"NeuralDBVectorStore",
|
||||||
"OpenSearchVectorSearch",
|
"OpenSearchVectorSearch",
|
||||||
"OracleVS",
|
"OracleVS",
|
||||||
|
@ -93,6 +93,7 @@ _EXPECTED = [
|
|||||||
"AzureCosmosDBVectorSearch",
|
"AzureCosmosDBVectorSearch",
|
||||||
"VectorStore",
|
"VectorStore",
|
||||||
"Yellowbrick",
|
"Yellowbrick",
|
||||||
|
"NeuralDBClientVectorStore",
|
||||||
"NeuralDBVectorStore",
|
"NeuralDBVectorStore",
|
||||||
"CouchbaseVectorStore",
|
"CouchbaseVectorStore",
|
||||||
]
|
]
|
||||||
|
@ -66,6 +66,7 @@ if TYPE_CHECKING:
|
|||||||
MyScale,
|
MyScale,
|
||||||
MyScaleSettings,
|
MyScaleSettings,
|
||||||
Neo4jVector,
|
Neo4jVector,
|
||||||
|
NeuralDBClientVectorStore,
|
||||||
NeuralDBVectorStore,
|
NeuralDBVectorStore,
|
||||||
OpenSearchVectorSearch,
|
OpenSearchVectorSearch,
|
||||||
PGEmbedding,
|
PGEmbedding,
|
||||||
@ -142,6 +143,7 @@ DEPRECATED_LOOKUP = {
|
|||||||
"MyScale": "langchain_community.vectorstores",
|
"MyScale": "langchain_community.vectorstores",
|
||||||
"MyScaleSettings": "langchain_community.vectorstores",
|
"MyScaleSettings": "langchain_community.vectorstores",
|
||||||
"Neo4jVector": "langchain_community.vectorstores",
|
"Neo4jVector": "langchain_community.vectorstores",
|
||||||
|
"NeuralDBClientVectorStore": "langchain_community.vectorstores",
|
||||||
"NeuralDBVectorStore": "langchain_community.vectorstores",
|
"NeuralDBVectorStore": "langchain_community.vectorstores",
|
||||||
"NEuralDBVectorStore": "langchain_community.vectorstores",
|
"NEuralDBVectorStore": "langchain_community.vectorstores",
|
||||||
"OpenSearchVectorSearch": "langchain_community.vectorstores",
|
"OpenSearchVectorSearch": "langchain_community.vectorstores",
|
||||||
@ -224,6 +226,7 @@ __all__ = [
|
|||||||
"MyScale",
|
"MyScale",
|
||||||
"MyScaleSettings",
|
"MyScaleSettings",
|
||||||
"Neo4jVector",
|
"Neo4jVector",
|
||||||
|
"NeuralDBClientVectorStore",
|
||||||
"NeuralDBVectorStore",
|
"NeuralDBVectorStore",
|
||||||
"OpenSearchVectorSearch",
|
"OpenSearchVectorSearch",
|
||||||
"PGEmbedding",
|
"PGEmbedding",
|
||||||
|
@ -42,6 +42,7 @@ _EXPECTED = [
|
|||||||
"MyScale",
|
"MyScale",
|
||||||
"MyScaleSettings",
|
"MyScaleSettings",
|
||||||
"Neo4jVector",
|
"Neo4jVector",
|
||||||
|
"NeuralDBClientVectorStore",
|
||||||
"NeuralDBVectorStore",
|
"NeuralDBVectorStore",
|
||||||
"OpenSearchVectorSearch",
|
"OpenSearchVectorSearch",
|
||||||
"PGEmbedding",
|
"PGEmbedding",
|
||||||
|
Loading…
Reference in New Issue
Block a user