mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-23 13:19:22 +00:00
Compare commits
3 Commits
v1.0
...
dev2049/em
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ab9179536 | ||
|
|
b7f3ef8ae5 | ||
|
|
2d3137ce20 |
@@ -14,7 +14,7 @@ try:
|
||||
except ImportError:
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import Generation
|
||||
from langchain.vectorstores.redis import Redis as RedisVectorstore
|
||||
|
||||
@@ -178,7 +178,10 @@ class RedisSemanticCache(BaseCache):
|
||||
# TODO - implement a TTL policy in Redis
|
||||
|
||||
def __init__(
|
||||
self, redis_url: str, embedding: Embeddings, score_threshold: float = 0.2
|
||||
self,
|
||||
redis_url: str,
|
||||
embedding: TextEmbeddingModel,
|
||||
score_threshold: float = 0.2,
|
||||
):
|
||||
"""Initialize by passing in the `init` GPTCache func
|
||||
|
||||
|
||||
@@ -14,16 +14,16 @@ from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.hyde.prompts import PROMPT_MAP
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
|
||||
|
||||
class HypotheticalDocumentEmbedder(Chain, Embeddings):
|
||||
class HypotheticalDocumentEmbedder(Chain, TextEmbeddingModel):
|
||||
"""Generate hypothetical document for query, and then embed that.
|
||||
|
||||
Based on https://arxiv.org/abs/2212.10496
|
||||
"""
|
||||
|
||||
base_embeddings: Embeddings
|
||||
base_embeddings: TextEmbeddingModel
|
||||
llm_chain: LLMChain
|
||||
|
||||
class Config:
|
||||
@@ -71,7 +71,7 @@ class HypotheticalDocumentEmbedder(Chain, Embeddings):
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
base_embeddings: Embeddings,
|
||||
base_embeddings: TextEmbeddingModel,
|
||||
prompt_key: str,
|
||||
**kwargs: Any,
|
||||
) -> HypotheticalDocumentEmbedder:
|
||||
|
||||
@@ -7,7 +7,7 @@ from pydantic import Extra
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.router.base import RouterChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ class EmbeddingRouterChain(RouterChain):
|
||||
cls,
|
||||
names_and_descriptions: Sequence[Tuple[str, Sequence[str]]],
|
||||
vectorstore_cls: Type[VectorStore],
|
||||
embeddings: Embeddings,
|
||||
embeddings: TextEmbeddingModel,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingRouterChain:
|
||||
"""Convenience constructor."""
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Callable, List, Sequence
|
||||
import numpy as np
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.math_utils import cosine_similarity
|
||||
from langchain.schema import BaseDocumentTransformer, Document
|
||||
|
||||
@@ -50,7 +50,7 @@ def _filter_similar_embeddings(
|
||||
|
||||
|
||||
def _get_embeddings_from_stateful_docs(
|
||||
embeddings: Embeddings, documents: Sequence[_DocumentWithState]
|
||||
embeddings: TextEmbeddingModel, documents: Sequence[_DocumentWithState]
|
||||
) -> List[List[float]]:
|
||||
if len(documents) and "embedded_doc" in documents[0].state:
|
||||
embedded_documents = [doc.state["embedded_doc"] for doc in documents]
|
||||
@@ -66,7 +66,7 @@ def _get_embeddings_from_stateful_docs(
|
||||
class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
|
||||
"""Filter that drops redundant documents by comparing their embeddings."""
|
||||
|
||||
embeddings: Embeddings
|
||||
embeddings: TextEmbeddingModel
|
||||
"""Embeddings to use for embedding document contents."""
|
||||
similarity_fn: Callable = cosine_similarity
|
||||
"""Similarity function for comparing documents. Function expected to take as input
|
||||
|
||||
@@ -2,11 +2,11 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
|
||||
class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, TextEmbeddingModel):
|
||||
"""
|
||||
Wrapper for Aleph Alpha's Asymmetric Embeddings
|
||||
AA provides you with an endpoint to embed a document and a query.
|
||||
|
||||
@@ -3,8 +3,8 @@ from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
|
||||
class Embeddings(ABC):
|
||||
"""Interface for embedding models."""
|
||||
class TextEmbeddingModel(ABC):
|
||||
"""Interface for text embedding models."""
|
||||
|
||||
@abstractmethod
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
@@ -13,3 +13,7 @@ class Embeddings(ABC):
|
||||
@abstractmethod
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Embed query text."""
|
||||
|
||||
|
||||
# For backwards compatibility.
|
||||
Embedding = TextEmbeddingModel
|
||||
|
||||
@@ -3,11 +3,11 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class CohereEmbeddings(BaseModel, Embeddings):
|
||||
class CohereEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around Cohere embedding models.
|
||||
|
||||
To use, you should have the ``cohere`` python package installed, and the
|
||||
|
||||
@@ -3,10 +3,10 @@ from typing import List
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings, BaseModel):
|
||||
class FakeEmbeddings(TextEmbeddingModel, BaseModel):
|
||||
size: int
|
||||
|
||||
def _get_embedding(self) -> List[float]:
|
||||
|
||||
@@ -13,7 +13,7 @@ from tenacity import (
|
||||
wait_exponential,
|
||||
)
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -54,7 +54,7 @@ def embed_with_retry(
|
||||
return _embed_with_retry(*args, **kwargs)
|
||||
|
||||
|
||||
class GooglePalmEmbeddings(BaseModel, Embeddings):
|
||||
class GooglePalmEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
client: Any
|
||||
google_api_key: Optional[str]
|
||||
model_name: str = "models/embedding-gecko-001"
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
|
||||
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
|
||||
DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large"
|
||||
@@ -13,7 +13,7 @@ DEFAULT_QUERY_INSTRUCTION = (
|
||||
)
|
||||
|
||||
|
||||
class HuggingFaceEmbeddings(BaseModel, Embeddings):
|
||||
class HuggingFaceEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around sentence_transformers embedding models.
|
||||
|
||||
To use, you should have the ``sentence_transformers`` python package installed.
|
||||
@@ -87,7 +87,7 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
|
||||
return embedding.tolist()
|
||||
|
||||
|
||||
class HuggingFaceInstructEmbeddings(BaseModel, Embeddings):
|
||||
class HuggingFaceInstructEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around sentence_transformers embedding models.
|
||||
|
||||
To use, you should have the ``sentence_transformers``
|
||||
|
||||
@@ -3,14 +3,14 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
DEFAULT_REPO_ID = "sentence-transformers/all-mpnet-base-v2"
|
||||
VALID_TASKS = ("feature-extraction",)
|
||||
|
||||
|
||||
class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
|
||||
class HuggingFaceHubEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around HuggingFaceHub embedding models.
|
||||
|
||||
To use, you should have the ``huggingface_hub`` python package installed, and the
|
||||
|
||||
@@ -6,11 +6,11 @@ from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
from pydantic import BaseModel, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class JinaEmbeddings(BaseModel, Embeddings):
|
||||
class JinaEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
client: Any #: :meta private:
|
||||
|
||||
model_name: str = "ViT-B-32::openai"
|
||||
|
||||
@@ -3,10 +3,10 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
|
||||
|
||||
class LlamaCppEmbeddings(BaseModel, Embeddings):
|
||||
class LlamaCppEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around llama.cpp embedding models.
|
||||
|
||||
To use, you should have the llama-cpp-python library installed, and provide the
|
||||
|
||||
@@ -25,7 +25,7 @@ from tenacity import (
|
||||
wait_exponential,
|
||||
)
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -64,7 +64,7 @@ def embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> Any:
|
||||
return _embed_with_retry(**kwargs)
|
||||
|
||||
|
||||
class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
class OpenAIEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around OpenAI embedding models.
|
||||
|
||||
To use, you should have the ``openai`` python package installed, and the
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.llms.sagemaker_endpoint import ContentHandlerBase
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ class EmbeddingsContentHandler(ContentHandlerBase[List[str], List[List[float]]])
|
||||
"""Content handler for LLM class."""
|
||||
|
||||
|
||||
class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
|
||||
class SagemakerEndpointEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around custom Sagemaker Inference Endpoints.
|
||||
|
||||
To use, you must supply the endpoint name from your deployed
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, Callable, List
|
||||
|
||||
from pydantic import Extra
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.llms import SelfHostedPipeline
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ def _embed_documents(pipeline: Any, *args: Any, **kwargs: Any) -> List[List[floa
|
||||
return pipeline(*args, **kwargs)
|
||||
|
||||
|
||||
class SelfHostedEmbeddings(SelfHostedPipeline, Embeddings):
|
||||
class SelfHostedEmbeddings(SelfHostedPipeline, TextEmbeddingModel):
|
||||
"""Runs custom embedding models on self-hosted remote hardware.
|
||||
|
||||
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
|
||||
|
||||
@@ -3,12 +3,12 @@ from typing import Any, List
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
|
||||
DEFAULT_MODEL_URL = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3"
|
||||
|
||||
|
||||
class TensorflowHubEmbeddings(BaseModel, Embeddings):
|
||||
class TensorflowHubEmbeddings(BaseModel, TextEmbeddingModel):
|
||||
"""Wrapper around tensorflow_hub embedding models.
|
||||
|
||||
To use, you should have the ``tensorflow_text`` python package installed.
|
||||
|
||||
@@ -6,7 +6,7 @@ from langchain.base_language import BaseLanguageModel
|
||||
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
|
||||
from langchain.chains.retrieval_qa.base import RetrievalQA
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.schema import Document
|
||||
@@ -55,7 +55,7 @@ class VectorstoreIndexCreator(BaseModel):
|
||||
"""Logic for creating indexes."""
|
||||
|
||||
vectorstore_cls: Type[VectorStore] = Chroma
|
||||
embedding: Embeddings = Field(default_factory=OpenAIEmbeddings)
|
||||
embedding: TextEmbeddingModel = Field(default_factory=OpenAIEmbeddings)
|
||||
text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter)
|
||||
vectorstore_kwargs: dict = Field(default_factory=dict)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.prompts.example_selector.base import BaseExampleSelector
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
@@ -64,7 +64,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
|
||||
def from_examples(
|
||||
cls,
|
||||
examples: List[dict],
|
||||
embeddings: Embeddings,
|
||||
embeddings: TextEmbeddingModel,
|
||||
vectorstore_cls: Type[VectorStore],
|
||||
k: int = 4,
|
||||
input_keys: Optional[List[str]] = None,
|
||||
@@ -130,7 +130,7 @@ class MaxMarginalRelevanceExampleSelector(SemanticSimilarityExampleSelector):
|
||||
def from_examples(
|
||||
cls,
|
||||
examples: List[dict],
|
||||
embeddings: Embeddings,
|
||||
embeddings: TextEmbeddingModel,
|
||||
vectorstore_cls: Type[VectorStore],
|
||||
k: int = 4,
|
||||
input_keys: Optional[List[str]] = None,
|
||||
|
||||
@@ -8,7 +8,7 @@ from langchain.document_transformers import (
|
||||
_get_embeddings_from_stateful_docs,
|
||||
get_stateful_documents,
|
||||
)
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.math_utils import cosine_similarity
|
||||
from langchain.retrievers.document_compressors.base import (
|
||||
BaseDocumentCompressor,
|
||||
@@ -17,7 +17,7 @@ from langchain.schema import Document
|
||||
|
||||
|
||||
class EmbeddingsFilter(BaseDocumentCompressor):
|
||||
embeddings: Embeddings
|
||||
embeddings: TextEmbeddingModel
|
||||
"""Embeddings to use for embedding document contents and queries."""
|
||||
similarity_fn: Callable = cosine_similarity
|
||||
"""Similarity function for comparing documents. Function expected to take as input
|
||||
|
||||
@@ -10,17 +10,17 @@ from typing import Any, List, Optional
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray:
|
||||
def create_index(contexts: List[str], embeddings: TextEmbeddingModel) -> np.ndarray:
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
return np.array(list(executor.map(embeddings.embed_query, contexts)))
|
||||
|
||||
|
||||
class KNNRetriever(BaseRetriever, BaseModel):
|
||||
embeddings: Embeddings
|
||||
embeddings: TextEmbeddingModel
|
||||
index: Any
|
||||
texts: List[str]
|
||||
k: int = 4
|
||||
@@ -34,7 +34,7 @@ class KNNRetriever(BaseRetriever, BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls, texts: List[str], embeddings: Embeddings, **kwargs: Any
|
||||
cls, texts: List[str], embeddings: TextEmbeddingModel, **kwargs: Any
|
||||
) -> KNNRetriever:
|
||||
index = create_index(texts, embeddings)
|
||||
return cls(embeddings=embeddings, index=index, texts=texts, **kwargs)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Milvus Retriever"""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.vectorstores.milvus import Milvus
|
||||
|
||||
@@ -11,7 +11,7 @@ from langchain.vectorstores.milvus import Milvus
|
||||
class MilvusRetreiver(BaseRetriever):
|
||||
def __init__(
|
||||
self,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
collection_name: str = "LangChainCollection",
|
||||
connection_args: Optional[Dict[str, Any]] = None,
|
||||
consistency_level: str = "Session",
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ def hash_text(text: str) -> str:
|
||||
def create_index(
|
||||
contexts: List[str],
|
||||
index: Any,
|
||||
embeddings: Embeddings,
|
||||
embeddings: TextEmbeddingModel,
|
||||
sparse_encoder: Any,
|
||||
ids: Optional[List[str]] = None,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
@@ -74,7 +74,7 @@ def create_index(
|
||||
|
||||
|
||||
class PineconeHybridSearchRetriever(BaseRetriever, BaseModel):
|
||||
embeddings: Embeddings
|
||||
embeddings: TextEmbeddingModel
|
||||
sparse_encoder: Any
|
||||
index: Any
|
||||
top_k: int = 4
|
||||
|
||||
@@ -10,17 +10,17 @@ from typing import Any, List, Optional
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray:
|
||||
def create_index(contexts: List[str], embeddings: TextEmbeddingModel) -> np.ndarray:
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
return np.array(list(executor.map(embeddings.embed_query, contexts)))
|
||||
|
||||
|
||||
class SVMRetriever(BaseRetriever, BaseModel):
|
||||
embeddings: Embeddings
|
||||
embeddings: TextEmbeddingModel
|
||||
index: Any
|
||||
texts: List[str]
|
||||
k: int = 4
|
||||
@@ -34,7 +34,7 @@ class SVMRetriever(BaseRetriever, BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls, texts: List[str], embeddings: Embeddings, **kwargs: Any
|
||||
cls, texts: List[str], embeddings: TextEmbeddingModel, **kwargs: Any
|
||||
) -> SVMRetriever:
|
||||
index = create_index(texts, embeddings)
|
||||
return cls(embeddings=embeddings, index=index, texts=texts, **kwargs)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Zilliz Retriever"""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.vectorstores.zilliz import Zilliz
|
||||
|
||||
@@ -11,7 +11,7 @@ from langchain.vectorstores.zilliz import Zilliz
|
||||
class ZillizRetreiver(BaseRetriever):
|
||||
def __init__(
|
||||
self,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
collection_name: str = "LangChainCollection",
|
||||
connection_args: Optional[Dict[str, Any]] = None,
|
||||
consistency_level: str = "Session",
|
||||
|
||||
@@ -13,7 +13,7 @@ from sqlalchemy.orm import Session, relationship
|
||||
from sqlalchemy.sql.expression import func
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
@@ -126,7 +126,7 @@ class AnalyticDB(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
connection_string: str,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
collection_metadata: Optional[dict] = None,
|
||||
pre_delete_collection: bool = False,
|
||||
@@ -343,7 +343,7 @@ class AnalyticDB(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
ids: Optional[List[str]] = None,
|
||||
@@ -390,7 +390,7 @@ class AnalyticDB(VectorStore):
|
||||
def from_documents(
|
||||
cls,
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
ids: Optional[List[str]] = None,
|
||||
pre_delete_collection: bool = False,
|
||||
|
||||
@@ -13,7 +13,7 @@ import numpy as np
|
||||
from langchain.docstore.base import Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
@@ -282,7 +282,7 @@ class Annoy(VectorStore):
|
||||
cls,
|
||||
texts: List[str],
|
||||
embeddings: List[List[float]],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
metric: str = DEFAULT_METRIC,
|
||||
trees: int = 100,
|
||||
@@ -319,7 +319,7 @@ class Annoy(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
metric: str = DEFAULT_METRIC,
|
||||
trees: int = 100,
|
||||
@@ -360,7 +360,7 @@ class Annoy(VectorStore):
|
||||
def from_embeddings(
|
||||
cls,
|
||||
text_embeddings: List[Tuple[str, List[float]]],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
metric: str = DEFAULT_METRIC,
|
||||
trees: int = 100,
|
||||
@@ -424,7 +424,7 @@ class Annoy(VectorStore):
|
||||
def load_local(
|
||||
cls,
|
||||
folder_path: str,
|
||||
embeddings: Embeddings,
|
||||
embeddings: TextEmbeddingModel,
|
||||
) -> Annoy:
|
||||
"""Load Annoy index, docstore, and index_to_docstore_id to disk.
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from typing import Any, Iterable, List, Optional, Type
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -34,7 +34,7 @@ class AtlasDB(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
embedding_function: Optional[Embeddings] = None,
|
||||
embedding_function: Optional[TextEmbeddingModel] = None,
|
||||
api_key: Optional[str] = None,
|
||||
description: str = "A description for your project",
|
||||
is_public: bool = True,
|
||||
@@ -212,7 +212,7 @@ class AtlasDB(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[AtlasDB],
|
||||
texts: List[str],
|
||||
embedding: Optional[Embeddings] = None,
|
||||
embedding: Optional[TextEmbeddingModel] = None,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
name: Optional[str] = None,
|
||||
@@ -229,7 +229,7 @@ class AtlasDB(VectorStore):
|
||||
texts (List[str]): The list of texts to ingest.
|
||||
name (str): Name of the project to create.
|
||||
api_key (str): Your nomic API key,
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
embedding (Optional[TextEmbeddingModel]): Embedding function. Defaults to None.
|
||||
metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
|
||||
ids (Optional[List[str]]): Optional list of document IDs. If None,
|
||||
ids will be auto created
|
||||
@@ -272,7 +272,7 @@ class AtlasDB(VectorStore):
|
||||
def from_documents(
|
||||
cls: Type[AtlasDB],
|
||||
documents: List[Document],
|
||||
embedding: Optional[Embeddings] = None,
|
||||
embedding: Optional[TextEmbeddingModel] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
name: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
@@ -289,7 +289,7 @@ class AtlasDB(VectorStore):
|
||||
name (str): Name of the collection to create.
|
||||
api_key (str): Your nomic API key,
|
||||
documents (List[Document]): List of documents to add to the vectorstore.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
embedding (Optional[TextEmbeddingModel]): Embedding function. Defaults to None.
|
||||
ids (Optional[List[str]]): Optional list of document IDs. If None,
|
||||
ids will be auto created
|
||||
description (str): A description for your project.
|
||||
|
||||
@@ -10,7 +10,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import BaseRetriever
|
||||
|
||||
VST = TypeVar("VST", bound="VectorStore")
|
||||
@@ -298,7 +298,7 @@ class VectorStore(ABC):
|
||||
def from_documents(
|
||||
cls: Type[VST],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
@@ -310,7 +310,7 @@ class VectorStore(ABC):
|
||||
async def afrom_documents(
|
||||
cls: Type[VST],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
@@ -323,7 +323,7 @@ class VectorStore(ABC):
|
||||
def from_texts(
|
||||
cls: Type[VST],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
@@ -333,7 +333,7 @@ class VectorStore(ABC):
|
||||
async def afrom_texts(
|
||||
cls: Type[VST],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
|
||||
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Ty
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import xor_args
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
@@ -58,7 +58,7 @@ class Chroma(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
embedding_function: Optional[Embeddings] = None,
|
||||
embedding_function: Optional[TextEmbeddingModel] = None,
|
||||
persist_directory: Optional[str] = None,
|
||||
client_settings: Optional[chromadb.config.Settings] = None,
|
||||
collection_metadata: Optional[Dict] = None,
|
||||
@@ -354,7 +354,7 @@ class Chroma(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[Chroma],
|
||||
texts: List[str],
|
||||
embedding: Optional[Embeddings] = None,
|
||||
embedding: Optional[TextEmbeddingModel] = None,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
@@ -372,7 +372,7 @@ class Chroma(VectorStore):
|
||||
texts (List[str]): List of texts to add to the collection.
|
||||
collection_name (str): Name of the collection to create.
|
||||
persist_directory (Optional[str]): Directory to persist the collection.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
embedding (Optional[TextEmbeddingModel]): Embedding function. Defaults to None.
|
||||
metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
|
||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
||||
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
|
||||
@@ -394,7 +394,7 @@ class Chroma(VectorStore):
|
||||
def from_documents(
|
||||
cls: Type[Chroma],
|
||||
documents: List[Document],
|
||||
embedding: Optional[Embeddings] = None,
|
||||
embedding: Optional[TextEmbeddingModel] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
persist_directory: Optional[str] = None,
|
||||
@@ -412,7 +412,7 @@ class Chroma(VectorStore):
|
||||
persist_directory (Optional[str]): Directory to persist the collection.
|
||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
||||
documents (List[Document]): List of documents to add to the vectorstore.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
embedding (Optional[TextEmbeddingModel]): Embedding function. Defaults to None.
|
||||
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
|
||||
Returns:
|
||||
Chroma: Chroma vectorstore.
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tupl
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
@@ -96,7 +96,7 @@ class DeepLake(VectorStore):
|
||||
self,
|
||||
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
||||
token: Optional[str] = None,
|
||||
embedding_function: Optional[Embeddings] = None,
|
||||
embedding_function: Optional[TextEmbeddingModel] = None,
|
||||
read_only: Optional[bool] = False,
|
||||
ingestion_batch_size: int = 1024,
|
||||
num_workers: int = 0,
|
||||
@@ -494,7 +494,7 @@ class DeepLake(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Optional[Embeddings] = None,
|
||||
embedding: Optional[TextEmbeddingModel] = None,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
||||
@@ -522,7 +522,7 @@ class DeepLake(VectorStore):
|
||||
save the dataset, but keeps it in memory instead.
|
||||
Should be used only for testing as it does not persist.
|
||||
documents (List[Document]): List of documents to add.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
embedding (Optional[TextEmbeddingModel]): Embedding function. Defaults to None.
|
||||
metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
|
||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Type
|
||||
import numpy as np
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
@@ -36,7 +36,7 @@ class DocArrayIndex(VectorStore, ABC):
|
||||
def __init__(
|
||||
self,
|
||||
doc_index: "BaseDocIndex",
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
):
|
||||
"""Initialize a vector store from DocArray's DocIndex."""
|
||||
self.doc_index = doc_index
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, List, Literal, Optional
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.docarray.base import (
|
||||
DocArrayIndex,
|
||||
_check_docarray_import,
|
||||
@@ -20,7 +20,7 @@ class DocArrayHnswSearch(DocArrayIndex):
|
||||
@classmethod
|
||||
def from_params(
|
||||
cls,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
work_dir: str,
|
||||
n_dim: int,
|
||||
dist_metric: Literal["cosine", "ip", "l2"] = "cosine",
|
||||
@@ -36,7 +36,7 @@ class DocArrayHnswSearch(DocArrayIndex):
|
||||
"""Initialize DocArrayHnswSearch store.
|
||||
|
||||
Args:
|
||||
embedding (Embeddings): Embedding function.
|
||||
embedding (TextEmbeddingModel): Embedding function.
|
||||
work_dir (str): path to the location where all the data will be stored.
|
||||
n_dim (int): dimension of an embedding.
|
||||
dist_metric (str): Distance metric for DocArrayHnswSearch can be one of:
|
||||
@@ -78,7 +78,7 @@ class DocArrayHnswSearch(DocArrayIndex):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
work_dir: Optional[str] = None,
|
||||
n_dim: Optional[int] = None,
|
||||
@@ -89,7 +89,7 @@ class DocArrayHnswSearch(DocArrayIndex):
|
||||
|
||||
Args:
|
||||
texts (List[str]): Text data.
|
||||
embedding (Embeddings): Embedding function.
|
||||
embedding (TextEmbeddingModel): Embedding function.
|
||||
metadatas (Optional[List[dict]]): Metadata for each text if it exists.
|
||||
Defaults to None.
|
||||
work_dir (str): path to the location where all the data will be stored.
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Literal, Optional
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.docarray.base import (
|
||||
DocArrayIndex,
|
||||
_check_docarray_import,
|
||||
@@ -20,7 +20,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
|
||||
@classmethod
|
||||
def from_params(
|
||||
cls,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metric: Literal[
|
||||
"cosine_sim", "euclidian_dist", "sgeuclidean_dist"
|
||||
] = "cosine_sim",
|
||||
@@ -29,7 +29,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
|
||||
"""Initialize DocArrayInMemorySearch store.
|
||||
|
||||
Args:
|
||||
embedding (Embeddings): Embedding function.
|
||||
embedding (TextEmbeddingModel): Embedding function.
|
||||
metric (str): metric for exact nearest-neighbor search.
|
||||
Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist".
|
||||
Defaults to "cosine_sim".
|
||||
@@ -46,7 +46,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[Dict[Any, Any]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> DocArrayInMemorySearch:
|
||||
@@ -54,7 +54,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
|
||||
|
||||
Args:
|
||||
texts (List[str]): Text data.
|
||||
embedding (Embeddings): Embedding function.
|
||||
embedding (TextEmbeddingModel): Embedding function.
|
||||
metadatas (Optional[List[Dict[Any, Any]]]): Metadata for each text
|
||||
if it exists. Defaults to None.
|
||||
metric (str): metric for exact nearest-neighbor search.
|
||||
|
||||
@@ -6,7 +6,7 @@ from abc import ABC
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
@@ -106,7 +106,7 @@ class ElasticVectorSearch(VectorStore, ABC):
|
||||
Args:
|
||||
elasticsearch_url (str): The URL for the Elasticsearch instance.
|
||||
index_name (str): The name of the Elasticsearch index for the embeddings.
|
||||
embedding (Embeddings): An object that provides the ability to embed text.
|
||||
embedding (TextEmbeddingModel): An object that provides the ability to embed text.
|
||||
It should be an instance of a class that subclasses the Embeddings
|
||||
abstract base class, such as OpenAIEmbeddings()
|
||||
|
||||
@@ -118,7 +118,7 @@ class ElasticVectorSearch(VectorStore, ABC):
|
||||
self,
|
||||
elasticsearch_url: str,
|
||||
index_name: str,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
*,
|
||||
ssl_verify: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
@@ -244,7 +244,7 @@ class ElasticVectorSearch(VectorStore, ABC):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
elasticsearch_url: Optional[str] = None,
|
||||
index_name: Optional[str] = None,
|
||||
|
||||
@@ -13,7 +13,7 @@ import numpy as np
|
||||
from langchain.docstore.base import AddableMixin, Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
@@ -364,7 +364,7 @@ class FAISS(VectorStore):
|
||||
cls,
|
||||
texts: List[str],
|
||||
embeddings: List[List[float]],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
normalize_L2: bool = False,
|
||||
**kwargs: Any,
|
||||
@@ -396,7 +396,7 @@ class FAISS(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> FAISS:
|
||||
@@ -430,7 +430,7 @@ class FAISS(VectorStore):
|
||||
def from_embeddings(
|
||||
cls,
|
||||
text_embeddings: List[Tuple[str, List[float]]],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> FAISS:
|
||||
@@ -486,7 +486,7 @@ class FAISS(VectorStore):
|
||||
|
||||
@classmethod
|
||||
def load_local(
|
||||
cls, folder_path: str, embeddings: Embeddings, index_name: str = "index"
|
||||
cls, folder_path: str, embeddings: TextEmbeddingModel, index_name: str = "index"
|
||||
) -> FAISS:
|
||||
"""Load FAISS index, docstore, and index_to_docstore_id to disk.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import uuid
|
||||
from typing import Any, Iterable, List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class LanceDB(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
connection: Any,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
vector_key: Optional[str] = "vector",
|
||||
id_key: Optional[str] = "id",
|
||||
text_key: Optional[str] = "text",
|
||||
@@ -113,7 +113,7 @@ class LanceDB(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
connection: Any = None,
|
||||
vector_key: Optional[str] = "vector",
|
||||
|
||||
@@ -8,7 +8,7 @@ from uuid import uuid4
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
@@ -28,7 +28,7 @@ class Milvus(VectorStore):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
collection_name: str = "LangChainCollection",
|
||||
connection_args: Optional[dict[str, Any]] = None,
|
||||
consistency_level: str = "Session",
|
||||
@@ -77,7 +77,7 @@ class Milvus(VectorStore):
|
||||
server_name (str): If use tls, need to write the common name.
|
||||
|
||||
Args:
|
||||
embedding_function (Embeddings): Function used to embed the text.
|
||||
embedding_function (TextEmbeddingModel): Function used to embed the text.
|
||||
collection_name (str): Which Milvus collection to use. Defaults to
|
||||
"LangChainCollection".
|
||||
connection_args (Optional[dict[str, any]]): The arguments for connection to
|
||||
@@ -754,7 +754,7 @@ class Milvus(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
collection_name: str = "LangChainCollection",
|
||||
connection_args: dict[str, Any] = DEFAULT_MILVUS_CONNECTION,
|
||||
@@ -768,7 +768,7 @@ class Milvus(VectorStore):
|
||||
|
||||
Args:
|
||||
texts (List[str]): Text data.
|
||||
embedding (Embeddings): Embedding function.
|
||||
embedding (TextEmbeddingModel): Embedding function.
|
||||
metadatas (Optional[List[dict]]): Metadata for each text if it exists.
|
||||
Defaults to None.
|
||||
collection_name (str, optional): Collection name to use. Defaults to
|
||||
|
||||
@@ -10,7 +10,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
from pydantic import BaseSettings
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
logger = logging.getLogger()
|
||||
@@ -98,7 +98,7 @@ class MyScale(VectorStore):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
config: Optional[MyScaleSettings] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
@@ -259,7 +259,7 @@ class MyScale(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[Dict[Any, Any]]] = None,
|
||||
config: Optional[MyScaleSettings] = None,
|
||||
text_ids: Optional[Iterable[str]] = None,
|
||||
@@ -269,7 +269,7 @@ class MyScale(VectorStore):
|
||||
"""Create Myscale wrapper with existing texts
|
||||
|
||||
Args:
|
||||
embedding_function (Embeddings): Function to extract text embedding
|
||||
embedding_function (TextEmbeddingModel): Function to extract text embedding
|
||||
texts (Iterable[str]): List or tuple of strings to be added
|
||||
config (MyScaleSettings, Optional): Myscale configuration
|
||||
text_ids (Optional[Iterable], optional): IDs for the texts.
|
||||
|
||||
@@ -5,7 +5,7 @@ import uuid
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
@@ -295,7 +295,7 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
self,
|
||||
opensearch_url: str,
|
||||
index_name: str,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Initialize with necessary components."""
|
||||
@@ -494,7 +494,7 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
bulk_size: int = 500,
|
||||
**kwargs: Any,
|
||||
|
||||
@@ -12,7 +12,7 @@ from sqlalchemy.dialects.postgresql import JSON, UUID
|
||||
from sqlalchemy.orm import Session, declarative_base, relationship
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
@@ -122,7 +122,7 @@ class PGVector(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
connection_string: str,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
collection_metadata: Optional[dict] = None,
|
||||
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
||||
@@ -363,7 +363,7 @@ class PGVector(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[PGVector],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
|
||||
@@ -412,7 +412,7 @@ class PGVector(VectorStore):
|
||||
def from_documents(
|
||||
cls: Type[PGVector],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
|
||||
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
||||
ids: Optional[List[str]] = None,
|
||||
|
||||
@@ -6,7 +6,7 @@ import uuid
|
||||
from typing import Any, Callable, Iterable, List, Optional, Tuple
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -161,7 +161,7 @@ class Pinecone(VectorStore):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
batch_size: int = 32,
|
||||
@@ -247,7 +247,7 @@ class Pinecone(VectorStore):
|
||||
def from_existing_index(
|
||||
cls,
|
||||
index_name: str,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
text_key: str = "text",
|
||||
namespace: Optional[str] = None,
|
||||
) -> Pinecone:
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import (
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
@@ -55,7 +55,7 @@ class Qdrant(VectorStore):
|
||||
self,
|
||||
client: Any,
|
||||
collection_name: str,
|
||||
embeddings: Optional[Embeddings] = None,
|
||||
embeddings: Optional[TextEmbeddingModel] = None,
|
||||
content_payload_key: str = CONTENT_KEY,
|
||||
metadata_payload_key: str = METADATA_KEY,
|
||||
embedding_function: Optional[Callable] = None, # deprecated
|
||||
@@ -99,7 +99,7 @@ class Qdrant(VectorStore):
|
||||
"Pass `Embeddings` instance to `embeddings` instead."
|
||||
)
|
||||
|
||||
if not isinstance(embeddings, Embeddings):
|
||||
if not isinstance(embeddings, TextEmbeddingModel):
|
||||
warnings.warn(
|
||||
"`embeddings` should be an instance of `Embeddings`."
|
||||
"Using `embeddings` as `embedding_function` which is deprecated"
|
||||
@@ -292,7 +292,7 @@ class Qdrant(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[Qdrant],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
location: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
|
||||
@@ -22,7 +22,7 @@ import numpy as np
|
||||
from pydantic import BaseModel, root_validator
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
|
||||
@@ -361,7 +361,7 @@ class Redis(VectorStore):
|
||||
def from_texts_return_keys(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
index_name: Optional[str] = None,
|
||||
content_key: str = "content",
|
||||
@@ -421,7 +421,7 @@ class Redis(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[Redis],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
index_name: Optional[str] = None,
|
||||
content_key: str = "content",
|
||||
@@ -502,7 +502,7 @@ class Redis(VectorStore):
|
||||
@classmethod
|
||||
def from_existing_index(
|
||||
cls,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
index_name: str,
|
||||
content_key: str = "content",
|
||||
metadata_key: str = "metadata",
|
||||
|
||||
@@ -15,7 +15,7 @@ from typing import (
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
@@ -41,14 +41,14 @@ class SupabaseVectorStore(VectorStore):
|
||||
# This is the embedding function. Don't confuse with the embedding vectors.
|
||||
# We should perhaps rename the underlying Embedding base class to EmbeddingFunction
|
||||
# or something
|
||||
_embedding: Embeddings
|
||||
_embedding: TextEmbeddingModel
|
||||
table_name: str
|
||||
query_name: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client: supabase.client.Client,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
table_name: str,
|
||||
query_name: Union[str, None] = None,
|
||||
) -> None:
|
||||
@@ -62,7 +62,7 @@ class SupabaseVectorStore(VectorStore):
|
||||
)
|
||||
|
||||
self._client = client
|
||||
self._embedding: Embeddings = embedding
|
||||
self._embedding: TextEmbeddingModel = embedding
|
||||
self.table_name = table_name or "documents"
|
||||
self.query_name = query_name or "match_documents"
|
||||
|
||||
@@ -81,7 +81,7 @@ class SupabaseVectorStore(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type["SupabaseVectorStore"],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
client: Optional[supabase.client.Client] = None,
|
||||
table_name: Optional[str] = "documents",
|
||||
|
||||
@@ -7,7 +7,7 @@ import uuid
|
||||
from typing import Any, Iterable, List, Optional, Type
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
@@ -21,7 +21,7 @@ def _uuid_key() -> str:
|
||||
class Tair(VectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
embedding_function: Embeddings,
|
||||
embedding_function: TextEmbeddingModel,
|
||||
url: str,
|
||||
index_name: str,
|
||||
content_key: str = "content",
|
||||
@@ -140,7 +140,7 @@ class Tair(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[Tair],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
index_name: str = "langchain",
|
||||
content_key: str = "content",
|
||||
@@ -208,7 +208,7 @@ class Tair(VectorStore):
|
||||
def from_documents(
|
||||
cls,
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
index_name: str = "langchain",
|
||||
content_key: str = "content",
|
||||
@@ -262,7 +262,7 @@ class Tair(VectorStore):
|
||||
@classmethod
|
||||
def from_existing_index(
|
||||
cls,
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
index_name: str = "langchain",
|
||||
content_key: str = "content",
|
||||
metadata_key: str = "metadata",
|
||||
|
||||
@@ -8,7 +8,7 @@ from uuid import uuid4
|
||||
import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
@@ -89,7 +89,7 @@ class Weaviate(VectorStore):
|
||||
client: Any,
|
||||
index_name: str,
|
||||
text_key: str,
|
||||
embedding: Optional[Embeddings] = None,
|
||||
embedding: Optional[TextEmbeddingModel] = None,
|
||||
attributes: Optional[List[str]] = None,
|
||||
relevance_score_fn: Optional[
|
||||
Callable[[float], float]
|
||||
@@ -364,7 +364,7 @@ class Weaviate(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type[Weaviate],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Weaviate:
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores.milvus import Milvus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -59,7 +59,7 @@ class Zilliz(Milvus):
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
collection_name: str = "LangChainCollection",
|
||||
connection_args: dict[str, Any] = {},
|
||||
@@ -73,7 +73,7 @@ class Zilliz(Milvus):
|
||||
|
||||
Args:
|
||||
texts (List[str]): Text data.
|
||||
embedding (Embeddings): Embedding function.
|
||||
embedding (TextEmbeddingModel): Embedding function.
|
||||
metadatas (Optional[List[dict]]): Metadata for each text if it exists.
|
||||
Defaults to None.
|
||||
collection_name (str, optional): Collection name to use. Defaults to
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
"""Fake Embedding class for testing purposes."""
|
||||
from typing import List
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
|
||||
fake_texts = ["foo", "bar", "baz"]
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings):
|
||||
class FakeEmbeddings(TextEmbeddingModel):
|
||||
"""Fake embeddings functionality for testing."""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Callable, Optional
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
@@ -139,7 +139,7 @@ def test_qdrant_max_marginal_relevance_search(
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
embeddings: Optional[TextEmbeddingModel], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
@@ -162,7 +162,7 @@ def test_qdrant_embedding_interface(
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface_raises(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
embeddings: Optional[TextEmbeddingModel], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
|
||||
@@ -9,12 +9,12 @@ from langchain.callbacks.manager import (
|
||||
)
|
||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
|
||||
from langchain.chains.hyde.prompts import PROMPT_MAP
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.schema import Generation, LLMResult
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings):
|
||||
class FakeEmbeddings(TextEmbeddingModel):
|
||||
"""Fake embedding class for tests."""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Any, Iterable, List, Optional, Tuple, Type
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.base import TextEmbeddingModel
|
||||
from langchain.retrievers.time_weighted_retriever import (
|
||||
TimeWeightedVectorStoreRetriever,
|
||||
_get_hours_passed,
|
||||
@@ -67,7 +67,7 @@ class MockVectorStore(VectorStore):
|
||||
def from_documents(
|
||||
cls: Type["MockVectorStore"],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
**kwargs: Any,
|
||||
) -> "MockVectorStore":
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
@@ -79,7 +79,7 @@ class MockVectorStore(VectorStore):
|
||||
def from_texts(
|
||||
cls: Type["MockVectorStore"],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
embedding: TextEmbeddingModel,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> "MockVectorStore":
|
||||
|
||||
Reference in New Issue
Block a user