From a3dd4dcadfc0e7bb56385c803f6718b9ab71e41b Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Fri, 18 Aug 2023 06:20:39 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=96=20docstrings=20`retrievers`=20cons?= =?UTF-8?q?istency=20(#9422)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit :scroll: - updated the top-level descriptions to a consistent format; - changed the format of several 100% internal functions from "name" to "_name". So, these functions are not shown in the Top-level API Reference page (with lists of classes/functions) --- libs/langchain/langchain/retrievers/arxiv.py | 3 +- .../retrievers/azure_cognitive_search.py | 4 +- libs/langchain/langchain/retrievers/bm25.py | 7 +--- .../langchain/retrievers/chaindesk.py | 2 +- .../retrievers/chatgpt_plugin_retriever.py | 2 +- .../langchain/retrievers/databerry.py | 2 +- .../langchain/retrievers/docarray.py | 5 +-- .../retrievers/document_compressors/base.py | 5 +-- .../document_compressors/chain_extract.py | 2 +- .../document_compressors/cohere_rerank.py | 2 +- .../retrievers/elastic_search_bm25.py | 2 +- .../langchain/retrievers/ensemble.py | 5 ++- .../google_cloud_enterprise_search.py | 7 ++-- libs/langchain/langchain/retrievers/kendra.py | 42 ++++++++++--------- libs/langchain/langchain/retrievers/knn.py | 2 +- .../langchain/retrievers/llama_index.py | 8 +++- libs/langchain/langchain/retrievers/metal.py | 2 +- libs/langchain/langchain/retrievers/milvus.py | 2 +- .../langchain/retrievers/multi_query.py | 5 ++- .../retrievers/parent_document_retriever.py | 2 +- .../retrievers/pinecone_hybrid_search.py | 10 ++--- libs/langchain/langchain/retrievers/pubmed.py | 2 +- .../langchain/retrievers/re_phraser.py | 5 +-- .../langchain/retrievers/remote_retriever.py | 2 +- .../langchain/retrievers/self_query/chroma.py | 2 +- .../retrievers/self_query/deeplake.py | 2 +- .../retrievers/self_query/elasticsearch.py | 2 +- .../retrievers/self_query/myscale.py | 27 ++++++------ .../retrievers/self_query/pinecone.py | 2 +- .../langchain/retrievers/self_query/qdrant.py | 2 +- .../retrievers/self_query/weaviate.py | 2 +- libs/langchain/langchain/retrievers/svm.py | 2 +- libs/langchain/langchain/retrievers/tfidf.py | 2 +- .../retrievers/time_weighted_retriever.py | 2 +- .../langchain/retrievers/vespa_retriever.py | 2 +- .../retrievers/weaviate_hybrid_search.py | 6 ++- .../langchain/retrievers/web_research.py | 4 +- .../langchain/retrievers/wikipedia.py | 2 +- libs/langchain/langchain/retrievers/zep.py | 2 +- libs/langchain/langchain/retrievers/zilliz.py | 2 +- 40 files changed, 97 insertions(+), 96 deletions(-) diff --git a/libs/langchain/langchain/retrievers/arxiv.py b/libs/langchain/langchain/retrievers/arxiv.py index 56019273b9d..f9a7910d8da 100644 --- a/libs/langchain/langchain/retrievers/arxiv.py +++ b/libs/langchain/langchain/retrievers/arxiv.py @@ -6,8 +6,7 @@ from langchain.utilities.arxiv import ArxivAPIWrapper class ArxivRetriever(BaseRetriever, ArxivAPIWrapper): - """ - Retriever for Arxiv. + """`Arxiv` retriever. It wraps load() to get_relevant_documents(). It uses all ArxivAPIWrapper arguments without any change. diff --git a/libs/langchain/langchain/retrievers/azure_cognitive_search.py b/libs/langchain/langchain/retrievers/azure_cognitive_search.py index 45deebfe31e..e95e8bd4929 100644 --- a/libs/langchain/langchain/retrievers/azure_cognitive_search.py +++ b/libs/langchain/langchain/retrievers/azure_cognitive_search.py @@ -1,5 +1,3 @@ -"""Retriever for the Azure Cognitive Search service.""" - from __future__ import annotations import json @@ -18,7 +16,7 @@ from langchain.utils import get_from_dict_or_env class AzureCognitiveSearchRetriever(BaseRetriever): - """Retriever for the Azure Cognitive Search service.""" + """`Azure Cognitive Search` service retriever.""" service_name: str = "" """Name of Azure Cognitive Search service""" diff --git a/libs/langchain/langchain/retrievers/bm25.py b/libs/langchain/langchain/retrievers/bm25.py index a5ef4f28496..2a03b8b2df6 100644 --- a/libs/langchain/langchain/retrievers/bm25.py +++ b/libs/langchain/langchain/retrievers/bm25.py @@ -1,8 +1,3 @@ -""" -BM25 Retriever without elastic search -""" - - from __future__ import annotations from typing import Any, Callable, Dict, Iterable, List, Optional @@ -16,7 +11,7 @@ def default_preprocessing_func(text: str) -> List[str]: class BM25Retriever(BaseRetriever): - """BM25 Retriever without elastic search.""" + """`BM25` retriever without Elasticsearch.""" vectorizer: Any """ BM25 vectorizer.""" diff --git a/libs/langchain/langchain/retrievers/chaindesk.py b/libs/langchain/langchain/retrievers/chaindesk.py index b68f31658e4..f4f85802f12 100644 --- a/libs/langchain/langchain/retrievers/chaindesk.py +++ b/libs/langchain/langchain/retrievers/chaindesk.py @@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever, Document class ChaindeskRetriever(BaseRetriever): - """Retriever for the Chaindesk API.""" + """`Chaindesk API` retriever.""" datastore_url: str top_k: Optional[int] diff --git a/libs/langchain/langchain/retrievers/chatgpt_plugin_retriever.py b/libs/langchain/langchain/retrievers/chatgpt_plugin_retriever.py index 5f2404f88ca..e279467609b 100644 --- a/libs/langchain/langchain/retrievers/chatgpt_plugin_retriever.py +++ b/libs/langchain/langchain/retrievers/chatgpt_plugin_retriever.py @@ -13,7 +13,7 @@ from langchain.schema import BaseRetriever, Document class ChatGPTPluginRetriever(BaseRetriever): - """Retrieves documents from a ChatGPT plugin.""" + """`ChatGPT plugin` retriever.""" url: str """URL of the ChatGPT plugin.""" diff --git a/libs/langchain/langchain/retrievers/databerry.py b/libs/langchain/langchain/retrievers/databerry.py index d46144ac6f1..4113f99c2fd 100644 --- a/libs/langchain/langchain/retrievers/databerry.py +++ b/libs/langchain/langchain/retrievers/databerry.py @@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever, Document class DataberryRetriever(BaseRetriever): - """Retriever for the Databerry API.""" + """`Databerry API` retriever.""" datastore_url: str top_k: Optional[int] diff --git a/libs/langchain/langchain/retrievers/docarray.py b/libs/langchain/langchain/retrievers/docarray.py index edd4e81f088..7e1796cc992 100644 --- a/libs/langchain/langchain/retrievers/docarray.py +++ b/libs/langchain/langchain/retrievers/docarray.py @@ -17,10 +17,9 @@ class SearchType(str, Enum): class DocArrayRetriever(BaseRetriever): - """ - Retriever for DocArray Document Indices. + """`DocArray Document Indices` retriever. - Currently, supports 5 backends: + Currently, it supports 5 backends: InMemoryExactNNIndex, HnswDocumentIndex, QdrantDocumentIndex, ElasticDocIndex, and WeaviateDocumentIndex. diff --git a/libs/langchain/langchain/retrievers/document_compressors/base.py b/libs/langchain/langchain/retrievers/document_compressors/base.py index b44c1e5c73d..abb5b02c6c3 100644 --- a/libs/langchain/langchain/retrievers/document_compressors/base.py +++ b/libs/langchain/langchain/retrievers/document_compressors/base.py @@ -1,4 +1,3 @@ -"""Interface for retrieved document compressors.""" from abc import ABC, abstractmethod from inspect import signature from typing import List, Optional, Sequence, Union @@ -9,7 +8,7 @@ from langchain.schema import BaseDocumentTransformer, Document class BaseDocumentCompressor(BaseModel, ABC): - """Base abstraction interface for document compression.""" + """Base class for document compressors.""" @abstractmethod def compress_documents( @@ -31,7 +30,7 @@ class BaseDocumentCompressor(BaseModel, ABC): class DocumentCompressorPipeline(BaseDocumentCompressor): - """Document compressor that uses a pipeline of transformers.""" + """Document compressor that uses a pipeline of Transformers.""" transformers: List[Union[BaseDocumentTransformer, BaseDocumentCompressor]] """List of document filters that are chained together and run in sequence.""" diff --git a/libs/langchain/langchain/retrievers/document_compressors/chain_extract.py b/libs/langchain/langchain/retrievers/document_compressors/chain_extract.py index 872124dd60e..43a50be79da 100644 --- a/libs/langchain/langchain/retrievers/document_compressors/chain_extract.py +++ b/libs/langchain/langchain/retrievers/document_compressors/chain_extract.py @@ -42,7 +42,7 @@ def _get_default_chain_prompt() -> PromptTemplate: class LLMChainExtractor(BaseDocumentCompressor): - """DocumentCompressor that uses an LLM chain to extract + """Document compressor that uses an LLM chain to extract the relevant parts of documents.""" llm_chain: LLMChain diff --git a/libs/langchain/langchain/retrievers/document_compressors/cohere_rerank.py b/libs/langchain/langchain/retrievers/document_compressors/cohere_rerank.py index f6e79a0b3a3..35de84432ee 100644 --- a/libs/langchain/langchain/retrievers/document_compressors/cohere_rerank.py +++ b/libs/langchain/langchain/retrievers/document_compressors/cohere_rerank.py @@ -20,7 +20,7 @@ else: class CohereRerank(BaseDocumentCompressor): - """DocumentCompressor that uses Cohere's rerank API to compress documents.""" + """Document compressor that uses `Cohere Rerank API`.""" client: Client """Cohere client to use for compressing documents.""" diff --git a/libs/langchain/langchain/retrievers/elastic_search_bm25.py b/libs/langchain/langchain/retrievers/elastic_search_bm25.py index 52c4c97bc13..68fb4323f25 100644 --- a/libs/langchain/langchain/retrievers/elastic_search_bm25.py +++ b/libs/langchain/langchain/retrievers/elastic_search_bm25.py @@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever class ElasticSearchBM25Retriever(BaseRetriever): - """Retriever for the Elasticsearch using BM25 as a retrieval method. + """`Elasticsearch` retriever that uses `BM25`. To connect to an Elasticsearch instance that requires login credentials, including Elastic Cloud, use the Elasticsearch URL format diff --git a/libs/langchain/langchain/retrievers/ensemble.py b/libs/langchain/langchain/retrievers/ensemble.py index 03f555a686c..93879e4d3cd 100644 --- a/libs/langchain/langchain/retrievers/ensemble.py +++ b/libs/langchain/langchain/retrievers/ensemble.py @@ -13,8 +13,9 @@ from langchain.schema import BaseRetriever, Document class EnsembleRetriever(BaseRetriever): - """ - This class ensemble the results of multiple retrievers by using rank fusion. + """Retriever that ensembles the multiple retrievers. + + It uses a rank fusion. Args: retrievers: A list of retrievers to ensemble. diff --git a/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py b/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py index 826428c26bb..51637bf322a 100644 --- a/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py +++ b/libs/langchain/langchain/retrievers/google_cloud_enterprise_search.py @@ -17,11 +17,10 @@ if TYPE_CHECKING: class GoogleCloudEnterpriseSearchRetriever(BaseRetriever): - """Retriever for the Google Cloud Enterprise Search Service API. - - For the detailed explanation of the Enterprise Search concepts - and configuration parameters refer to the product documentation. + """`Google Cloud Enterprise Search API` retriever. + For a detailed explanation of the Enterprise Search concepts + and configuration parameters, refer to the product documentation. https://cloud.google.com/generative-ai-app-builder/docs/enterprise-search-introduction """ diff --git a/libs/langchain/langchain/retrievers/kendra.py b/libs/langchain/langchain/retrievers/kendra.py index 1f43050c49c..fd11358f850 100644 --- a/libs/langchain/langchain/retrievers/kendra.py +++ b/libs/langchain/langchain/retrievers/kendra.py @@ -9,7 +9,7 @@ from langchain.schema import BaseRetriever def clean_excerpt(excerpt: str) -> str: - """Cleans an excerpt from Kendra. + """Clean an excerpt from Kendra. Args: excerpt: The excerpt to clean. @@ -25,7 +25,7 @@ def clean_excerpt(excerpt: str) -> str: def combined_text(item: "ResultItem") -> str: - """Combines a ResultItem title and excerpt into a single string. + """Combine a ResultItem title and excerpt into a single string. Args: item: the ResultItem of a Kendra search. @@ -45,15 +45,15 @@ def combined_text(item: "ResultItem") -> str: DocumentAttributeValueType = Union[str, int, List[str], None] -"""Possible types of a DocumentAttributeValue. Dates are also represented as str.""" +"""Possible types of a DocumentAttributeValue. + +Dates are also represented as str. +""" # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class Highlight(BaseModel, extra=Extra.allow): # type: ignore[call-arg] - """ - Represents the information that can be - used to highlight key words in the excerpt. - """ + """Information that highlights the key words in the excerpt.""" BeginOffset: int """The zero-based location in the excerpt where the highlight starts.""" @@ -79,7 +79,7 @@ class TextWithHighLights(BaseModel, extra=Extra.allow): # type: ignore[call-arg class AdditionalResultAttributeValue( # type: ignore[call-arg] BaseModel, extra=Extra.allow ): - """The value of an additional result attribute.""" + """Value of an additional result attribute.""" TextWithHighlightsValue: TextWithHighLights """The text with highlights value.""" @@ -87,7 +87,7 @@ class AdditionalResultAttributeValue( # type: ignore[call-arg] # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class AdditionalResultAttribute(BaseModel, extra=Extra.allow): # type: ignore[call-arg] - """An additional result attribute.""" + """Additional result attribute.""" Key: str """The key of the attribute.""" @@ -102,7 +102,7 @@ class AdditionalResultAttribute(BaseModel, extra=Extra.allow): # type: ignore[c # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class DocumentAttributeValue(BaseModel, extra=Extra.allow): # type: ignore[call-arg] - """The value of a document attribute.""" + """Value of a document attribute.""" DateValue: Optional[str] """The date expressed as an ISO 8601 string.""" @@ -133,7 +133,7 @@ class DocumentAttributeValue(BaseModel, extra=Extra.allow): # type: ignore[call # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class DocumentAttribute(BaseModel, extra=Extra.allow): # type: ignore[call-arg] - """A document attribute.""" + """Document attribute.""" Key: str """The key of the attribute.""" @@ -143,7 +143,7 @@ class DocumentAttribute(BaseModel, extra=Extra.allow): # type: ignore[call-arg] # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class ResultItem(BaseModel, ABC, extra=Extra.allow): # type: ignore[call-arg] - """Abstract class that represents a result item.""" + """Base class of a result item.""" Id: Optional[str] """The ID of the relevant result item.""" @@ -199,7 +199,7 @@ class ResultItem(BaseModel, ABC, extra=Extra.allow): # type: ignore[call-arg] class QueryResultItem(ResultItem): - """A Query API result item.""" + """Query API result item.""" DocumentTitle: TextWithHighLights """The document title.""" @@ -248,7 +248,7 @@ class QueryResultItem(ResultItem): class RetrieveResultItem(ResultItem): - """A Retrieve API result item.""" + """Retrieve API result item.""" DocumentTitle: Optional[str] """The document title.""" @@ -264,11 +264,12 @@ class RetrieveResultItem(ResultItem): # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class QueryResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg] - """ - Represents an Amazon Kendra Query API search result, which is composed of: + """`Amazon Kendra Query API` search result. + + It is composed of: * Relevant suggested answers: either a text excerpt or table excerpt. * Matching FAQs or questions-answer from your FAQ file. - * Documents including an excerpt of each document with the its title. + * Documents including an excerpt of each document with its title. """ ResultItems: List[QueryResultItem] @@ -277,8 +278,9 @@ class QueryResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg] # Unexpected keyword argument "extra" for "__init_subclass__" of "object" class RetrieveResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg] - """ - Represents an Amazon Kendra Retrieve API search result, which is composed of: + """`Amazon Kendra Retrieve API` search result. + + It is composed of: * relevant passages or text excerpts given an input query. """ @@ -289,7 +291,7 @@ class RetrieveResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg] class AmazonKendraRetriever(BaseRetriever): - """Retriever for the Amazon Kendra Index. + """`Amazon Kendra Index` retriever. Args: index_id: Kendra index id diff --git a/libs/langchain/langchain/retrievers/knn.py b/libs/langchain/langchain/retrievers/knn.py index d28408347f6..7bdaf556536 100644 --- a/libs/langchain/langchain/retrievers/knn.py +++ b/libs/langchain/langchain/retrievers/knn.py @@ -30,7 +30,7 @@ def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray: class KNNRetriever(BaseRetriever): - """KNN Retriever.""" + """`KNN` retriever.""" embeddings: Embeddings """Embeddings model to use.""" diff --git a/libs/langchain/langchain/retrievers/llama_index.py b/libs/langchain/langchain/retrievers/llama_index.py index 42ad10d7bf3..81f30d2104c 100644 --- a/libs/langchain/langchain/retrievers/llama_index.py +++ b/libs/langchain/langchain/retrievers/llama_index.py @@ -6,7 +6,9 @@ from langchain.schema import BaseRetriever, Document class LlamaIndexRetriever(BaseRetriever): - """Retriever for the question-answering with sources over + """`LlamaIndex` retriever. + + It is used for the question-answering with sources over an LlamaIndex data structure.""" index: Any @@ -40,7 +42,9 @@ class LlamaIndexRetriever(BaseRetriever): class LlamaIndexGraphRetriever(BaseRetriever): - """Retriever for question-answering with sources over an LlamaIndex + """`LlamaIndex` graph data structure retriever. + + It is used for question-answering with sources over an LlamaIndex graph data structure.""" graph: Any diff --git a/libs/langchain/langchain/retrievers/metal.py b/libs/langchain/langchain/retrievers/metal.py index 286de10300a..aed77b45137 100644 --- a/libs/langchain/langchain/retrievers/metal.py +++ b/libs/langchain/langchain/retrievers/metal.py @@ -6,7 +6,7 @@ from langchain.schema import BaseRetriever, Document class MetalRetriever(BaseRetriever): - """Retriever that uses the Metal API.""" + """`Metal API` retriever.""" client: Any """The Metal client to use.""" diff --git a/libs/langchain/langchain/retrievers/milvus.py b/libs/langchain/langchain/retrievers/milvus.py index 2d03388e0cc..c749152d53e 100644 --- a/libs/langchain/langchain/retrievers/milvus.py +++ b/libs/langchain/langchain/retrievers/milvus.py @@ -12,7 +12,7 @@ from langchain.vectorstores.milvus import Milvus class MilvusRetriever(BaseRetriever): - """Retriever that uses the Milvus API.""" + """`Milvus API` retriever.""" embedding_function: Embeddings collection_name: str = "LangChainCollection" diff --git a/libs/langchain/langchain/retrievers/multi_query.py b/libs/langchain/langchain/retrievers/multi_query.py index 2085e9f2d92..8398ce40d9d 100644 --- a/libs/langchain/langchain/retrievers/multi_query.py +++ b/libs/langchain/langchain/retrievers/multi_query.py @@ -44,9 +44,10 @@ DEFAULT_QUERY_PROMPT = PromptTemplate( class MultiQueryRetriever(BaseRetriever): + """Given a query, use an LLM to write a set of queries. - """Given a user query, use an LLM to write a set of queries. - Retrieve docs for each query. Rake the unique union of all retrieved docs.""" + Retrieve docs for each query. Rake the unique union of all retrieved docs. + """ retriever: BaseRetriever llm_chain: LLMChain diff --git a/libs/langchain/langchain/retrievers/parent_document_retriever.py b/libs/langchain/langchain/retrievers/parent_document_retriever.py index d7ad9acfb12..f56b8929032 100644 --- a/libs/langchain/langchain/retrievers/parent_document_retriever.py +++ b/libs/langchain/langchain/retrievers/parent_document_retriever.py @@ -11,7 +11,7 @@ from langchain.vectorstores.base import VectorStore class ParentDocumentRetriever(BaseRetriever): - """Fetches small chunks, then fetches their parent documents. + """Retrieve small chunks then retrieve their parent documents. When splitting documents for retrieval, there are often conflicting desires: diff --git a/libs/langchain/langchain/retrievers/pinecone_hybrid_search.py b/libs/langchain/langchain/retrievers/pinecone_hybrid_search.py index a3653164524..5791aedf7cc 100644 --- a/libs/langchain/langchain/retrievers/pinecone_hybrid_search.py +++ b/libs/langchain/langchain/retrievers/pinecone_hybrid_search.py @@ -29,13 +29,13 @@ def create_index( ids: Optional[List[str]] = None, metadatas: Optional[List[dict]] = None, ) -> None: - """ - Create a Pinecone index from a list of contexts. - Modifies the index argument in-place. + """Create an index from a list of contexts. + + It modifies the index argument in-place! Args: contexts: List of contexts to embed. - index: Pinecone index to use. + index: Index to use. embeddings: Embeddings model to use. sparse_encoder: Sparse encoder to use. ids: List of ids to use for the documents. @@ -95,7 +95,7 @@ def create_index( class PineconeHybridSearchRetriever(BaseRetriever): - """Pinecone Hybrid Search Retriever.""" + """`Pinecone Hybrid Search` retriever.""" embeddings: Embeddings """Embeddings model to use.""" diff --git a/libs/langchain/langchain/retrievers/pubmed.py b/libs/langchain/langchain/retrievers/pubmed.py index 751aae510e1..b87441e170c 100644 --- a/libs/langchain/langchain/retrievers/pubmed.py +++ b/libs/langchain/langchain/retrievers/pubmed.py @@ -6,7 +6,7 @@ from langchain.utilities.pubmed import PubMedAPIWrapper class PubMedRetriever(BaseRetriever, PubMedAPIWrapper): - """Retriever for PubMed API. + """`PubMed API` retriever. It wraps load() to get_relevant_documents(). It uses all PubMedAPIWrapper arguments without any change. diff --git a/libs/langchain/langchain/retrievers/re_phraser.py b/libs/langchain/langchain/retrievers/re_phraser.py index 50baea43f44..eba5910d430 100644 --- a/libs/langchain/langchain/retrievers/re_phraser.py +++ b/libs/langchain/langchain/retrievers/re_phraser.py @@ -23,9 +23,8 @@ DEFAULT_QUERY_PROMPT = PromptTemplate.from_template(DEFAULT_TEMPLATE) class RePhraseQueryRetriever(BaseRetriever): - - """Given a user query, use an LLM to re-phrase it. - Then, retrieve docs for re-phrased query.""" + """Given a query, use an LLM to re-phrase it. + Then, retrieve docs for the re-phrased query.""" retriever: BaseRetriever llm_chain: LLMChain diff --git a/libs/langchain/langchain/retrievers/remote_retriever.py b/libs/langchain/langchain/retrievers/remote_retriever.py index ae17401974c..3df22253e7d 100644 --- a/libs/langchain/langchain/retrievers/remote_retriever.py +++ b/libs/langchain/langchain/retrievers/remote_retriever.py @@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever, Document class RemoteLangChainRetriever(BaseRetriever): - """Retriever for remote LangChain API.""" + """`LangChain API` retriever.""" url: str """URL of the remote LangChain API.""" diff --git a/libs/langchain/langchain/retrievers/self_query/chroma.py b/libs/langchain/langchain/retrievers/self_query/chroma.py index 64c3fcbc45d..9f9900358a1 100644 --- a/libs/langchain/langchain/retrievers/self_query/chroma.py +++ b/libs/langchain/langchain/retrievers/self_query/chroma.py @@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import ( class ChromaTranslator(Visitor): - """Translate internal query language elements to valid filters.""" + """Translate `Chroma` internal query language elements to valid filters.""" allowed_operators = [Operator.AND, Operator.OR] """Subset of allowed logical operators.""" diff --git a/libs/langchain/langchain/retrievers/self_query/deeplake.py b/libs/langchain/langchain/retrievers/self_query/deeplake.py index f50272b308a..1e65eb39a16 100644 --- a/libs/langchain/langchain/retrievers/self_query/deeplake.py +++ b/libs/langchain/langchain/retrievers/self_query/deeplake.py @@ -35,7 +35,7 @@ def can_cast_to_float(string: str) -> bool: class DeepLakeTranslator(Visitor): - """Logic for converting internal query language elements to valid filters.""" + """Translate `DeepLake` internal query language elements to valid filters.""" allowed_operators = [Operator.AND, Operator.OR] """Subset of allowed logical operators.""" diff --git a/libs/langchain/langchain/retrievers/self_query/elasticsearch.py b/libs/langchain/langchain/retrievers/self_query/elasticsearch.py index 32a00f071ec..8f93ea034a6 100644 --- a/libs/langchain/langchain/retrievers/self_query/elasticsearch.py +++ b/libs/langchain/langchain/retrievers/self_query/elasticsearch.py @@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import ( class ElasticsearchTranslator(Visitor): - """Translate the internal query language elements to valid filters.""" + """Translate `Elasticsearch` internal query language elements to valid filters.""" allowed_comparators = [ Comparator.EQ, diff --git a/libs/langchain/langchain/retrievers/self_query/myscale.py b/libs/langchain/langchain/retrievers/self_query/myscale.py index e50af7a1293..5c11b53d469 100644 --- a/libs/langchain/langchain/retrievers/self_query/myscale.py +++ b/libs/langchain/langchain/retrievers/self_query/myscale.py @@ -12,7 +12,7 @@ from langchain.chains.query_constructor.ir import ( ) -def DEFAULT_COMPOSER(op_name: str) -> Callable: +def _DEFAULT_COMPOSER(op_name: str) -> Callable: """ Default composer for logical operators. @@ -30,9 +30,10 @@ def DEFAULT_COMPOSER(op_name: str) -> Callable: return f -def FUNCTION_COMPOSER(op_name: str) -> Callable: +def _FUNCTION_COMPOSER(op_name: str) -> Callable: """ Composer for functions. + Args: op_name: Name of the function. @@ -48,7 +49,7 @@ def FUNCTION_COMPOSER(op_name: str) -> Callable: class MyScaleTranslator(Visitor): - """Translate internal query language elements to valid filters.""" + """Translate `MyScale` internal query language elements to valid filters.""" allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] """Subset of allowed logical operators.""" @@ -64,16 +65,16 @@ class MyScaleTranslator(Visitor): ] map_dict = { - Operator.AND: DEFAULT_COMPOSER("AND"), - Operator.OR: DEFAULT_COMPOSER("OR"), - Operator.NOT: DEFAULT_COMPOSER("NOT"), - Comparator.EQ: DEFAULT_COMPOSER("="), - Comparator.GT: DEFAULT_COMPOSER(">"), - Comparator.GTE: DEFAULT_COMPOSER(">="), - Comparator.LT: DEFAULT_COMPOSER("<"), - Comparator.LTE: DEFAULT_COMPOSER("<="), - Comparator.CONTAIN: FUNCTION_COMPOSER("has"), - Comparator.LIKE: DEFAULT_COMPOSER("ILIKE"), + Operator.AND: _DEFAULT_COMPOSER("AND"), + Operator.OR: _DEFAULT_COMPOSER("OR"), + Operator.NOT: _DEFAULT_COMPOSER("NOT"), + Comparator.EQ: _DEFAULT_COMPOSER("="), + Comparator.GT: _DEFAULT_COMPOSER(">"), + Comparator.GTE: _DEFAULT_COMPOSER(">="), + Comparator.LT: _DEFAULT_COMPOSER("<"), + Comparator.LTE: _DEFAULT_COMPOSER("<="), + Comparator.CONTAIN: _FUNCTION_COMPOSER("has"), + Comparator.LIKE: _DEFAULT_COMPOSER("ILIKE"), } def __init__(self, metadata_key: str = "metadata") -> None: diff --git a/libs/langchain/langchain/retrievers/self_query/pinecone.py b/libs/langchain/langchain/retrievers/self_query/pinecone.py index 733065937f7..003514953cf 100644 --- a/libs/langchain/langchain/retrievers/self_query/pinecone.py +++ b/libs/langchain/langchain/retrievers/self_query/pinecone.py @@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import ( class PineconeTranslator(Visitor): - """Translate the internal query language elements to valid filters.""" + """Translate `Pinecone` internal query language elements to valid filters.""" allowed_comparators = ( Comparator.EQ, diff --git a/libs/langchain/langchain/retrievers/self_query/qdrant.py b/libs/langchain/langchain/retrievers/self_query/qdrant.py index 5d5d2a0469a..55dc8d34578 100644 --- a/libs/langchain/langchain/retrievers/self_query/qdrant.py +++ b/libs/langchain/langchain/retrievers/self_query/qdrant.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: class QdrantTranslator(Visitor): - """Translate the internal query language elements to valid filters.""" + """Translate `Qdrant` internal query language elements to valid filters.""" allowed_comparators = ( Comparator.EQ, diff --git a/libs/langchain/langchain/retrievers/self_query/weaviate.py b/libs/langchain/langchain/retrievers/self_query/weaviate.py index cc4727c0951..13ab09891c1 100644 --- a/libs/langchain/langchain/retrievers/self_query/weaviate.py +++ b/libs/langchain/langchain/retrievers/self_query/weaviate.py @@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import ( class WeaviateTranslator(Visitor): - """Translate the internal query language elements to valid filters.""" + """Translate `Weaviate` internal query language elements to valid filters.""" allowed_operators = [Operator.AND, Operator.OR] """Subset of allowed logical operators.""" diff --git a/libs/langchain/langchain/retrievers/svm.py b/libs/langchain/langchain/retrievers/svm.py index 9594ce0dce3..7c30fd5f0cc 100644 --- a/libs/langchain/langchain/retrievers/svm.py +++ b/libs/langchain/langchain/retrievers/svm.py @@ -26,7 +26,7 @@ def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray: class SVMRetriever(BaseRetriever): - """SVM Retriever. + """`SVM` retriever. Largely based on https://github.com/karpathy/randomfun/blob/master/knn_vs_svm.ipynb diff --git a/libs/langchain/langchain/retrievers/tfidf.py b/libs/langchain/langchain/retrievers/tfidf.py index d5758f39424..fbc4e387926 100644 --- a/libs/langchain/langchain/retrievers/tfidf.py +++ b/libs/langchain/langchain/retrievers/tfidf.py @@ -9,7 +9,7 @@ from langchain.schema import BaseRetriever, Document class TFIDFRetriever(BaseRetriever): - """TF-IDF Retriever. + """`TF-IDF` retriever. Largely based on https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb diff --git a/libs/langchain/langchain/retrievers/time_weighted_retriever.py b/libs/langchain/langchain/retrievers/time_weighted_retriever.py index de4ff2adf29..44d7c6ac6af 100644 --- a/libs/langchain/langchain/retrievers/time_weighted_retriever.py +++ b/libs/langchain/langchain/retrievers/time_weighted_retriever.py @@ -9,7 +9,7 @@ from langchain.vectorstores.base import VectorStore def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float: - """Get the hours passed between two datetime objects.""" + """Get the hours passed between two datetimes.""" return (time - ref_time).total_seconds() / 3600 diff --git a/libs/langchain/langchain/retrievers/vespa_retriever.py b/libs/langchain/langchain/retrievers/vespa_retriever.py index 5580172efbc..97c71d998bd 100644 --- a/libs/langchain/langchain/retrievers/vespa_retriever.py +++ b/libs/langchain/langchain/retrievers/vespa_retriever.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: class VespaRetriever(BaseRetriever): - """Retriever that uses Vespa.""" + """`Vespa` retriever.""" app: Vespa """Vespa application to query.""" diff --git a/libs/langchain/langchain/retrievers/weaviate_hybrid_search.py b/libs/langchain/langchain/retrievers/weaviate_hybrid_search.py index 3251a41cd18..8c2191b1661 100644 --- a/libs/langchain/langchain/retrievers/weaviate_hybrid_search.py +++ b/libs/langchain/langchain/retrievers/weaviate_hybrid_search.py @@ -10,7 +10,11 @@ from langchain.schema import BaseRetriever class WeaviateHybridSearchRetriever(BaseRetriever): - """Retriever for the Weaviate's hybrid search.""" + """`Weaviate hybrid search` retriever. + + See the documentation: + https://weaviate.io/blog/hybrid-search-explained + """ client: Any """keyword arguments to pass to the Weaviate client.""" diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py index 5c25a411278..f51dbe8b807 100644 --- a/libs/langchain/langchain/retrievers/web_research.py +++ b/libs/langchain/langchain/retrievers/web_research.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) class SearchQueries(BaseModel): - """Search queries to run to research for the user's goal.""" + """Search queries to research for the user's goal.""" queries: List[str] = Field( ..., description="List of search queries to look up on Google" @@ -66,7 +66,7 @@ class QuestionListOutputParser(PydanticOutputParser): class WebResearchRetriever(BaseRetriever): - """Retriever for web research based on the Google Search API.""" + """`Google Search API` retriever.""" # Inputs vectorstore: VectorStore = Field( diff --git a/libs/langchain/langchain/retrievers/wikipedia.py b/libs/langchain/langchain/retrievers/wikipedia.py index 6b13f0d2ddc..7b6b8c3f052 100644 --- a/libs/langchain/langchain/retrievers/wikipedia.py +++ b/libs/langchain/langchain/retrievers/wikipedia.py @@ -6,7 +6,7 @@ from langchain.utilities.wikipedia import WikipediaAPIWrapper class WikipediaRetriever(BaseRetriever, WikipediaAPIWrapper): - """Retriever for Wikipedia API. + """`Wikipedia API` retriever. It wraps load() to get_relevant_documents(). It uses all WikipediaAPIWrapper arguments without any change. diff --git a/libs/langchain/langchain/retrievers/zep.py b/libs/langchain/langchain/retrievers/zep.py index 5cd0f89799a..dafb28c8245 100644 --- a/libs/langchain/langchain/retrievers/zep.py +++ b/libs/langchain/langchain/retrievers/zep.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: class ZepRetriever(BaseRetriever): - """Retriever for the Zep long-term memory store. + """`Zep` long-term memory store retriever. Search your user's long-term chat history with Zep. diff --git a/libs/langchain/langchain/retrievers/zilliz.py b/libs/langchain/langchain/retrievers/zilliz.py index 583dbd83731..1da5a6b36b2 100644 --- a/libs/langchain/langchain/retrievers/zilliz.py +++ b/libs/langchain/langchain/retrievers/zilliz.py @@ -11,7 +11,7 @@ from langchain.vectorstores.zilliz import Zilliz class ZillizRetriever(BaseRetriever): - """Retriever for the Zilliz API.""" + """`Zilliz API` retriever.""" embedding_function: Embeddings """The underlying embedding function from which documents will be retrieved."""