From 5454591b0af8a409ad07e73681d071be08ef4eab Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Wed, 9 Aug 2023 15:49:06 -0700 Subject: [PATCH] docstrings cleanup (#8993) Added/Updated docstrings @baskaryan --- .../agents/agent_toolkits/openapi/spec.py | 10 ++ libs/langchain/langchain/agents/xml/base.py | 2 + .../langchain/chains/natbot/crawler.py | 2 + .../chains/query_constructor/parser.py | 1 + libs/langchain/langchain/chat_models/base.py | 2 + .../langchain/document_loaders/airbyte.py | 97 +++++++++++++++++++ .../document_loaders/parsers/audio.py | 15 ++- .../nuclia_text_transform.py | 2 +- libs/langchain/langchain/embeddings/awa.py | 8 ++ libs/langchain/langchain/embeddings/embaas.py | 2 +- .../langchain/langchain/evaluation/loading.py | 2 +- libs/langchain/langchain/llms/aviary.py | 15 +++ libs/langchain/langchain/llms/fake.py | 2 + libs/langchain/langchain/llms/vllm.py | 2 + libs/langchain/langchain/schema/messages.py | 12 +++ libs/langchain/langchain/schema/output.py | 9 ++ .../langchain/schema/output_parser.py | 2 + libs/langchain/langchain/schema/runnable.py | 12 +++ .../langchain/tools/multion/create_session.py | 8 ++ .../langchain/tools/multion/update_session.py | 8 ++ libs/langchain/langchain/tools/nuclia/tool.py | 9 ++ libs/langchain/langchain/utilities/portkey.py | 7 ++ .../langchain/utilities/spark_sql.py | 20 +++- libs/langchain/langchain/utils/utils.py | 8 +- .../vectorstores/alibabacloud_opensearch.py | 11 ++- .../langchain/vectorstores/hologres.py | 10 ++ .../langchain/vectorstores/pgembedding.py | 2 + .../langchain/langchain/vectorstores/scann.py | 1 + 28 files changed, 269 insertions(+), 12 deletions(-) diff --git a/libs/langchain/langchain/agents/agent_toolkits/openapi/spec.py b/libs/langchain/langchain/agents/agent_toolkits/openapi/spec.py index 1c717db7b94..fa26b3c5d0e 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/openapi/spec.py +++ b/libs/langchain/langchain/agents/agent_toolkits/openapi/spec.py @@ -55,6 +55,16 @@ def dereference_refs(spec_obj: dict, full_spec: dict) -> Union[dict, list]: @dataclass(frozen=True) class ReducedOpenAPISpec: + """A reduced OpenAPI spec. + + This is a quick and dirty representation for OpenAPI specs. + + Attributes: + servers: The servers in the spec. + description: The description of the spec. + endpoints: The endpoints in the spec. + """ + servers: List[dict] description: str endpoints: List[Tuple[str, str, dict]] diff --git a/libs/langchain/langchain/agents/xml/base.py b/libs/langchain/langchain/agents/xml/base.py index 8e93b54fe32..3462ebe66d5 100644 --- a/libs/langchain/langchain/agents/xml/base.py +++ b/libs/langchain/langchain/agents/xml/base.py @@ -10,6 +10,8 @@ from langchain.tools.base import BaseTool class XMLAgentOutputParser(AgentOutputParser): + """Output parser for XMLAgent.""" + def parse(self, text: str) -> Union[AgentAction, AgentFinish]: if "" in text: tool, tool_input = text.split("") diff --git a/libs/langchain/langchain/chains/natbot/crawler.py b/libs/langchain/langchain/chains/natbot/crawler.py index 69fd51122b4..442781e551c 100644 --- a/libs/langchain/langchain/chains/natbot/crawler.py +++ b/libs/langchain/langchain/chains/natbot/crawler.py @@ -49,6 +49,8 @@ class ElementInViewPort(TypedDict): class Crawler: + """A crawler for web pages.""" + def __init__(self) -> None: try: from playwright.sync_api import sync_playwright diff --git a/libs/langchain/langchain/chains/query_constructor/parser.py b/libs/langchain/langchain/chains/query_constructor/parser.py index 8e685786d45..6cbc42b5e73 100644 --- a/libs/langchain/langchain/chains/query_constructor/parser.py +++ b/libs/langchain/langchain/chains/query_constructor/parser.py @@ -9,6 +9,7 @@ try: except ImportError: def v_args(*args: Any, **kwargs: Any) -> Any: # type: ignore + """Dummy decorator for when lark is not installed.""" return lambda _: None Transformer = object # type: ignore diff --git a/libs/langchain/langchain/chat_models/base.py b/libs/langchain/langchain/chat_models/base.py index 0a39dff54ac..8ef1479e1f8 100644 --- a/libs/langchain/langchain/chat_models/base.py +++ b/libs/langchain/langchain/chat_models/base.py @@ -51,6 +51,8 @@ def _get_verbosity() -> bool: class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC): + """Base class for chat models.""" + cache: Optional[bool] = None """Whether to cache the response.""" verbose: bool = Field(default_factory=_get_verbosity) diff --git a/libs/langchain/langchain/document_loaders/airbyte.py b/libs/langchain/langchain/document_loaders/airbyte.py index aa670704655..51411f01200 100644 --- a/libs/langchain/langchain/document_loaders/airbyte.py +++ b/libs/langchain/langchain/document_loaders/airbyte.py @@ -19,6 +19,17 @@ class AirbyteCDKLoader(BaseLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + source_class: The source connector class. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage from airbyte_cdk.sources.embedded.base_integration import ( BaseEmbeddedIntegration, @@ -26,6 +37,8 @@ class AirbyteCDKLoader(BaseLoader): from airbyte_cdk.sources.embedded.runner import CDKRunner class CDKIntegration(BaseEmbeddedIntegration): + """A wrapper around the CDK integration.""" + def _handle_record( self, record: AirbyteRecordMessage, id: Optional[str] ) -> Document: @@ -50,6 +63,8 @@ class AirbyteCDKLoader(BaseLoader): class AirbyteHubspotLoader(AirbyteCDKLoader): + """Loads records from Hubspot using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -57,6 +72,16 @@ class AirbyteHubspotLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_hubspot", pip_name="airbyte-source-hubspot" ).SourceHubspot @@ -70,6 +95,8 @@ class AirbyteHubspotLoader(AirbyteCDKLoader): class AirbyteStripeLoader(AirbyteCDKLoader): + """Loads records from Stripe using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -77,6 +104,16 @@ class AirbyteStripeLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_stripe", pip_name="airbyte-source-stripe" ).SourceStripe @@ -90,6 +127,8 @@ class AirbyteStripeLoader(AirbyteCDKLoader): class AirbyteTypeformLoader(AirbyteCDKLoader): + """Loads records from Typeform using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -97,6 +136,16 @@ class AirbyteTypeformLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_typeform", pip_name="airbyte-source-typeform" ).SourceTypeform @@ -110,6 +159,8 @@ class AirbyteTypeformLoader(AirbyteCDKLoader): class AirbyteZendeskSupportLoader(AirbyteCDKLoader): + """Loads records from Zendesk Support using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -117,6 +168,16 @@ class AirbyteZendeskSupportLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_zendesk_support", pip_name="airbyte-source-zendesk-support" ).SourceZendeskSupport @@ -130,6 +191,8 @@ class AirbyteZendeskSupportLoader(AirbyteCDKLoader): class AirbyteShopifyLoader(AirbyteCDKLoader): + """Loads records from Shopify using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -137,6 +200,16 @@ class AirbyteShopifyLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_shopify", pip_name="airbyte-source-shopify" ).SourceShopify @@ -150,6 +223,8 @@ class AirbyteShopifyLoader(AirbyteCDKLoader): class AirbyteSalesforceLoader(AirbyteCDKLoader): + """Loads records from Salesforce using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -157,6 +232,16 @@ class AirbyteSalesforceLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_salesforce", pip_name="airbyte-source-salesforce" ).SourceSalesforce @@ -170,6 +255,8 @@ class AirbyteSalesforceLoader(AirbyteCDKLoader): class AirbyteGongLoader(AirbyteCDKLoader): + """Loads records from Gong using an Airbyte source connector.""" + def __init__( self, config: Mapping[str, Any], @@ -177,6 +264,16 @@ class AirbyteGongLoader(AirbyteCDKLoader): record_handler: Optional[RecordHandler] = None, state: Optional[Any] = None, ) -> None: + """Initializes the loader. + + Args: + config: The config to pass to the source connector. + stream_name: The name of the stream to load. + record_handler: A function that takes in a record and an optional id and + returns a Document. If None, the record will be used as the document. + Defaults to None. + state: The state to pass to the source connector. Defaults to None. + """ source_class = guard_import( "source_gong", pip_name="airbyte-source-gong" ).SourceGong diff --git a/libs/langchain/langchain/document_loaders/parsers/audio.py b/libs/langchain/langchain/document_loaders/parsers/audio.py index fe394570a0b..91c6870f7e2 100644 --- a/libs/langchain/langchain/document_loaders/parsers/audio.py +++ b/libs/langchain/langchain/document_loaders/parsers/audio.py @@ -79,8 +79,10 @@ class OpenAIWhisperParser(BaseBlobParser): class OpenAIWhisperParserLocal(BaseBlobParser): - """Transcribe and parse audio files. - Audio transcription with OpenAI Whisper model locally from transformers + """Transcribe and parse audio files with OpenAI Whisper model. + + Audio transcription with OpenAI Whisper model locally from transformers. + Parameters: device - device to use NOTE: By default uses the gpu if available, @@ -105,6 +107,15 @@ class OpenAIWhisperParserLocal(BaseBlobParser): lang_model: Optional[str] = None, forced_decoder_ids: Optional[Tuple[Dict]] = None, ): + """Initialize the parser. + + Args: + device: device to use. + lang_model: whisper model to use, for example "openai/whisper-medium". + Defaults to None. + forced_decoder_ids: id states for decoder in a multilanguage model. + Defaults to None. + """ try: from transformers import pipeline except ImportError: diff --git a/libs/langchain/langchain/document_transformers/nuclia_text_transform.py b/libs/langchain/langchain/document_transformers/nuclia_text_transform.py index 45454215746..387f33b81d5 100644 --- a/libs/langchain/langchain/document_transformers/nuclia_text_transform.py +++ b/libs/langchain/langchain/document_transformers/nuclia_text_transform.py @@ -11,7 +11,7 @@ class NucliaTextTransformer(BaseDocumentTransformer): """ The Nuclia Understanding API splits into paragraphs and sentences, identifies entities, provides a summary of the text and generates - embeddings for all the sentences. + embeddings for all sentences. """ def __init__(self, nua: NucliaUnderstandingAPI): diff --git a/libs/langchain/langchain/embeddings/awa.py b/libs/langchain/langchain/embeddings/awa.py index e2def631d87..d854eb2f63a 100644 --- a/libs/langchain/langchain/embeddings/awa.py +++ b/libs/langchain/langchain/embeddings/awa.py @@ -6,6 +6,14 @@ from langchain.embeddings.base import Embeddings class AwaEmbeddings(BaseModel, Embeddings): + """Embedding documents and queries with Awa DB. + + Attributes: + client: The AwaEmbedding client. + model: The name of the model used for embedding. + Default is "all-mpnet-base-v2". + """ + client: Any #: :meta private: model: str = "all-mpnet-base-v2" diff --git a/libs/langchain/langchain/embeddings/embaas.py b/libs/langchain/langchain/embeddings/embaas.py index 945861dc832..d6985fc68c8 100644 --- a/libs/langchain/langchain/embeddings/embaas.py +++ b/libs/langchain/langchain/embeddings/embaas.py @@ -13,7 +13,7 @@ EMBAAS_API_URL = "https://api.embaas.io/v1/embeddings/" class EmbaasEmbeddingsPayload(TypedDict): - """Payload for the embaas embeddings API.""" + """Payload for the Embaas embeddings API.""" model: str texts: List[str] diff --git a/libs/langchain/langchain/evaluation/loading.py b/libs/langchain/langchain/evaluation/loading.py index 6a43ed297f1..c608c259a34 100644 --- a/libs/langchain/langchain/evaluation/loading.py +++ b/libs/langchain/langchain/evaluation/loading.py @@ -24,7 +24,7 @@ from langchain.schema.language_model import BaseLanguageModel def load_dataset(uri: str) -> List[Dict]: - """Load a dataset from the `LangChainDatasets HuggingFace org `_. + """Load a dataset from the `LangChainDatasets on HuggingFace `_. Args: uri: The uri of the dataset to load. diff --git a/libs/langchain/langchain/llms/aviary.py b/libs/langchain/langchain/llms/aviary.py index 9f9c0937d07..5e2a38cf714 100644 --- a/libs/langchain/langchain/llms/aviary.py +++ b/libs/langchain/langchain/llms/aviary.py @@ -15,6 +15,13 @@ TIMEOUT = 60 @dataclasses.dataclass class AviaryBackend: + """Aviary backend. + + Attributes: + backend_url: The URL for the Aviary backend. + bearer: The bearer token for the Aviary backend. + """ + backend_url: str bearer: str @@ -89,6 +96,14 @@ class Aviary(LLM): AVIARY_URL and AVIARY_TOKEN environment variables must be set. + Attributes: + model: The name of the model to use. Defaults to "amazon/LightGPT". + aviary_url: The URL for the Aviary backend. Defaults to None. + aviary_token: The bearer token for the Aviary backend. Defaults to None. + use_prompt_format: If True, the prompt template for the model will be ignored. + Defaults to True. + version: API version to use for Aviary. Defaults to None. + Example: .. code-block:: python diff --git a/libs/langchain/langchain/llms/fake.py b/libs/langchain/langchain/llms/fake.py index 8aa0fbea751..d8a5a7fd037 100644 --- a/libs/langchain/langchain/llms/fake.py +++ b/libs/langchain/langchain/llms/fake.py @@ -56,6 +56,8 @@ class FakeListLLM(LLM): class FakeStreamingListLLM(FakeListLLM): + """Fake streaming list LLM for testing purposes.""" + def stream( self, input: LanguageModelInput, diff --git a/libs/langchain/langchain/llms/vllm.py b/libs/langchain/langchain/llms/vllm.py index 46da858c775..0a6c4dbec04 100644 --- a/libs/langchain/langchain/llms/vllm.py +++ b/libs/langchain/langchain/llms/vllm.py @@ -8,6 +8,8 @@ from langchain.schema.output import Generation, LLMResult class VLLM(BaseLLM): + """VLLM language model.""" + model: str = "" """The name or path of a HuggingFace Transformers model.""" diff --git a/libs/langchain/langchain/schema/messages.py b/libs/langchain/langchain/schema/messages.py index 1722602be36..3d6219f7d07 100644 --- a/libs/langchain/langchain/schema/messages.py +++ b/libs/langchain/langchain/schema/messages.py @@ -88,6 +88,8 @@ class BaseMessage(Serializable): class BaseMessageChunk(BaseMessage): + """A Message chunk, which can be concatenated with other Message chunks.""" + def _merge_kwargs_dict( self, left: Dict[str, Any], right: Dict[str, Any] ) -> Dict[str, Any]: @@ -145,6 +147,8 @@ class HumanMessage(BaseMessage): class HumanMessageChunk(HumanMessage, BaseMessageChunk): + """A Human Message chunk.""" + pass @@ -163,6 +167,8 @@ class AIMessage(BaseMessage): class AIMessageChunk(AIMessage, BaseMessageChunk): + """A Message chunk from an AI.""" + pass @@ -178,6 +184,8 @@ class SystemMessage(BaseMessage): class SystemMessageChunk(SystemMessage, BaseMessageChunk): + """A System Message chunk.""" + pass @@ -194,6 +202,8 @@ class FunctionMessage(BaseMessage): class FunctionMessageChunk(FunctionMessage, BaseMessageChunk): + """A Function Message chunk.""" + pass @@ -210,6 +220,8 @@ class ChatMessage(BaseMessage): class ChatMessageChunk(ChatMessage, BaseMessageChunk): + """A Chat Message chunk.""" + pass diff --git a/libs/langchain/langchain/schema/output.py b/libs/langchain/langchain/schema/output.py index 06d222ce889..10b9923bad6 100644 --- a/libs/langchain/langchain/schema/output.py +++ b/libs/langchain/langchain/schema/output.py @@ -29,6 +29,8 @@ class Generation(Serializable): class GenerationChunk(Generation): + """A Generation chunk, which can be concatenated with other Generation chunks.""" + def __add__(self, other: GenerationChunk) -> GenerationChunk: if isinstance(other, GenerationChunk): generation_info = ( @@ -62,6 +64,13 @@ class ChatGeneration(Generation): class ChatGenerationChunk(ChatGeneration): + """A ChatGeneration chunk, which can be concatenated with other + ChatGeneration chunks. + + Attributes: + message: The message chunk output by the chat model. + """ + message: BaseMessageChunk def __add__(self, other: ChatGenerationChunk) -> ChatGenerationChunk: diff --git a/libs/langchain/langchain/schema/output_parser.py b/libs/langchain/langchain/schema/output_parser.py index aeeda0880df..9290c4cedec 100644 --- a/libs/langchain/langchain/schema/output_parser.py +++ b/libs/langchain/langchain/schema/output_parser.py @@ -56,6 +56,8 @@ class BaseLLMOutputParser(Serializable, Generic[T], ABC): class BaseGenerationOutputParser( BaseLLMOutputParser, Runnable[Union[str, BaseMessage], T] ): + """Base class to parse the output of an LLM call.""" + def invoke( self, input: Union[str, BaseMessage], config: Optional[RunnableConfig] = None ) -> T: diff --git a/libs/langchain/langchain/schema/runnable.py b/libs/langchain/langchain/schema/runnable.py index 84399a2c0b9..7ed4a739e1a 100644 --- a/libs/langchain/langchain/schema/runnable.py +++ b/libs/langchain/langchain/schema/runnable.py @@ -48,6 +48,8 @@ async def _gather_with_concurrency(n: Union[int, None], *coros: Coroutine) -> li class RunnableConfig(TypedDict, total=False): + """Configuration for a Runnable.""" + tags: List[str] """ Tags for this call and any sub-calls (eg. a Chain calling an LLM). @@ -74,6 +76,9 @@ Other = TypeVar("Other") class Runnable(Generic[Input, Output], ABC): + """A Runnable is a unit of work that can be invoked, batched, streamed, or + transformed.""" + def __or__( self, other: Union[ @@ -1325,6 +1330,13 @@ class RunnableBinding(Serializable, Runnable[Input, Output]): class RouterInput(TypedDict): + """A Router input. + + Attributes: + key: The key to route on. + input: The input to pass to the selected runnable. + """ + key: str input: Any diff --git a/libs/langchain/langchain/tools/multion/create_session.py b/libs/langchain/langchain/tools/multion/create_session.py index 9ae2332cf33..b0551e49cab 100644 --- a/libs/langchain/langchain/tools/multion/create_session.py +++ b/libs/langchain/langchain/tools/multion/create_session.py @@ -31,6 +31,14 @@ class CreateSessionSchema(BaseModel): class MultionCreateSession(BaseTool): + """Tool that creates a new Multion Browser Window with provided fields. + + Attributes: + name: The name of the tool. Default: "create_multion_session" + description: The description of the tool. + args_schema: The schema for the tool's arguments. + """ + name: str = "create_multion_session" description: str = """Use this tool to create a new Multion Browser Window \ with provided fields.Always the first step to run \ diff --git a/libs/langchain/langchain/tools/multion/update_session.py b/libs/langchain/langchain/tools/multion/update_session.py index 0e724726e93..1f20d70d65d 100644 --- a/libs/langchain/langchain/tools/multion/update_session.py +++ b/libs/langchain/langchain/tools/multion/update_session.py @@ -34,6 +34,14 @@ class UpdateSessionSchema(BaseModel): class MultionUpdateSession(BaseTool): + """Tool that updates an existing Multion Browser Window with provided fields. + + Attributes: + name: The name of the tool. Default: "update_multion_session" + description: The description of the tool. + args_schema: The schema for the tool's arguments. Default: UpdateSessionSchema + """ + name: str = "update_multion_session" description: str = """Use this tool to update \ a existing corresponding \ diff --git a/libs/langchain/langchain/tools/nuclia/tool.py b/libs/langchain/langchain/tools/nuclia/tool.py index 663b156a648..054a439dbb0 100644 --- a/libs/langchain/langchain/tools/nuclia/tool.py +++ b/libs/langchain/langchain/tools/nuclia/tool.py @@ -28,6 +28,15 @@ logger = logging.getLogger(__name__) class NUASchema(BaseModel): + """Input for Nuclia Understanding API. + + Attributes: + action: Action to perform. Either `push` or `pull`. + id: ID of the file to push or pull. + path: Path to the file to push (needed only for `push` action). + text: Text content to process (needed only for `push` action). + """ + action: str = Field( ..., description="Action to perform. Either `push` or `pull`.", diff --git a/libs/langchain/langchain/utilities/portkey.py b/libs/langchain/langchain/utilities/portkey.py index 4c07a59fb9b..bf9044c4f15 100644 --- a/libs/langchain/langchain/utilities/portkey.py +++ b/libs/langchain/langchain/utilities/portkey.py @@ -4,6 +4,13 @@ from typing import Dict, Optional class Portkey: + """Portkey configuration. + + Attributes: + base: The base URL for the Portkey API. + Default: "https://api.portkey.ai/v1/proxy" + """ + base = "https://api.portkey.ai/v1/proxy" @staticmethod diff --git a/libs/langchain/langchain/utilities/spark_sql.py b/libs/langchain/langchain/utilities/spark_sql.py index 12cfbd2d8aa..ffecbe511d4 100644 --- a/libs/langchain/langchain/utilities/spark_sql.py +++ b/libs/langchain/langchain/utilities/spark_sql.py @@ -7,6 +7,8 @@ if TYPE_CHECKING: class SparkSQL: + """SparkSQL is a utility class for interacting with Spark SQL.""" + def __init__( self, spark_session: Optional[SparkSession] = None, @@ -16,10 +18,26 @@ class SparkSQL: include_tables: Optional[List[str]] = None, sample_rows_in_table_info: int = 3, ): + """Initialize a SparkSQL object. + + Args: + spark_session: A SparkSession object. + If not provided, one will be created. + catalog: The catalog to use. + If not provided, the default catalog will be used. + schema: The schema to use. + If not provided, the default schema will be used. + ignore_tables: A list of tables to ignore. + If not provided, all tables will be used. + include_tables: A list of tables to include. + If not provided, all tables will be used. + sample_rows_in_table_info: The number of rows to include in the table info. + Defaults to 3. + """ try: from pyspark.sql import SparkSession except ImportError: - raise ValueError( + raise ImportError( "pyspark is not installed. Please install it with `pip install pyspark`" ) diff --git a/libs/langchain/langchain/utils/utils.py b/libs/langchain/langchain/utils/utils.py index 6257ca330e9..77ccbf68914 100644 --- a/libs/langchain/langchain/utils/utils.py +++ b/libs/langchain/langchain/utils/utils.py @@ -141,7 +141,13 @@ def build_extra_kwargs( values: Dict[str, Any], all_required_field_names: Set[str], ) -> Dict[str, Any]: - """""" + """Build extra kwargs from values and extra_kwargs. + + Args: + extra_kwargs: Extra kwargs passed in by user. + values: Values passed in by user. + all_required_field_names: All required field names for the pydantic class. + """ for field_name in list(values): if field_name in extra_kwargs: raise ValueError(f"Found {field_name} supplied twice.") diff --git a/libs/langchain/langchain/vectorstores/alibabacloud_opensearch.py b/libs/langchain/langchain/vectorstores/alibabacloud_opensearch.py index 10385e56805..f8cf664cc4d 100644 --- a/libs/langchain/langchain/vectorstores/alibabacloud_opensearch.py +++ b/libs/langchain/langchain/vectorstores/alibabacloud_opensearch.py @@ -12,19 +12,20 @@ logger = logging.getLogger() class AlibabaCloudOpenSearchSettings: - """Opensearch Client Configuration + """Alibaba Cloud Opensearch Client Configuration. + Attribute: endpoint (str) : The endpoint of opensearch instance, You can find it - from the console of Alibaba Cloud OpenSearch. + from the console of Alibaba Cloud OpenSearch. instance_id (str) : The identify of opensearch instance, You can find - it from the console of Alibaba Cloud OpenSearch. + it from the console of Alibaba Cloud OpenSearch. datasource_name (str): The name of the data source specified when creating it. username (str) : The username specified when purchasing the instance. password (str) : The password specified when purchasing the instance. embedding_index_name (str) : The name of the vector attribute specified - when configuring the instance attributes. + when configuring the instance attributes. field_name_mapping (Dict) : Using field name mapping between opensearch - vector store and opensearch instance configuration table field names: + vector store and opensearch instance configuration table field names: { 'id': 'The id field name map of index document.', 'document': 'The text field name map of index document.', diff --git a/libs/langchain/langchain/vectorstores/hologres.py b/libs/langchain/langchain/vectorstores/hologres.py index 23073708551..092dc24c364 100644 --- a/libs/langchain/langchain/vectorstores/hologres.py +++ b/libs/langchain/langchain/vectorstores/hologres.py @@ -16,7 +16,17 @@ _LANGCHAIN_DEFAULT_TABLE_NAME = "langchain_pg_embedding" class HologresWrapper: + """Wrapper around Hologres service.""" + def __init__(self, connection_string: str, ndims: int, table_name: str) -> None: + """Initialize the wrapper. + + Args: + connection_string: Hologres connection string. + ndims: Number of dimensions of the embedding output. + table_name: Name of the table to store embeddings and data. + """ + import psycopg2 self.table_name = table_name diff --git a/libs/langchain/langchain/vectorstores/pgembedding.py b/libs/langchain/langchain/vectorstores/pgembedding.py index 02e1936a724..4c820636c62 100644 --- a/libs/langchain/langchain/vectorstores/pgembedding.py +++ b/libs/langchain/langchain/vectorstores/pgembedding.py @@ -87,6 +87,8 @@ class EmbeddingStore(BaseModel): class QueryResult: + """QueryResult is a result from a query.""" + EmbeddingStore: EmbeddingStore distance: float diff --git a/libs/langchain/langchain/vectorstores/scann.py b/libs/langchain/langchain/vectorstores/scann.py index b1eb9a9db15..a1ce4af4808 100644 --- a/libs/langchain/langchain/vectorstores/scann.py +++ b/libs/langchain/langchain/vectorstores/scann.py @@ -18,6 +18,7 @@ from langchain.vectorstores.utils import DistanceStrategy def normalize(x: np.ndarray) -> np.ndarray: + """Normalize vectors to unit length.""" x /= np.clip(np.linalg.norm(x, axis=-1, keepdims=True), 1e-12, None) return x