community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)

Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2025-09-09 06:53:59 +00:00 · 2023-12-11 13:53:30 -08:00
parent c0f4b95aa9
commit ed58eeb9c5
2446 changed files with 171805 additions and 137118 deletions
--- a/libs/community/langchain_community/embeddings/aleph_alpha.py
+++ b/libs/community/langchain_community/embeddings/aleph_alpha.py
@@ -0,0 +1,255 @@
+from typing import Any, Dict, List, Optional
+
+from langchain_core.embeddings import Embeddings
+from langchain_core.pydantic_v1 import BaseModel, root_validator
+from langchain_core.utils import get_from_dict_or_env
+
+
+class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
+    """Aleph Alpha's asymmetric semantic embedding.
+
+    AA provides you with an endpoint to embed a document and a query.
+    The models were optimized to make the embeddings of documents and
+    the query for a document as similar as possible.
+    To learn more, check out: https://docs.aleph-alpha.com/docs/tasks/semantic_embed/
+
+    Example:
+        .. code-block:: python
+            from aleph_alpha import AlephAlphaAsymmetricSemanticEmbedding
+
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )
+
+            document = "This is a content of the document"
+            query = "What is the content of the document?"
+
+            doc_result = embeddings.embed_documents([document])
+            query_result = embeddings.embed_query(query)
+
+    """
+
+    client: Any  #: :meta private:
+
+    # Embedding params
+    model: str = "luminous-base"
+    """Model name to use."""
+    compress_to_size: Optional[int] = None
+    """Should the returned embeddings come back as an original 5120-dim vector, 
+    or should it be compressed to 128-dim."""
+    normalize: Optional[bool] = None
+    """Should returned embeddings be normalized"""
+    contextual_control_threshold: Optional[int] = None
+    """Attention control parameters only apply to those tokens that have 
+    explicitly been set in the request."""
+    control_log_additive: bool = True
+    """Apply controls on prompt items by adding the log(control_factor) 
+    to attention scores."""
+
+    # Client params
+    aleph_alpha_api_key: Optional[str] = None
+    """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal flexibility 
+    in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the request 
+    in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries 
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for each 
+    retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added between 
+    the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        aleph_alpha_api_key = get_from_dict_or_env(
+            values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY"
+        )
+        try:
+            from aleph_alpha_client import Client
+
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
+        except ImportError:
+            raise ValueError(
+                "Could not import aleph_alpha_client python package. "
+                "Please install it with `pip install aleph_alpha_client`."
+            )
+
+        return values
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Call out to Aleph Alpha's asymmetric Document endpoint.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        try:
+            from aleph_alpha_client import (
+                Prompt,
+                SemanticEmbeddingRequest,
+                SemanticRepresentation,
+            )
+        except ImportError:
+            raise ValueError(
+                "Could not import aleph_alpha_client python package. "
+                "Please install it with `pip install aleph_alpha_client`."
+            )
+        document_embeddings = []
+
+        for text in texts:
+            document_params = {
+                "prompt": Prompt.from_text(text),
+                "representation": SemanticRepresentation.Document,
+                "compress_to_size": self.compress_to_size,
+                "normalize": self.normalize,
+                "contextual_control_threshold": self.contextual_control_threshold,
+                "control_log_additive": self.control_log_additive,
+            }
+
+            document_request = SemanticEmbeddingRequest(**document_params)
+            document_response = self.client.semantic_embed(
+                request=document_request, model=self.model
+            )
+
+            document_embeddings.append(document_response.embedding)
+
+        return document_embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        """Call out to Aleph Alpha's asymmetric, query embedding endpoint
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        try:
+            from aleph_alpha_client import (
+                Prompt,
+                SemanticEmbeddingRequest,
+                SemanticRepresentation,
+            )
+        except ImportError:
+            raise ValueError(
+                "Could not import aleph_alpha_client python package. "
+                "Please install it with `pip install aleph_alpha_client`."
+            )
+        symmetric_params = {
+            "prompt": Prompt.from_text(text),
+            "representation": SemanticRepresentation.Query,
+            "compress_to_size": self.compress_to_size,
+            "normalize": self.normalize,
+            "contextual_control_threshold": self.contextual_control_threshold,
+            "control_log_additive": self.control_log_additive,
+        }
+
+        symmetric_request = SemanticEmbeddingRequest(**symmetric_params)
+        symmetric_response = self.client.semantic_embed(
+            request=symmetric_request, model=self.model
+        )
+
+        return symmetric_response.embedding
+
+
+class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding):
+    """The symmetric version of the Aleph Alpha's semantic embeddings.
+
+    The main difference is that here, both the documents and
+    queries are embedded with a SemanticRepresentation.Symmetric
+    Example:
+        .. code-block:: python
+
+            from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding
+
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )
+            text = "This is a test text"
+
+            doc_result = embeddings.embed_documents([text])
+            query_result = embeddings.embed_query(text)
+    """
+
+    def _embed(self, text: str) -> List[float]:
+        try:
+            from aleph_alpha_client import (
+                Prompt,
+                SemanticEmbeddingRequest,
+                SemanticRepresentation,
+            )
+        except ImportError:
+            raise ValueError(
+                "Could not import aleph_alpha_client python package. "
+                "Please install it with `pip install aleph_alpha_client`."
+            )
+        query_params = {
+            "prompt": Prompt.from_text(text),
+            "representation": SemanticRepresentation.Symmetric,
+            "compress_to_size": self.compress_to_size,
+            "normalize": self.normalize,
+            "contextual_control_threshold": self.contextual_control_threshold,
+            "control_log_additive": self.control_log_additive,
+        }
+
+        query_request = SemanticEmbeddingRequest(**query_params)
+        query_response = self.client.semantic_embed(
+            request=query_request, model=self.model
+        )
+
+        return query_response.embedding
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Call out to Aleph Alpha's Document endpoint.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        document_embeddings = []
+
+        for text in texts:
+            document_embeddings.append(self._embed(text))
+        return document_embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        """Call out to Aleph Alpha's asymmetric, query embedding endpoint
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        return self._embed(text)