Harrison/aleph alpha (#8735)

Co-authored-by: PiotrMazurek <piotr.mazurek@aleph-alpha.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-01 19:12:42 +00:00 · 2023-08-03 21:21:15 -07:00
parent 179a39954d
commit 6c3573e7f6
3 changed files with 118 additions and 31 deletions
--- a/libs/langchain/langchain/embeddings/aleph_alpha.py
+++ b/libs/langchain/langchain/embeddings/aleph_alpha.py
@@ -16,10 +16,11 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):

    Example:
        .. code-block:: python
-
            from aleph_alpha import AlephAlphaAsymmetricSemanticEmbedding

-            embeddings = AlephAlphaSymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )

            document = "This is a content of the document"
            query = "What is the content of the document?"
@@ -30,24 +31,55 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
    """

    client: Any  #: :meta private:
-    """Aleph Alpha client."""
-    model: Optional[str] = "luminous-base"
+
+    # Embedding params
+    model: str = "luminous-base"
    """Model name to use."""
-    hosting: Optional[str] = "https://api.aleph-alpha.com"
-    """Optional parameter that specifies which datacenters may process the request."""
-    normalize: Optional[bool] = True
-    """Should returned embeddings be normalized"""
-    compress_to_size: Optional[int] = 128
+    compress_to_size: Optional[int] = None
    """Should the returned embeddings come back as an original 5120-dim vector, 
    or should it be compressed to 128-dim."""
+    normalize: Optional[bool] = None
+    """Should returned embeddings be normalized"""
    contextual_control_threshold: Optional[int] = None
    """Attention control parameters only apply to those tokens that have 
    explicitly been set in the request."""
-    control_log_additive: Optional[bool] = True
+    control_log_additive: bool = True
    """Apply controls on prompt items by adding the log(control_factor) 
    to attention scores."""
+
+    # Client params
    aleph_alpha_api_key: Optional[str] = None
    """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal flexibility 
+    in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the request 
+    in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries 
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for each 
+    retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added between 
+    the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
@@ -57,12 +89,21 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
        )
        try:
            from aleph_alpha_client import Client
+
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
        except ImportError:
            raise ValueError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )
-        values["client"] = Client(token=aleph_alpha_api_key)
+
        return values

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
@@ -152,7 +193,9 @@ class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding

            from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding

-            embeddings = AlephAlphaAsymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )
            text = "This is a test text"

            doc_result = embeddings.embed_documents([text])
--- a/libs/langchain/langchain/llms/aleph_alpha.py
+++ b/libs/langchain/langchain/llms/aleph_alpha.py
@@ -125,12 +125,43 @@ class AlephAlpha(LLM):
    raw_completion: bool = False
    """Force the raw completion of the model to be returned."""

-    aleph_alpha_api_key: Optional[str] = None
-    """API key for Aleph Alpha API."""
-
    stop_sequences: Optional[List[str]] = None
    """Stop sequences to use."""

+    # Client params
+    aleph_alpha_api_key: Optional[str] = None
+    """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal 
+    flexibility in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the 
+    request in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for
+    each retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added 
+    between the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""
+
    class Config:
        """Configuration for this pydantic object."""

@@ -143,9 +174,16 @@ class AlephAlpha(LLM):
            values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY"
        )
        try:
-            import aleph_alpha_client
+            from aleph_alpha_client import Client

-            values["client"] = aleph_alpha_client.Client(token=aleph_alpha_api_key)
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
@@ -241,3 +279,9 @@ class AlephAlpha(LLM):
        if stop is not None or self.stop_sequences is not None:
            text = enforce_stop_tokens(text, params["stop_sequences"])
        return text
+
+
+if __name__ == "__main__":
+    aa = AlephAlpha()
+
+    print(aa("How are you?"))