Harrison/aleph alpha (#8735)

Co-authored-by: PiotrMazurek <piotr.mazurek@aleph-alpha.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-08-08 04:25:46 +00:00 · 2023-08-03 21:21:15 -07:00 · 2023-08-03 21:21:15 -07:00 · 6c3573e7f6
commit 6c3573e7f6
parent 179a39954d
3 changed files with 118 additions and 31 deletions
--- a/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
+++ b/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
@ -20,7 +20,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "8a920a89",
   "metadata": {},
   "outputs": [],
@ -30,7 +30,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "f2d04da3",
   "metadata": {},
   "outputs": [],
@ -41,17 +41,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "e6ecde96",
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = AlephAlphaAsymmetricSemanticEmbedding()"
+    "embeddings = AlephAlphaAsymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "id": "90e68411",
   "metadata": {},
   "outputs": [],
@ -61,7 +61,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "id": "55903233",
   "metadata": {},
   "outputs": [],
@ -79,7 +79,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "id": "eabb763a",
   "metadata": {},
   "outputs": [],
@ -89,7 +89,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "id": "0ad799f7",
   "metadata": {},
   "outputs": [],
@ -99,17 +99,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "id": "af86dc10",
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = AlephAlphaSymmetricSemanticEmbedding()"
+    "embeddings = AlephAlphaSymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
   "id": "d292536f",
   "metadata": {},
   "outputs": [],
@ -119,7 +119,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
   "id": "c704a7cf",
   "metadata": {},
   "outputs": [],
@ -130,7 +130,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "33492471",
+   "id": "5d999f8f",
   "metadata": {},
   "outputs": [],
   "source": []
@ -152,7 +152,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.9.13"
  },
  "vscode": {
   "interpreter": {
--- a/libs/langchain/langchain/embeddings/aleph_alpha.py
+++ b/libs/langchain/langchain/embeddings/aleph_alpha.py
@ -16,10 +16,11 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):

    Example:
        .. code-block:: python
-
            from aleph_alpha import AlephAlphaAsymmetricSemanticEmbedding

-            embeddings = AlephAlphaSymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )

            document = "This is a content of the document"
            query = "What is the content of the document?"
@ -30,24 +31,55 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
    """

    client: Any  #: :meta private:
-    """Aleph Alpha client."""
-    model: Optional[str] = "luminous-base"
+
+    # Embedding params
+    model: str = "luminous-base"
    """Model name to use."""
-    hosting: Optional[str] = "https://api.aleph-alpha.com"
-    """Optional parameter that specifies which datacenters may process the request."""
-    normalize: Optional[bool] = True
-    """Should returned embeddings be normalized"""
-    compress_to_size: Optional[int] = 128
+    compress_to_size: Optional[int] = None
    """Should the returned embeddings come back as an original 5120-dim vector, 
    or should it be compressed to 128-dim."""
+    normalize: Optional[bool] = None
+    """Should returned embeddings be normalized"""
    contextual_control_threshold: Optional[int] = None
    """Attention control parameters only apply to those tokens that have 
    explicitly been set in the request."""
-    control_log_additive: Optional[bool] = True
+    control_log_additive: bool = True
    """Apply controls on prompt items by adding the log(control_factor) 
    to attention scores."""
+
+    # Client params
    aleph_alpha_api_key: Optional[str] = None
    """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal flexibility 
+    in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the request 
+    in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries 
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for each 
+    retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added between 
+    the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
@ -57,12 +89,21 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
        )
        try:
            from aleph_alpha_client import Client
+
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
        except ImportError:
            raise ValueError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )
-        values["client"] = Client(token=aleph_alpha_api_key)
+
        return values

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
@ -152,7 +193,9 @@ class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding

            from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding

-            embeddings = AlephAlphaAsymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )
            text = "This is a test text"

            doc_result = embeddings.embed_documents([text])
--- a/libs/langchain/langchain/llms/aleph_alpha.py
+++ b/libs/langchain/langchain/llms/aleph_alpha.py
@ -125,12 +125,43 @@ class AlephAlpha(LLM):
    raw_completion: bool = False
    """Force the raw completion of the model to be returned."""

-    aleph_alpha_api_key: Optional[str] = None
-    """API key for Aleph Alpha API."""
-
    stop_sequences: Optional[List[str]] = None
    """Stop sequences to use."""

+    # Client params
+    aleph_alpha_api_key: Optional[str] = None
+    """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal 
+    flexibility in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the 
+    request in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for
+    each retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added 
+    between the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""
+
    class Config:
        """Configuration for this pydantic object."""

@ -143,9 +174,16 @@ class AlephAlpha(LLM):
            values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY"
        )
        try:
-            import aleph_alpha_client
+            from aleph_alpha_client import Client

-            values["client"] = aleph_alpha_client.Client(token=aleph_alpha_api_key)
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
@ -241,3 +279,9 @@ class AlephAlpha(LLM):
        if stop is not None or self.stop_sequences is not None:
            text = enforce_stop_tokens(text, params["stop_sequences"])
        return text
+
+
+if __name__ == "__main__":
+    aa = AlephAlpha()
+
+    print(aa("How are you?"))