Harrison/aleph alpha (#8735)

Co-authored-by: PiotrMazurek <piotr.mazurek@aleph-alpha.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-08-10 13:27:36 +00:00 · 2023-08-03 21:21:15 -07:00 · 2023-08-03 21:21:15 -07:00 · 6c3573e7f6
commit 6c3573e7f6
parent 179a39954d
3 changed files with 118 additions and 31 deletions
--- a/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
+++ b/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
@ -20,7 +20,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "8a920a89",
   "metadata": {},
   "outputs": [],
@ -30,7 +30,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "f2d04da3",
   "metadata": {},
   "outputs": [],
@ -41,17 +41,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "e6ecde96",
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = AlephAlphaAsymmetricSemanticEmbedding()"
+    "embeddings = AlephAlphaAsymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "id": "90e68411",
   "metadata": {},
   "outputs": [],
@ -61,7 +61,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "id": "55903233",
   "metadata": {},
   "outputs": [],
@ -79,7 +79,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "id": "eabb763a",
   "metadata": {},
   "outputs": [],
@ -89,7 +89,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "id": "0ad799f7",
   "metadata": {},
   "outputs": [],
@ -99,17 +99,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "id": "af86dc10",
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = AlephAlphaSymmetricSemanticEmbedding()"
+    "embeddings = AlephAlphaSymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
   "id": "d292536f",
   "metadata": {},
   "outputs": [],
@ -119,7 +119,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
   "id": "c704a7cf",
   "metadata": {},
   "outputs": [],
@ -130,7 +130,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "33492471",
+   "id": "5d999f8f",
   "metadata": {},
   "outputs": [],
   "source": []
@ -152,7 +152,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.9.13"
  },
  "vscode": {
   "interpreter": {
--- a/libs/langchain/langchain/embeddings/aleph_alpha.py
+++ b/libs/langchain/langchain/embeddings/aleph_alpha.py
@ -16,10 +16,11 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
    Example:
        .. code-block:: python
            from aleph_alpha import AlephAlphaAsymmetricSemanticEmbedding
-            embeddings = AlephAlphaSymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
                normalize=True, compress_to_size=128
            )
            document = "This is a content of the document"
            query = "What is the content of the document?"
@ -30,24 +31,55 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
    """
    client: Any  #: :meta private:
-    """Aleph Alpha client."""
+
-    model: Optional[str] = "luminous-base"
+    # Embedding params
    model: str = "luminous-base"
    """Model name to use."""
-    hosting: Optional[str] = "https://api.aleph-alpha.com"
+    compress_to_size: Optional[int] = None
    """Optional parameter that specifies which datacenters may process the request."""
    normalize: Optional[bool] = True
    """Should returned embeddings be normalized"""
    compress_to_size: Optional[int] = 128
    """Should the returned embeddings come back as an original 5120-dim vector, 
    or should it be compressed to 128-dim."""
    normalize: Optional[bool] = None
    """Should returned embeddings be normalized"""
    contextual_control_threshold: Optional[int] = None
    """Attention control parameters only apply to those tokens that have 
    explicitly been set in the request."""
-    control_log_additive: Optional[bool] = True
+    control_log_additive: bool = True
    """Apply controls on prompt items by adding the log(control_factor) 
    to attention scores."""
    # Client params
    aleph_alpha_api_key: Optional[str] = None
    """API key for Aleph Alpha API."""
    host: str = "https://api.aleph-alpha.com"
    """The hostname of the API host. 
    The default one is "https://api.aleph-alpha.com")"""
    hosting: Optional[str] = None
    """Determines in which datacenters the request may be processed.
    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
    Not setting this value, or setting it to None, gives us maximal flexibility 
    in processing your request in our
    own datacenters and on servers hosted with other providers. 
    Choose this option for maximal availability.
    Setting it to "aleph-alpha" allows us to only process the request 
    in our own datacenters.
    Choose this option for maximal data privacy."""
    request_timeout_seconds: int = 305
    """Client timeout that will be set for HTTP requests in the 
    `requests` library's API calls.
    Server will close all requests after 300 seconds with an internal server error."""
    total_retries: int = 8
    """The number of retries made in case requests fail with certain retryable 
    status codes. If the last
    retry fails a corresponding exception is raised. Note, that between retries 
    an exponential backoff
    is applied, starting with 0.5 s after the first retry and doubling for each 
    retry made. So with the
    default setting of 8 retries a total wait time of 63.5 s is added between 
    the retries."""
    nice: bool = False
    """Setting this to True, will signal to the API that you intend to be 
    nice to other users
    by de-prioritizing your request below concurrent ones."""
    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
@ -57,12 +89,21 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
        )
        try:
            from aleph_alpha_client import Client
            values["client"] = Client(
                token=aleph_alpha_api_key,
                host=values["host"],
                hosting=values["hosting"],
                request_timeout_seconds=values["request_timeout_seconds"],
                total_retries=values["total_retries"],
                nice=values["nice"],
            )
        except ImportError:
            raise ValueError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )
-        values["client"] = Client(token=aleph_alpha_api_key)
+
        return values
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
@ -152,7 +193,9 @@ class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding
            from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding
-            embeddings = AlephAlphaAsymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
                normalize=True, compress_to_size=128
            )
            text = "This is a test text"
            doc_result = embeddings.embed_documents([text])
--- a/libs/langchain/langchain/llms/aleph_alpha.py
+++ b/libs/langchain/langchain/llms/aleph_alpha.py
@ -125,12 +125,43 @@ class AlephAlpha(LLM):
    raw_completion: bool = False
    """Force the raw completion of the model to be returned."""
    aleph_alpha_api_key: Optional[str] = None
    """API key for Aleph Alpha API."""
    stop_sequences: Optional[List[str]] = None
    """Stop sequences to use."""
    # Client params
    aleph_alpha_api_key: Optional[str] = None
    """API key for Aleph Alpha API."""
    host: str = "https://api.aleph-alpha.com"
    """The hostname of the API host. 
    The default one is "https://api.aleph-alpha.com")"""
    hosting: Optional[str] = None
    """Determines in which datacenters the request may be processed.
    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
    Not setting this value, or setting it to None, gives us maximal 
    flexibility in processing your request in our
    own datacenters and on servers hosted with other providers. 
    Choose this option for maximal availability.
    Setting it to "aleph-alpha" allows us to only process the 
    request in our own datacenters.
    Choose this option for maximal data privacy."""
    request_timeout_seconds: int = 305
    """Client timeout that will be set for HTTP requests in the 
    `requests` library's API calls.
    Server will close all requests after 300 seconds with an internal server error."""
    total_retries: int = 8
    """The number of retries made in case requests fail with certain retryable 
    status codes. If the last
    retry fails a corresponding exception is raised. Note, that between retries
    an exponential backoff
    is applied, starting with 0.5 s after the first retry and doubling for
    each retry made. So with the
    default setting of 8 retries a total wait time of 63.5 s is added 
    between the retries."""
    nice: bool = False
    """Setting this to True, will signal to the API that you intend to be 
    nice to other users
    by de-prioritizing your request below concurrent ones."""
    class Config:
        """Configuration for this pydantic object."""
@ -143,9 +174,16 @@ class AlephAlpha(LLM):
            values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY"
        )
        try:
-            import aleph_alpha_client
+            from aleph_alpha_client import Client
-            values["client"] = aleph_alpha_client.Client(token=aleph_alpha_api_key)
+            values["client"] = Client(
                token=aleph_alpha_api_key,
                host=values["host"],
                hosting=values["hosting"],
                request_timeout_seconds=values["request_timeout_seconds"],
                total_retries=values["total_retries"],
                nice=values["nice"],
            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
@ -241,3 +279,9 @@ class AlephAlpha(LLM):
        if stop is not None or self.stop_sequences is not None:
            text = enforce_stop_tokens(text, params["stop_sequences"])
        return text
 if __name__ == "__main__":
    aa = AlephAlpha()
    print(aa("How are you?"))