GradientLLM Docs update and model_id renaming. (#10963)

Related to #10800 - Errors in the Docstring of GradientLLM / Gradient.ai LLM - Renamed the `model_id` to `model` and adapting this in all tests. Reason to so is to be in Sync with `GradientEmbeddings` and other LLM's. - inmproving tests so they check the headers in the sent request. - making the aiosession a private attribute in the docs, as in the future `pip install gradientai` will be replacing aiosession. - adding a example how to fine-tune on the Prompt Template as suggested in #10800
2025-09-16 06:53:16 +00:00 · 2023-10-13 22:57:58 +02:00
parent 6876b02c87
commit 233a904f2e
6 changed files with 329 additions and 64 deletions
--- a/libs/langchain/langchain/embeddings/gradient_ai.py
+++ b/libs/langchain/langchain/embeddings/gradient_ai.py
@@ -12,6 +12,8 @@ from langchain.pydantic_v1 import BaseModel, Extra, root_validator
 from langchain.schema.embeddings import Embeddings
 from langchain.utils import get_from_dict_or_env

+__all__ = ["GradientEmbeddings"]
+

 class GradientEmbeddings(BaseModel, Embeddings):
    """Gradient.ai Embedding models.
@@ -48,7 +50,7 @@ class GradientEmbeddings(BaseModel, Embeddings):
    gradient_api_url: str = "https://api.gradient.ai/api"
    """Endpoint URL to use."""

-    client: Any  #: :meta private:
+    client: Any = None  #: :meta private:
    """Gradient client."""

    # LLM call kwargs
@@ -143,8 +145,9 @@ class GradientEmbeddings(BaseModel, Embeddings):
        return embeddings[0]


-class TinyAsyncGradientEmbeddingClient:
-    """A helper tool to embed Gradient. Not part of Langchain's or Gradients stable API.
+class TinyAsyncGradientEmbeddingClient:  #: :meta private:
+    """A helper tool to embed Gradient. Not part of Langchain's or Gradients stable API,
+    direct use discouraged.

    To use, set the environment variable ``GRADIENT_ACCESS_TOKEN`` with your
    API token and ``GRADIENT_WORKSPACE_ID`` for your gradient workspace,
--- a/libs/langchain/langchain/llms/gradient_ai.py
+++ b/libs/langchain/langchain/llms/gradient_ai.py
@@ -1,4 +1,7 @@
-from typing import Any, Dict, List, Mapping, Optional, Sequence, TypedDict, Union
+import asyncio
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Dict, List, Mapping, Optional, Sequence, TypedDict

 import aiohttp
 import requests
@@ -7,9 +10,10 @@ from langchain.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
 )
-from langchain.llms.base import LLM
+from langchain.llms.base import BaseLLM
 from langchain.llms.utils import enforce_stop_tokens
-from langchain.pydantic_v1 import Extra, root_validator
+from langchain.pydantic_v1 import Extra, Field, root_validator
+from langchain.schema import Generation, LLMResult
 from langchain.utils import get_from_dict_or_env


@@ -17,7 +21,7 @@ class TrainResult(TypedDict):
    loss: float


-class GradientLLM(LLM):
+class GradientLLM(BaseLLM):
    """Gradient.ai LLM Endpoints.

    GradientLLM is a class to interact with LLMs on gradient.ai
@@ -29,11 +33,11 @@ class GradientLLM(LLM):
    Example:
        .. code-block:: python

-            from langchain.llms.gradientai_endpoint import GradientAIEndpoint
+            from langchain.llms import GradientLLM
            GradientLLM(
-                model_id="cad6644_base_ml_model",
+                model="99148c6d-c2a0-4fbe-a4a7-e7c05bdb8a09_base_ml_model",
                model_kwargs={
-                    "max_generated_token_count": 200,
+                    "max_generated_token_count": 128,
                    "temperature": 0.75,
                    "top_p": 0.95,
                    "top_k": 20,
@@ -45,7 +49,7 @@ class GradientLLM(LLM):

    """

-    model_id: str
+    model_id: str = Field(alias="model", min_length=2)
    "Underlying gradient.ai model id (base or fine-tuned)."

    gradient_workspace_id: Optional[str] = None
@@ -63,13 +67,14 @@ class GradientLLM(LLM):
    gradient_api_url: str = "https://api.gradient.ai/api"
    """Endpoint URL to use."""

-    aiosession: Optional[aiohttp.ClientSession] = None
-    """ClientSession, in case we want to reuse connection for better performance."""
+    aiosession: Optional[aiohttp.ClientSession] = None  #: :meta private:
+    """ClientSession, private, subject to change in upcoming releases."""

    # LLM call kwargs
    class Config:
        """Configuration for this pydantic object."""

+        allow_population_by_field_name = True
        extra = Extra.forbid

    @root_validator(allow_reuse=True)
@@ -113,6 +118,16 @@ class GradientLLM(LLM):
            values, "gradient_api_url", "GRADIENT_API_URL"
        )

+        try:
+            import gradientai  # noqa
+        except ImportError:
+            logging.warning(
+                "DeprecationWarning: `GradientLLM` will use "
+                "`pip install gradientai` in future releases of langchain."
+            )
+        except Exception:
+            pass
+
        return values

    @property
@@ -243,8 +258,8 @@ class GradientLLM(LLM):
    async def _acall(
        self,
        prompt: str,
-        stop: Union[List[str], None] = None,
-        run_manager: Union[AsyncCallbackManagerForLLMRun, None] = None,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Async Call to Gradients API `model/{id}/complete`.
@@ -284,6 +299,49 @@ class GradientLLM(LLM):

        return text

+    def _generate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Run the LLM on the given prompt and input."""
+
+        # same thing with threading
+        def _inner_generate(prompt: str) -> List[Generation]:
+            return [
+                Generation(
+                    text=self._call(
+                        prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
+                    )
+                )
+            ]
+
+        if len(prompts) <= 1:
+            generations = list(map(_inner_generate, prompts))
+        else:
+            with ThreadPoolExecutor(min(8, len(prompts))) as p:
+                generations = list(p.map(_inner_generate, prompts))
+
+        return LLMResult(generations=generations)
+
+    async def _agenerate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Run the LLM on the given prompt and input."""
+        generations = []
+        for generation in asyncio.gather(
+            [self._acall(prompt, stop=stop, run_manager=run_manager, **kwargs)]
+            for prompt in prompts
+        ):
+            generations.append([Generation(text=generation)])
+        return LLMResult(generations=generations)
+
    def train_unsupervised(
        self,
        inputs: Sequence[str],
--- a/libs/langchain/tests/integration_tests/llms/test_gradient_ai.py
+++ b/libs/langchain/tests/integration_tests/llms/test_gradient_ai.py
@@ -6,7 +6,7 @@ You can get it by registering for free at https://gradient.ai/.
 You'll then need to set:
 - `GRADIENT_ACCESS_TOKEN` environment variable to your api key.
 - `GRADIENT_WORKSPACE_ID` environment variable to your workspace id.
- `GRADIENT_MODEL_ID` environment variable to your workspace id.
+- `GRADIENT_MODEL` environment variable to your workspace id.
 """
 import os

@@ -15,8 +15,14 @@ from langchain.llms import GradientLLM

 def test_gradient_acall() -> None:
    """Test simple call to gradient.ai."""
-    model_id = os.environ["GRADIENT_MODEL_ID"]
-    llm = GradientLLM(model_id=model_id)
+    model = os.environ["GRADIENT_MODEL"]
+    gradient_access_token = os.environ["GRADIENT_ACCESS_TOKEN"]
+    gradient_workspace_id = os.environ["GRADIENT_WORKSPACE_ID"]
+    llm = GradientLLM(
+        model=model,
+        gradient_access_token=gradient_access_token,
+        gradient_workspace_id=gradient_workspace_id,
+    )
    output = llm("Say hello:", temperature=0.2, max_tokens=250)

    assert llm._llm_type == "gradient"
@@ -27,8 +33,14 @@ def test_gradient_acall() -> None:

 async def test_gradientai_acall() -> None:
    """Test async call to gradient.ai."""
-    model_id = os.environ["GRADIENT_MODEL_ID"]
-    llm = GradientLLM(model_id=model_id)
+    model = os.environ["GRADIENT_MODEL"]
+    gradient_access_token = os.environ["GRADIENT_ACCESS_TOKEN"]
+    gradient_workspace_id = os.environ["GRADIENT_WORKSPACE_ID"]
+    llm = GradientLLM(
+        model=model,
+        gradient_access_token=gradient_access_token,
+        gradient_workspace_id=gradient_workspace_id,
+    )
    output = await llm.agenerate(["Say hello:"], temperature=0.2, max_tokens=250)
    assert llm._llm_type == "gradient"

--- a/libs/langchain/tests/unit_tests/llms/test_gradient_ai.py
+++ b/libs/langchain/tests/unit_tests/llms/test_gradient_ai.py
@@ -1,5 +1,6 @@
 from typing import Dict

+import pytest
 from pytest_mock import MockerFixture

 from langchain.llms import GradientLLM
@@ -19,32 +20,45 @@ class MockResponse:
        return self.json_data


-def mocked_requests_post(
-    url: str,
-    headers: dict,
-    json: dict,
-) -> MockResponse:
+def mocked_requests_post(url: str, headers: dict, json: dict) -> MockResponse:
    assert url.startswith(_GRADIENT_BASE_URL)
-    assert headers
+    assert _MODEL_ID in url
    assert json
+    assert headers
+
+    assert headers.get("authorization") == f"Bearer {_GRADIENT_SECRET}"
+    assert headers.get("x-gradient-workspace-id") == f"{_GRADIENT_WORKSPACE_ID}"
+    query = json.get("query")
+    assert query and isinstance(query, str)
+    output = "bar" if "foo" in query else "baz"

    return MockResponse(
-        json_data={"generatedOutput": "bar"},
+        json_data={"generatedOutput": output},
        status_code=200,
    )


-def test_gradient_llm_sync(
-    mocker: MockerFixture,
-) -> None:
+@pytest.mark.parametrize(
+    "setup",
+    [
+        dict(
+            gradient_api_url=_GRADIENT_BASE_URL,
+            gradient_access_token=_GRADIENT_SECRET,
+            gradient_workspace_id=_GRADIENT_WORKSPACE_ID,
+            model=_MODEL_ID,
+        ),
+        dict(
+            gradient_api_url=_GRADIENT_BASE_URL,
+            gradient_access_token=_GRADIENT_SECRET,
+            gradient_workspace_id=_GRADIENT_WORKSPACE_ID,
+            model_id=_MODEL_ID,
+        ),
+    ],
+)
+def test_gradient_llm_sync(mocker: MockerFixture, setup: dict) -> None:
    mocker.patch("requests.post", side_effect=mocked_requests_post)

-    llm = GradientLLM(
-        gradient_api_url=_GRADIENT_BASE_URL,
-        gradient_access_token=_GRADIENT_SECRET,
-        gradient_workspace_id=_GRADIENT_WORKSPACE_ID,
-        model_id=_MODEL_ID,
-    )
+    llm = GradientLLM(**setup)
    assert llm.gradient_access_token == _GRADIENT_SECRET
    assert llm.gradient_api_url == _GRADIENT_BASE_URL
    assert llm.gradient_workspace_id == _GRADIENT_WORKSPACE_ID
@@ -54,3 +68,32 @@ def test_gradient_llm_sync(
    want = "bar"

    assert response == want
+
+
+@pytest.mark.parametrize(
+    "setup",
+    [
+        dict(
+            gradient_api_url=_GRADIENT_BASE_URL,
+            gradient_access_token=_GRADIENT_SECRET,
+            gradient_workspace_id=_GRADIENT_WORKSPACE_ID,
+            model=_MODEL_ID,
+        )
+    ],
+)
+def test_gradient_llm_sync_batch(mocker: MockerFixture, setup: dict) -> None:
+    mocker.patch("requests.post", side_effect=mocked_requests_post)
+
+    llm = GradientLLM(**setup)
+    assert llm.gradient_access_token == _GRADIENT_SECRET
+    assert llm.gradient_api_url == _GRADIENT_BASE_URL
+    assert llm.gradient_workspace_id == _GRADIENT_WORKSPACE_ID
+    assert llm.model_id == _MODEL_ID
+
+    inputs = ["Say foo:", "Say baz:", "Say foo again"]
+    response = llm._generate(inputs)
+
+    want = ["bar", "baz", "bar"]
+    assert len(response.generations) == len(inputs)
+    for i, gen in enumerate(response.generations):
+        assert gen[0].text == want[i]