community[patch]: Add param "task" to Databricks LLM to work around serialization of transform_output_fn (#14933)

**What is the reproduce code?** ```python from langchain.chains import LLMChain, load_chain from langchain.llms import Databricks from langchain.prompts import PromptTemplate def transform_output(response): # Extract the answer from the responses. return str(response["candidates"][0]["text"]) def transform_input(**request): full_prompt = f"""{request["prompt"]} Be Concise. """ request["prompt"] = full_prompt return request chat_model = Databricks( endpoint_name="llama2-13B-chat-Brambles", transform_input_fn=transform_input, transform_output_fn=transform_output, verbose=True, ) print(f"Test chat model: {chat_model('What is Apache Spark')}") # This works llm_chain = LLMChain(llm=chat_model, prompt=PromptTemplate.from_template("{chat_input}")) llm_chain("colorful socks") # this works llm_chain.save("databricks_llm_chain.yaml") # transform_input_fn and transform_output_fn are not serialized into the model yaml file loaded_chain = load_chain("databricks_llm_chain.yaml") # The Databricks LLM is recreated with transform_input_fn=None, transform_output_fn=None. loaded_chain("colorful socks") # Thus this errors. The transform_output_fn is needed to produce the correct output ``` Error: ``` File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-6c34afab-3473-421d-877f-1ef18930ef4d/lib/python3.10/site-packages/pydantic/v1/main.py", line 341, in __init__ raise validation_error pydantic.v1.error_wrappers.ValidationError: 1 validation error for Generation text str type expected (type=type_error.str) request payload: {'query': 'What is a databricks notebook?'}'} ``` **What does the error mean?** When the LLM generates an answer, represented by a Generation data object. The Generation data object takes a str field called text, e.g. Generation(text=”blah”). However, the Databricks LLM tried to put a non-str to text, e.g. Generation(text={“candidates”:[{“text”: “blah”}]}) Thus, pydantic errors. **Why the output format becomes incorrect after saving and loading the Databricks LLM?** Databrick LLM does not support serializing transform_input_fn and transform_output_fn, so they are not serialized into the model yaml file. When the Databricks LLM is loaded, it is recreated with transform_input_fn=None, transform_output_fn=None. Without transform_output_fn, the output text is not unwrapped, thus errors. Missing transform_output_fn causes this error. Missing transform_input_fn causes the additional prompt “Be Concise.” to be lost after saving and loading.  --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-28 15:00:23 +00:00 · 2023-12-20 09:50:23 -08:00
parent 1ea6d83188
commit 6479aab74f
1 changed files with 15 additions and 2 deletions
--- a/libs/community/langchain_community/llms/databricks.py
+++ b/libs/community/langchain_community/llms/databricks.py
@@ -55,6 +55,10 @@ def _transform_completions(response: Dict[str, Any]) -> str:
    return response["choices"][0]["text"]


+def _transform_llama2_chat(response: Dict[str, Any]) -> str:
+    return response["candidates"][0]["text"]
+
+
 def _transform_chat(response: Dict[str, Any]) -> str:
    return response["choices"][0]["message"]["content"]

@@ -87,11 +91,12 @@ class _DatabricksServingEndpointClient(_DatabricksClientBase):
            "external_model",
            "foundation_model_api",
        )
-        self.task = endpoint.get("task")
+        if self.task is None:
+            self.task = endpoint.get("task")

    @property
    def llm(self) -> bool:
-        return self.task in ("llm/v1/chat", "llm/v1/completions")
+        return self.task in ("llm/v1/chat", "llm/v1/completions", "llama2/chat")

    @root_validator(pre=True)
    def set_api_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
@@ -125,6 +130,8 @@ class _DatabricksServingEndpointClient(_DatabricksClientBase):
            preds = response["predictions"]
            # For a single-record query, the result is not a list.
            pred = preds[0] if isinstance(preds, list) else preds
+            if self.task == "llama2/chat":
+                return _transform_llama2_chat(pred)
            return transform_output_fn(pred) if transform_output_fn else pred


@@ -325,6 +332,10 @@ class Databricks(LLM):
    """The maximum number of tokens to generate."""
    extra_params: Dict[str, Any] = Field(default_factory=dict)
    """Any extra parameters to pass to the endpoint."""
+    task: Optional[str] = None
+    """The task of the endpoint. Only used when using a serving endpoint.
+    If not provided, the task is automatically inferred from the endpoint.
+    """

    _client: _DatabricksClientBase = PrivateAttr()

@@ -401,6 +412,7 @@ class Databricks(LLM):
                api_token=self.api_token,
                endpoint_name=self.endpoint_name,
                databricks_uri=self.databricks_uri,
+                task=self.task,
            )
        elif self.cluster_id and self.cluster_driver_port:
            self._client = _DatabricksClusterDriverProxyClient(
@@ -430,6 +442,7 @@ class Databricks(LLM):
            "stop": self.stop,
            "max_tokens": self.max_tokens,
            "extra_params": self.extra_params,
+            "task": self.task,
            # TODO: Support saving transform_input_fn and transform_output_fn
            # "transform_input_fn": self.transform_input_fn,
            # "transform_output_fn": self.transform_output_fn,