langchain/libs/community/tests/unit_tests/chat_models/test_sparkllm.py
Lei Zhang f203229b51
community: Fix the failure of ChatSparkLLM after upgrading to Pydantic V2 (#27418)
**Description:**

The test_sparkllm.py can reproduce this issue.


https://github.com/langchain-ai/langchain/blob/master/libs/community/tests/integration_tests/chat_models/test_sparkllm.py#L66

```
Testing started at 18:27 ...
Launching pytest with arguments test_sparkllm.py::test_chat_spark_llm --no-header --no-summary -q in /Users/zhanglei/Work/github/langchain/libs/community/tests/integration_tests/chat_models

============================= test session starts ==============================
collecting ... collected 1 item

test_sparkllm.py::test_chat_spark_llm 

============================== 1 failed in 0.45s ===============================
FAILED                             [100%]
tests/integration_tests/chat_models/test_sparkllm.py:65 (test_chat_spark_llm)
def test_chat_spark_llm() -> None:
>       chat = ChatSparkLLM(
            spark_app_id="your spark_app_id",
            spark_api_key="your spark_api_key",
            spark_api_secret="your spark_api_secret",
        )  # type: ignore[call-arg]

test_sparkllm.py:67: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../../core/langchain_core/load/serializable.py:111: in __init__
    super().__init__(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

cls = <class 'langchain_community.chat_models.sparkllm.ChatSparkLLM'>
values = {'spark_api_key': 'your spark_api_key', 'spark_api_secret': 'your spark_api_secret', 'spark_api_url': 'wss://spark-api.xf-yun.com/v3.5/chat', 'spark_app_id': 'your spark_app_id', ...}

    @model_validator(mode="before")
    @classmethod
    def validate_environment(cls, values: Dict) -> Any:
        values["spark_app_id"] = get_from_dict_or_env(
            values,
            ["spark_app_id", "app_id"],
            "IFLYTEK_SPARK_APP_ID",
        )
        values["spark_api_key"] = get_from_dict_or_env(
            values,
            ["spark_api_key", "api_key"],
            "IFLYTEK_SPARK_API_KEY",
        )
        values["spark_api_secret"] = get_from_dict_or_env(
            values,
            ["spark_api_secret", "api_secret"],
            "IFLYTEK_SPARK_API_SECRET",
        )
        values["spark_api_url"] = get_from_dict_or_env(
            values,
            "spark_api_url",
            "IFLYTEK_SPARK_API_URL",
            SPARK_API_URL,
        )
        values["spark_llm_domain"] = get_from_dict_or_env(
            values,
            "spark_llm_domain",
            "IFLYTEK_SPARK_LLM_DOMAIN",
            SPARK_LLM_DOMAIN,
        )
    
        # put extra params into model_kwargs
        default_values = {
            name: field.default
            for name, field in get_fields(cls).items()
            if field.default is not None
        }
>       values["model_kwargs"]["temperature"] = default_values.get("temperature")
E       KeyError: 'model_kwargs'

../../../langchain_community/chat_models/sparkllm.py:368: KeyError
``` 

I found that when upgrading to Pydantic v2, @root_validator was changed
to @model_validator. When a class declares multiple
@model_validator(model=before), the execution order in V1 and V2 is
opposite. This is the reason for ChatSparkLLM's failure.

The correct execution order is to execute build_extra first.


https://github.com/langchain-ai/langchain/blob/langchain%3D%3D0.2.16/libs/community/langchain_community/chat_models/sparkllm.py#L302

And then execute validate_environment.


https://github.com/langchain-ai/langchain/blob/langchain%3D%3D0.2.16/libs/community/langchain_community/chat_models/sparkllm.py#L329

The Pydantic community also discusses it, but there hasn't been a
conclusion yet. https://github.com/pydantic/pydantic/discussions/7434

**Issus:** #27416 

**Twitter handle:** coolbeevip

---------

Co-authored-by: vbarda <vadym@langchain.dev>
2024-10-23 21:17:10 -04:00

110 lines
3.3 KiB
Python

import pytest
from langchain_core.messages import (
AIMessage,
HumanMessage,
SystemMessage,
)
from langchain_core.output_parsers.openai_tools import (
parse_tool_call,
)
from langchain_community.chat_models.sparkllm import (
ChatSparkLLM,
convert_dict_to_message,
convert_message_to_dict,
)
def test__convert_dict_to_message_human() -> None:
message_dict = {"role": "user", "content": "foo"}
result = convert_dict_to_message(message_dict)
expected_output = HumanMessage(content="foo")
assert result == expected_output
def test__convert_dict_to_message_ai() -> None:
message_dict = {"role": "assistant", "content": "foo"}
result = convert_dict_to_message(message_dict)
expected_output = AIMessage(content="foo")
assert result == expected_output
def test__convert_dict_to_message_other_role() -> None:
message_dict = {"role": "system", "content": "foo"}
result = convert_dict_to_message(message_dict)
expected_output = SystemMessage(content="foo")
assert result == expected_output
def test__convert_dict_to_message_function_call() -> None:
raw_function_calls = [
{
"function": {
"name": "get_current_weather",
"arguments": '{"location": "Boston", "unit": "fahrenheit"}',
},
"type": "function",
}
]
message_dict = {
"role": "assistant",
"content": "foo",
"tool_calls": raw_function_calls,
}
result = convert_dict_to_message(message_dict)
tool_calls = [
parse_tool_call(raw_tool_call, return_id=True)
for raw_tool_call in raw_function_calls
]
expected_output = AIMessage(
content="foo",
additional_kwargs={"tool_calls": raw_function_calls},
tool_calls=tool_calls,
invalid_tool_calls=[],
)
assert result == expected_output
def test__convert_message_to_dict_human() -> None:
message = HumanMessage(content="foo")
result = convert_message_to_dict(message)
expected_output = {"role": "user", "content": "foo"}
assert result == expected_output
def test__convert_message_to_dict_ai() -> None:
message = AIMessage(content="foo")
result = convert_message_to_dict(message)
expected_output = {"role": "assistant", "content": "foo"}
assert result == expected_output
def test__convert_message_to_dict_system() -> None:
message = SystemMessage(content="foo")
result = convert_message_to_dict(message)
expected_output = {"role": "system", "content": "foo"}
assert result == expected_output
@pytest.mark.requires("websocket")
def test__chat_spark_llm_initialization() -> None:
chat = ChatSparkLLM(
app_id="IFLYTEK_SPARK_APP_ID",
api_key="IFLYTEK_SPARK_API_KEY",
api_secret="IFLYTEK_SPARK_API_SECRET",
api_url="IFLYTEK_SPARK_API_URL",
model="IFLYTEK_SPARK_LLM_DOMAIN",
timeout=40,
temperature=0.1,
top_k=3,
)
assert chat.spark_app_id == "IFLYTEK_SPARK_APP_ID"
assert chat.spark_api_key == "IFLYTEK_SPARK_API_KEY"
assert chat.spark_api_secret == "IFLYTEK_SPARK_API_SECRET"
assert chat.spark_api_url == "IFLYTEK_SPARK_API_URL"
assert chat.spark_llm_domain == "IFLYTEK_SPARK_LLM_DOMAIN"
assert chat.request_timeout == 40
assert chat.temperature == 0.1
assert chat.top_k == 3