community[minor]: DeepInfra support for chat models (#16380)

Add deepinfra chat models support. This is https://github.com/langchain-ai/langchain/pull/14234 re-opened from my branch (so maintainers can edit).
2025-09-17 15:35:14 +00:00 · 2024-01-22 21:22:17 +02:00
parent eac91b60c9
commit fc196cab12
8 changed files with 759 additions and 6 deletions
--- a/libs/community/tests/integration_tests/chat_models/test_deepinfra.py
+++ b/libs/community/tests/integration_tests/chat_models/test_deepinfra.py
@@ -0,0 +1,65 @@
+"""Test ChatDeepInfra wrapper."""
+from langchain_core.messages import BaseMessage, HumanMessage
+from langchain_core.outputs import ChatGeneration, LLMResult
+
+from langchain_community.chat_models.deepinfra import ChatDeepInfra
+from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
+
+
+def test_chat_deepinfra() -> None:
+    """Test valid call to DeepInfra."""
+    chat = ChatDeepInfra(
+        max_tokens=10,
+    )
+    response = chat.invoke([HumanMessage(content="Hello")])
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+def test_chat_deepinfra_streaming() -> None:
+    callback_handler = FakeCallbackHandler()
+    chat = ChatDeepInfra(
+        callbacks=[callback_handler],
+        streaming=True,
+        max_tokens=10,
+    )
+    response = chat.invoke([HumanMessage(content="Hello")])
+    assert callback_handler.llm_streams > 0
+    assert isinstance(response, BaseMessage)
+
+
+async def test_async_chat_deepinfra() -> None:
+    """Test async generation."""
+    chat = ChatDeepInfra(
+        max_tokens=10,
+    )
+    message = HumanMessage(content="Hello")
+    response = await chat.agenerate([[message]])
+    assert isinstance(response, LLMResult)
+    assert len(response.generations) == 1
+    assert len(response.generations[0]) == 1
+    generation = response.generations[0][0]
+    assert isinstance(generation, ChatGeneration)
+    assert isinstance(generation.text, str)
+    assert generation.text == generation.message.content
+
+
+async def test_async_chat_deepinfra_streaming() -> None:
+    callback_handler = FakeCallbackHandler()
+    chat = ChatDeepInfra(
+        # model="meta-llama/Llama-2-7b-chat-hf",
+        callbacks=[callback_handler],
+        max_tokens=10,
+        streaming=True,
+        timeout=5,
+    )
+    message = HumanMessage(content="Hello")
+    response = await chat.agenerate([[message]])
+    assert callback_handler.llm_streams > 0
+    assert isinstance(response, LLMResult)
+    assert len(response.generations) == 1
+    assert len(response.generations[0]) == 1
+    generation = response.generations[0][0]
+    assert isinstance(generation, ChatGeneration)
+    assert isinstance(generation.text, str)
+    assert generation.text == generation.message.content
--- a/libs/community/tests/integration_tests/embeddings/test_deepinfra.py
+++ b/libs/community/tests/integration_tests/embeddings/test_deepinfra.py
@@ -5,7 +5,7 @@ from langchain_community.embeddings import DeepInfraEmbeddings

 def test_deepinfra_call() -> None:
    """Test valid call to DeepInfra."""
-    deepinfra_emb = DeepInfraEmbeddings(model_id="sentence-transformers/clip-ViT-B-32")
+    deepinfra_emb = DeepInfraEmbeddings(model_id="BAAI/bge-base-en-v1.5")
    r1 = deepinfra_emb.embed_documents(
        [
            "Alpha is the first letter of Greek alphabet",
@@ -13,7 +13,7 @@ def test_deepinfra_call() -> None:
        ]
    )
    assert len(r1) == 2
-    assert len(r1[0]) == 512
-    assert len(r1[1]) == 512
+    assert len(r1[0]) == 768
+    assert len(r1[1]) == 768
    r2 = deepinfra_emb.embed_query("What is the third letter of Greek alphabet")
-    assert len(r2) == 512
+    assert len(r2) == 768
--- a/libs/community/tests/integration_tests/llms/test_deepinfra.py
+++ b/libs/community/tests/integration_tests/llms/test_deepinfra.py
@@ -5,13 +5,13 @@ from langchain_community.llms.deepinfra import DeepInfra
 def test_deepinfra_call() -> None:
    """Test valid call to DeepInfra."""
    llm = DeepInfra(model_id="meta-llama/Llama-2-7b-chat-hf")
-    output = llm("What is 2 + 2?")
+    output = llm.invoke("What is 2 + 2?")
    assert isinstance(output, str)


 async def test_deepinfra_acall() -> None:
    llm = DeepInfra(model_id="meta-llama/Llama-2-7b-chat-hf")
-    output = await llm.apredict("What is 2 + 2?")
+    output = await llm.ainvoke("What is 2 + 2?")
    assert llm._llm_type == "deepinfra"
    assert isinstance(output, str)