community[minor]: DeepInfra support for chat models (#16380)

Add deepinfra chat models support.

This is https://github.com/langchain-ai/langchain/pull/14234 re-opened
from my branch (so maintainers can edit).
This commit is contained in:
Iskren Ivov Chernev
2024-01-22 21:22:17 +02:00
committed by GitHub
parent eac91b60c9
commit fc196cab12
8 changed files with 759 additions and 6 deletions

View File

@@ -0,0 +1,65 @@
"""Test ChatDeepInfra wrapper."""
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.outputs import ChatGeneration, LLMResult
from langchain_community.chat_models.deepinfra import ChatDeepInfra
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
def test_chat_deepinfra() -> None:
"""Test valid call to DeepInfra."""
chat = ChatDeepInfra(
max_tokens=10,
)
response = chat.invoke([HumanMessage(content="Hello")])
assert isinstance(response, BaseMessage)
assert isinstance(response.content, str)
def test_chat_deepinfra_streaming() -> None:
callback_handler = FakeCallbackHandler()
chat = ChatDeepInfra(
callbacks=[callback_handler],
streaming=True,
max_tokens=10,
)
response = chat.invoke([HumanMessage(content="Hello")])
assert callback_handler.llm_streams > 0
assert isinstance(response, BaseMessage)
async def test_async_chat_deepinfra() -> None:
"""Test async generation."""
chat = ChatDeepInfra(
max_tokens=10,
)
message = HumanMessage(content="Hello")
response = await chat.agenerate([[message]])
assert isinstance(response, LLMResult)
assert len(response.generations) == 1
assert len(response.generations[0]) == 1
generation = response.generations[0][0]
assert isinstance(generation, ChatGeneration)
assert isinstance(generation.text, str)
assert generation.text == generation.message.content
async def test_async_chat_deepinfra_streaming() -> None:
callback_handler = FakeCallbackHandler()
chat = ChatDeepInfra(
# model="meta-llama/Llama-2-7b-chat-hf",
callbacks=[callback_handler],
max_tokens=10,
streaming=True,
timeout=5,
)
message = HumanMessage(content="Hello")
response = await chat.agenerate([[message]])
assert callback_handler.llm_streams > 0
assert isinstance(response, LLMResult)
assert len(response.generations) == 1
assert len(response.generations[0]) == 1
generation = response.generations[0][0]
assert isinstance(generation, ChatGeneration)
assert isinstance(generation.text, str)
assert generation.text == generation.message.content

View File

@@ -5,7 +5,7 @@ from langchain_community.embeddings import DeepInfraEmbeddings
def test_deepinfra_call() -> None:
"""Test valid call to DeepInfra."""
deepinfra_emb = DeepInfraEmbeddings(model_id="sentence-transformers/clip-ViT-B-32")
deepinfra_emb = DeepInfraEmbeddings(model_id="BAAI/bge-base-en-v1.5")
r1 = deepinfra_emb.embed_documents(
[
"Alpha is the first letter of Greek alphabet",
@@ -13,7 +13,7 @@ def test_deepinfra_call() -> None:
]
)
assert len(r1) == 2
assert len(r1[0]) == 512
assert len(r1[1]) == 512
assert len(r1[0]) == 768
assert len(r1[1]) == 768
r2 = deepinfra_emb.embed_query("What is the third letter of Greek alphabet")
assert len(r2) == 512
assert len(r2) == 768

View File

@@ -5,13 +5,13 @@ from langchain_community.llms.deepinfra import DeepInfra
def test_deepinfra_call() -> None:
"""Test valid call to DeepInfra."""
llm = DeepInfra(model_id="meta-llama/Llama-2-7b-chat-hf")
output = llm("What is 2 + 2?")
output = llm.invoke("What is 2 + 2?")
assert isinstance(output, str)
async def test_deepinfra_acall() -> None:
llm = DeepInfra(model_id="meta-llama/Llama-2-7b-chat-hf")
output = await llm.apredict("What is 2 + 2?")
output = await llm.ainvoke("What is 2 + 2?")
assert llm._llm_type == "deepinfra"
assert isinstance(output, str)