community[patch]: Support Streaming in Azure Machine Learning (#18246)

- [x] **PR title**: "community: Support streaming in Azure ML and few naming changes" - [x] **PR message**: - **Description:** Added support for streaming for azureml_endpoint. Also, renamed and AzureMLEndpointApiType.realtime to AzureMLEndpointApiType.dedicated. Also, added new classes CustomOpenAIChatContentFormatter and CustomOpenAIContentFormatter and updated the classes LlamaChatContentFormatter and LlamaContentFormatter to now show a deprecated warning message when instantiated. --------- Co-authored-by: Sachin Paryani <saparan@microsoft.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-16 23:13:31 +00:00 · 2024-03-28 16:38:20 -07:00
parent ecb11a4a32
commit 25c9f3d1d1
6 changed files with 285 additions and 76 deletions
--- a/libs/community/tests/integration_tests/chat_models/test_azureml_endpoint.py
+++ b/libs/community/tests/integration_tests/chat_models/test_azureml_endpoint.py
@@ -5,13 +5,15 @@ from langchain_core.outputs import ChatGeneration, LLMResult

 from langchain_community.chat_models.azureml_endpoint import (
    AzureMLChatOnlineEndpoint,
-    LlamaChatContentFormatter,
+    CustomOpenAIChatContentFormatter,
 )


 def test_llama_call() -> None:
    """Test valid call to Open Source Foundation Model."""
-    chat = AzureMLChatOnlineEndpoint(content_formatter=LlamaChatContentFormatter())
+    chat = AzureMLChatOnlineEndpoint(
+        content_formatter=CustomOpenAIChatContentFormatter()
+    )
    response = chat.invoke([HumanMessage(content="Foo")])
    assert isinstance(response, BaseMessage)
    assert isinstance(response.content, str)
@@ -19,7 +21,9 @@ def test_llama_call() -> None:

 def test_temperature_kwargs() -> None:
    """Test that timeout kwarg works."""
-    chat = AzureMLChatOnlineEndpoint(content_formatter=LlamaChatContentFormatter())
+    chat = AzureMLChatOnlineEndpoint(
+        content_formatter=CustomOpenAIChatContentFormatter()
+    )
    response = chat.invoke([HumanMessage(content="FOO")], temperature=0.8)
    assert isinstance(response, BaseMessage)
    assert isinstance(response.content, str)
@@ -27,7 +31,9 @@ def test_temperature_kwargs() -> None:

 def test_message_history() -> None:
    """Test that multiple messages works."""
-    chat = AzureMLChatOnlineEndpoint(content_formatter=LlamaChatContentFormatter())
+    chat = AzureMLChatOnlineEndpoint(
+        content_formatter=CustomOpenAIChatContentFormatter()
+    )
    response = chat.invoke(
        [
            HumanMessage(content="Hello."),
@@ -40,7 +46,9 @@ def test_message_history() -> None:


 def test_multiple_messages() -> None:
-    chat = AzureMLChatOnlineEndpoint(content_formatter=LlamaChatContentFormatter())
+    chat = AzureMLChatOnlineEndpoint(
+        content_formatter=CustomOpenAIChatContentFormatter()
+    )
    message = HumanMessage(content="Hi!")
    response = chat.generate([[message], [message]])