community: Add configurable VisualFeatures to the AzureAiServicesImageAnalysisTool (#27444)

Thank you for contributing to LangChain! - [ ] **PR title**: community: Add configurable `VisualFeatures` to the `AzureAiServicesImageAnalysisTool` - [ ] **PR message**: - **Description:** The `AzureAiServicesImageAnalysisTool` is a good service and utilises the Azure AI Vision package under the hood. However, since the creation of this tool, new `VisualFeatures` have been added to allow the user to request other image specific information to be returned. Currently, the tool offers neither configuration of which features should be return nor does it offer any newer feature types. The aim of this PR is to address this and expose more of the Azure Service in this integration. - **Dependencies:** no new dependencies in the main class file, azure.ai.vision.imageanalysis added to extra test dependencies file. - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. Although no tests exist for already implemented Azure Service tools, I've created 3 unit tests for this class that test initialisation and credentials, local file analysis and a test for the new changes/ features option. - [ ] **Lint and test**: All linting has passed. --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-08-31 10:23:18 +00:00 · 2024-12-17 07:30:04 +13:00
parent 1c120e9615
commit 580a8d53f9
4 changed files with 182 additions and 13 deletions
--- a/libs/community/extended_testing_deps.txt
+++ b/libs/community/extended_testing_deps.txt
@@ -7,6 +7,7 @@ atlassian-python-api>=3.36.0,<4
 azure-ai-documentintelligence>=1.0.0b1,<2
 azure-identity>=1.15.0,<2
 azure-search-documents==11.4.0
+azure.ai.vision.imageanalysis>=1.0.0,<2
 beautifulsoup4>=4,<5
 bibtexparser>=1.4.0,<2
 cassio>=0.1.6,<0.2
--- a/libs/community/langchain_community/tools/azure_ai_services/image_analysis.py
+++ b/libs/community/langchain_community/tools/azure_ai_services/image_analysis.py
@@ -19,19 +19,31 @@ class AzureAiServicesImageAnalysisTool(BaseTool):  # type: ignore[override]
    """Tool that queries the Azure AI Services Image Analysis API.

    In order to set this up, follow instructions at:
-    https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
+    https://learn.microsoft.com/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
+
+    Attributes:
+    azure_ai_services_key (Optional[str]): The API key for Azure AI Services.
+    azure_ai_services_endpoint (Optional[str]): The endpoint URL for Azure AI Services.
+    visual_features Any: The visual features to analyze in the image, can be set as
+        either strings or azure.ai.vision.imageanalysis.models.VisualFeatures.
+        (e.g. 'TAGS', VisualFeatures.CAPTION).
+    image_analysis_client (Any): The client for interacting
+        with Azure AI Services Image Analysis.
+    name (str): The name of the tool.
+    description (str): A description of the tool,
+        including its purpose and expected input.
    """

-    azure_ai_services_key: str = ""  #: :meta private:
-    azure_ai_services_endpoint: str = ""  #: :meta private:
-    image_analysis_client: Any  #: :meta private:
-    visual_features: Any  #: :meta private:
+    azure_ai_services_key: Optional[str] = None  #: :meta private:
+    azure_ai_services_endpoint: Optional[str] = None  #: :meta private:
+    visual_features: Any = None
+    image_analysis_client: Any = None  #: :meta private:

    name: str = "azure_ai_services_image_analysis"
    description: str = (
        "A wrapper around Azure AI Services Image Analysis. "
        "Useful for when you need to analyze images. "
-        "Input should be a url to an image."
+        "Input must be a url string or path string to an image."
    )

    @model_validator(mode="before")
@@ -68,13 +80,16 @@ class AzureAiServicesImageAnalysisTool(BaseTool):  # type: ignore[override]
                f"Initialization of Azure AI Vision Image Analysis client failed: {e}"
            )

-        values["visual_features"] = [
-            VisualFeatures.TAGS,
-            VisualFeatures.OBJECTS,
-            VisualFeatures.CAPTION,
-            VisualFeatures.READ,
-        ]
-
+        visual_features = values.get(
+            "visual_features",
+            [
+                VisualFeatures.TAGS,
+                VisualFeatures.OBJECTS,
+                VisualFeatures.CAPTION,
+                VisualFeatures.READ,
+            ],
+        )
+        values["visual_features"] = visual_features
        return values

    def _image_analysis(self, image_path: str) -> Dict:
@@ -115,6 +130,17 @@ class AzureAiServicesImageAnalysisTool(BaseTool):  # type: ignore[override]
            if result.read is not None and len(result.read.blocks) > 0:
                res_dict["text"] = [line.text for line in result.read.blocks[0].lines]

+            if result.dense_captions is not None and len(result.dense_captions) > 0:
+                res_dict["dense_captions"] = [
+                    str(dc) for dc in result.dense_captions.list
+                ]
+
+            if result.smart_crops is not None and len(result.smart_crops) > 0:
+                res_dict["smart_crops"] = [str(sc) for sc in result.smart_crops.list]
+
+            if result.people is not None and len(result.people) > 0:
+                res_dict["people"] = [str(p) for p in result.people.list]
+
        return res_dict

    def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
@@ -136,6 +162,21 @@ class AzureAiServicesImageAnalysisTool(BaseTool):  # type: ignore[override]
        if "text" in image_analysis_result and len(image_analysis_result["text"]) > 0:
            formatted_result.append("Text: " + ", ".join(image_analysis_result["text"]))

+        if "dense_captions" in image_analysis_result:
+            formatted_result.append(
+                "Dense Captions: " + ", ".join(image_analysis_result["dense_captions"])
+            )
+
+        if "smart_crops" in image_analysis_result:
+            formatted_result.append(
+                "Smart Crops: " + ", ".join(image_analysis_result["smart_crops"])
+            )
+
+        if "people" in image_analysis_result:
+            formatted_result.append(
+                "People: " + ", ".join(image_analysis_result["people"])
+            )
+
        return "\n".join(formatted_result)

    def _run(
--- a/libs/community/tests/examples/building.jpg
+++ b/libs/community/tests/examples/building.jpg
--- a/libs/community/tests/unit_tests/tools/azure_ai_services/test_image_analysis.py
+++ b/libs/community/tests/unit_tests/tools/azure_ai_services/test_image_analysis.py
@@ -0,0 +1,127 @@
+"""Tests for the Azure AI Services Image Analysis Tool."""
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from langchain_community.tools.azure_ai_services.image_analysis import (
+    AzureAiServicesImageAnalysisTool,
+)
+
+this_dir = Path(__file__).parents[3]
+
+examples_dir = this_dir / "examples"
+building_path = examples_dir / "building.jpg"
+
+
+@pytest.mark.requires("azure.ai.vision.imageanalysis")
+def test_content_safety(mocker: Any) -> None:
+    mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
+    mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
+
+    key = "key"
+    endpoint = "endpoint"
+
+    tool = AzureAiServicesImageAnalysisTool(
+        azure_ai_services_key=key, azure_ai_services_endpoint=endpoint
+    )
+    assert tool.azure_ai_services_key == key
+    assert tool.azure_ai_services_endpoint == endpoint
+
+
+@pytest.mark.requires("azure.ai.vision.imageanalysis")
+def test_local_image_analysis(mocker: Any) -> None:
+    key = "key"
+    endpoint = "endpoint"
+
+    mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
+    mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
+    mocker.patch(
+        "langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
+        return_value="local",
+    )
+
+    tool = AzureAiServicesImageAnalysisTool(
+        azure_ai_services_key=key,
+        azure_ai_services_endpoint=endpoint,
+        visual_features=["CAPTION"],
+    )
+
+    mock_content_client = mocker.Mock()
+    mock_content_client.analyze.return_value = mocker.Mock()
+    mock_content_client.analyze.return_value.caption.text = "A building corner."
+
+    mock_content_client.analyze.return_value.objects = None
+    mock_content_client.analyze.return_value.tags = None
+    mock_content_client.analyze.return_value.read = None
+    mock_content_client.analyze.return_value.dense_captions = None
+    mock_content_client.analyze.return_value.smart_crops = None
+    mock_content_client.analyze.return_value.people = None
+
+    tool.image_analysis_client = mock_content_client
+
+    input = str(building_path)
+    output = "Caption: A building corner."
+
+    result = tool._run(input)
+    assert result == output
+
+
+@pytest.mark.requires("azure.ai.vision.imageanalysis")
+def test_local_image_different_features(mocker: Any) -> None:
+    key = "key"
+    endpoint = "endpoint"
+
+    mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
+    mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
+    mocker.patch(
+        "langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
+        return_value="local",
+    )
+
+    tool = AzureAiServicesImageAnalysisTool(
+        azure_ai_services_key=key,
+        azure_ai_services_endpoint=endpoint,
+        visual_features=["PEOPLE", "CAPTION", "SMARTCROPS"],
+    )
+
+    mock_content_client = mocker.Mock()
+    mock_content_client.analyze.return_value = mocker.Mock()
+    mock_content_client.analyze.return_value.caption.text = "A building corner."
+
+    mock_content_client.analyze.return_value.objects = None
+    mock_content_client.analyze.return_value.tags = None
+    mock_content_client.analyze.return_value.read = None
+    mock_content_client.analyze.return_value.dense_captions = None
+
+    mock_smart_crops = mocker.MagicMock()
+    mock_smart_crops.list = [
+        {"aspectRatio": 1.97, "boundingBox": {"x": 43, "y": 24, "w": 853, "h": 432}}
+    ]
+    mock_smart_crops.__len__.return_value = 1
+    mock_content_client.analyze.return_value.smart_crops = mock_smart_crops
+
+    mock_people = mocker.MagicMock()
+    mock_people.list = [
+        {
+            "boundingBox": {"x": 454, "y": 44, "w": 408, "h": 531},
+            "confidence": 0.9601945281028748,
+        },
+    ]
+    mock_people.__len__.return_value = 1
+    mock_content_client.analyze.return_value.people = mock_people
+
+    tool.image_analysis_client = mock_content_client
+
+    input = str(building_path)
+    output = (
+        "Caption: A building corner.\n"
+        "Smart Crops: {'aspectRatio': 1.97,"
+        " 'boundingBox': {'x': 43, 'y': 24, 'w': 853, 'h': 432}}\n"
+        "People: {'boundingBox': {'x': 454, 'y': 44, 'w': 408, 'h': 531},"
+        " 'confidence': 0.9601945281028748}"
+    )
+
+    result = tool._run(input)
+    assert result == output