community: Add configurable VisualFeatures to the AzureAiServicesImageAnalysisTool (#27444)

Thank you for contributing to LangChain!

- [ ] **PR title**: community: Add configurable `VisualFeatures` to the
`AzureAiServicesImageAnalysisTool`


- [ ] **PR message**:  
- **Description:** The `AzureAiServicesImageAnalysisTool` is a good
service and utilises the Azure AI Vision package under the hood.
However, since the creation of this tool, new `VisualFeatures` have been
added to allow the user to request other image specific information to
be returned. Currently, the tool offers neither configuration of which
features should be return nor does it offer any newer feature types. The
aim of this PR is to address this and expose more of the Azure Service
in this integration.
- **Dependencies:** no new dependencies in the main class file,
azure.ai.vision.imageanalysis added to extra test dependencies file.


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. Although no tests exist for already implemented Azure Service tools,
I've created 3 unit tests for this class that test initialisation and
credentials, local file analysis and a test for the new changes/
features option.


- [ ] **Lint and test**: All linting has passed.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
Sheepsta300
2024-12-17 07:30:04 +13:00
committed by GitHub
parent 1c120e9615
commit 580a8d53f9
4 changed files with 182 additions and 13 deletions

View File

@@ -7,6 +7,7 @@ atlassian-python-api>=3.36.0,<4
azure-ai-documentintelligence>=1.0.0b1,<2
azure-identity>=1.15.0,<2
azure-search-documents==11.4.0
azure.ai.vision.imageanalysis>=1.0.0,<2
beautifulsoup4>=4,<5
bibtexparser>=1.4.0,<2
cassio>=0.1.6,<0.2

View File

@@ -19,19 +19,31 @@ class AzureAiServicesImageAnalysisTool(BaseTool): # type: ignore[override]
"""Tool that queries the Azure AI Services Image Analysis API.
In order to set this up, follow instructions at:
https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
https://learn.microsoft.com/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
Attributes:
azure_ai_services_key (Optional[str]): The API key for Azure AI Services.
azure_ai_services_endpoint (Optional[str]): The endpoint URL for Azure AI Services.
visual_features Any: The visual features to analyze in the image, can be set as
either strings or azure.ai.vision.imageanalysis.models.VisualFeatures.
(e.g. 'TAGS', VisualFeatures.CAPTION).
image_analysis_client (Any): The client for interacting
with Azure AI Services Image Analysis.
name (str): The name of the tool.
description (str): A description of the tool,
including its purpose and expected input.
"""
azure_ai_services_key: str = "" #: :meta private:
azure_ai_services_endpoint: str = "" #: :meta private:
image_analysis_client: Any #: :meta private:
visual_features: Any #: :meta private:
azure_ai_services_key: Optional[str] = None #: :meta private:
azure_ai_services_endpoint: Optional[str] = None #: :meta private:
visual_features: Any = None
image_analysis_client: Any = None #: :meta private:
name: str = "azure_ai_services_image_analysis"
description: str = (
"A wrapper around Azure AI Services Image Analysis. "
"Useful for when you need to analyze images. "
"Input should be a url to an image."
"Input must be a url string or path string to an image."
)
@model_validator(mode="before")
@@ -68,13 +80,16 @@ class AzureAiServicesImageAnalysisTool(BaseTool): # type: ignore[override]
f"Initialization of Azure AI Vision Image Analysis client failed: {e}"
)
values["visual_features"] = [
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.READ,
]
visual_features = values.get(
"visual_features",
[
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.READ,
],
)
values["visual_features"] = visual_features
return values
def _image_analysis(self, image_path: str) -> Dict:
@@ -115,6 +130,17 @@ class AzureAiServicesImageAnalysisTool(BaseTool): # type: ignore[override]
if result.read is not None and len(result.read.blocks) > 0:
res_dict["text"] = [line.text for line in result.read.blocks[0].lines]
if result.dense_captions is not None and len(result.dense_captions) > 0:
res_dict["dense_captions"] = [
str(dc) for dc in result.dense_captions.list
]
if result.smart_crops is not None and len(result.smart_crops) > 0:
res_dict["smart_crops"] = [str(sc) for sc in result.smart_crops.list]
if result.people is not None and len(result.people) > 0:
res_dict["people"] = [str(p) for p in result.people.list]
return res_dict
def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
@@ -136,6 +162,21 @@ class AzureAiServicesImageAnalysisTool(BaseTool): # type: ignore[override]
if "text" in image_analysis_result and len(image_analysis_result["text"]) > 0:
formatted_result.append("Text: " + ", ".join(image_analysis_result["text"]))
if "dense_captions" in image_analysis_result:
formatted_result.append(
"Dense Captions: " + ", ".join(image_analysis_result["dense_captions"])
)
if "smart_crops" in image_analysis_result:
formatted_result.append(
"Smart Crops: " + ", ".join(image_analysis_result["smart_crops"])
)
if "people" in image_analysis_result:
formatted_result.append(
"People: " + ", ".join(image_analysis_result["people"])
)
return "\n".join(formatted_result)
def _run(

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

View File

@@ -0,0 +1,127 @@
"""Tests for the Azure AI Services Image Analysis Tool."""
from pathlib import Path
from typing import Any
import pytest
from langchain_community.tools.azure_ai_services.image_analysis import (
AzureAiServicesImageAnalysisTool,
)
this_dir = Path(__file__).parents[3]
examples_dir = this_dir / "examples"
building_path = examples_dir / "building.jpg"
@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_content_safety(mocker: Any) -> None:
mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
key = "key"
endpoint = "endpoint"
tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key, azure_ai_services_endpoint=endpoint
)
assert tool.azure_ai_services_key == key
assert tool.azure_ai_services_endpoint == endpoint
@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_local_image_analysis(mocker: Any) -> None:
key = "key"
endpoint = "endpoint"
mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
mocker.patch(
"langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
return_value="local",
)
tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key,
azure_ai_services_endpoint=endpoint,
visual_features=["CAPTION"],
)
mock_content_client = mocker.Mock()
mock_content_client.analyze.return_value = mocker.Mock()
mock_content_client.analyze.return_value.caption.text = "A building corner."
mock_content_client.analyze.return_value.objects = None
mock_content_client.analyze.return_value.tags = None
mock_content_client.analyze.return_value.read = None
mock_content_client.analyze.return_value.dense_captions = None
mock_content_client.analyze.return_value.smart_crops = None
mock_content_client.analyze.return_value.people = None
tool.image_analysis_client = mock_content_client
input = str(building_path)
output = "Caption: A building corner."
result = tool._run(input)
assert result == output
@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_local_image_different_features(mocker: Any) -> None:
key = "key"
endpoint = "endpoint"
mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
mocker.patch(
"langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
return_value="local",
)
tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key,
azure_ai_services_endpoint=endpoint,
visual_features=["PEOPLE", "CAPTION", "SMARTCROPS"],
)
mock_content_client = mocker.Mock()
mock_content_client.analyze.return_value = mocker.Mock()
mock_content_client.analyze.return_value.caption.text = "A building corner."
mock_content_client.analyze.return_value.objects = None
mock_content_client.analyze.return_value.tags = None
mock_content_client.analyze.return_value.read = None
mock_content_client.analyze.return_value.dense_captions = None
mock_smart_crops = mocker.MagicMock()
mock_smart_crops.list = [
{"aspectRatio": 1.97, "boundingBox": {"x": 43, "y": 24, "w": 853, "h": 432}}
]
mock_smart_crops.__len__.return_value = 1
mock_content_client.analyze.return_value.smart_crops = mock_smart_crops
mock_people = mocker.MagicMock()
mock_people.list = [
{
"boundingBox": {"x": 454, "y": 44, "w": 408, "h": 531},
"confidence": 0.9601945281028748,
},
]
mock_people.__len__.return_value = 1
mock_content_client.analyze.return_value.people = mock_people
tool.image_analysis_client = mock_content_client
input = str(building_path)
output = (
"Caption: A building corner.\n"
"Smart Crops: {'aspectRatio': 1.97,"
" 'boundingBox': {'x': 43, 'y': 24, 'w': 853, 'h': 432}}\n"
"People: {'boundingBox': {'x': 454, 'y': 44, 'w': 408, 'h': 531},"
" 'confidence': 0.9601945281028748}"
)
result = tool._run(input)
assert result == output