diff --git a/langchain/tools/__init__.py b/langchain/tools/__init__.py index f8080cd5dbb..21324ac9c14 100644 --- a/langchain/tools/__init__.py +++ b/langchain/tools/__init__.py @@ -26,6 +26,7 @@ from langchain.tools.playwright import ( NavigateTool, ) from langchain.tools.plugin import AIPluginTool +from langchain.tools.scenexplain.tool import SceneXplainTool from langchain.tools.shell.tool import ShellTool __all__ = [ @@ -59,4 +60,6 @@ __all__ = [ "ReadFileTool", "ShellTool", "WriteFileTool", + "BaseTool", + "SceneXplainTool", ] diff --git a/langchain/tools/scenexplain/__init__.py b/langchain/tools/scenexplain/__init__.py new file mode 100644 index 00000000000..2e6553b7356 --- /dev/null +++ b/langchain/tools/scenexplain/__init__.py @@ -0,0 +1 @@ +"""SceneXplain API toolkit.""" diff --git a/langchain/tools/scenexplain/tool.py b/langchain/tools/scenexplain/tool.py new file mode 100644 index 00000000000..d8e5394cba7 --- /dev/null +++ b/langchain/tools/scenexplain/tool.py @@ -0,0 +1,26 @@ +"""Tool for the SceneXplain API.""" + +from pydantic import Field + +from langchain.tools.base import BaseTool +from langchain.utilities.scenexplain import SceneXplainAPIWrapper + + +class SceneXplainTool(BaseTool): + """Tool that adds the capability to explain images.""" + + name = "Image Explainer" + description = ( + "An Image Captioning Tool: Use this tool to generate a detailed caption " + "for an image. The input can be an image file of any format, and " + "the output will be a text description that covers every detail of the image." + ) + api_wrapper: SceneXplainAPIWrapper = Field(default_factory=SceneXplainAPIWrapper) + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("SceneXplainTool does not support async") diff --git a/langchain/utilities/scenexplain.py b/langchain/utilities/scenexplain.py new file mode 100644 index 00000000000..7b3342c1649 --- /dev/null +++ b/langchain/utilities/scenexplain.py @@ -0,0 +1,68 @@ +"""Util that calls SceneXplain. + +In order to set this up, you need API key for the SceneXplain API. +You can obtain a key by following the steps below. +- Sign up for a free account at https://scenex.jina.ai/. +- Navigate to the API Access page (https://scenex.jina.ai/api) and create a new API key. +""" +from typing import Dict + +import requests +from pydantic import BaseModel, root_validator + +from langchain.utils import get_from_dict_or_env + + +class SceneXplainAPIWrapper(BaseModel): + """Wrapper for SceneXplain API. + + In order to set this up, you need API key for the SceneXplain API. + You can obtain a key by following the steps below. + - Sign up for a free account at https://scenex.jina.ai/. + - Navigate to the API Access page (https://scenex.jina.ai/api) + and create a new API key. + """ + + scenex_api_key: str + scenex_api_url: str = ( + "https://us-central1-causal-diffusion.cloudfunctions.net/describe" + ) + + def _describe_image(self, image: str) -> str: + headers = { + "x-api-key": f"token {self.scenex_api_key}", + "content-type": "application/json", + } + payload = { + "data": [ + { + "image": image, + "algorithm": "Ember", + "languages": ["en"], + } + ] + } + response = requests.post(self.scenex_api_url, headers=headers, json=payload) + response.raise_for_status() + result = response.json().get("result", []) + img = result[0] if result else {} + + return img.get("text", "") + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + scenex_api_key = get_from_dict_or_env( + values, "scenex_api_key", "SCENEX_API_KEY" + ) + values["scenex_api_key"] = scenex_api_key + + return values + + def run(self, image: str) -> str: + """Run SceneXplain image explainer.""" + description = self._describe_image(image) + if not description: + return "No description found." + + return description