community[minor]: Add Datahareld tool (#19680)

**Description:** Integrate [dataherald](https://www.dataherald.com)
tool, It is a natural language-to-SQL tool.
**Dependencies:** Install dataherald sdk to use it,
```
pip install dataherald
```

---------

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Christophe Bornet <cbornet@hotmail.com>
This commit is contained in:
Juan Carlos José Camacho
2024-04-13 17:27:16 -06:00
committed by GitHub
parent ece008f117
commit 450c458f8f
11 changed files with 306 additions and 0 deletions

View File

@@ -485,6 +485,7 @@ _module_lookup = {
"ConneryAction": "langchain_community.tools.connery",
"CopyFileTool": "langchain_community.tools.file_management",
"CurrentWebPageTool": "langchain_community.tools.playwright",
"DataheraldTextToSQL": "langchain_community.tools.dataherald.tool",
"DeleteFileTool": "langchain_community.tools.file_management",
"DuckDuckGoSearchResults": "langchain_community.tools.ddg_search.tool",
"DuckDuckGoSearchRun": "langchain_community.tools.ddg_search.tool",

View File

@@ -0,0 +1,8 @@
"""Dataherald API toolkit."""
from langchain_community.tools.dataherald.tool import DataheraldTextToSQL
__all__ = [
"DataheraldTextToSQL",
]

View File

@@ -0,0 +1,36 @@
"""Tool for the Dataherald Hosted API"""
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.tools import BaseTool
from langchain_community.utilities.dataherald import DataheraldAPIWrapper
class DataheraldTextToSQLInput(BaseModel):
prompt: str = Field(
description="Natural language query to be translated to a SQL query."
)
class DataheraldTextToSQL(BaseTool):
"""Tool that queries using the Dataherald SDK."""
name: str = "dataherald"
description: str = (
"A wrapper around Dataherald. "
"Text to SQL. "
"Input should be a prompt and an existing db_connection_id"
)
api_wrapper: DataheraldAPIWrapper
args_schema: Type[BaseModel] = DataheraldTextToSQLInput
def _run(
self,
prompt: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the Dataherald tool."""
return self.api_wrapper.run(prompt)

View File

@@ -230,6 +230,7 @@ _module_lookup = {
"BibtexparserWrapper": "langchain_community.utilities.bibtex",
"BingSearchAPIWrapper": "langchain_community.utilities.bing_search",
"BraveSearchWrapper": "langchain_community.utilities.brave_search",
"DataheraldAPIWrapper": "langchain_community.utilities.dataherald",
"DriaAPIWrapper": "langchain_community.utilities.dria_index",
"DuckDuckGoSearchAPIWrapper": "langchain_community.utilities.duckduckgo_search",
"GoldenQueryAPIWrapper": "langchain_community.utilities.golden_query",

View File

@@ -0,0 +1,67 @@
"""Util that calls Dataherald."""
from typing import Any, Dict, Optional
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
from langchain_core.utils import get_from_dict_or_env
class DataheraldAPIWrapper(BaseModel):
"""Wrapper for Dataherald.
Docs for using:
1. Go to dataherald and sign up
2. Create an API key
3. Save your API key into DATAHERALD_API_KEY env variable
4. pip install dataherald
"""
dataherald_client: Any #: :meta private:
db_connection_id: str
dataherald_api_key: Optional[str] = None
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
dataherald_api_key = get_from_dict_or_env(
values, "dataherald_api_key", "DATAHERALD_API_KEY"
)
values["dataherald_api_key"] = dataherald_api_key
try:
import dataherald
except ImportError:
raise ImportError(
"dataherald is not installed. "
"Please install it with `pip install dataherald`"
)
client = dataherald.Dataherald(api_key=dataherald_api_key)
values["dataherald_client"] = client
return values
def run(self, prompt: str) -> str:
"""Generate a sql query through Dataherald and parse result."""
from dataherald.types.sql_generation_create_params import Prompt
prompt_obj = Prompt(text=prompt, db_connection_id=self.db_connection_id)
res = self.dataherald_client.sql_generations.create(prompt=prompt_obj)
try:
answer = res.sql
if not answer:
# We don't want to return the assumption alone if answer is empty
return "No answer"
else:
return f"Answer: {answer}"
except StopIteration:
return "Dataherald wasn't able to answer it"

View File

@@ -0,0 +1,9 @@
"""Integration test for Dataherald API Wrapper."""
from langchain_community.utilities.dataherald import DataheraldAPIWrapper
def test_call() -> None:
"""Test that call gives the correct answer."""
search = DataheraldAPIWrapper(db_connection_id="65fb766367dd22c99ce1a12d")
output = search.run("How many employees are in the company?")
assert "Answer: SELECT \n COUNT(*) FROM \n employees" in output

View File

@@ -36,6 +36,7 @@ EXPECTED_ALL = [
"ConneryAction",
"CopyFileTool",
"CurrentWebPageTool",
"DataheraldTextToSQL",
"DeleteFileTool",
"DuckDuckGoSearchResults",
"DuckDuckGoSearchRun",

View File

@@ -37,6 +37,7 @@ _EXPECTED = [
"ConneryAction",
"CopyFileTool",
"CurrentWebPageTool",
"DataheraldTextToSQL",
"DeleteFileTool",
"DuckDuckGoSearchResults",
"DuckDuckGoSearchRun",

View File

@@ -8,6 +8,7 @@ EXPECTED_ALL = [
"BibtexparserWrapper",
"BingSearchAPIWrapper",
"BraveSearchWrapper",
"DataheraldAPIWrapper",
"DuckDuckGoSearchAPIWrapper",
"DriaAPIWrapper",
"GoldenQueryAPIWrapper",