mirror of
https://github.com/hwchase17/langchain.git
synced 2025-11-30 11:17:04 +00:00
```python
"""python scripts/update_mypy_ruff.py"""
import glob
import tomllib
from pathlib import Path
import toml
import subprocess
import re
ROOT_DIR = Path(__file__).parents[1]
def main():
for path in glob.glob(str(ROOT_DIR / "libs/**/pyproject.toml"), recursive=True):
print(path)
with open(path, "rb") as f:
pyproject = tomllib.load(f)
try:
pyproject["tool"]["poetry"]["group"]["typing"]["dependencies"]["mypy"] = (
"^1.10"
)
pyproject["tool"]["poetry"]["group"]["lint"]["dependencies"]["ruff"] = (
"^0.5"
)
except KeyError:
continue
with open(path, "w") as f:
toml.dump(pyproject, f)
cwd = "/".join(path.split("/")[:-1])
completed = subprocess.run(
"poetry lock --no-update; poetry install --with typing; poetry run mypy . --no-color",
cwd=cwd,
shell=True,
capture_output=True,
text=True,
)
logs = completed.stdout.split("\n")
to_ignore = {}
for l in logs:
if re.match("^(.*)\:(\d+)\: error:.*\[(.*)\]", l):
path, line_no, error_type = re.match(
"^(.*)\:(\d+)\: error:.*\[(.*)\]", l
).groups()
if (path, line_no) in to_ignore:
to_ignore[(path, line_no)].append(error_type)
else:
to_ignore[(path, line_no)] = [error_type]
print(len(to_ignore))
for (error_path, line_no), error_types in to_ignore.items():
all_errors = ", ".join(error_types)
full_path = f"{cwd}/{error_path}"
try:
with open(full_path, "r") as f:
file_lines = f.readlines()
except FileNotFoundError:
continue
file_lines[int(line_no) - 1] = (
file_lines[int(line_no) - 1][:-1] + f" # type: ignore[{all_errors}]\n"
)
with open(full_path, "w") as f:
f.write("".join(file_lines))
subprocess.run(
"poetry run ruff format .; poetry run ruff --select I --fix .",
cwd=cwd,
shell=True,
capture_output=True,
text=True,
)
if __name__ == "__main__":
main()
```
133 lines
4.3 KiB
Python
133 lines
4.3 KiB
Python
# flake8: noqa
|
|
"""Tools for interacting with Spark SQL."""
|
|
|
|
from typing import Any, Dict, Optional
|
|
|
|
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
|
|
|
from langchain_core.language_models import BaseLanguageModel
|
|
from langchain_core.callbacks import (
|
|
AsyncCallbackManagerForToolRun,
|
|
CallbackManagerForToolRun,
|
|
)
|
|
from langchain_core.prompts import PromptTemplate
|
|
from langchain_community.utilities.spark_sql import SparkSQL
|
|
from langchain_core.tools import BaseTool
|
|
from langchain_community.tools.spark_sql.prompt import QUERY_CHECKER
|
|
|
|
|
|
class BaseSparkSQLTool(BaseModel):
|
|
"""Base tool for interacting with Spark SQL."""
|
|
|
|
db: SparkSQL = Field(exclude=True)
|
|
|
|
class Config(BaseTool.Config):
|
|
pass
|
|
|
|
|
|
class QuerySparkSQLTool(BaseSparkSQLTool, BaseTool):
|
|
"""Tool for querying a Spark SQL."""
|
|
|
|
name: str = "query_sql_db"
|
|
description: str = """
|
|
Input to this tool is a detailed and correct SQL query, output is a result from the Spark SQL.
|
|
If the query is not correct, an error message will be returned.
|
|
If an error is returned, rewrite the query, check the query, and try again.
|
|
"""
|
|
|
|
def _run(
|
|
self,
|
|
query: str,
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Execute the query, return the results or an error message."""
|
|
return self.db.run_no_throw(query)
|
|
|
|
|
|
class InfoSparkSQLTool(BaseSparkSQLTool, BaseTool):
|
|
"""Tool for getting metadata about a Spark SQL."""
|
|
|
|
name: str = "schema_sql_db"
|
|
description: str = """
|
|
Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables.
|
|
Be sure that the tables actually exist by calling list_tables_sql_db first!
|
|
|
|
Example Input: "table1, table2, table3"
|
|
"""
|
|
|
|
def _run(
|
|
self,
|
|
table_names: str,
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Get the schema for tables in a comma-separated list."""
|
|
return self.db.get_table_info_no_throw(table_names.split(", "))
|
|
|
|
|
|
class ListSparkSQLTool(BaseSparkSQLTool, BaseTool):
|
|
"""Tool for getting tables names."""
|
|
|
|
name: str = "list_tables_sql_db"
|
|
description: str = "Input is an empty string, output is a comma separated list of tables in the Spark SQL."
|
|
|
|
def _run(
|
|
self,
|
|
tool_input: str = "",
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Get the schema for a specific table."""
|
|
return ", ".join(self.db.get_usable_table_names())
|
|
|
|
|
|
class QueryCheckerTool(BaseSparkSQLTool, BaseTool):
|
|
"""Use an LLM to check if a query is correct.
|
|
Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/"""
|
|
|
|
template: str = QUERY_CHECKER
|
|
llm: BaseLanguageModel
|
|
llm_chain: Any = Field(init=False)
|
|
name: str = "query_checker_sql_db"
|
|
description: str = """
|
|
Use this tool to double check if your query is correct before executing it.
|
|
Always use this tool before executing a query with query_sql_db!
|
|
"""
|
|
|
|
@root_validator(pre=True)
|
|
def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
if "llm_chain" not in values:
|
|
from langchain.chains.llm import LLMChain
|
|
|
|
values["llm_chain"] = LLMChain(
|
|
llm=values.get("llm"), # type: ignore[arg-type]
|
|
prompt=PromptTemplate(
|
|
template=QUERY_CHECKER, input_variables=["query"]
|
|
),
|
|
)
|
|
|
|
if values["llm_chain"].prompt.input_variables != ["query"]:
|
|
raise ValueError(
|
|
"LLM chain for QueryCheckerTool need to use ['query'] as input_variables "
|
|
"for the embedded prompt"
|
|
)
|
|
|
|
return values
|
|
|
|
def _run(
|
|
self,
|
|
query: str,
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Use the LLM to check the query."""
|
|
return self.llm_chain.predict(
|
|
query=query, callbacks=run_manager.get_child() if run_manager else None
|
|
)
|
|
|
|
async def _arun(
|
|
self,
|
|
query: str,
|
|
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
return await self.llm_chain.apredict(
|
|
query=query, callbacks=run_manager.get_child() if run_manager else None
|
|
)
|