mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-18 16:16:33 +00:00
community[minor]: add Cassandra Database Toolkit (#20246)
**Description**: ToolKit and Tools for accessing data in a Cassandra Database primarily for Agent integration. Initially, this includes the following tools: - `cassandra_db_schema` Gathers all schema information for the connected database or a specific schema. Critical for the agent when determining actions. - `cassandra_db_select_table_data` Selects data from a specific keyspace and table. The agent can pass paramaters for a predicate and limits on the number of returned records. - `cassandra_db_query` Expiriemental alternative to `cassandra_db_select_table_data` which takes a query string completely formed by the agent instead of parameters. May be removed in future versions. Includes unit test and two notebooks to demonstrate usage. **Dependencies**: cassio **Twitter handle**: @PatrickMcFadin --------- Co-authored-by: Phil Miesle <phil.miesle@datastax.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -0,0 +1 @@
|
||||
""" Cassandra Tool """
|
@@ -0,0 +1,36 @@
|
||||
"""Tools for interacting with an Apache Cassandra database."""
|
||||
|
||||
QUERY_PATH_PROMPT = """"
|
||||
You are an Apache Cassandra expert query analysis bot with the following features
|
||||
and rules:
|
||||
- You will take a question from the end user about finding certain
|
||||
data in the database.
|
||||
- You will examine the schema of the database and create a query path.
|
||||
- You will provide the user with the correct query to find the data they are looking
|
||||
for showing the steps provided by the query path.
|
||||
- You will use best practices for querying Apache Cassandra using partition keys
|
||||
and clustering columns.
|
||||
- Avoid using ALLOW FILTERING in the query.
|
||||
- The goal is to find a query path, so it may take querying other tables to get
|
||||
to the final answer.
|
||||
|
||||
The following is an example of a query path in JSON format:
|
||||
|
||||
{
|
||||
"query_paths": [
|
||||
{
|
||||
"description": "Direct query to users table using email",
|
||||
"steps": [
|
||||
{
|
||||
"table": "user_credentials",
|
||||
"query":
|
||||
"SELECT userid FROM user_credentials WHERE email = 'example@example.com';"
|
||||
},
|
||||
{
|
||||
"table": "users",
|
||||
"query": "SELECT * FROM users WHERE userid = ?;"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}"""
|
@@ -0,0 +1,126 @@
|
||||
"""Tools for interacting with an Apache Cassandra database."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Type, Union
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.tools import BaseTool
|
||||
|
||||
from langchain_community.utilities.cassandra_database import CassandraDatabase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cassandra.cluster import ResultSet
|
||||
|
||||
|
||||
class BaseCassandraDatabaseTool(BaseModel):
|
||||
"""Base tool for interacting with an Apache Cassandra database."""
|
||||
|
||||
db: CassandraDatabase = Field(exclude=True)
|
||||
|
||||
class Config(BaseTool.Config):
|
||||
pass
|
||||
|
||||
|
||||
class _QueryCassandraDatabaseToolInput(BaseModel):
|
||||
query: str = Field(..., description="A detailed and correct CQL query.")
|
||||
|
||||
|
||||
class QueryCassandraDatabaseTool(BaseCassandraDatabaseTool, BaseTool):
|
||||
"""Tool for querying an Apache Cassandra database with provided CQL."""
|
||||
|
||||
name: str = "cassandra_db_query"
|
||||
description: str = """
|
||||
Execute a CQL query against the database and get back the result.
|
||||
If the query is not correct, an error message will be returned.
|
||||
If an error is returned, rewrite the query, check the query, and try again.
|
||||
"""
|
||||
args_schema: Type[BaseModel] = _QueryCassandraDatabaseToolInput
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> Union[str, Sequence[Dict[str, Any]], ResultSet]:
|
||||
"""Execute the query, return the results or an error message."""
|
||||
return self.db.run_no_throw(query)
|
||||
|
||||
|
||||
class _GetSchemaCassandraDatabaseToolInput(BaseModel):
|
||||
keyspace: str = Field(
|
||||
...,
|
||||
description=("The name of the keyspace for which to return the schema."),
|
||||
)
|
||||
|
||||
|
||||
class GetSchemaCassandraDatabaseTool(BaseCassandraDatabaseTool, BaseTool):
|
||||
"""Tool for getting the schema of a keyspace in an Apache Cassandra database."""
|
||||
|
||||
name: str = "cassandra_db_schema"
|
||||
description: str = """
|
||||
Input to this tool is a keyspace name, output is a table description
|
||||
of Apache Cassandra tables.
|
||||
If the query is not correct, an error message will be returned.
|
||||
If an error is returned, report back to the user that the keyspace
|
||||
doesn't exist and stop.
|
||||
"""
|
||||
|
||||
args_schema: Type[BaseModel] = _GetSchemaCassandraDatabaseToolInput
|
||||
|
||||
def _run(
|
||||
self,
|
||||
keyspace: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Get the schema for a keyspace."""
|
||||
return self.db.get_keyspace_tables_str_no_throw(keyspace)
|
||||
|
||||
|
||||
class _GetTableDataCassandraDatabaseToolInput(BaseModel):
|
||||
keyspace: str = Field(
|
||||
...,
|
||||
description=("The name of the keyspace containing the table."),
|
||||
)
|
||||
table: str = Field(
|
||||
...,
|
||||
description=("The name of the table for which to return data."),
|
||||
)
|
||||
predicate: str = Field(
|
||||
...,
|
||||
description=("The predicate for the query that uses the primary key."),
|
||||
)
|
||||
limit: int = Field(
|
||||
...,
|
||||
description=("The maximum number of rows to return."),
|
||||
)
|
||||
|
||||
|
||||
class GetTableDataCassandraDatabaseTool(BaseCassandraDatabaseTool, BaseTool):
|
||||
"""
|
||||
Tool for getting data from a table in an Apache Cassandra database.
|
||||
Use the WHERE clause to specify the predicate for the query that uses the
|
||||
primary key. A blank predicate will return all rows. Avoid this if possible.
|
||||
Use the limit to specify the number of rows to return. A blank limit will
|
||||
return all rows.
|
||||
"""
|
||||
|
||||
name: str = "cassandra_db_select_table_data"
|
||||
description: str = """
|
||||
Tool for getting data from a table in an Apache Cassandra database.
|
||||
Use the WHERE clause to specify the predicate for the query that uses the
|
||||
primary key. A blank predicate will return all rows. Avoid this if possible.
|
||||
Use the limit to specify the number of rows to return. A blank limit will
|
||||
return all rows.
|
||||
"""
|
||||
args_schema: Type[BaseModel] = _GetTableDataCassandraDatabaseToolInput
|
||||
|
||||
def _run(
|
||||
self,
|
||||
keyspace: str,
|
||||
table: str,
|
||||
predicate: str,
|
||||
limit: int,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Get data from a table in a keyspace."""
|
||||
return self.db.get_table_data_no_throw(keyspace, table, predicate, limit)
|
Reference in New Issue
Block a user