mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-28 17:38:36 +00:00
PowerBI updates (#7143)
<!-- Thank you for contributing to LangChain! Replace this comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out! If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. Maintainer responsibilities: - General / Misc / if you don't know who to tag: @baskaryan - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev - Models / Prompts: @hwchase17, @baskaryan - Memory: @hwchase17 - Agents / Tools / Toolkits: @hinthornw - Tracing / Callbacks: @agola11 - Async: @agola11 If no one reviews your PR within a few days, feel free to @-mention the same people again. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md --> Several updates for the PowerBI tools: - Handle 0 records returned by requesting redo with different filtering - Handle too large results by optionally tokenizing the result and comparing against a max (change in signature, non-breaking) - Implemented LLMChain with Chat for chat models for the tools. - Updates to the main prompt including tables - Update to Tool prompt with TOPN function - Split the tool prompt to allow the LLMChain with ChatPromptTemplate Smaller fixes for stability. For visibility: @hinthornw
This commit is contained in:
parent
b9d6d4cd4c
commit
ae5aa496ee
@ -36,13 +36,13 @@ def create_pbi_agent(
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
|
||||
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(
|
||||
llm=llm,
|
||||
prompt=ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix.format(top_k=top_k),
|
||||
prefix=prefix.format(top_k=top_k).format(tables=tables),
|
||||
suffix=suffix,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
|
@ -41,10 +41,11 @@ def create_pbi_chat_agent(
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
|
||||
agent = ConversationalChatAgent.from_llm_and_tools(
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
system_message=prefix.format(top_k=top_k),
|
||||
system_message=prefix.format(top_k=top_k).format(tables=tables),
|
||||
human_message=suffix,
|
||||
input_variables=input_variables,
|
||||
callback_manager=callback_manager,
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
POWERBI_PREFIX = """You are an agent designed to help users interact with a PowerBI Dataset.
|
||||
|
||||
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
|
||||
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, return "This does not appear to be part of this dataset." as the answer.
|
||||
|
||||
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
"""
|
||||
@ -17,9 +17,9 @@ Thought: I can first ask which tables I have, then how each table is defined and
|
||||
|
||||
POWERBI_CHAT_PREFIX = """Assistant is a large language model built to help users interact with a PowerBI Dataset.
|
||||
|
||||
Assistant has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
|
||||
Assistant should try to create a correct and complete answer to the question from the user. If the user asks a question not related to the dataset it should return "This does not appear to be part of this dataset." as the answer. The user might make a mistake with the spelling of certain values, if you think that is the case, ask the user to confirm the spelling of the value and then run the query again. Unless the user specifies a specific number of examples they wish to obtain, and the results are too large, limit your query to at most {top_k} results, but make it clear when answering which field was used for the filtering. The user has access to these tables: {{tables}}.
|
||||
|
||||
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
The answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000.
|
||||
"""
|
||||
|
||||
POWERBI_CHAT_SUFFIX = """TOOLS
|
||||
|
@ -1,5 +1,5 @@
|
||||
"""Toolkit for interacting with a Power BI dataset."""
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
@ -7,9 +7,19 @@ from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.powerbi.prompt import QUESTION_TO_QUERY
|
||||
from langchain.tools.powerbi.prompt import (
|
||||
QUESTION_TO_QUERY_BASE,
|
||||
SINGLE_QUESTION_TO_QUERY,
|
||||
USER_INPUT,
|
||||
)
|
||||
from langchain.tools.powerbi.tool import (
|
||||
InfoPowerBITool,
|
||||
ListPowerBITool,
|
||||
@ -22,10 +32,12 @@ class PowerBIToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with PowerBI dataset."""
|
||||
|
||||
powerbi: PowerBIDataset = Field(exclude=True)
|
||||
llm: BaseLanguageModel = Field(exclude=True)
|
||||
llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True)
|
||||
examples: Optional[str] = None
|
||||
max_iterations: int = 5
|
||||
callback_manager: Optional[BaseCallbackManager] = None
|
||||
output_token_limit: Optional[int] = None
|
||||
tiktoken_model_name: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@ -34,30 +46,47 @@ class PowerBIToolkit(BaseToolkit):
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
if self.callback_manager:
|
||||
chain = LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager,
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
else:
|
||||
chain = LLMChain(
|
||||
llm=self.llm,
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
return [
|
||||
QueryPowerBITool(
|
||||
llm_chain=chain,
|
||||
llm_chain=self._get_chain(),
|
||||
powerbi=self.powerbi,
|
||||
examples=self.examples,
|
||||
max_iterations=self.max_iterations,
|
||||
output_token_limit=self.output_token_limit,
|
||||
tiktoken_model_name=self.tiktoken_model_name,
|
||||
),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
ListPowerBITool(powerbi=self.powerbi),
|
||||
]
|
||||
|
||||
def _get_chain(self) -> LLMChain:
|
||||
"""Construct the chain based on the callback manager and model type."""
|
||||
if isinstance(self.llm, BaseLanguageModel):
|
||||
return LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager
|
||||
if self.callback_manager
|
||||
else None,
|
||||
prompt=PromptTemplate(
|
||||
template=SINGLE_QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
|
||||
system_prompt = SystemMessagePromptTemplate(
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY_BASE,
|
||||
input_variables=["tables", "schemas", "examples"],
|
||||
)
|
||||
)
|
||||
human_prompt = HumanMessagePromptTemplate(
|
||||
prompt=PromptTemplate(
|
||||
template=USER_INPUT,
|
||||
input_variables=["tool_input"],
|
||||
)
|
||||
)
|
||||
return LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager if self.callback_manager else None,
|
||||
prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]),
|
||||
)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# flake8: noqa
|
||||
QUESTION_TO_QUERY = """
|
||||
QUESTION_TO_QUERY_BASE = """
|
||||
Answer the question below with a DAX query that can be sent to Power BI. DAX queries have a simple syntax comprised of just one required keyword, EVALUATE, and several optional keywords: ORDER BY, START AT, DEFINE, MEASURE, VAR, TABLE, and COLUMN. Each keyword defines a statement used for the duration of the query. Any time < or > are used in the text below it means that those values need to be replaced by table, columns or other things. If the question is not something you can answer with a DAX query, reply with "I cannot answer this" and the question will be escalated to a human.
|
||||
|
||||
Some DAX functions return a table instead of a scalar, and must be wrapped in a function that evaluates the table and returns a scalar; unless the table is a single column, single row table, then it is treated as a scalar value. Most DAX functions require one or more arguments, which can include tables, columns, expressions, and values. However, some functions, such as PI, do not require any arguments, but always require parentheses to indicate the null argument. For example, you must always type PI(), not PI. You can also nest functions within other functions.
|
||||
@ -14,6 +14,7 @@ VAR <name> = <expression> - Stores the result of an expression as a named variab
|
||||
|
||||
FILTER(<table>,<filter>) - Returns a table that represents a subset of another table or expression, where <filter> is a Boolean expression that is to be evaluated for each row of the table. For example, [Amount] > 0 or [Region] = "France"
|
||||
ROW(<name>, <expression>) - Returns a table with a single row containing values that result from the expressions given to each column.
|
||||
TOPN(<n>, <table>, <OrderBy_Expression>, <Order>) - Returns a table with the top n rows from the specified table, sorted by the specified expression, in the order specified by 0 for descending, 1 for ascending, the default is 0. Multiple OrderBy_Expressions and Order pairs can be given, separated by a comma.
|
||||
DISTINCT(<column>) - Returns a one-column table that contains the distinct values from the specified column. In other words, duplicate values are removed and only unique values are returned. This function cannot be used to Return values into a cell or column on a worksheet; rather, you nest the DISTINCT function within a formula, to get a list of distinct values that can be passed to another function and then counted, summed, or used for other operations.
|
||||
DISTINCT(<table>) - Returns a table by removing duplicate rows from another table or expression.
|
||||
|
||||
@ -40,11 +41,15 @@ and the schema's for some are given here:
|
||||
|
||||
Examples:
|
||||
{examples}
|
||||
"""
|
||||
|
||||
USER_INPUT = """
|
||||
Question: {tool_input}
|
||||
DAX:
|
||||
"""
|
||||
|
||||
SINGLE_QUESTION_TO_QUERY = f"{QUESTION_TO_QUERY_BASE}{USER_INPUT}"
|
||||
|
||||
DEFAULT_FEWSHOT_EXAMPLES = """
|
||||
Question: How many rows are in the table <table>?
|
||||
DAX: EVALUATE ROW(\"Number of rows\", COUNTROWS(<table>))
|
||||
|
@ -10,11 +10,11 @@ from langchain.callbacks.manager import (
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chat_models.openai import _import_tiktoken
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.powerbi.prompt import (
|
||||
BAD_REQUEST_RESPONSE,
|
||||
DEFAULT_FEWSHOT_EXAMPLES,
|
||||
QUESTION_TO_QUERY,
|
||||
RETRY_RESPONSE,
|
||||
)
|
||||
from langchain.utilities.powerbi import PowerBIDataset, json_to_md
|
||||
@ -33,10 +33,11 @@ class QueryPowerBITool(BaseTool):
|
||||
""" # noqa: E501
|
||||
llm_chain: LLMChain
|
||||
powerbi: PowerBIDataset = Field(exclude=True)
|
||||
template: Optional[str] = QUESTION_TO_QUERY
|
||||
examples: Optional[str] = DEFAULT_FEWSHOT_EXAMPLES
|
||||
session_cache: Dict[str, Any] = Field(default_factory=dict, exclude=True)
|
||||
max_iterations: int = 5
|
||||
output_token_limit: int = 4000
|
||||
tiktoken_model_name: Optional[str] = None # "cl100k_base"
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@ -48,12 +49,8 @@ class QueryPowerBITool(BaseTool):
|
||||
cls, llm_chain: LLMChain
|
||||
) -> LLMChain:
|
||||
"""Make sure the LLM chain has the correct input variables."""
|
||||
if llm_chain.prompt.input_variables != [
|
||||
"tool_input",
|
||||
"tables",
|
||||
"schemas",
|
||||
"examples",
|
||||
]:
|
||||
for var in llm_chain.prompt.input_variables:
|
||||
if var not in ["tool_input", "tables", "schemas", "examples"]:
|
||||
raise ValueError(
|
||||
"LLM chain for QueryPowerBITool must have input variables ['tool_input', 'tables', 'schemas', 'examples'], found %s", # noqa: C0301 E501 # pylint: disable=C0301
|
||||
llm_chain.prompt.input_variables,
|
||||
@ -94,7 +91,7 @@ class QueryPowerBITool(BaseTool):
|
||||
if query == "I cannot answer this":
|
||||
self.session_cache[tool_input] = query
|
||||
return self.session_cache[tool_input]
|
||||
logger.info("PBI Query: %s", query)
|
||||
logger.info("PBI Query:\n%s", query)
|
||||
start_time = perf_counter()
|
||||
pbi_result = self.powerbi.run(command=query)
|
||||
end_time = perf_counter()
|
||||
@ -131,7 +128,7 @@ class QueryPowerBITool(BaseTool):
|
||||
"""Execute the query, return the results or an error message."""
|
||||
if cache := self._check_cache(tool_input):
|
||||
logger.debug("Found cached result for %s: %s", tool_input, cache)
|
||||
return cache
|
||||
return f"{cache}, from cache, you have already asked this question."
|
||||
try:
|
||||
logger.info("Running PBI Query Tool with input: %s", tool_input)
|
||||
query = await self.llm_chain.apredict(
|
||||
@ -154,10 +151,10 @@ class QueryPowerBITool(BaseTool):
|
||||
logger.debug("PBI Result: %s", pbi_result)
|
||||
logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}")
|
||||
result, error = self._parse_output(pbi_result)
|
||||
if error is not None and "TokenExpired" in error:
|
||||
if error is not None and ("TokenExpired" in error or "TokenError" in error):
|
||||
self.session_cache[
|
||||
tool_input
|
||||
] = "Authentication token expired or invalid, please try reauthenticate."
|
||||
] = "Authentication token expired or invalid, please try to reauthenticate or check the scope of the credential." # noqa: E501
|
||||
return self.session_cache[tool_input]
|
||||
|
||||
iterations = kwargs.get("iterations", 0)
|
||||
@ -177,10 +174,24 @@ class QueryPowerBITool(BaseTool):
|
||||
|
||||
def _parse_output(
|
||||
self, pbi_result: Dict[str, Any]
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
) -> Tuple[Optional[str], Optional[Any]]:
|
||||
"""Parse the output of the query to a markdown table."""
|
||||
if "results" in pbi_result:
|
||||
return json_to_md(pbi_result["results"][0]["tables"][0]["rows"]), None
|
||||
rows = pbi_result["results"][0]["tables"][0]["rows"]
|
||||
if len(rows) == 0:
|
||||
logger.info("0 records in result, query was valid.")
|
||||
return (
|
||||
None,
|
||||
"0 rows returned, this might be correct, but please validate if all filter values were correct?", # noqa: E501
|
||||
)
|
||||
result = json_to_md(rows)
|
||||
too_long, length = self._result_too_large(result)
|
||||
if too_long:
|
||||
return (
|
||||
f"Result too large, please try to be more specific or use the `TOPN` function. The result is {length} tokens long, the limit is {self.output_token_limit} tokens.", # noqa: E501
|
||||
None,
|
||||
)
|
||||
return result, None
|
||||
|
||||
if "error" in pbi_result:
|
||||
if (
|
||||
@ -189,7 +200,17 @@ class QueryPowerBITool(BaseTool):
|
||||
):
|
||||
return None, pbi_result["error"]["pbi.error"]["details"][0]["detail"]
|
||||
return None, pbi_result["error"]
|
||||
return None, "Unknown error"
|
||||
return None, pbi_result
|
||||
|
||||
def _result_too_large(self, result: str) -> Tuple[bool, int]:
|
||||
"""Tokenize the output of the query."""
|
||||
if self.tiktoken_model_name:
|
||||
tiktoken_ = _import_tiktoken()
|
||||
encoding = tiktoken_.encoding_for_model(self.tiktoken_model_name)
|
||||
length = len(encoding.encode(result))
|
||||
logger.info("Result length: %s", length)
|
||||
return length > self.output_token_limit, length
|
||||
return False, 0
|
||||
|
||||
|
||||
class InfoPowerBITool(BaseTool):
|
||||
|
@ -208,13 +208,17 @@ class PowerBIDataset(BaseModel):
|
||||
def run(self, command: str) -> Any:
|
||||
"""Execute a DAX command and return a json representing the results."""
|
||||
_LOGGER.debug("Running command: %s", command)
|
||||
result = requests.post(
|
||||
response = requests.post(
|
||||
self.request_url,
|
||||
json=self._create_json_content(command),
|
||||
headers=self.headers,
|
||||
timeout=10,
|
||||
)
|
||||
return result.json()
|
||||
if response.status_code == 403:
|
||||
return (
|
||||
"TokenError: Could not login to PowerBI, please check your credentials."
|
||||
)
|
||||
return response.json()
|
||||
|
||||
async def arun(self, command: str) -> Any:
|
||||
"""Execute a DAX command and return the result asynchronously."""
|
||||
@ -226,6 +230,8 @@ class PowerBIDataset(BaseModel):
|
||||
json=self._create_json_content(command),
|
||||
timeout=10,
|
||||
) as response:
|
||||
if response.status == 403:
|
||||
return "TokenError: Could not login to PowerBI, please check your credentials." # noqa: E501
|
||||
response_json = await response.json(content_type=response.content_type)
|
||||
return response_json
|
||||
async with aiohttp.ClientSession() as session:
|
||||
@ -235,6 +241,8 @@ class PowerBIDataset(BaseModel):
|
||||
json=self._create_json_content(command),
|
||||
timeout=10,
|
||||
) as response:
|
||||
if response.status == 403:
|
||||
return "TokenError: Could not login to PowerBI, please check your credentials." # noqa: E501
|
||||
response_json = await response.json(content_type=response.content_type)
|
||||
return response_json
|
||||
|
||||
@ -244,6 +252,8 @@ def json_to_md(
|
||||
table_name: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Converts a JSON object to a markdown table."""
|
||||
if len(json_contents) == 0:
|
||||
return ""
|
||||
output_md = ""
|
||||
headers = json_contents[0].keys()
|
||||
for header in headers:
|
||||
|
Loading…
Reference in New Issue
Block a user