power bi api wrapper integration tests & bug fix (#4983)

# Powerbi API wrapper bug fix + integration tests - Bug fix by removing `TYPE_CHECKING` in in utilities/powerbi.py - Added integration test for power bi api in utilities/test_powerbi_api.py - Added integration test for power bi agent in agent/test_powerbi_agent.py - Edited .env.examples to help set up power bi related environment variables - Updated demo notebook with working code in docs../examples/powerbi.ipynb - AzureOpenAI -> ChatOpenAI Notes: Chat models (gpt3.5, gpt4) are much more capable than davinci at writing DAX queries, so that is important to getting the agent to work properly. Interestingly, gpt3.5-turbo needed the examples=DEFAULT_FEWSHOT_EXAMPLES to write consistent DAX queries, so gpt4 seems necessary as the smart llm. Fixes #4325 ## Before submitting Azure-core and Azure-identity are necessary dependencies check integration tests with the following: `pytest tests/integration_tests/utilities/test_powerbi_api.py` `pytest tests/integration_tests/agent/test_powerbi_agent.py` You will need a power bi account with a dataset id + table name in order to test. See .env.examples for details. ## Who can review? @hwchase17 @vowelparrot --------- Co-authored-by: aditya-pethe <adityapethe1@gmail.com>
2025-09-04 04:28:58 +00:00 · 2023-05-19 11:25:52 -04:00
parent e68dfa7062
commit 06e524416c
7 changed files with 182 additions and 89 deletions
--- a/docs/modules/agents/toolkits/examples/powerbi.ipynb
+++ b/docs/modules/agents/toolkits/examples/powerbi.ipynb
@@ -1,10 +1,7 @@
 {
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
   "metadata": {},
   "source": [
    "# PowerBI Dataset Agent\n",
    "\n",
@@ -17,46 +14,41 @@
    "- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n",
    "- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n",
    "- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well."
-   ]
+   ],
   "metadata": {},
   "attachments": {}
  },
  {
   "cell_type": "markdown",
   "id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Initialization"
-   ]
+   ],
   "metadata": {
    "tags": []
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53422913-967b-4f2a-8022-00269c1be1b1",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain.agents.agent_toolkits import create_pbi_agent\n",
    "from langchain.agents.agent_toolkits import PowerBIToolkit\n",
    "from langchain.utilities.powerbi import PowerBIDataset\n",
-    "from langchain.llms.openai import AzureOpenAI\n",
+    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.agents import AgentExecutor\n",
    "from azure.identity import DefaultAzureCredential"
-   ]
+   ],
   "outputs": [],
   "metadata": {
    "tags": []
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "090f3699-79c6-4ce1-ab96-a94f0121fd64",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
-    "fast_llm = AzureOpenAI(temperature=0.5, max_tokens=1000, deployment_name=\"gpt-35-turbo\", verbose=True)\n",
+    "fast_llm = ChatOpenAI(temperature=0.5, max_tokens=1000, model_name=\"gpt-3.5-turbo\", verbose=True)\n",
-    "smart_llm = AzureOpenAI(temperature=0, max_tokens=100, deployment_name=\"gpt-4\", verbose=True)\n",
+    "smart_llm = ChatOpenAI(temperature=0, max_tokens=100, model_name=\"gpt-4\", verbose=True)\n",
    "\n",
    "toolkit = PowerBIToolkit(\n",
    "    powerbi=PowerBIDataset(dataset_id=\"<dataset_id>\", table_names=['table1', 'table2'], credential=DefaultAzureCredential()), \n",
@@ -68,97 +60,90 @@
    "    toolkit=toolkit,\n",
    "    verbose=True,\n",
    ")"
-   ]
+   ],
   "outputs": [],
   "metadata": {
    "tags": []
   }
  },
  {
   "cell_type": "markdown",
   "id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
   "metadata": {},
   "source": [
    "## Example: describing a table"
-   ]
+   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent_executor.run(\"Describe table1\")"
-   ]
+   ],
   "outputs": [],
   "metadata": {
    "tags": []
   }
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681",
   "metadata": {},
   "source": [
    "## Example: simple query on a table\n",
    "In this example, the agent actually figures out the correct query to get a row count of the table."
-   ]
+   ],
   "metadata": {},
   "attachments": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bea76658-a65b-47e2-b294-6d52c5556246",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent_executor.run(\"How many records are in table1?\")"
-   ]
+   ],
   "outputs": [],
   "metadata": {
    "tags": []
   }
  },
  {
   "cell_type": "markdown",
   "id": "6fbc26af-97e4-4a21-82aa-48bdc992da26",
   "metadata": {},
   "source": [
    "## Example: running queries"
-   ]
+   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "17bea710-4a23-4de0-b48e-21d57be48293",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "agent_executor.run(\"How many records are there by dimension1 in table2?\")"
-   ]
+   ],
-  },
+   "outputs": [],
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "474dddda-c067-4eeb-98b1-e763ee78b18c",
   "metadata": {
    "tags": []
-   },
+   }
   "outputs": [],
   "source": [
    "agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "6fd950e4",
   "metadata": {},
   "source": [
    "## Example: add your own few-shot prompts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "87d677f9",
+   "source": [
-   "metadata": {},
+    "agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
   ],
   "outputs": [],
   "metadata": {
    "tags": []
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Example: add your own few-shot prompts"
   ],
   "metadata": {},
   "attachments": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#fictional example\n",
    "few_shots = \"\"\"\n",
@@ -182,24 +167,24 @@
    "    toolkit=toolkit,\n",
    "    verbose=True,\n",
    ")"
-   ]
+   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33f4bb43",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent_executor.run(\"What was the maximum of value in revenue in dollars in 2022?\")"
-   ]
+   ],
   "outputs": [],
   "metadata": {}
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "name": "python3",
-   "language": "python",
+   "display_name": "Python 3.9.16 64-bit"
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
@@ -211,9 +196,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.9.16"
  },
  "interpreter": {
   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
-}
+}
--- a/langchain/utilities/powerbi.py
+++ b/langchain/utilities/powerbi.py
@@ -1,11 +1,10 @@
 """Wrapper around a Power BI endpoint."""
 from __future__ import annotations
 import logging
 import os
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Union
 import aiohttp
 import requests
@@ -17,8 +16,13 @@ _LOGGER = logging.getLogger(__name__)
 BASE_URL = os.getenv("POWERBI_BASE_URL", "https://api.powerbi.com/v1.0/myorg")
-if TYPE_CHECKING:
+try:
    from azure.core.credentials import TokenCredential
 except ImportError:
    _LOGGER.log(
        logging.WARNING,
        "Could not import azure.core python package.",
    )
 class PowerBIDataset(BaseModel):
@@ -67,8 +71,8 @@ class PowerBIDataset(BaseModel):
                "Content-Type": "application/json",
                "Authorization": "Bearer " + self.token,
            }
-        from azure.core.exceptions import (  # pylint: disable=import-outside-toplevel
+        from azure.core.exceptions import (
-            ClientAuthenticationError,
+            ClientAuthenticationError,  # pylint: disable=import-outside-toplevel
        )
        if self.credential:
--- a/tests/integration_tests/.env.example
+++ b/tests/integration_tests/.env.example
@@ -14,4 +14,12 @@ PINECONE_ENVIRONMENT=us-west4-gcp
 # more details here: https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html
 # JIRA_API_TOKEN=your_jira_api_token_here
 # JIRA_USERNAME=your_jira_username_here
-# JIRA_INSTANCE_URL=your_jira_instance_url_here
+# JIRA_INSTANCE_URL=your_jira_instance_url_here
 # power bi
 # sign in to azure in order to authenticate with DefaultAzureCredentials
 # details here https://learn.microsoft.com/en-us/dotnet/api/azure.identity.defaultazurecredential?view=azure-dotnet
 POWERBI_DATASET_ID=_powerbi_dataset_id_here
 POWERBI_TABLE_NAME=_test_table_name_here
 POWERBI_NUMROWS=_num_rows_in_your_test_table
--- a/tests/integration_tests/agent/test_powerbi_agent.py
+++ b/tests/integration_tests/agent/test_powerbi_agent.py
@@ -0,0 +1,47 @@
 import pytest
 from langchain.agents.agent_toolkits import PowerBIToolkit, create_pbi_agent
 from langchain.chat_models import ChatOpenAI
 from langchain.utilities.powerbi import PowerBIDataset
 from langchain.utils import get_from_env
 def azure_installed() -> bool:
    try:
        from azure.core.credentials import TokenCredential  # noqa: F401
        from azure.identity import DefaultAzureCredential  # noqa: F401
        return True
    except Exception as e:
        print(f"azure not installed, skipping test {e}")
        return False
@pytest.mark.skipif(not azure_installed(), reason="requires azure package")
 def test_daxquery() -> None:
    from azure.identity import DefaultAzureCredential
    DATASET_ID = get_from_env("", "POWERBI_DATASET_ID")
    TABLE_NAME = get_from_env("", "POWERBI_TABLE_NAME")
    NUM_ROWS = get_from_env("", "POWERBI_NUMROWS")
    fast_llm = ChatOpenAI(
        temperature=0.5, max_tokens=1000, model_name="gpt-3.5-turbo", verbose=True
    )
    smart_llm = ChatOpenAI(
        temperature=0, max_tokens=100, model_name="gpt-4", verbose=True
    )
    toolkit = PowerBIToolkit(
        powerbi=PowerBIDataset(
            dataset_id=DATASET_ID,
            table_names=[TABLE_NAME],
            credential=DefaultAzureCredential(),
        ),
        llm=smart_llm,
    )
    agent_executor = create_pbi_agent(llm=fast_llm, toolkit=toolkit, verbose=True)
    output = agent_executor.run(f"How many rows are in the table, {TABLE_NAME}")
    assert NUM_ROWS in output
--- a/tests/integration_tests/utilities/test_powerbi_api.py
+++ b/tests/integration_tests/utilities/test_powerbi_api.py
@@ -0,0 +1,36 @@
 """Integration test for POWERBI API Wrapper."""
 import pytest
 from langchain.utilities.powerbi import PowerBIDataset
 from langchain.utils import get_from_env
 def azure_installed() -> bool:
    try:
        from azure.core.credentials import TokenCredential  # noqa: F401
        from azure.identity import DefaultAzureCredential  # noqa: F401
        return True
    except Exception as e:
        print(f"azure not installed, skipping test {e}")
        return False
@pytest.mark.skipif(not azure_installed(), reason="requires azure package")
 def test_daxquery() -> None:
    from azure.identity import DefaultAzureCredential
    DATASET_ID = get_from_env("", "POWERBI_DATASET_ID")
    TABLE_NAME = get_from_env("", "POWERBI_TABLE_NAME")
    NUM_ROWS = get_from_env("", "POWERBI_NUMROWS")
    powerbi = PowerBIDataset(
        dataset_id=DATASET_ID,
        table_names=[TABLE_NAME],
        credential=DefaultAzureCredential(),
    )
    output = powerbi.run(f'EVALUATE ROW("RowCount", COUNTROWS({TABLE_NAME}))')
    numrows = str(output["results"][0]["tables"][0]["rows"][0]["[RowCount]"])
    assert NUM_ROWS == numrows
--- a/tests/unit_tests/tools/powerbi/init.py
+++ b/tests/unit_tests/tools/powerbi/init.py
--- a/tests/unit_tests/tools/powerbi/test_powerbi.py
+++ b/tests/unit_tests/tools/powerbi/test_powerbi.py
@@ -0,0 +1,10 @@
 def test_power_bi_can_be_imported() -> None:
    """Test that powerbi tools can be imported.
    The goal of this test is to verify that langchain users will not get import errors
    when loading powerbi related code if they don't have optional dependencies
    installed.
    """
    from langchain.tools.powerbi.tool import QueryPowerBITool  # noqa
    from langchain.agents.agent_toolkits import PowerBIToolkit, create_pbi_agent  # noqa
    from langchain.utilities.powerbi import PowerBIDataset  # noqa