From c9eed530ce4c7997690ff7835d11e0bb76b5957a Mon Sep 17 00:00:00 2001 From: Bar Cohen <42403844+wpbarco@users.noreply.github.com> Date: Fri, 12 Sep 2025 23:51:42 +0300 Subject: [PATCH] docs: add Timbr tools integration (#32862) # feat(integrations): Add Timbr tools integration ## DESCRIPTION This PR adds comprehensive documentation and integration support for Timbr's semantic layer tools in LangChain. [Timbr](https://timbr.ai/) provides an ontology-driven semantic layer that enables natural language querying of databases through business-friendly concepts. It connects raw data to governed business measures for consistent access across BI, APIs, and AI applications. [`langchain-timbr`](https://pypi.org/project/langchain-timbr/) is a Python SDK that extends [LangChain](https://github.com/WPSemantix/Timbr-GenAI/tree/main/LangChain) and [LangGraph](https://github.com/WPSemantix/Timbr-GenAI/tree/main/LangGraph) with custom agents, chains, and nodes for seamless integration with the Timbr semantic layer. It enables converting natural language prompts into optimized semantic-SQL queries and executing them directly against your data. **What's Added:** - Complete integration documentation for `langchain-timbr` package - Tool documentation page with usage examples and API reference **Integration Components:** - `IdentifyTimbrConceptChain` - Identify relevant concepts from user prompts - `GenerateTimbrSqlChain` - Generate SQL queries from natural language - `ValidateTimbrSqlChain` - Validate queries against knowledge graph schemas - `ExecuteTimbrQueryChain` - Execute queries against semantic databases - `GenerateAnswerChain` - Generate human-readable answers from results ## Documentation Added - `/docs/integrations/providers/timbr.mdx` - Provider overview and configuration - `/docs/integrations/tools/timbr.ipynb` - Comprehensive tool usage examples ## Links - [PyPI Package](https://pypi.org/project/langchain-timbr/) - [GitHub Repository](https://github.com/WPSemantix/langchain-timbr) - [Official Documentation](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/) --------- Co-authored-by: Mason Daugherty --- docs/docs/integrations/providers/timbr.mdx | 170 ++++++++++ docs/docs/integrations/tools/timbr.ipynb | 350 +++++++++++++++++++++ libs/packages.yml | 4 + 3 files changed, 524 insertions(+) create mode 100644 docs/docs/integrations/providers/timbr.mdx create mode 100644 docs/docs/integrations/tools/timbr.ipynb diff --git a/docs/docs/integrations/providers/timbr.mdx b/docs/docs/integrations/providers/timbr.mdx new file mode 100644 index 00000000000..3ebbda62764 --- /dev/null +++ b/docs/docs/integrations/providers/timbr.mdx @@ -0,0 +1,170 @@ +# Timbr + +[Timbr](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/) integrates natural language inputs with Timbr's ontology-driven semantic layer. Leveraging Timbr's robust ontology capabilities, the SDK integrates with Timbr data models and leverages semantic relationships and annotations, enabling users to query data using business-friendly language. + +Timbr provides a pre-built SQL agent, `TimbrSqlAgent`, which can be used for end-to-end purposes from user prompt, through semantic SQL query generation and validation, to query execution and result analysis. + +For customizations and partial usage, you can use LangChain chains and LangGraph nodes with our 5 main tools: + +- `IdentifyTimbrConceptChain` & `IdentifyConceptNode` - Identify relevant concepts from user prompts +- `GenerateTimbrSqlChain` & `GenerateTimbrSqlNode` - Generate SQL queries from natural language prompts +- `ValidateTimbrSqlChain` & `ValidateSemanticSqlNode` - Validate SQL queries against Timbr knowledge graph schemas +- `ExecuteTimbrQueryChain` & `ExecuteSemanticQueryNode` - Execute (semantic and regular) SQL queries against Timbr knowledge graph databases +- `GenerateAnswerChain` & `GenerateResponseNode` - Generate human-readable answers based on a given prompt and data rows + +Additionally, `langchain-timbr` provides `TimbrLlmConnector` for manual integration with Timbr's semantic layer using LLM providers. This connector includes the following methods: + +- `get_ontologies` - List Timbr's semantic knowledge graphs +- `get_concepts` - List selected knowledge graph ontology representation concepts +- `get_views` - List selected knowledge graph ontology representation views +- `determine_concept` - Identify relevant concepts from user prompts +- `generate_sql` - Generate SQL queries from natural language prompts +- `validate_sql` - Validate SQL queries against Timbr knowledge graph schemas +- `run_timbr_query` - Execute (semantic and regular) SQL queries against Timbr knowledge graph databases +- `run_llm_query` - Execute agent pipeline to determine concept, generate SQL, and run query from natural language prompt + +## Quickstart + +### Installation + +#### Install the package + +```bash +pip install langchain-timbr +``` + +#### Optional: Install with selected LLM provider + +Choose one of: openai, anthropic, google, azure_openai, snowflake, databricks (or 'all') + +```bash +pip install 'langchain-timbr[]' +``` + +## Configuration + +Starting from `langchain-timbr` v2.0.0, all chains, agents, and nodes support optional environment-based configuration. You can set the following environment variables to provide default values and simplify setup for the provided tools: + +### Timbr Connection Parameters + +- **TIMBR_URL**: Default Timbr server URL +- **TIMBR_TOKEN**: Default Timbr authentication token +- **TIMBR_ONTOLOGY**: Default ontology/knowledge graph name + +When these environment variables are set, the corresponding parameters (`url`, `token`, `ontology`) become optional in all chain and agent constructors and will use the environment values as defaults. + +### LLM Configuration Parameters + +- **LLM_TYPE**: The type of LLM provider (one of langchain_timbr LlmTypes enum: 'openai-chat', 'anthropic-chat', 'chat-google-generative-ai', 'azure-openai-chat', 'snowflake-cortex', 'chat-databricks') +- **LLM_API_KEY**: The API key for authenticating with the LLM provider +- **LLM_MODEL**: The model name or deployment to use +- **LLM_TEMPERATURE**: Temperature setting for the LLM +- **LLM_ADDITIONAL_PARAMS**: Additional parameters as dict or JSON string + +When LLM environment variables are set, the `llm` parameter becomes optional and will use the `LlmWrapper` with environment configuration. + +Example environment setup: + +```bash +# Timbr connection +export TIMBR_URL="https://your-timbr-app.com/" +export TIMBR_TOKEN="tk_XXXXXXXXXXXXXXXXXXXXXXXX" +export TIMBR_ONTOLOGY="timbr_knowledge_graph" + +# LLM configuration +export LLM_TYPE="openai-chat" +export LLM_API_KEY="your-openai-api-key" +export LLM_MODEL="gpt-4o" +export LLM_TEMPERATURE="0.1" +export LLM_ADDITIONAL_PARAMS='{"max_tokens": 1000}' +``` + +## Usage + +Import and utilize your intended chain/node, or use TimbrLlmConnector to manually integrate with Timbr's semantic layer. For a complete agent working example, see the [Timbr tool page](/docs/integrations/tools/timbr). + +### ExecuteTimbrQueryChain example + +```python +from langchain_timbr import ExecuteTimbrQueryChain + +# You can use the standard LangChain ChatOpenAI/ChatAnthropic models +# or any other LLM model based on langchain_core.language_models.chat.BaseChatModel +llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key='open-ai-api-key') + +# Optional alternative: Use Timbr's LlmWrapper, which provides generic connections to different LLM providers +from langchain_timbr import LlmWrapper, LlmTypes +llm = LlmWrapper(llm_type=LlmTypes.OpenAI, api_key="open-ai-api-key", model="gpt-4o") + +execute_timbr_query_chain = ExecuteTimbrQueryChain( + llm=llm, + url="https://your-timbr-app.com/", + token="tk_XXXXXXXXXXXXXXXXXXXXXXXX", + ontology="timbr_knowledge_graph", + schema="dtimbr", # optional + concept="Sales", # optional + concepts_list=["Sales","Orders"], # optional + views_list=["sales_view"], # optional + note="We only need sums", # optional + retries=3, # optional + should_validate_sql=True # optional +) + +result = execute_timbr_query_chain.invoke({"prompt": "What are the total sales for last month?"}) +rows = result["rows"] +sql = result["sql"] +concept = result["concept"] +schema = result["schema"] +error = result.get("error", None) + +usage_metadata = result.get("execute_timbr_usage_metadata", {}) +determine_concept_usage = usage_metadata.get('determine_concept', {}) +generate_sql_usage = usage_metadata.get('generate_sql', {}) +# Each usage_metadata item contains: +# * 'approximate': Estimated token count calculated before invoking the LLM +# * 'input_tokens'/'output_tokens'/'total_tokens'/etc.: Actual token usage metrics returned by the LLM +``` + +### Multiple chains using SequentialChain example + +```python +from langchain.chains import SequentialChain +from langchain_timbr import ExecuteTimbrQueryChain, GenerateAnswerChain +from langchain_openai import ChatOpenAI + +# You can use the standard LangChain ChatOpenAI/ChatAnthropic models +# or any other LLM model based on langchain_core.language_models.chat.BaseChatModel +llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key='open-ai-api-key') + +# Optional alternative: Use Timbr's LlmWrapper, which provides generic connections to different LLM providers +from langchain_timbr import LlmWrapper, LlmTypes +llm = LlmWrapper(llm_type=LlmTypes.OpenAI, api_key="open-ai-api-key", model="gpt-4o") + +execute_timbr_query_chain = ExecuteTimbrQueryChain( + llm=llm, + url='https://your-timbr-app.com/', + token='tk_XXXXXXXXXXXXXXXXXXXXXXXX', + ontology='timbr_knowledge_graph', +) + +generate_answer_chain = GenerateAnswerChain( + llm=llm, + url='https://your-timbr-app.com/', + token='tk_XXXXXXXXXXXXXXXXXXXXXXXX', +) + +pipeline = SequentialChain( + chains=[execute_timbr_query_chain, generate_answer_chain], + input_variables=["prompt"], + output_variables=["answer", "sql"] +) + +result = pipeline.invoke({"prompt": "What are the total sales for last month?"}) +``` + +## Additional Resources + +- [PyPI](https://pypi.org/project/langchain-timbr) +- [GitHub](https://github.com/WPSemantix/langchain-timbr) +- [LangChain Timbr Docs](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/) +- [LangGraph Timbr Docs](https://docs.timbr.ai/doc/docs/integration/langgraph-sdk) diff --git a/docs/docs/integrations/tools/timbr.ipynb b/docs/docs/integrations/tools/timbr.ipynb new file mode 100644 index 00000000000..e941abba03f --- /dev/null +++ b/docs/docs/integrations/tools/timbr.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "2ce4bdbc", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: timbr\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "a6f91f20", + "metadata": {}, + "source": [ + "# Timbr\n", + "\n", + "[Timbr](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/) integrates natural language inputs with Timbr's ontology-driven semantic layer. Leveraging Timbr's robust ontology capabilities, the SDK integrates with Timbr data models and leverages semantic relationships and annotations, enabling users to query data using business-friendly language.\n", + "\n", + "This notebook provides a quick overview for getting started with Timbr tools and agents. For more information about Timbr visit [Timbr.ai](https://timbr.ai/) or the [Timbr Documentation](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/)\n", + "\n", + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "Timbr package for LangChain is [langchain-timbr](https://pypi.org/project/langchain-timbr), which provides seamless integration with Timbr's semantic layer for natural language to SQL conversion.\n", + "\n", + "### Tool features\n", + "\n", + "| Tool Name | Description |\n", + "| :--- | :--- |\n", + "| `IdentifyTimbrConceptChain` | Identify relevant concepts from user prompts |\n", + "| `GenerateTimbrSqlChain` | Generate SQL queries from natural language prompts |\n", + "| `ValidateTimbrSqlChain` | Validate SQL queries against Timbr knowledge graph schemas |\n", + "| `ExecuteTimbrQueryChain` | Execute SQL queries against Timbr knowledge graph databases |\n", + "| `GenerateAnswerChain` | Generate human-readable answers from query results |\n", + "| `TimbrSqlAgent` | End-to-end SQL agent for natural language queries |\n", + "\n", + "### TimbrSqlAgent Parameters\n", + "\n", + "The `TimbrSqlAgent` is a pre-built agent that combines all the above tools for end-to-end natural language to SQL processing.\n", + "\n", + "For the complete list of parameters and detailed documentation, see: [TimbrSqlAgent Documentation](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/#timbr-sql-agent)\n", + "\n", + "| Parameter | Type | Required | Description |\n", + "| :--- | :--- | :--- | :--- |\n", + "| `llm` | BaseChatModel | Yes | Language model instance (ChatOpenAI, ChatAnthropic, etc.) |\n", + "| `url` | str | Yes | Timbr application URL |\n", + "| `token` | str | Yes | Timbr API token |\n", + "| `ontology` | str | Yes | Knowledge graph ontology name |\n", + "| `schema` | str | No | Database schema name |\n", + "| `concept` | str | No | Specific concept to focus on |\n", + "| `concepts_list` | List[str] | No | List of relevant concepts |\n", + "| `views_list` | List[str] | No | List of available views |\n", + "| `note` | str | No | Additional context or instructions |\n", + "| `retries` | int | No | Number of retry attempts (default: 3) |\n", + "| `should_validate_sql` | bool | No | Whether to validate generated SQL (default: True) |\n", + "\n", + "## Setup\n", + "\n", + "The integration lives in the `langchain-timbr` package.\n", + "\n", + "In this example, we'll use OpenAI for the LLM provider." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f85b4089", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --quiet -U langchain-timbr[openai]" + ] + }, + { + "cell_type": "markdown", + "id": "b15e9266", + "metadata": {}, + "source": [ + "### Credentials\n", + "\n", + "You'll need Timbr credentials to use the tools. Get your API token from your Timbr application's API settings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "# Set up Timbr credentials\n", + "if not os.environ.get(\"TIMBR_URL\"):\n", + " os.environ[\"TIMBR_URL\"] = input(\"Timbr URL:\\n\")\n", + "\n", + "if not os.environ.get(\"TIMBR_TOKEN\"):\n", + " os.environ[\"TIMBR_TOKEN\"] = getpass.getpass(\"Timbr API Token:\\n\")\n", + "\n", + "if not os.environ.get(\"TIMBR_ONTOLOGY\"):\n", + " os.environ[\"TIMBR_ONTOLOGY\"] = input(\"Timbr Ontology:\\n\")\n", + "\n", + "if not os.environ.get(\"OPENAI_API_KEY\"):\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Instantiate Timbr tools and agents. First, let's set up the LLM and basic Timbr chains:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_timbr import (\n", + " ExecuteTimbrQueryChain,\n", + " GenerateAnswerChain,\n", + " TimbrSqlAgent,\n", + " LlmWrapper,\n", + " LlmTypes,\n", + ")\n", + "\n", + "# Set up the LLM\n", + "# from langchain_openai import ChatOpenAI\n", + "# llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n", + "\n", + "# Alternative: Use Timbr's LlmWrapper for an easy LLM setup\n", + "llm = LlmWrapper(\n", + " llm_type=LlmTypes.OpenAI, api_key=os.environ[\"OPENAI_API_KEY\"], model=\"gpt-4o\"\n", + ")\n", + "\n", + "# Instantiate Timbr chains\n", + "execute_timbr_query_chain = ExecuteTimbrQueryChain(\n", + " llm=llm,\n", + " url=os.environ[\"TIMBR_URL\"],\n", + " token=os.environ[\"TIMBR_TOKEN\"],\n", + " ontology=os.environ[\"TIMBR_ONTOLOGY\"],\n", + ")\n", + "\n", + "generate_answer_chain = GenerateAnswerChain(\n", + " llm=llm, url=os.environ[\"TIMBR_URL\"], token=os.environ[\"TIMBR_TOKEN\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "74147a1a", + "metadata": {}, + "source": [ + "## Invocation\n", + "\n", + "### Execute SQL queries from natural language\n", + "\n", + "You can use the individual chains to perform specific operations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc", + "metadata": {}, + "outputs": [], + "source": [ + "# Execute a natural language query\n", + "result = execute_timbr_query_chain.invoke(\n", + " {\"prompt\": \"What are the total sales for last month?\"}\n", + ")\n", + "\n", + "print(\"SQL Query:\", result[\"sql\"])\n", + "print(\"Results:\", result[\"rows\"])\n", + "print(\"Concept:\", result[\"concept\"])\n", + "\n", + "# Generate a human-readable answer from the results\n", + "answer_result = generate_answer_chain.invoke(\n", + " {\"prompt\": \"What are the total sales for last month?\", \"rows\": result[\"rows\"]}\n", + ")\n", + "\n", + "print(\"Human-readable answer:\", answer_result[\"answer\"])" + ] + }, + { + "cell_type": "markdown", + "id": "d6e73897", + "metadata": {}, + "source": [ + "## Use within an agent\n", + "\n", + "### Using TimbrSqlAgent\n", + "\n", + "The `TimbrSqlAgent` provides an end-to-end solution that combines concept identification, SQL generation, validation, execution, and answer generation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f90e33a7", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents import AgentExecutor\n", + "\n", + "# Create a TimbrSqlAgent with all parameters\n", + "timbr_agent = TimbrSqlAgent(\n", + " llm=llm,\n", + " url=os.environ[\"TIMBR_URL\"],\n", + " token=os.environ[\"TIMBR_TOKEN\"],\n", + " ontology=os.environ[\"TIMBR_ONTOLOGY\"],\n", + " concepts_list=[\"Sales\", \"Orders\"], # optional\n", + " views_list=[\"sales_view\"], # optional\n", + " note=\"Focus on monthly aggregations\", # optional\n", + " retries=3, # optional\n", + " should_validate_sql=True, # optional\n", + ")\n", + "\n", + "# Use the agent for end-to-end natural language to answer processing\n", + "agent_result = AgentExecutor.from_agent_and_tools(\n", + " agent=timbr_agent,\n", + " tools=[], # No tools needed as we're directly using the chain\n", + " verbose=True,\n", + ").invoke(\"Show me the top 5 customers by total sales amount this year\")\n", + "\n", + "print(\"Final Answer:\", agent_result[\"answer\"])\n", + "print(\"Generated SQL:\", agent_result[\"sql\"])\n", + "print(\"Usage Metadata:\", agent_result.get(\"usage_metadata\", {}))" + ] + }, + { + "cell_type": "markdown", + "id": "659f9fbd-6fcf-445f-aa8c-72d8e60154bd", + "metadata": {}, + "source": [ + "### Sequential Chains\n", + "\n", + "You can combine multiple Timbr chains using LangChain's SequentialChain for custom workflows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af3123ad-7a02-40e5-b58e-7d56e23e5830", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import SequentialChain\n", + "\n", + "# Create a sequential pipeline\n", + "pipeline = SequentialChain(\n", + " chains=[execute_timbr_query_chain, generate_answer_chain],\n", + " input_variables=[\"prompt\"],\n", + " output_variables=[\"answer\", \"sql\", \"rows\"],\n", + ")\n", + "\n", + "# Execute the pipeline\n", + "pipeline_result = pipeline.invoke(\n", + " {\"prompt\": \"What are the average order values by customer segment?\"}\n", + ")\n", + "\n", + "print(\"Pipeline Result:\", pipeline_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdbf35b5-3aaf-4947-9ec6-48c21533fb95", + "metadata": {}, + "outputs": [], + "source": [ + "# Example: Accessing usage metadata from Timbr operations\n", + "result_with_metadata = execute_timbr_query_chain.invoke(\n", + " {\"prompt\": \"How many orders were placed last quarter?\"}\n", + ")\n", + "\n", + "# Extract usage metadata\n", + "usage_metadata = result_with_metadata.get(\"execute_timbr_usage_metadata\", {})\n", + "determine_concept_usage = usage_metadata.get(\"determine_concept\", {})\n", + "generate_sql_usage = usage_metadata.get(\"generate_sql\", {})\n", + "\n", + "print(determine_concept_usage)\n", + "\n", + "print(\n", + " \"Concept determination token estimate:\",\n", + " determine_concept_usage.get(\"approximate\", \"N/A\"),\n", + ")\n", + "print(\n", + " \"Concept determination tokens:\",\n", + " determine_concept_usage.get(\"token_usage\", {}).get(\"total_tokens\", \"N/A\"),\n", + ")\n", + "\n", + "print(\"SQL generation token estimate:\", generate_sql_usage.get(\"approximate\", \"N/A\"))\n", + "print(\n", + " \"SQL generation tokens:\",\n", + " generate_sql_usage.get(\"token_usage\", {}).get(\"total_tokens\", \"N/A\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4ac8146c", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "- [PyPI](https://pypi.org/project/langchain-timbr)\n", + "- [GitHub](https://github.com/WPSemantix/langchain-timbr)\n", + "- [LangChain Timbr Documentation](https://docs.timbr.ai/doc/docs/integration/langchain-sdk/)\n", + "- [LangGraph Timbr Documentation](https://docs.timbr.ai/doc/docs/integration/langgraph-sdk)\n", + "- [Timbr Official Website](https://timbr.ai/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/packages.yml b/libs/packages.yml index 5ee7e4aa077..2937bfa801c 100644 --- a/libs/packages.yml +++ b/libs/packages.yml @@ -729,6 +729,10 @@ packages: - name: langchain-scrapeless repo: scrapeless-ai/langchain-scrapeless path: . +- name: langchain-timbr + provider_page: timbr + path: . + repo: WPSemantix/langchain-timbr - name: langchain-zenrows path: . repo: ZenRows-Hub/langchain-zenrows