From 8977451c76bf64fa5f7cff7d2391a0f8488fc8d7 Mon Sep 17 00:00:00 2001 From: Stefan Berkner <94835841+stefan-berkner-tilotech@users.noreply.github.com> Date: Thu, 23 Jan 2025 18:17:59 +0100 Subject: [PATCH] docs: add Tilores provider and tools (#29244) Description: This PR adds documentation for the Tilores provider and tools. Issue: closes #26320 --- .../docs/integrations/providers/tilores.ipynb | 105 ++++++ docs/docs/integrations/tools/tilores.ipynb | 350 ++++++++++++++++++ libs/packages.yml | 7 +- 3 files changed, 461 insertions(+), 1 deletion(-) create mode 100644 docs/docs/integrations/providers/tilores.ipynb create mode 100644 docs/docs/integrations/tools/tilores.ipynb diff --git a/docs/docs/integrations/providers/tilores.ipynb b/docs/docs/integrations/providers/tilores.ipynb new file mode 100644 index 00000000000..b51384ca4c7 --- /dev/null +++ b/docs/docs/integrations/providers/tilores.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tilores\n", + "\n", + "[Tilores](https://tilores.io) is a platform that provides advanced entity resolution solutions for data integration and management. Using cutting-edge algorithms, machine learning, and a user-friendly interfaces, Tilores helps organizations match, resolve, and consolidate data from disparate sources, ensuring high-quality, consistent information." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y8ku6X96sebl" + }, + "outputs": [], + "source": [ + "%pip install --upgrade tilores-langchain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To access Tilores, you need to [create and configure an instance](https://app.tilores.io). If you prefer to test out Tilores first, you can use the [read-only demo credentials](https://github.com/tilotech/identity-rag-customer-insights-chatbot?tab=readme-ov-file#1-configure-customer-data-access)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from tilores import TiloresAPI\n", + "\n", + "os.environ[\"TILORES_API_URL\"] = \"\"\n", + "os.environ[\"TILORES_TOKEN_URL\"] = \"\"\n", + "os.environ[\"TILORES_CLIENT_ID\"] = \"\"\n", + "os.environ[\"TILORES_CLIENT_SECRET\"] = \"\"\n", + "\n", + "tilores = TiloresAPI.from_environ()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please refer to the [Tilores documentation](https://docs.tilotech.io/tilores/publicsaaswalkthrough/) on how to create your own instance." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Toolkits\n", + "\n", + "You can use the [`TiloresTools`](/docs/integrations/tools/tilores) to query data from Tilores:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tilores_langchain import TiloresTools" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "langchain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/docs/docs/integrations/tools/tilores.ipynb b/docs/docs/integrations/tools/tilores.ipynb new file mode 100644 index 00000000000..e2bb19fca43 --- /dev/null +++ b/docs/docs/integrations/tools/tilores.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "10238e62-3465-4973-9279-606cbb7ccf16", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Tilores\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "a6f91f20", + "metadata": {}, + "source": [ + "# Tilores\n", + "\n", + "This notebook covers how to get started with the [Tilores](/docs/integrations/providers/tilores) tools.\n", + "For a more complex example you can checkout our [customer insights chatbot example](https://github.com/tilotech/identity-rag-customer-insights-chatbot).\n", + "\n", + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | Serializable | JS support | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| TiloresTools | [tilores-langchain](https://pypi.org/project/tilores-langchain/) | ❌ | ❌ | ![PyPI - Version](https://img.shields.io/pypi/v/tilores-langchain?style=flat-square&label=%20) |\n", + "\n", + "## Setup\n", + "\n", + "The integration requires the following packages:" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "f85b4089", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install --quiet -U tilores-langchain langchain" + ] + }, + { + "cell_type": "markdown", + "id": "b15e9266", + "metadata": {}, + "source": [ + "### Credentials\n", + "\n", + "To access Tilores, you need to [create and configure an instance](https://app.tilores.io). If you prefer to test out Tilores first, you can use the [read-only demo credentials](https://github.com/tilotech/identity-rag-customer-insights-chatbot?tab=readme-ov-file#1-configure-customer-data-access)." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"TILORES_API_URL\"] = \"\"\n", + "os.environ[\"TILORES_TOKEN_URL\"] = \"\"\n", + "os.environ[\"TILORES_CLIENT_ID\"] = \"\"\n", + "os.environ[\"TILORES_CLIENT_SECRET\"] = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Here we show how to instantiate an instance of the Tilores tools:" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64", + "metadata": {}, + "outputs": [], + "source": [ + "from tilores import TiloresAPI\n", + "from tilores_langchain import TiloresTools\n", + "\n", + "tilores = TiloresAPI.from_environ()\n", + "tilores_tools = TiloresTools(tilores)\n", + "search_tool = tilores_tools.search_tool()\n", + "edge_tool = tilores_tools.edge_tool()" + ] + }, + { + "cell_type": "markdown", + "id": "74147a1a", + "metadata": {}, + "source": [ + "## Invocation\n", + "\n", + "The parameters for the `tilores_search` tool are dependent on the [configured schema](https://docs.tilotech.io/tilores/schema/) within Tilores. The following examples will use the schema for the demo instance with generated data.\n", + "\n", + "### [Invoke directly with args](/docs/concepts/tools)" + ] + }, + { + "cell_type": "markdown", + "id": "010aea95", + "metadata": {}, + "source": [ + "The following example searches for a person called Sophie Müller in Berlin. The Tilores data contains multiple such persons and returns their known email addresses and phone numbers." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of entities: 3\n", + "Number of records: 3\n", + "Email Addresses: ['s.mueller@newcompany.de', 'sophie.mueller@email.de']\n", + "Phone Numbers: ['30987654', '30987654', '30987654']\n", + "Number of records: 5\n", + "Email Addresses: ['mueller.sophie@uni-berlin.de', 'sophie.m@newshipping.de', 's.mueller@newfinance.de']\n", + "Phone Numbers: ['30135792', '30135792']\n", + "Number of records: 2\n", + "Email Addresses: ['s.mueller@company.de']\n", + "Phone Numbers: ['30123456', '30123456']\n" + ] + } + ], + "source": [ + "result = search_tool.invoke(\n", + " {\n", + " \"searchParams\": {\n", + " \"name\": \"Sophie Müller\",\n", + " \"city\": \"Berlin\",\n", + " },\n", + " \"recordFieldsToQuery\": {\n", + " \"email\": True,\n", + " \"phone\": True,\n", + " },\n", + " }\n", + ")\n", + "print(\"Number of entities:\", len(result[\"data\"][\"search\"][\"entities\"]))\n", + "for entity in result[\"data\"][\"search\"][\"entities\"]:\n", + " print(\"Number of records:\", len(entity[\"records\"]))\n", + " print(\n", + " \"Email Addresses:\",\n", + " [record[\"email\"] for record in entity[\"records\"] if record.get(\"email\")],\n", + " )\n", + " print(\n", + " \"Phone Numbers:\",\n", + " [record[\"phone\"] for record in entity[\"records\"] if record.get(\"phone\")],\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "ab5c77ef", + "metadata": {}, + "source": [ + "If we're interested how the records from the first entity are related, we can use the edge_tool. Note that the Tilores entity resolution engine figured out the relation between those records automatically. Please refer to the [edge documentation](https://docs.tilotech.io/tilores/rules/#edges) for more details." + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "430e425c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of edges: 7\n", + "Edges: ['e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:L1', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L4', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:L2', 'f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L1', 'f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L4', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L1', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:L4']\n" + ] + } + ], + "source": [ + "edge_result = edge_tool.invoke(\n", + " {\"entityID\": result[\"data\"][\"search\"][\"entities\"][0][\"id\"]}\n", + ")\n", + "edges = edge_result[\"data\"][\"entity\"][\"entity\"][\"edges\"]\n", + "print(\"Number of edges:\", len(edges))\n", + "print(\"Edges:\", edges)" + ] + }, + { + "cell_type": "markdown", + "id": "d6e73897", + "metadata": {}, + "source": [ + "### [Invoke with ToolCall](/docs/concepts/tools)\n", + "\n", + "We can also invoke the tool with a model-generated ToolCall, in which case a ToolMessage will be returned:" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "f90e33a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ToolMessage(content='{\"data\": {\"search\": {\"entities\": [{\"id\": \"9601cf3b-e85f-46ab-aaa8-ffb8b46f1c5b\", \"hits\": {\"c3d4e5f6-g7h8-i9j0-k1l2-m3n4o5p6q7r8\": [\"L1\"]}, \"records\": [{\"email\": \"\", \"phone\": \"30123456\"}, {\"email\": \"s.mueller@company.de\", \"phone\": \"30123456\"}]}, {\"id\": \"03da2e11-0aa2-4d17-8aaa-7b32c52decd9\", \"hits\": {\"e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6\": [\"L1\"], \"g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8\": [\"L1\"]}, \"records\": [{\"email\": \"s.mueller@newcompany.de\", \"phone\": \"30987654\"}, {\"email\": \"\", \"phone\": \"30987654\"}, {\"email\": \"sophie.mueller@email.de\", \"phone\": \"30987654\"}]}, {\"id\": \"4d896fb5-0d08-4212-a043-b5deb0347106\", \"hits\": {\"j6k7l8m9-n0o1-p2q3-r4s5-t6u7v8w9x0y1\": [\"L1\"], \"l8m9n0o1-p2q3-r4s5-t6u7-v8w9x0y1z2a3\": [\"L1\"], \"m9n0o1p2-q3r4-s5t6-u7v8-w9x0y1z2a3b4\": [\"L1\"], \"n0o1p2q3-r4s5-t6u7-v8w9-x0y1z2a3b4c5\": [\"L1\"]}, \"records\": [{\"email\": \"mueller.sophie@uni-berlin.de\", \"phone\": \"\"}, {\"email\": \"sophie.m@newshipping.de\", \"phone\": \"\"}, {\"email\": \"\", \"phone\": \"30135792\"}, {\"email\": \"\", \"phone\": \"\"}, {\"email\": \"s.mueller@newfinance.de\", \"phone\": \"30135792\"}]}]}}}', name='tilores_search', tool_call_id='1')" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This is usually generated by a model, but we'll create a tool call directly for demo purposes.\n", + "model_generated_tool_call = {\n", + " \"args\": {\n", + " \"searchParams\": {\n", + " \"name\": \"Sophie Müller\",\n", + " \"city\": \"Berlin\",\n", + " },\n", + " \"recordFieldsToQuery\": {\n", + " \"email\": True,\n", + " \"phone\": True,\n", + " },\n", + " },\n", + " \"id\": \"1\",\n", + " \"name\": search_tool.name,\n", + " \"type\": \"tool_call\",\n", + "}\n", + "search_tool.invoke(model_generated_tool_call)" + ] + }, + { + "cell_type": "markdown", + "id": "659f9fbd-6fcf-445f-aa8c-72d8e60154bd", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n", + "\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af3123ad-7a02-40e5-b58e-7d56e23e5830", + "metadata": {}, + "outputs": [], + "source": [ + "# | output: false\n", + "# | echo: false\n", + "\n", + "# !pip install -qU langchain langchain-openai\n", + "from langchain.chat_models import init_chat_model\n", + "\n", + "llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdbf35b5-3aaf-4947-9ec6-48c21533fb95", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnableConfig, chain\n", + "\n", + "prompt = ChatPromptTemplate(\n", + " [\n", + " (\"system\", \"You are a helpful assistant.\"),\n", + " (\"human\", \"{user_input}\"),\n", + " (\"placeholder\", \"{messages}\"),\n", + " ]\n", + ")\n", + "\n", + "# specifying tool_choice will force the model to call this tool.\n", + "llm_with_tools = llm.bind_tools([search_tool], tool_choice=search_tool.name)\n", + "\n", + "llm_chain = prompt | llm_with_tools\n", + "\n", + "\n", + "@chain\n", + "def tool_chain(user_input: str, config: RunnableConfig):\n", + " input_ = {\"user_input\": user_input}\n", + " ai_msg = llm_chain.invoke(input_, config=config)\n", + " tool_msgs = search_tool.batch(ai_msg.tool_calls, config=config)\n", + " return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n", + "\n", + "\n", + "tool_chain.invoke(\"Tell me the email addresses from Sophie Müller from Berlin.\")" + ] + }, + { + "cell_type": "markdown", + "id": "4ac8146c", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Tilores features and configurations head to the official documentation: https://docs.tilotech.io/tilores/" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/packages.yml b/libs/packages.yml index 1cb609c3084..4bdf49680fc 100644 --- a/libs/packages.yml +++ b/libs/packages.yml @@ -346,6 +346,11 @@ packages: path: . repo: MehdiZare/langchain-fmp-data downloads: 0 +- name: tilores-langchain + path: . + repo: tilotech/tilores-langchain + provider_page: tilores + downloads: 0 - name: langchain-pipeshift path: . repo: pipeshift-org/langchain-pipeshift @@ -354,4 +359,4 @@ packages: path: . repo: paymanai/langchain-payman-tool downloads: 0 - downloads_updated_at: "2025-01-22T00:00:00+00:00" \ No newline at end of file + downloads_updated_at: "2025-01-22T00:00:00+00:00"