diff --git a/.github/actions/poetry_setup/action.yml b/.github/actions/poetry_setup/action.yml
index 22dcb05e16e..d1342465c34 100644
--- a/.github/actions/poetry_setup/action.yml
+++ b/.github/actions/poetry_setup/action.yml
@@ -39,10 +39,35 @@ runs:
with:
path: |
/opt/pipx/venvs/poetry
- /opt/pipx_bin/poetry
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
+ - name: Refresh shell hashtable and fixup softlinks
+ if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
+ shell: bash
+ env:
+ POETRY_VERSION: ${{ inputs.poetry-version }}
+ PYTHON_VERSION: ${{ inputs.python-version }}
+ run: |
+ set -eux
+
+ # Refresh the shell hashtable, to ensure correct `which` output.
+ hash -r
+
+ # `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
+ # Delete and recreate the softlinks pipx expects to have.
+ rm /opt/pipx/venvs/poetry/bin/python
+ cd /opt/pipx/venvs/poetry/bin
+ ln -s "$(which "python$PYTHON_VERSION")" python
+ chmod +x python
+ cd /opt/pipx_bin/
+ ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
+ chmod +x poetry
+
+ # Ensure everything got set up correctly.
+ /opt/pipx/venvs/poetry/bin/python --version
+ /opt/pipx_bin/poetry --version
+
- name: Install poetry
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
shell: bash
diff --git a/.github/workflows/_lint.yml b/.github/workflows/_lint.yml
index 1a01b225a00..64169ce0be0 100644
--- a/.github/workflows/_lint.yml
+++ b/.github/workflows/_lint.yml
@@ -87,7 +87,7 @@ jobs:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
- cache-key: lint
+ cache-key: lint-with-extras
- name: Check Poetry File
shell: bash
@@ -102,9 +102,17 @@ jobs:
poetry lock --check
- name: Install dependencies
+ # Also installs dev/lint/test/typing dependencies, to ensure we have
+ # type hints for as many of our libraries as possible.
+ # This helps catch errors that require dependencies to be spotted, for example:
+ # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
+ #
+ # If you change this configuration, make sure to change the `cache-key`
+ # in the `poetry_setup` action above to stop using the old cache.
+ # It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
- poetry install
+ poetry install --with dev,lint,test,typing
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
diff --git a/.github/workflows/_pydantic_compatibility.yml b/.github/workflows/_pydantic_compatibility.yml
index 7d8fe26d92f..94d362f3274 100644
--- a/.github/workflows/_pydantic_compatibility.yml
+++ b/.github/workflows/_pydantic_compatibility.yml
@@ -79,3 +79,15 @@ jobs:
- name: Run pydantic compatibility tests
shell: bash
run: make test
+
+ - name: Ensure the tests did not create any additional files
+ shell: bash
+ run: |
+ set -eu
+
+ STATUS="$(git status)"
+ echo "$STATUS"
+
+ # grep will exit non-zero if the target message isn't found,
+ # and `set -e` above will cause the step to fail.
+ echo "$STATUS" | grep 'nothing to commit, working tree clean'
diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index 76d86a2862e..04be6a2c39a 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -43,3 +43,15 @@ jobs:
- name: Run core tests
shell: bash
run: make test
+
+ - name: Ensure the tests did not create any additional files
+ shell: bash
+ run: |
+ set -eu
+
+ STATUS="$(git status)"
+ echo "$STATUS"
+
+ # grep will exit non-zero if the target message isn't found,
+ # and `set -e` above will cause the step to fail.
+ echo "$STATUS" | grep 'nothing to commit, working tree clean'
diff --git a/.github/workflows/langchain_ci.yml b/.github/workflows/langchain_ci.yml
index 8f1fc5d8744..f184af9772d 100644
--- a/.github/workflows/langchain_ci.yml
+++ b/.github/workflows/langchain_ci.yml
@@ -6,6 +6,8 @@ on:
branches: [ master ]
pull_request:
paths:
+ - '.github/actions/poetry_setup/action.yml'
+ - '.github/tools/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/_test.yml'
- '.github/workflows/_pydantic_compatibility.yml'
@@ -81,3 +83,15 @@ jobs:
- name: Run extended tests
run: make extended_tests
+
+ - name: Ensure the tests did not create any additional files
+ shell: bash
+ run: |
+ set -eu
+
+ STATUS="$(git status)"
+ echo "$STATUS"
+
+ # grep will exit non-zero if the target message isn't found,
+ # and `set -e` above will cause the step to fail.
+ echo "$STATUS" | grep 'nothing to commit, working tree clean'
diff --git a/.github/workflows/langchain_experimental_ci.yml b/.github/workflows/langchain_experimental_ci.yml
index 5b00365f82f..c4c4a039c18 100644
--- a/.github/workflows/langchain_experimental_ci.yml
+++ b/.github/workflows/langchain_experimental_ci.yml
@@ -6,6 +6,8 @@ on:
branches: [ master ]
pull_request:
paths:
+ - '.github/actions/poetry_setup/action.yml'
+ - '.github/tools/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/_test.yml'
- '.github/workflows/langchain_experimental_ci.yml'
@@ -113,3 +115,15 @@ jobs:
- name: Run extended tests
run: make extended_tests
+
+ - name: Ensure the tests did not create any additional files
+ shell: bash
+ run: |
+ set -eu
+
+ STATUS="$(git status)"
+ echo "$STATUS"
+
+ # grep will exit non-zero if the target message isn't found,
+ # and `set -e` above will cause the step to fail.
+ echo "$STATUS" | grep 'nothing to commit, working tree clean'
diff --git a/.github/workflows/scheduled_test.yml b/.github/workflows/scheduled_test.yml
index b71eee05929..7ce59d5b695 100644
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -47,3 +47,15 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
make scheduled_tests
+
+ - name: Ensure the tests did not create any additional files
+ shell: bash
+ run: |
+ set -eu
+
+ STATUS="$(git status)"
+ echo "$STATUS"
+
+ # grep will exit non-zero if the target message isn't found,
+ # and `set -e` above will cause the step to fail.
+ echo "$STATUS" | grep 'nothing to commit, working tree clean'
diff --git a/docs/api_reference/guide_imports.json b/docs/api_reference/guide_imports.json
index f35c7805a03..8e4d0fed32f 100644
--- a/docs/api_reference/guide_imports.json
+++ b/docs/api_reference/guide_imports.json
@@ -317,7 +317,7 @@
"Chatbots": "https://python.langchain.com/docs/use_cases/chatbots",
"Summarization": "https://python.langchain.com/docs/use_cases/summarization",
"Extraction": "https://python.langchain.com/docs/use_cases/extraction",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"Tagging": "https://python.langchain.com/docs/use_cases/tagging",
"Code Understanding": "https://python.langchain.com/docs/use_cases/code_understanding",
"AutoGPT": "https://python.langchain.com/docs/use_cases/autonomous_agents/autogpt",
@@ -400,7 +400,7 @@
"Summarization": "https://python.langchain.com/docs/use_cases/summarization",
"Extraction": "https://python.langchain.com/docs/use_cases/extraction",
"Interacting with APIs": "https://python.langchain.com/docs/use_cases/apis",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"QA over Documents": "https://python.langchain.com/docs/use_cases/question_answering/index",
"Retrieve from vector stores directly": "https://python.langchain.com/docs/use_cases/question_answering/how_to/vector_db_text_generation",
"Improve document indexing with HyDE": "https://python.langchain.com/docs/use_cases/question_answering/how_to/hyde",
@@ -641,7 +641,7 @@
"Chatbots": "https://python.langchain.com/docs/use_cases/chatbots",
"Extraction": "https://python.langchain.com/docs/use_cases/extraction",
"Interacting with APIs": "https://python.langchain.com/docs/use_cases/apis",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"HuggingGPT": "https://python.langchain.com/docs/use_cases/autonomous_agents/hugginggpt",
"Perform context-aware text splitting": "https://python.langchain.com/docs/use_cases/question_answering/how_to/document-context-aware-QA",
"Retrieve from vector stores directly": "https://python.langchain.com/docs/use_cases/question_answering/how_to/vector_db_text_generation",
@@ -1009,7 +1009,7 @@
"LangSmith Walkthrough": "https://python.langchain.com/docs/guides/langsmith/walkthrough",
"Comparing Chain Outputs": "https://python.langchain.com/docs/guides/evaluation/examples/comparisons",
"Agent Trajectory": "https://python.langchain.com/docs/guides/evaluation/trajectory/trajectory_eval",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"Multi-modal outputs: Image & Text": "https://python.langchain.com/docs/use_cases/multi_modal/image_agent",
"Agent Debates with Tools": "https://python.langchain.com/docs/use_cases/agent_simulations/two_agent_debate_tools",
"Multiple callback handlers": "https://python.langchain.com/docs/modules/callbacks/multiple_callbacks",
@@ -1268,7 +1268,7 @@
"SQL Database Agent": "https://python.langchain.com/docs/integrations/toolkits/sql_database",
"JSON Agent": "https://python.langchain.com/docs/integrations/toolkits/json",
"NIBittensorLLM": "https://python.langchain.com/docs/integrations/llms/bittensor",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"BabyAGI with Tools": "https://python.langchain.com/docs/use_cases/agents/baby_agi_with_agent",
"Conversational Retrieval Agent": "https://python.langchain.com/docs/use_cases/question_answering/how_to/conversational_retrieval_agents",
"Plug-and-Plai": "https://python.langchain.com/docs/use_cases/agents/custom_agent_with_plugin_retrieval_using_plugnplai",
@@ -1832,12 +1832,12 @@
"create_sql_agent": {
"CnosDB": "https://python.langchain.com/docs/integrations/providers/cnosdb",
"SQL Database Agent": "https://python.langchain.com/docs/integrations/toolkits/sql_database",
- "SQL": "https://python.langchain.com/docs/use_cases/sql"
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql"
},
"SQLDatabaseToolkit": {
"CnosDB": "https://python.langchain.com/docs/integrations/providers/cnosdb",
"SQL Database Agent": "https://python.langchain.com/docs/integrations/toolkits/sql_database",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"Use ToolKits with OpenAI Functions": "https://python.langchain.com/docs/modules/agents/how_to/use_toolkits_with_openai_functions"
},
"SageMakerCallbackHandler": {
@@ -1899,7 +1899,7 @@
"Rebuff": "https://python.langchain.com/docs/integrations/providers/rebuff",
"SQL Database Agent": "https://python.langchain.com/docs/integrations/toolkits/sql_database",
"Cookbook": "https://python.langchain.com/docs/guides/expression_language/cookbook",
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"Multiple Retrieval Sources": "https://python.langchain.com/docs/use_cases/question_answering/how_to/multiple_retrieval"
},
"Weaviate": {
@@ -3035,11 +3035,11 @@
"Interacting with APIs": "https://python.langchain.com/docs/use_cases/apis"
},
"create_sql_query_chain": {
- "SQL": "https://python.langchain.com/docs/use_cases/sql",
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql",
"Multiple Retrieval Sources": "https://python.langchain.com/docs/use_cases/question_answering/how_to/multiple_retrieval"
},
"ElasticsearchDatabaseChain": {
- "SQL": "https://python.langchain.com/docs/use_cases/sql"
+ "SQL": "https://python.langchain.com/docs/use_cases/qa_structured/sql"
},
"FileChatMessageHistory": {
"AutoGPT": "https://python.langchain.com/docs/use_cases/autonomous_agents/autogpt"
diff --git a/docs/docs_skeleton/docs/guides/langsmith/index.md b/docs/docs_skeleton/docs/guides/langsmith/index.md
index 9915e70c5fb..c4cfee065a4 100644
--- a/docs/docs_skeleton/docs/guides/langsmith/index.md
+++ b/docs/docs_skeleton/docs/guides/langsmith/index.md
@@ -2,11 +2,21 @@
import DocCardList from "@theme/DocCardList";
-LangSmith helps you trace and evaluate your language model applications and intelligent agents to help you
+[LangSmith](https://smith.langchain.com) helps you trace and evaluate your language model applications and intelligent agents to help you
move from prototype to production.
Check out the [interactive walkthrough](/docs/guides/langsmith/walkthrough) below to get started.
-For more information, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/)
+For more information, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/).
+
+For tutorials and other end-to-end examples demonstrating ways to integrate LangSmith in your workflow,
+check out the [LangSmith Cookbook](https://github.com/langchain-ai/langsmith-cookbook). Some of the guides therein include:
+
+- Leveraging user feedback in your JS application ([link](https://github.com/langchain-ai/langsmith-cookbook/blob/main/feedback-examples/nextjs/README.md)).
+- Building an automated feedback pipeline ([link](https://github.com/langchain-ai/langsmith-cookbook/blob/main/feedback-examples/algorithmic-feedback/algorithmic_feedback.ipynb)).
+- How to evaluate and audit your RAG workflows ([link](https://github.com/langchain-ai/langsmith-cookbook/tree/main/testing-examples/qa-correctness)).
+- How to fine-tune a LLM on real usage data ([link](https://github.com/langchain-ai/langsmith-cookbook/blob/main/fine-tuning-examples/export-to-openai/fine-tuning-on-chat-runs.ipynb)).
+- How to use the [LangChain Hub](https://smith.langchain.com/hub) to version your prompts ([link](https://github.com/langchain-ai/langsmith-cookbook/blob/main/hub-examples/retrieval-qa-chain/retrieval-qa.ipynb))
+
diff --git a/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx b/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx
index d417b631746..0ffd152f738 100644
--- a/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx
+++ b/docs/docs_skeleton/docs/modules/memory/types/buffer.mdx
@@ -1,6 +1,6 @@
# Conversation Buffer
-This notebook shows how to use `ConversationBufferMemory`. This memory allows for storing of messages and then extracts the messages in a variable.
+This notebook shows how to use `ConversationBufferMemory`. This memory allows for storing messages and then extracts the messages in a variable.
We can first extract it as a string.
diff --git a/docs/docs_skeleton/docs/use_cases/question_answering/_category_.yml b/docs/docs_skeleton/docs/use_cases/question_answering/_category_.yml
new file mode 100644
index 00000000000..75252fdc392
--- /dev/null
+++ b/docs/docs_skeleton/docs/use_cases/question_answering/_category_.yml
@@ -0,0 +1,2 @@
+position: 0
+collapsed: false
diff --git a/docs/docs_skeleton/docs/use_cases/web_scraping/index.mdx b/docs/docs_skeleton/docs/use_cases/web_scraping/index.mdx
deleted file mode 100644
index ce28ca38395..00000000000
--- a/docs/docs_skeleton/docs/use_cases/web_scraping/index.mdx
+++ /dev/null
@@ -1,9 +0,0 @@
----
-sidebar_position: 3
----
-
-# Web Scraping
-
-Web scraping has historically been a challenging endeavor due to the ever-changing nature of website structures, making it tedious for developers to maintain their scraping scripts. Traditional methods often rely on specific HTML tags and patterns which, when altered, can disrupt data extraction processes.
-
-Enter the LLM-based method for parsing HTML: By leveraging the capabilities of LLMs, and especially OpenAI Functions in LangChain's extraction chain, developers can instruct the model to extract only the desired data in a specified format. This method not only streamlines the extraction process but also significantly reduces the time spent on manual debugging and script modifications. Its adaptability means that even if websites undergo significant design changes, the extraction remains consistent and robust. This level of resilience translates to reduced maintenance efforts, cost savings, and ensures a higher quality of extracted data. Compared to its predecessors, the LLM-based approach wins out in the web scraping domain by transforming a historically cumbersome task into a more automated and efficient process.
diff --git a/docs/docs_skeleton/vercel.json b/docs/docs_skeleton/vercel.json
index dfa378952a5..2f560db73a0 100644
--- a/docs/docs_skeleton/vercel.json
+++ b/docs/docs_skeleton/vercel.json
@@ -3178,7 +3178,11 @@
},
{
"source": "/en/latest/use_cases/tabular.html",
- "destination": "/docs/use_cases/tabular"
+ "destination": "/docs/use_cases/qa_structured"
+ },
+ {
+ "source": "/docs/use_cases/sql(/?)",
+ "destination": "/docs/use_cases/qa_structured/sql"
},
{
"source": "/en/latest/youtube.html",
@@ -3370,7 +3374,7 @@
},
{
"source": "/docs/modules/chains/popular/sqlite",
- "destination": "/docs/use_cases/tabular/sqlite"
+ "destination": "/docs/use_cases/qa_structured/sql"
},
{
"source": "/docs/modules/chains/popular/openai_functions",
@@ -3582,7 +3586,7 @@
},
{
"source": "/docs/modules/chains/additional/elasticsearch_database",
- "destination": "/docs/use_cases/tabular/elasticsearch_database"
+ "destination": "/docs/use_cases/qa_structured/integrations/elasticsearch"
},
{
"source": "/docs/modules/chains/additional/tagging",
diff --git a/docs/extras/additional_resources/youtube.mdx b/docs/extras/additional_resources/youtube.mdx
index fc266bf48b0..78da30e453b 100644
--- a/docs/extras/additional_resources/youtube.mdx
+++ b/docs/extras/additional_resources/youtube.mdx
@@ -1,6 +1,6 @@
# YouTube videos
-⛓ icon marks a new addition [last update 2023-06-20]
+⛓ icon marks a new addition [last update 2023-09-05]
### [Official LangChain YouTube channel](https://www.youtube.com/@LangChain)
@@ -86,20 +86,20 @@
- [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
- [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
-- ⛓ [Build AI chatbot with custom knowledge base using OpenAI API and GPT Index](https://youtu.be/vDZAZuaXf48) by [Irina Nik](https://www.youtube.com/@irina_nik)
-- ⛓ [Build Your Own Auto-GPT Apps with LangChain (Python Tutorial)](https://youtu.be/NYSWn1ipbgg) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
-- ⛓ [Chat with Multiple `PDFs` | LangChain App Tutorial in Python (Free LLMs and Embeddings)](https://youtu.be/dXxQ0LR-3Hg) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
-- ⛓ [Chat with a `CSV` | `LangChain Agents` Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
-- ⛓ [Create Your Own ChatGPT with `PDF` Data in 5 Minutes (LangChain Tutorial)](https://youtu.be/au2WVVGUvc8) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
-- ⛓ [Using ChatGPT with YOUR OWN Data. This is magical. (LangChain OpenAI API)](https://youtu.be/9AXP7tCI9PI) by [TechLead](https://www.youtube.com/@TechLead)
-- ⛓ [Build a Custom Chatbot with OpenAI: `GPT-Index` & LangChain | Step-by-Step Tutorial](https://youtu.be/FIDv6nc4CgU) by [Fabrikod](https://www.youtube.com/@fabrikod)
-- ⛓ [`Flowise` is an open source no-code UI visual tool to build 🦜🔗LangChain applications](https://youtu.be/CovAPtQPU0k) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
-- ⛓ [LangChain & GPT 4 For Data Analysis: The `Pandas` Dataframe Agent](https://youtu.be/rFQ5Kmkd4jc) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
-- ⛓ [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Toolfinder AI](https://www.youtube.com/@toolfinderai)
-- ⛓ [`PrivateGPT`: Chat to your FILES OFFLINE and FREE [Installation and Tutorial]](https://youtu.be/G7iLllmx4qc) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
-- ⛓ [How to build with Langchain 10x easier | ⛓️ LangFlow & `Flowise`](https://youtu.be/Ya1oGL7ZTvU) by [AI Jason](https://www.youtube.com/@AIJasonZ)
-- ⛓ [Getting Started With LangChain In 20 Minutes- Build Celebrity Search Application](https://youtu.be/_FpT1cwcSLg) by [Krish Naik](https://www.youtube.com/@krishnaik06)
-
+- [Build AI chatbot with custom knowledge base using OpenAI API and GPT Index](https://youtu.be/vDZAZuaXf48) by [Irina Nik](https://www.youtube.com/@irina_nik)
+- [Build Your Own Auto-GPT Apps with LangChain (Python Tutorial)](https://youtu.be/NYSWn1ipbgg) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
+- [Chat with Multiple `PDFs` | LangChain App Tutorial in Python (Free LLMs and Embeddings)](https://youtu.be/dXxQ0LR-3Hg) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
+- [Chat with a `CSV` | `LangChain Agents` Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
+- [Create Your Own ChatGPT with `PDF` Data in 5 Minutes (LangChain Tutorial)](https://youtu.be/au2WVVGUvc8) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
+- [Using ChatGPT with YOUR OWN Data. This is magical. (LangChain OpenAI API)](https://youtu.be/9AXP7tCI9PI) by [TechLead](https://www.youtube.com/@TechLead)
+- [Build a Custom Chatbot with OpenAI: `GPT-Index` & LangChain | Step-by-Step Tutorial](https://youtu.be/FIDv6nc4CgU) by [Fabrikod](https://www.youtube.com/@fabrikod)
+- [`Flowise` is an open source no-code UI visual tool to build 🦜🔗LangChain applications](https://youtu.be/CovAPtQPU0k) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
+- [LangChain & GPT 4 For Data Analysis: The `Pandas` Dataframe Agent](https://youtu.be/rFQ5Kmkd4jc) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
+- [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Toolfinder AI](https://www.youtube.com/@toolfinderai)
+- [`PrivateGPT`: Chat to your FILES OFFLINE and FREE [Installation and Tutorial]](https://youtu.be/G7iLllmx4qc) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
+- [How to build with Langchain 10x easier | ⛓️ LangFlow & `Flowise`](https://youtu.be/Ya1oGL7ZTvU) by [AI Jason](https://www.youtube.com/@AIJasonZ)
+- [Getting Started With LangChain In 20 Minutes- Build Celebrity Search Application](https://youtu.be/_FpT1cwcSLg) by [Krish Naik](https://www.youtube.com/@krishnaik06)
+- ⛓ [LangChain HowTo and Guides YouTube playlist](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai/)
### [Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
diff --git a/docs/extras/expression_language/cookbook.ipynb b/docs/extras/expression_language/cookbook.ipynb
index 04b74164dde..c10d0a76722 100644
--- a/docs/extras/expression_language/cookbook.ipynb
+++ b/docs/extras/expression_language/cookbook.ipynb
@@ -33,7 +33,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 2,
"id": "3c634ef0",
"metadata": {},
"outputs": [],
@@ -70,7 +70,7 @@
{
"data": {
"text/plain": [
- "AIMessage(content='Why don\\'t bears use cell phones? \\n\\nBecause they always get terrible \"grizzly\" reception!', additional_kwargs={}, example=False)"
+ "AIMessage(content=\"Why don't bears wear shoes?\\n\\nBecause they have bear feet!\", additional_kwargs={}, example=False)"
]
},
"execution_count": 5,
@@ -117,7 +117,7 @@
{
"data": {
"text/plain": [
- "AIMessage(content=\"Why don't bears use cell phones?\", additional_kwargs={}, example=False)"
+ "AIMessage(content=\"Why don't bears wear shoes?\", additional_kwargs={}, example=False)"
]
},
"execution_count": 7,
@@ -210,7 +210,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"id": "cc194c78",
"metadata": {},
"outputs": [],
@@ -228,17 +228,17 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"id": "e3d69a18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "\"Why don't bears wear shoes?\\n\\nBecause they have bear feet!\""
+ "\"Sure, here's a bear joke for you:\\n\\nWhy don't bears like fast food?\\n\\nBecause they can't catch it!\""
]
},
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -259,7 +259,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"id": "ad0dd88e",
"metadata": {},
"outputs": [],
@@ -274,7 +274,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"id": "1e7aa8eb",
"metadata": {},
"outputs": [
@@ -285,7 +285,7 @@
" 'punchline': 'Because they have bear feet!'}"
]
},
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -296,7 +296,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 15,
"id": "d4aa1a01",
"metadata": {},
"outputs": [],
@@ -311,17 +311,17 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 16,
"id": "8b6df9ba",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "\"Why don't bears like fast food?\""
+ "\"Why don't bears wear shoes?\""
]
},
- "execution_count": 18,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -345,7 +345,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 17,
"id": "5d3d8ffe",
"metadata": {},
"outputs": [],
@@ -366,7 +366,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 18,
"id": "33be32af",
"metadata": {},
"outputs": [],
@@ -378,7 +378,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 19,
"id": "df3f3fa2",
"metadata": {},
"outputs": [],
@@ -390,7 +390,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 20,
"id": "bfc47ec1",
"metadata": {},
"outputs": [],
@@ -405,7 +405,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 21,
"id": "eae31755",
"metadata": {},
"outputs": [],
@@ -420,7 +420,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 22,
"id": "f3040b0c",
"metadata": {},
"outputs": [
@@ -437,7 +437,7 @@
"'Harrison worked at Kensho.'"
]
},
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -448,7 +448,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 23,
"id": "e1d20c7c",
"metadata": {},
"outputs": [],
@@ -471,7 +471,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 24,
"id": "7ee8b2d4",
"metadata": {},
"outputs": [
@@ -488,7 +488,7 @@
"'Harrison ha lavorato a Kensho.'"
]
},
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -509,7 +509,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 25,
"id": "3f30c348",
"metadata": {},
"outputs": [],
@@ -520,7 +520,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 26,
"id": "64ab1dbf",
"metadata": {},
"outputs": [],
@@ -538,7 +538,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 27,
"id": "7d628c97",
"metadata": {},
"outputs": [],
@@ -553,7 +553,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 28,
"id": "f60a5d0f",
"metadata": {},
"outputs": [],
@@ -566,7 +566,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 29,
"id": "7d007db6",
"metadata": {},
"outputs": [],
@@ -583,7 +583,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 30,
"id": "5c32cc89",
"metadata": {},
"outputs": [],
@@ -605,7 +605,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 31,
"id": "135c8205",
"metadata": {},
"outputs": [
@@ -622,7 +622,7 @@
"AIMessage(content='Harrison was employed at Kensho.', additional_kwargs={}, example=False)"
]
},
- "execution_count": 17,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -636,7 +636,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 32,
"id": "424e7e7a",
"metadata": {},
"outputs": [
@@ -653,7 +653,7 @@
"AIMessage(content='Harrison worked at Kensho.', additional_kwargs={}, example=False)"
]
},
- "execution_count": 15,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -677,7 +677,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 33,
"id": "e31dd17c",
"metadata": {},
"outputs": [],
@@ -687,7 +687,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 34,
"id": "d4bffe94",
"metadata": {},
"outputs": [],
@@ -697,7 +697,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 35,
"id": "733be985",
"metadata": {},
"outputs": [],
@@ -744,7 +744,7 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 36,
"id": "806e390c",
"metadata": {},
"outputs": [
@@ -762,7 +762,7 @@
" 'docs': [Document(page_content='harrison worked at kensho', metadata={})]}"
]
},
- "execution_count": 46,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@@ -775,7 +775,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 37,
"id": "977399fd",
"metadata": {},
"outputs": [],
@@ -788,7 +788,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 38,
"id": "f94f7de4",
"metadata": {},
"outputs": [
@@ -799,7 +799,7 @@
" AIMessage(content='Harrison was employed at Kensho.', additional_kwargs={}, example=False)]}"
]
},
- "execution_count": 48,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -820,17 +820,17 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 39,
"id": "d65d4e9e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'El país en el que nació la ciudad de Honolulu, Hawái, donde nació Barack Obama, el 44º presidente de los Estados Unidos, es Estados Unidos.'"
+ "'El país en el que se encuentra la ciudad de Honolulu, Hawái, donde nació Barack Obama, el 44º presidente de los Estados Unidos, es Estados Unidos.'"
]
},
- "execution_count": 31,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
@@ -850,7 +850,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 40,
"id": "878f8176",
"metadata": {},
"outputs": [],
@@ -869,17 +869,17 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 41,
"id": "d621a870",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "ChatPromptValue(messages=[HumanMessage(content=\"What is the color of A fruit that has a color similar to #7E7DE6 is the Peruvian Apple Cactus (Cereus repandus). It is a tropical fruit with a vibrant purple or violet exterior. and The country's flag that has the color #7E7DE6 is North Macedonia.\", additional_kwargs={}, example=False)])"
+ "ChatPromptValue(messages=[HumanMessage(content=\"What is the color of A fruit that is of color #FF4500 is typically an orange fruit. and The country's flag that has the color #FF4500 is the flag of India.\", additional_kwargs={}, example=False)])"
]
},
- "execution_count": 33,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
@@ -888,6 +888,101 @@
"chain2.invoke({})"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "6d75a313-f1c8-4e94-9a17-24e0bf4a2bdc",
+ "metadata": {},
+ "source": [
+ "### Branching and Merging\n",
+ "\n",
+ "You may want the output of one component to be processed by 2 or more other components. [RunnableMaps](https://api.python.langchain.com/en/latest/schema/langchain.schema.runnable.base.RunnableMap.html) let you split or fork the chain so multiple components can process the input in parallel. Later, other components can join or merge the results to synthesize a final response. This type of chain creates a computation graph that looks like the following:\n",
+ "\n",
+ "```text\n",
+ " Input\n",
+ " / \\\n",
+ " / \\\n",
+ " Branch1 Branch2\n",
+ " \\ /\n",
+ " \\ /\n",
+ " Combine\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "247fa0bd-4596-4063-8cb3-1d7fc119d982",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "planner = (\n",
+ " ChatPromptTemplate.from_template(\n",
+ " \"Generate an argument about: {input}\"\n",
+ " )\n",
+ " | ChatOpenAI()\n",
+ " | StrOutputParser()\n",
+ " | {\"base_response\": RunnablePassthrough()}\n",
+ ")\n",
+ "\n",
+ "arguments_for = (\n",
+ " ChatPromptTemplate.from_template(\n",
+ " \"List the pros or positive aspects of {base_response}\"\n",
+ " )\n",
+ " | ChatOpenAI()\n",
+ " | StrOutputParser()\n",
+ ")\n",
+ "arguments_against = (\n",
+ " ChatPromptTemplate.from_template(\n",
+ " \"List the cons or negative aspects of {base_response}\"\n",
+ " )\n",
+ " | ChatOpenAI()\n",
+ " | StrOutputParser()\n",
+ ")\n",
+ "\n",
+ "final_responder = (\n",
+ " ChatPromptTemplate.from_messages(\n",
+ " [\n",
+ " (\"ai\", \"{original_response}\"),\n",
+ " (\"human\", \"Pros:\\n{results_1}\\n\\nCons:\\n{results_2}\"),\n",
+ " (\"system\", \"Generate a final response given the critique\"),\n",
+ " ]\n",
+ " )\n",
+ " | ChatOpenAI()\n",
+ " | StrOutputParser()\n",
+ ")\n",
+ "\n",
+ "chain = (\n",
+ " planner \n",
+ " | {\n",
+ " \"results_1\": arguments_for,\n",
+ " \"results_2\": arguments_against,\n",
+ " \"original_response\": itemgetter(\"base_response\"),\n",
+ " }\n",
+ " | final_responder\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "2564f310-0674-4bb1-9c4e-d7848ca73511",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\"While Scrum has its limitations and potential drawbacks, it is important to note that these can be mitigated with proper understanding, implementation, and adaptation. Here are some ways to address the critique:\\n\\n1. Lack of structure: While Scrum promotes self-organization, it is essential to provide clear guidelines, define roles and responsibilities, and establish a shared understanding of the project's goals and expectations. This can be achieved through effective communication and regular alignment meetings.\\n\\n2. Time and resource constraints: Proper planning, prioritization, and resource allocation are crucial in managing the sprint cycles effectively. Teams can leverage tools and techniques such as backlog refinement, sprint planning, and capacity planning to ensure that workloads are manageable and realistic.\\n\\n3. Managing large teams: Scaling frameworks like Scrum of Scrums or LeSS (Large-Scale Scrum) can be implemented to coordinate the efforts of multiple Scrum teams. These frameworks provide mechanisms for communication, synchronization, and alignment across teams.\\n\\n4. Limited documentation: Although Scrum emphasizes working software over comprehensive documentation, it is important to strike a balance. Teams can adopt lightweight documentation practices such as user stories, acceptance criteria, and sprint reviews to capture relevant information and promote knowledge transfer.\\n\\n5. Resolving conflicts and fostering collaboration: Conflict resolution techniques and team-building activities can help address conflicts and foster a collaborative environment. Encouraging open and honest communication, promoting a culture of trust and respect, and providing opportunities for team members to share ideas and perspectives can contribute to better team dynamics.\\n\\n6. Long-term planning: While Scrum focuses on short-term goals, it is still important to have a long-term vision and roadmap. Teams can incorporate longer-term planning activities, such as release planning or product roadmapping, to align the project with broader strategic objectives and ensure a balance between adaptability and long-term goals.\\n\\n7. Skilled Scrum Master: Investing in the training and development of a skilled Scrum Master is crucial. Organizations can provide training and support for Scrum Masters to enhance their understanding of Scrum principles, facilitation skills, and ability to address challenges effectively.\\n\\n8. Scope management: To prevent scope creep, teams should establish a well-defined product backlog and prioritize requirements based on value and feasibility. Regular backlog refinement and stakeholder engagement can help ensure that changes are evaluated and incorporated in a controlled manner.\\n\\n9. Applicability to different domains: While Scrum originated in software development, it has been successfully applied in various industries and domains. Organizations can tailor Scrum practices to suit their specific needs, making necessary adaptations and incorporating domain-specific practices as required.\\n\\nBy addressing these concerns and adapting Scrum to the specific context, organizations can maximize the benefits of Scrum while mitigating potential drawbacks. It is important to continuously evaluate and improve the implementation to ensure the best outcomes for the project and the team.\""
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chain.invoke({\"input\": \"scrum\"})"
+ ]
+ },
{
"cell_type": "markdown",
"id": "d094d637",
@@ -900,7 +995,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 66,
"id": "252625fd",
"metadata": {},
"outputs": [],
@@ -916,7 +1011,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 67,
"id": "57886e84",
"metadata": {},
"outputs": [],
@@ -926,7 +1021,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 68,
"id": "a303b089",
"metadata": {},
"outputs": [],
@@ -937,7 +1032,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 69,
"id": "7aa9ea06",
"metadata": {},
"outputs": [],
@@ -948,7 +1043,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 70,
"id": "6a3d3f5d",
"metadata": {},
"outputs": [],
@@ -961,17 +1056,17 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 71,
"id": "8aeda930",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "AIMessage(content='Thank you for the compliment! The sum of 2 + 2 is equal to 4.', additional_kwargs={}, example=False)"
+ "AIMessage(content='Thank you for the compliment! The sum of 2 and 2 is 4.', additional_kwargs={}, example=False)"
]
},
- "execution_count": 9,
+ "execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
@@ -987,31 +1082,22 @@
"source": [
"## Tools\n",
"\n",
- "You can use any LangChain tool easily"
+ "You can use any LangChain tool easily."
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 72,
"id": "9232d2a9",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.14) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
- " warnings.warn(\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from langchain.tools import DuckDuckGoSearchRun"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 73,
"id": "a0c64d2c",
"metadata": {},
"outputs": [],
@@ -1021,7 +1107,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 74,
"id": "391969b6",
"metadata": {},
"outputs": [],
@@ -1034,7 +1120,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 75,
"id": "e3d9d20d",
"metadata": {},
"outputs": [],
@@ -1044,17 +1130,17 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 76,
"id": "55f2967d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "\"What sports games are on TV today & tonight? Watch and stream live sports on TV today, tonight, tomorrow. Today's 2023 sports TV schedule includes football, basketball, baseball, hockey, motorsports, soccer and more. Watch on TV or stream online on ESPN, FOX, FS1, CBS, NBC, ABC, Peacock, Paramount+, fuboTV, local channels and many other networks. Weather Alerts Alerts Bar. Not all offers available in all states, please visit BetMGM for the latest promotions for your area. Must be 21+ to gamble, please wager responsibly. If you or someone ... Speak of the Devils. Good Morning Arizona. Happy Hour Spots. Jaime's Local Love. Surprise Squad. Silver Apple. Field Trip Friday. Seen on TV. Arizona Highways TV. MLB Games Tonight: How to Watch on TV, Streaming & Odds - Friday, July 28. San Diego Padres' Juan Soto plays during the first baseball game in a doubleheader, Saturday, July 15, 2023, in Philadelphia. (AP Photo/Matt Slocum) (APMedia) Today's MLB schedule features top teams in action. Among those games is the Texas Rangers playing the San Diego ... TV. Cleveland at Chi. White Sox. 1:10pm. Bally Sports. NBCS-CHI. Cleveland Guardians (50-51) are second place in AL Central and Chicago White Sox (41-61) are fourth place in AL Central. The Guardians are 23-27 on the road this season and White Sox are 21-26 at home. Chi. Cubs at St. Louis.\""
+ "\"What sports games are on TV today & tonight? Watch and stream live sports on TV today, tonight, tomorrow. Today's 2023 sports TV schedule includes football, basketball, baseball, hockey, motorsports, soccer and more. Watch on TV or stream online on ESPN, FOX, FS1, CBS, NBC, ABC, Peacock, Paramount+, fuboTV, local channels and many other networks. MLB Games Tonight: How to Watch on TV, Streaming & Odds - Wednesday, September 6. Texas Rangers second baseman Marcus Semien, left, tags out Houston Astros' Jose Altuve (27) who was attempting to stretch out a single in the seventh inning of a baseball game, Monday, Sept. 4, 2023, in Arlington, Texas. (AP Photo/Tony Gutierrez) (APMedia) There ... MLB Games Tonight: How to Watch on TV, Streaming & Odds - Sunday, September 3. Los Angeles Dodgers right fielder Mookie Betts, left, gives a thumbs up to Vanessa Bryant, right, widow of Kobe ... WEEK 16 NFL TV SCHEDULE. NFL Games Thursday, 12/21/23. TIME ET. TV. New Orleans at LA Rams. 8:15pm. AMZN. NFL Games Saturday, 12/23/23. TIME ET. The second half of tonight's college football schedule still has some good games remaining to watch on your television.. We've already seen an exciting one when Colorado upset TCU. And we saw some ...\""
]
},
- "execution_count": 10,
+ "execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
@@ -1077,7 +1163,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 77,
"id": "6bb221b3",
"metadata": {},
"outputs": [],
@@ -1105,17 +1191,17 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 78,
"id": "5488ec85",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "AIMessage(content='3 + 9 is equal to 12.', additional_kwargs={}, example=False)"
+ "AIMessage(content='3 + 9 equals 12.', additional_kwargs={}, example=False)"
]
},
- "execution_count": 36,
+ "execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
@@ -1124,6 +1210,78 @@
"chain.invoke({\"foo\": \"bar\", \"bar\": \"gah\"})"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "4728ddd9-914d-42ce-ae9b-72c9ce8ec940",
+ "metadata": {},
+ "source": [
+ "## Accepting a Runnable Config\n",
+ "\n",
+ "Runnable lambdas can optionally accept a [RunnableConfig](https://api.python.langchain.com/en/latest/schema/langchain.schema.runnable.config.RunnableConfig.html?highlight=runnableconfig#langchain.schema.runnable.config.RunnableConfig), which they can use to pass callbacks, tags, and other configuration information to nested runs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "id": "80b3b5f6-5d58-44b9-807e-cce9a46bf49f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.schema.runnable import RunnableConfig"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "id": "ff0daf0c-49dd-4d21-9772-e5fa133c5f36",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "\n",
+ "def parse_or_fix(text: str, config: RunnableConfig):\n",
+ " fixing_chain = (\n",
+ " ChatPromptTemplate.from_template(\n",
+ " \"Fix the following text:\\n\\n```text\\n{input}\\n```\\nError: {error}\"\n",
+ " \" Don't narrate, just respond with the fixed data.\"\n",
+ " )\n",
+ " | ChatOpenAI()\n",
+ " | StrOutputParser()\n",
+ " )\n",
+ " for _ in range(3):\n",
+ " try:\n",
+ " return json.loads(text)\n",
+ " except Exception as e:\n",
+ " text = fixing_chain.invoke({\"input\": text, \"error\": e}, config)\n",
+ " return \"Failed to parse\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "id": "1a5e709e-9d75-48c7-bb9c-503251990505",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tokens Used: 65\n",
+ "\tPrompt Tokens: 56\n",
+ "\tCompletion Tokens: 9\n",
+ "Successful Requests: 1\n",
+ "Total Cost (USD): $0.00010200000000000001\n"
+ ]
+ }
+ ],
+ "source": [
+ "from langchain.callbacks import get_openai_callback\n",
+ "\n",
+ "with get_openai_callback() as cb:\n",
+ " RunnableLambda(parse_or_fix).invoke(\"{foo: bar}\", {\"tags\": [\"my-tag\"], \"callbacks\": [cb]})\n",
+ " print(cb)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "506e9636",
@@ -1136,7 +1294,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 106,
"id": "7a927516",
"metadata": {},
"outputs": [],
@@ -1144,13 +1302,14 @@
"template = \"\"\"Based on the table schema below, write a SQL query that would answer the user's question:\n",
"{schema}\n",
"\n",
- "Question: {question}\"\"\"\n",
+ "Question: {question}\n",
+ "SQL Query:\"\"\"\n",
"prompt = ChatPromptTemplate.from_template(template)"
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 107,
"id": "3f51f386",
"metadata": {},
"outputs": [],
@@ -1160,7 +1319,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 111,
"id": "2ccca6fc",
"metadata": {},
"outputs": [],
@@ -1170,7 +1329,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 109,
"id": "05ba88ee",
"metadata": {},
"outputs": [],
@@ -1181,7 +1340,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 112,
"id": "a4eda902",
"metadata": {},
"outputs": [],
@@ -1192,7 +1351,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 113,
"id": "5046cb17",
"metadata": {},
"outputs": [],
@@ -1211,17 +1370,17 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 114,
"id": "a5552039",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'SELECT COUNT(*) \\nFROM Employee;'"
+ "'SELECT COUNT(EmployeeId) FROM Employee'"
]
},
- "execution_count": 48,
+ "execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
@@ -1232,7 +1391,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 115,
"id": "d6fee130",
"metadata": {},
"outputs": [],
@@ -1248,7 +1407,7 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 116,
"id": "923aa634",
"metadata": {},
"outputs": [],
@@ -1271,7 +1430,7 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 117,
"id": "e94963d8",
"metadata": {},
"outputs": [
@@ -1281,7 +1440,7 @@
"AIMessage(content='There are 8 employees.', additional_kwargs={}, example=False)"
]
},
- "execution_count": 53,
+ "execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
@@ -1300,7 +1459,7 @@
},
{
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": 118,
"id": "bd7c259a",
"metadata": {},
"outputs": [],
@@ -1311,7 +1470,7 @@
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 119,
"id": "73795d2d",
"metadata": {},
"outputs": [],
@@ -1331,7 +1490,7 @@
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 120,
"id": "42859e8a",
"metadata": {},
"outputs": [],
@@ -1343,7 +1502,7 @@
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 121,
"id": "5ded1a86",
"metadata": {},
"outputs": [],
@@ -1353,7 +1512,7 @@
},
{
"cell_type": "code",
- "execution_count": 68,
+ "execution_count": 122,
"id": "208c2b75",
"metadata": {},
"outputs": [
@@ -1370,7 +1529,7 @@
"'4\\n'"
]
},
- "execution_count": 68,
+ "execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
@@ -1391,7 +1550,7 @@
},
{
"cell_type": "code",
- "execution_count": 99,
+ "execution_count": 123,
"id": "7998efd8",
"metadata": {},
"outputs": [],
@@ -1409,7 +1568,7 @@
},
{
"cell_type": "code",
- "execution_count": 100,
+ "execution_count": 124,
"id": "fa0087f3",
"metadata": {},
"outputs": [],
@@ -1419,7 +1578,7 @@
},
{
"cell_type": "code",
- "execution_count": 101,
+ "execution_count": 125,
"id": "06b531ae",
"metadata": {},
"outputs": [
@@ -1429,7 +1588,7 @@
"{'history': []}"
]
},
- "execution_count": 101,
+ "execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
@@ -1440,7 +1599,7 @@
},
{
"cell_type": "code",
- "execution_count": 102,
+ "execution_count": 126,
"id": "d9437af6",
"metadata": {},
"outputs": [],
@@ -1456,7 +1615,7 @@
},
{
"cell_type": "code",
- "execution_count": 103,
+ "execution_count": 127,
"id": "bed1e260",
"metadata": {},
"outputs": [
@@ -1466,7 +1625,7 @@
"AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False)"
]
},
- "execution_count": 103,
+ "execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
@@ -1479,7 +1638,7 @@
},
{
"cell_type": "code",
- "execution_count": 104,
+ "execution_count": 128,
"id": "890475b4",
"metadata": {},
"outputs": [],
@@ -1489,7 +1648,7 @@
},
{
"cell_type": "code",
- "execution_count": 105,
+ "execution_count": 129,
"id": "e8fcb77f",
"metadata": {},
"outputs": [
@@ -1500,7 +1659,7 @@
" AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False)]}"
]
},
- "execution_count": 105,
+ "execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
@@ -1511,17 +1670,17 @@
},
{
"cell_type": "code",
- "execution_count": 106,
+ "execution_count": 130,
"id": "d837d5c3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "AIMessage(content='Your name is Bob. You mentioned it in your previous message. Is there anything else I can help you with, Bob?', additional_kwargs={}, example=False)"
+ "AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False)"
]
},
- "execution_count": 106,
+ "execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
@@ -1544,7 +1703,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 131,
"id": "4f5f6449-940a-4f5c-97c0-39b71c3e2a68",
"metadata": {},
"outputs": [],
@@ -1555,7 +1714,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 132,
"id": "fcb8312b-7e7a-424f-a3ec-76738c9a9d21",
"metadata": {},
"outputs": [],
@@ -1565,7 +1724,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 133,
"id": "b24b9148-f6b0-4091-8ea8-d3fb281bd950",
"metadata": {},
"outputs": [],
@@ -1578,7 +1737,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 134,
"id": "1c8ed87c-9ca6-4559-bf60-d40e94a0af08",
"metadata": {},
"outputs": [],
@@ -1588,7 +1747,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 135,
"id": "5256b9bd-381a-42b0-bfa8-7e6d18f853cb",
"metadata": {},
"outputs": [
@@ -1598,7 +1757,7 @@
"'\\n\\nYou are stupid.'"
]
},
- "execution_count": 34,
+ "execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
@@ -1609,7 +1768,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 136,
"id": "fe6e3b33-dc9a-49d5-b194-ba750c58a628",
"metadata": {},
"outputs": [],
@@ -1619,7 +1778,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 137,
"id": "d8ba0cbd-c739-4d23-be9f-6ae092bd5ffb",
"metadata": {},
"outputs": [
@@ -1630,7 +1789,7 @@
" 'output': \"Text was found that violates OpenAI's content policy.\"}"
]
},
- "execution_count": 37,
+ "execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
@@ -1638,6 +1797,14 @@
"source": [
"moderated_chain.invoke({\"input\": \"you are stupid\"})"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f07b5300-8676-48ee-ab77-3f2dc2ecd415",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -1656,7 +1823,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.1"
+ "version": "3.11.2"
}
},
"nbformat": 4,
diff --git a/docs/extras/guides/debugging.md b/docs/extras/guides/debugging.md
index 2527034dcb1..9b231372502 100644
--- a/docs/extras/guides/debugging.md
+++ b/docs/extras/guides/debugging.md
@@ -2,7 +2,7 @@
If you're building with LLMs, at some point something will break, and you'll need to debug. A model call will fail, or the model output will be misformatted, or there will be some nested model calls and it won't be clear where along the way an incorrect output was created.
-Here's a few different tools and functionalities to aid in debugging.
+Here are a few different tools and functionalities to aid in debugging.
@@ -18,9 +18,9 @@ For anyone building production-grade LLM applications, we highly recommend using
If you're prototyping in Jupyter Notebooks or running Python scripts, it can be helpful to print out the intermediate steps of a Chain run.
-There's a number of ways to enable printing at varying degrees of verbosity.
+There are a number of ways to enable printing at varying degrees of verbosity.
-Let's suppose we have a simple agent and want to visualize the actions it takes and tool outputs it receives. Without any debugging, here's what we see:
+Let's suppose we have a simple agent, and want to visualize the actions it takes and tool outputs it receives. Without any debugging, here's what we see:
```python
diff --git a/docs/extras/guides/privacy/presidio_data_anonymization.ipynb b/docs/extras/guides/privacy/presidio_data_anonymization.ipynb
index faa99292594..4b4b718e29b 100644
--- a/docs/extras/guides/privacy/presidio_data_anonymization.ipynb
+++ b/docs/extras/guides/privacy/presidio_data_anonymization.ipynb
@@ -28,7 +28,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -47,16 +47,16 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'My name is Mrs. Rachel Chen DDS, call me at 849-829-7628x073 or email me at christopherfrey@example.org'"
+ "'My name is Laura Ruiz, call me at +1-412-982-8374x13414 or email me at javierwatkins@example.net'"
]
},
- "execution_count": 14,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -82,7 +82,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -94,35 +94,53 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = f\"\"\"Slim Shady recently lost his wallet. \n",
+ "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n",
+ "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "AIMessage(content='You can find our super secret data at https://www.ross.com/', additional_kwargs={}, example=False)"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dear Sir/Madam,\n",
+ "\n",
+ "We regret to inform you that Richard Fields has recently misplaced his wallet, which contains a sum of cash and his credit card bearing the number 30479847307774. \n",
+ "\n",
+ "Should you happen to come across it, we kindly request that you contact us immediately at 6439182672 or via email at frank45@example.com.\n",
+ "\n",
+ "Thank you for your attention to this matter.\n",
+ "\n",
+ "Yours faithfully,\n",
+ "\n",
+ "[Your Name]\n"
+ ]
}
],
"source": [
"from langchain.prompts.prompt import PromptTemplate\n",
"from langchain.chat_models import ChatOpenAI\n",
- "from langchain.schema.runnable import RunnablePassthrough\n",
"\n",
- "template = \"\"\"According to this text, where can you find our super secret data?\n",
+ "anonymizer = PresidioAnonymizer()\n",
"\n",
- "{anonymized_text}\n",
+ "template = \"\"\"Rewrite this text into an official, short email:\n",
"\n",
- "Answer:\"\"\"\n",
+ "{anonymized_text}\"\"\"\n",
"prompt = PromptTemplate.from_template(template)\n",
- "llm = ChatOpenAI()\n",
+ "llm = ChatOpenAI(temperature=0)\n",
"\n",
"chain = {\"anonymized_text\": anonymizer.anonymize} | prompt | llm\n",
- "chain.invoke(\"You can find our super secret data at https://supersecretdata.com\")"
+ "response = chain.invoke(text)\n",
+ "print(response.content)"
]
},
{
@@ -135,16 +153,16 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'My name is Gabrielle Edwards, call me at 313-666-7440 or email me at real.slim.shady@gmail.com'"
+ "'My name is Adrian Fleming, call me at 313-666-7440 or email me at real.slim.shady@gmail.com'"
]
},
- "execution_count": 18,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -166,16 +184,16 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'My name is Victoria Mckinney, call me at 713-549-8623 or email me at real.slim.shady@gmail.com'"
+ "'My name is Justin Miller, call me at 761-824-1889 or email me at real.slim.shady@gmail.com'"
]
},
- "execution_count": 3,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -201,16 +219,16 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'My name is Billy Russo, call me at 970-996-9453x038 or email me at jamie80@example.org'"
+ "'My name is Dr. Jennifer Baker, call me at (508)839-9329x232 or email me at ehamilton@example.com'"
]
},
- "execution_count": 4,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -232,16 +250,16 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'My polish phone number is EVIA70648911396944'"
+ "'My polish phone number is NRGN41434238921378'"
]
},
- "execution_count": 5,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -261,7 +279,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -291,7 +309,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -308,7 +326,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -337,16 +355,16 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'+48 533 220 543'"
+ "'511 622 683'"
]
},
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -374,7 +392,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -389,7 +407,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -398,16 +416,16 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "'My polish phone number is +48 692 715 636'"
+ "'My polish phone number is +48 734 630 977'"
]
},
- "execution_count": 12,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -443,7 +461,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.1"
+ "version": "3.11.4"
}
},
"nbformat": 4,
diff --git a/docs/extras/guides/privacy/presidio_reversible_anonymization.ipynb b/docs/extras/guides/privacy/presidio_reversible_anonymization.ipynb
new file mode 100644
index 00000000000..480b2632780
--- /dev/null
+++ b/docs/extras/guides/privacy/presidio_reversible_anonymization.ipynb
@@ -0,0 +1,461 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Reversible data anonymization with Microsoft Presidio\n",
+ "\n",
+ "[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/guides/privacy/presidio_reversible_anonymization.ipynb)\n",
+ "\n",
+ "\n",
+ "## Use case\n",
+ "\n",
+ "We have already written about the importance of anonymizing sensitive data in the previous section. **Reversible Anonymization** is an equally essential technology while sharing information with language models, as it balances data protection with data usability. This technique involves masking sensitive personally identifiable information (PII), yet it can be reversed and original data can be restored when authorized users need it. Its main advantage lies in the fact that while it conceals individual identities to prevent misuse, it also allows the concealed data to be accurately unmasked should it be necessary for legal or compliance purposes. \n",
+ "\n",
+ "## Overview\n",
+ "\n",
+ "We implemented the `PresidioReversibleAnonymizer`, which consists of two parts:\n",
+ "\n",
+ "1. anonymization - it works the same way as `PresidioAnonymizer`, plus the object itself stores a mapping of made-up values to original ones, for example:\n",
+ "```\n",
+ " {\n",
+ " \"PERSON\": {\n",
+ " \"\": \"\",\n",
+ " \"John Doe\": \"Slim Shady\"\n",
+ " },\n",
+ " \"PHONE_NUMBER\": {\n",
+ " \"111-111-1111\": \"555-555-5555\"\n",
+ " }\n",
+ " ...\n",
+ " }\n",
+ "```\n",
+ "\n",
+ "2. deanonymization - using the mapping described above, it matches fake data with original data and then substitutes it.\n",
+ "\n",
+ "Between anonymization and deanonymization user can perform different operations, for example, passing the output to LLM.\n",
+ "\n",
+ "## Quickstart\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Install necessary packages\n",
+ "# ! pip install langchain langchain-experimental openai presidio-analyzer presidio-anonymizer spacy Faker\n",
+ "# ! python -m spacy download en_core_web_lg"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`PresidioReversibleAnonymizer` is not significantly different from its predecessor (`PresidioAnonymizer`) in terms of anonymization:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My name is Maria Lynch, call me at 7344131647 or email me at jamesmichael@example.com. By the way, my card number is: 4838637940262'"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer\n",
+ "\n",
+ "anonymizer = PresidioReversibleAnonymizer(\n",
+ " analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"CREDIT_CARD\"],\n",
+ " # Faker seed is used here to make sure the same fake data is generated for the test purposes\n",
+ " # In production, it is recommended to remove the faker_seed parameter (it will default to None)\n",
+ " faker_seed=42,\n",
+ ")\n",
+ "\n",
+ "anonymizer.anonymize(\n",
+ " \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com. \"\n",
+ " \"By the way, my card number is: 4916 0387 9536 0861\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is what the full string we want to deanonymize looks like:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Maria Lynch recently lost his wallet. \n",
+ "Inside is some cash and his credit card with the number 4838637940262. \n",
+ "If you would find it, please call at 7344131647 or write an email here: jamesmichael@example.com.\n",
+ "Maria Lynch would be very grateful!\n"
+ ]
+ }
+ ],
+ "source": [
+ "# We know this data, as we set the faker_seed parameter\n",
+ "fake_name = \"Maria Lynch\"\n",
+ "fake_phone = \"7344131647\"\n",
+ "fake_email = \"jamesmichael@example.com\"\n",
+ "fake_credit_card = \"4838637940262\"\n",
+ "\n",
+ "anonymized_text = f\"\"\"{fake_name} recently lost his wallet. \n",
+ "Inside is some cash and his credit card with the number {fake_credit_card}. \n",
+ "If you would find it, please call at {fake_phone} or write an email here: {fake_email}.\n",
+ "{fake_name} would be very grateful!\"\"\"\n",
+ "\n",
+ "print(anonymized_text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "And now, using the `deanonymize` method, we can reverse the process:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Slim Shady recently lost his wallet. \n",
+ "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n",
+ "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\n",
+ "Slim Shady would be very grateful!\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(anonymizer.deanonymize(anonymized_text))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Using with LangChain Expression Language\n",
+ "\n",
+ "With LCEL we can easily chain together anonymization and deanonymization with the rest of our application. This is an example of using the anonymization mechanism with a query to LLM (without deanonymization for now):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = f\"\"\"Slim Shady recently lost his wallet. \n",
+ "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n",
+ "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dear Sir/Madam,\n",
+ "\n",
+ "We regret to inform you that Mr. Dana Rhodes has reported the loss of his wallet. The wallet contains a sum of cash and his credit card, bearing the number 4397528473885757. \n",
+ "\n",
+ "If you happen to come across the aforementioned wallet, we kindly request that you contact us immediately at 258-481-7074x714 or via email at laurengoodman@example.com.\n",
+ "\n",
+ "Your prompt assistance in this matter would be greatly appreciated.\n",
+ "\n",
+ "Yours faithfully,\n",
+ "\n",
+ "[Your Name]\n"
+ ]
+ }
+ ],
+ "source": [
+ "from langchain.prompts.prompt import PromptTemplate\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "\n",
+ "anonymizer = PresidioReversibleAnonymizer()\n",
+ "\n",
+ "template = \"\"\"Rewrite this text into an official, short email:\n",
+ "\n",
+ "{anonymized_text}\"\"\"\n",
+ "prompt = PromptTemplate.from_template(template)\n",
+ "llm = ChatOpenAI(temperature=0)\n",
+ "\n",
+ "chain = {\"anonymized_text\": anonymizer.anonymize} | prompt | llm\n",
+ "response = chain.invoke(text)\n",
+ "print(response.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, let's add **deanonymization step** to our sequence:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dear Sir/Madam,\n",
+ "\n",
+ "We regret to inform you that Mr. Slim Shady has recently misplaced his wallet. The wallet contains a sum of cash and his credit card, bearing the number 4916 0387 9536 0861. \n",
+ "\n",
+ "If by any chance you come across the lost wallet, kindly contact us immediately at 313-666-7440 or send an email to real.slim.shady@gmail.com.\n",
+ "\n",
+ "Your prompt assistance in this matter would be greatly appreciated.\n",
+ "\n",
+ "Yours faithfully,\n",
+ "\n",
+ "[Your Name]\n"
+ ]
+ }
+ ],
+ "source": [
+ "chain = chain | (lambda ai_message: anonymizer.deanonymize(ai_message.content))\n",
+ "response = chain.invoke(text)\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Anonymized data was given to the model itself, and therefore it was protected from being leaked to the outside world. Then, the model's response was processed, and the factual value was replaced with the real one."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Extra knowledge"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`PresidioReversibleAnonymizer` stores the mapping of the fake values to the original values in the `deanonymizer_mapping` parameter, where key is fake PII and value is the original one: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'PERSON': {'Maria Lynch': 'Slim Shady'},\n",
+ " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n",
+ " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n",
+ " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861'}}"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer\n",
+ "\n",
+ "anonymizer = PresidioReversibleAnonymizer(\n",
+ " analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"CREDIT_CARD\"],\n",
+ " # Faker seed is used here to make sure the same fake data is generated for the test purposes\n",
+ " # In production, it is recommended to remove the faker_seed parameter (it will default to None)\n",
+ " faker_seed=42,\n",
+ ")\n",
+ "\n",
+ "anonymizer.anonymize(\n",
+ " \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com. \"\n",
+ " \"By the way, my card number is: 4916 0387 9536 0861\"\n",
+ ")\n",
+ "\n",
+ "anonymizer.deanonymizer_mapping"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Anonymizing more texts will result in new mapping entries:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Do you have his VISA card number? Yep, it's 3537672423884966. I'm William Bowman by the way.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n",
+ " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n",
+ " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n",
+ " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n",
+ " '3537672423884966': '4001 9192 5753 7193'}}"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print(\n",
+ " anonymizer.anonymize(\n",
+ " \"Do you have his VISA card number? Yep, it's 4001 9192 5753 7193. I'm John Doe by the way.\"\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "anonymizer.deanonymizer_mapping"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can save the mapping itself to a file for future use: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We can save the deanonymizer mapping as a JSON or YAML file\n",
+ "\n",
+ "anonymizer.save_deanonymizer_mapping(\"deanonymizer_mapping.json\")\n",
+ "# anonymizer.save_deanonymizer_mapping(\"deanonymizer_mapping.yaml\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "And then, load it in another `PresidioReversibleAnonymizer` instance:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{}"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "anonymizer = PresidioReversibleAnonymizer()\n",
+ "\n",
+ "anonymizer.deanonymizer_mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n",
+ " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n",
+ " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n",
+ " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n",
+ " '3537672423884966': '4001 9192 5753 7193'}}"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "anonymizer.load_deanonymizer_mapping(\"deanonymizer_mapping.json\")\n",
+ "\n",
+ "anonymizer.deanonymizer_mapping"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Future works\n",
+ "\n",
+ "- **instance anonymization** - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object.\n",
+ "- **better matching and substitution of fake values for real ones** - currently the strategy is based on matching full strings and then substituting them. Due to the indeterminism of language models, it may happen that the value in the answer is slightly changed (e.g. *John Doe* -> *John* or *Main St, New York* -> *New York*) and such a substitution is then no longer possible. Therefore, it is worth adjusting the matching for your needs."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/extras/guides/safety/amazon_comprehend_chain.ipynb b/docs/extras/guides/safety/amazon_comprehend_chain.ipynb
index e7e1961d42d..69117b8257c 100644
--- a/docs/extras/guides/safety/amazon_comprehend_chain.ipynb
+++ b/docs/extras/guides/safety/amazon_comprehend_chain.ipynb
@@ -512,9 +512,9 @@
"# Examples\n",
"---\n",
"\n",
- "## With HuggingFace Hub Models\n",
+ "## With Hugging Face Hub Models\n",
"\n",
- "Get your API Key from Huggingface hub - https://huggingface.co/docs/api-inference/quicktour#get-your-api-token"
+ "Get your API Key from Hugging Face hub - https://huggingface.co/docs/api-inference/quicktour#get-your-api-token"
]
},
{
diff --git a/docs/extras/integrations/document_transformers/nuclia_transformer.ipynb b/docs/extras/integrations/document_transformers/nuclia_transformer.ipynb
index d4317c9bba6..468be00d94b 100644
--- a/docs/extras/integrations/document_transformers/nuclia_transformer.ipynb
+++ b/docs/extras/integrations/document_transformers/nuclia_transformer.ipynb
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -93,8 +93,22 @@
}
],
"metadata": {
+ "kernelspec": {
+ "display_name": "langchain",
+ "language": "python",
+ "name": "python3"
+ },
"language_info": {
- "name": "python"
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.5"
},
"orig_nbformat": 4
},
diff --git a/docs/extras/integrations/llms/banana.ipynb b/docs/extras/integrations/llms/banana.ipynb
index 44e51faafa2..b92db8dabab 100644
--- a/docs/extras/integrations/llms/banana.ipynb
+++ b/docs/extras/integrations/llms/banana.ipynb
@@ -31,11 +31,16 @@
"outputs": [],
"source": [
"# get new tokens: https://app.banana.dev/\n",
- "# We need two tokens, not just an `api_key`: `BANANA_API_KEY` and `YOUR_MODEL_KEY`\n",
+ "# We need three parameters to make a Banana.dev API call:\n",
+ "# * a team api key\n",
+ "# * the model's unique key\n",
+ "# * the model's url slug\n",
"\n",
"import os\n",
"from getpass import getpass\n",
"\n",
+ "# You can get this from the main dashboard\n",
+ "# at https://app.banana.dev\n",
"os.environ[\"BANANA_API_KEY\"] = \"YOUR_API_KEY\"\n",
"# OR\n",
"# BANANA_API_KEY = getpass()"
@@ -70,7 +75,9 @@
"metadata": {},
"outputs": [],
"source": [
- "llm = Banana(model_key=\"YOUR_MODEL_KEY\")"
+ "# Both of these are found in your model's \n",
+ "# detail page in https://app.banana.dev\n",
+ "llm = Banana(model_key=\"YOUR_MODEL_KEY\", model_url_slug=\"YOUR_MODEL_URL_SLUG\")"
]
},
{
diff --git a/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb b/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb
index e0bbd87a705..cff7d9bdd0e 100644
--- a/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb
+++ b/docs/extras/integrations/llms/google_vertex_ai_palm.ipynb
@@ -236,7 +236,7 @@
"metadata": {},
"outputs": [],
"source": [
- "llm_oss = VertexAIModelGarden(\n",
+ "llm = VertexAIModelGarden(\n",
" project=\"YOUR PROJECT\",\n",
" endpoint_id=\"YOUR ENDPOINT_ID\"\n",
")"
@@ -248,14 +248,25 @@
"metadata": {},
"outputs": [],
"source": [
- "llm_oss(\"What is the meaning of life?\")"
+ "llm(\"What is the meaning of life?\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "You can also use it as a chain:"
+ "Like all LLMs, we can then compose it with other components:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.prompts import PromptTemplate\n",
+ "\n",
+ "prompt = PromptTemplate.from_template(\"What is the meaning of {thing}?\")"
]
},
{
@@ -264,17 +275,17 @@
"metadata": {},
"outputs": [],
"source": [
- "llm_oss_chain = LLMChain(prompt=prompt, llm=llm_oss(\"What is the meaning of life?\")\n",
- ")\n",
- "llm_oss_chain.run(question)"
+ "llm_oss_chain = prompt | llm\n",
+ "\n",
+ "llm_oss_chain.invoke({\"thing\": \"life\"})"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "poetry-venv",
"language": "python",
- "name": "python3"
+ "name": "poetry-venv"
},
"language_info": {
"codemirror_mode": {
@@ -286,7 +297,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "version": "3.9.1"
},
"vscode": {
"interpreter": {
diff --git a/docs/extras/integrations/providers/bananadev.mdx b/docs/extras/integrations/providers/bananadev.mdx
index 4961e5f88bb..ee7992be74c 100644
--- a/docs/extras/integrations/providers/bananadev.mdx
+++ b/docs/extras/integrations/providers/bananadev.mdx
@@ -1,79 +1,72 @@
# Banana
-This page covers how to use the Banana ecosystem within LangChain.
-It is broken into two parts: installation and setup, and then references to specific Banana wrappers.
+Banana provided serverless GPU inference for AI models, including a CI/CD build pipeline and a simple Python framework (Potassium) to server your models.
+
+This page covers how to use the [Banana](https://www.banana.dev) ecosystem within LangChain.
+
+It is broken into two parts:
+* installation and setup,
+* and then references to specific Banana wrappers.
## Installation and Setup
- Install with `pip install banana-dev`
-- Get an Banana api key and set it as an environment variable (`BANANA_API_KEY`)
+- Get an Banana api key from the [Banana.dev dashboard](https://app.banana.dev) and set it as an environment variable (`BANANA_API_KEY`)
+- Get your model's key and url slug from the model's details page
## Define your Banana Template
-If you want to use an available language model template you can find one [here](https://app.banana.dev/templates/conceptofmind/serverless-template-palmyra-base).
-This template uses the Palmyra-Base model by [Writer](https://writer.com/product/api/).
-You can check out an example Banana repository [here](https://github.com/conceptofmind/serverless-template-palmyra-base).
+You'll need to set up a Github repo for your Banana app. You can get started in 5 minutes using [this guide](https://docs.banana.dev/banana-docs/).
+
+Alternatively, for a ready-to-go LLM example, you can check out Banana's [CodeLlama-7B-Instruct-GPTQ](https://github.com/bananaml/demo-codellama-7b-instruct-gptq) GitHub repository. Just fork it and deploy it within Banana.
+
+Other starter repos are available [here](https://github.com/orgs/bananaml/repositories?q=demo-&type=all&language=&sort=).
## Build the Banana app
-Banana Apps must include the "output" key in the return json.
-There is a rigid response structure.
+To use Banana apps within Langchain, they must include the `outputs` key
+in the returned json, and the value must be a string.
```python
# Return the results as a dictionary
-result = {'output': result}
+result = {'outputs': result}
```
An example inference function would be:
```python
-def inference(model_inputs:dict) -> dict:
- global model
- global tokenizer
-
- # Parse out your arguments
- prompt = model_inputs.get('prompt', None)
- if prompt == None:
- return {'message': "No prompt provided"}
-
- # Run the model
- input_ids = tokenizer.encode(prompt, return_tensors='pt').cuda()
- output = model.generate(
- input_ids,
- max_length=100,
- do_sample=True,
- top_k=50,
- top_p=0.95,
- num_return_sequences=1,
- temperature=0.9,
- early_stopping=True,
- no_repeat_ngram_size=3,
- num_beams=5,
- length_penalty=1.5,
- repetition_penalty=1.5,
- bad_words_ids=[[tokenizer.encode(' ', add_prefix_space=True)[0]]]
- )
-
- result = tokenizer.decode(output[0], skip_special_tokens=True)
- # Return the results as a dictionary
- result = {'output': result}
- return result
+@app.handler("/")
+def handler(context: dict, request: Request) -> Response:
+ """Handle a request to generate code from a prompt."""
+ model = context.get("model")
+ tokenizer = context.get("tokenizer")
+ max_new_tokens = request.json.get("max_new_tokens", 512)
+ temperature = request.json.get("temperature", 0.7)
+ prompt = request.json.get("prompt")
+ prompt_template=f'''[INST] Write code to solve the following coding problem that obeys the constraints and passes the example test cases. Please wrap your code answer using ```:
+ {prompt}
+ [/INST]
+ '''
+ input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
+ output = model.generate(inputs=input_ids, temperature=temperature, max_new_tokens=max_new_tokens)
+ result = tokenizer.decode(output[0])
+ return Response(json={"outputs": result}, status=200)
```
-You can find a full example of a Banana app [here](https://github.com/conceptofmind/serverless-template-palmyra-base/blob/main/app.py).
+This example is from the `app.py` file in [CodeLlama-7B-Instruct-GPTQ](https://github.com/bananaml/demo-codellama-7b-instruct-gptq).
## Wrappers
### LLM
-There exists an Banana LLM wrapper, which you can access with
+Within Langchain, there exists a Banana LLM wrapper, which you can access with
```python
from langchain.llms import Banana
```
-You need to provide a model key located in the dashboard:
+You need to provide a model key and model url slug, which you can get from the model's details page in the [Banana.dev dashboard](https://app.banana.dev).
```python
-llm = Banana(model_key="YOUR_MODEL_KEY")
+llm = Banana(model_key="YOUR_MODEL_KEY", model_url_slug="YOUR_MODEL_URL_SLUG")
```
diff --git a/docs/extras/integrations/text_embedding/huggingfacehub.ipynb b/docs/extras/integrations/text_embedding/huggingfacehub.ipynb
index a86df86d742..cb897f86934 100644
--- a/docs/extras/integrations/text_embedding/huggingfacehub.ipynb
+++ b/docs/extras/integrations/text_embedding/huggingfacehub.ipynb
@@ -5,13 +5,23 @@
"id": "ed47bb62",
"metadata": {},
"source": [
- "# Hugging Face Hub\n",
+ "# Hugging Face\n",
"Let's load the Hugging Face Embedding class."
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
+ "id": "16b20335-da1d-46ba-aa23-fbf3e2c6fe60",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install langchain sentence_transformers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
"id": "861521a9",
"metadata": {},
"outputs": [],
@@ -21,7 +31,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 3,
"id": "ff9be586",
"metadata": {},
"outputs": [],
@@ -31,7 +41,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 3,
"id": "d0a98ae9",
"metadata": {},
"outputs": [],
@@ -41,7 +51,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 5,
"id": "5d6c682b",
"metadata": {},
"outputs": [],
@@ -51,7 +61,28 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 6,
+ "id": "b57b8ce9-ef7d-4e63-979e-aa8763d1f9a8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[-0.04895168915390968, -0.03986193612217903, -0.021562768146395683]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "query_result[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
"id": "bb5e74c0",
"metadata": {},
"outputs": [],
@@ -60,19 +91,71 @@
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "aaad49f8",
+ "cell_type": "markdown",
+ "id": "92019ef1-5d30-4985-b4e6-c0d98bdfe265",
"metadata": {},
- "outputs": [],
- "source": []
+ "source": [
+ "## Hugging Face Inference API\n",
+ "We can also access embedding models via the Hugging Face Inference API, which does not require us to install ``sentence_transformers`` and download models locally."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "66f5c6ba-1446-43e1-b012-800d17cef300",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Enter your HF Inference API Key:\n",
+ "\n",
+ " ········\n"
+ ]
+ }
+ ],
+ "source": [
+ "import getpass\n",
+ "\n",
+ "inference_api_key = getpass.getpass(\"Enter your HF Inference API Key:\\n\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "d0623c1f-cd82-4862-9bce-3655cb9b66ac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[-0.038338541984558105, 0.1234646737575531, -0.028642963618040085]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings\n",
+ "\n",
+ "embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
+ " api_key=inference_api_key,\n",
+ " model_name=\"sentence-transformers/all-MiniLM-l6-v2\"\n",
+ ")\n",
+ "\n",
+ "query_result = embeddings.embed_query(text)\n",
+ "query_result[:3]"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "poetry-venv",
"language": "python",
- "name": "python3"
+ "name": "poetry-venv"
},
"language_info": {
"codemirror_mode": {
diff --git a/docs/extras/integrations/vectorstores/nucliadb.ipynb b/docs/extras/integrations/vectorstores/nucliadb.ipynb
new file mode 100644
index 00000000000..f5fe2299c85
--- /dev/null
+++ b/docs/extras/integrations/vectorstores/nucliadb.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# NucliaDB\n",
+ "\n",
+ "You can use a local NucliaDB instance or use [Nuclia Cloud](https://nuclia.cloud).\n",
+ "\n",
+ "When using a local instance, you need a Nuclia Understanding API key, so your texts are properly vectorized and indexed. You can get a key by creating a free account at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#!pip install langchain nuclia"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage with nuclia.cloud"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.vectorstores.nucliadb import NucliaDB\n",
+ "API_KEY = \"YOUR_API_KEY\"\n",
+ "\n",
+ "ndb = NucliaDB(knowledge_box=\"YOUR_KB_ID\", local=False, api_key=API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage with a local instance\n",
+ "\n",
+ "Note: By default `backend` is set to `http://localhost:8080`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.vectorstores.nucliadb import NucliaDB\n",
+ "\n",
+ "ndb = NucliaDB(knowledge_box=\"YOUR_KB_ID\", local=True, backend=\"http://my-local-server\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Add and delete texts to your Knowledge Box"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ids = ndb.add_texts([\"This is a new test\", \"This is a second test\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ndb.delete(ids=ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Search in your Knowledge Box"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = ndb.similarity_search(\"Who was inspired by Ada Lovelace?\")\n",
+ "print(res.page_content)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/extras/integrations/vectorstores/sqlitevss.ipynb b/docs/extras/integrations/vectorstores/sqlitevss.ipynb
new file mode 100644
index 00000000000..e670d5683fd
--- /dev/null
+++ b/docs/extras/integrations/vectorstores/sqlitevss.ipynb
@@ -0,0 +1,207 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# sqlite-vss\n",
+ "\n",
+ ">[sqlite-vss](https://alexgarcia.xyz/sqlite-vss/) is an SQLite extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the Faiss library, it offers efficient similarity search and clustering capabilities.\n",
+ "\n",
+ "This notebook shows how to use the `SQLiteVSS` vector database."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "# You need to install sqlite-vss as a dependency.\n",
+ "%pip install sqlite-vss"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Quickstart"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'"
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n",
+ "from langchain.text_splitter import CharacterTextSplitter\n",
+ "from langchain.vectorstores import SQLiteVSS\n",
+ "from langchain.document_loaders import TextLoader\n",
+ "\n",
+ "# load the document and split it into chunks\n",
+ "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
+ "documents = loader.load()\n",
+ "\n",
+ "# split it into chunks\n",
+ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+ "docs = text_splitter.split_documents(documents)\n",
+ "texts = [doc.page_content for doc in docs]\n",
+ "\n",
+ "\n",
+ "# create the open-source embedding function\n",
+ "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
+ "\n",
+ "\n",
+ "# load it in sqlite-vss in a table named state_union.\n",
+ "# the db_file parameter is the name of the file you want\n",
+ "# as your sqlite database.\n",
+ "db = SQLiteVSS.from_texts(\n",
+ " texts=texts,\n",
+ " embedding=embedding_function,\n",
+ " table=\"state_union\",\n",
+ " db_file=\"/tmp/vss.db\"\n",
+ ")\n",
+ "\n",
+ "# query it\n",
+ "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+ "data = db.similarity_search(query)\n",
+ "\n",
+ "# print results\n",
+ "data[0].page_content"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-09-06T14:55:55.370351Z",
+ "start_time": "2023-09-06T14:55:53.547755Z"
+ }
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Using existing sqlite connection"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "'Ketanji Brown Jackson is awesome'"
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n",
+ "from langchain.text_splitter import CharacterTextSplitter\n",
+ "from langchain.vectorstores import SQLiteVSS\n",
+ "from langchain.document_loaders import TextLoader\n",
+ "\n",
+ "# load the document and split it into chunks\n",
+ "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
+ "documents = loader.load()\n",
+ "\n",
+ "# split it into chunks\n",
+ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+ "docs = text_splitter.split_documents(documents)\n",
+ "texts = [doc.page_content for doc in docs]\n",
+ "\n",
+ "\n",
+ "# create the open-source embedding function\n",
+ "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
+ "connection = SQLiteVSS.create_connection(db_file=\"/tmp/vss.db\")\n",
+ "\n",
+ "db1 = SQLiteVSS(\n",
+ " table=\"state_union\",\n",
+ " embedding=embedding_function,\n",
+ " connection=connection\n",
+ ")\n",
+ "\n",
+ "db1.add_texts([\"Ketanji Brown Jackson is awesome\"])\n",
+ "# query it again\n",
+ "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+ "data = db1.similarity_search(query)\n",
+ "\n",
+ "# print results\n",
+ "data[0].page_content"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-09-06T14:59:22.086252Z",
+ "start_time": "2023-09-06T14:59:21.693237Z"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "outputs": [],
+ "source": [
+ "# Cleaning up\n",
+ "import os\n",
+ "os.remove(\"/tmp/vss.db\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2023-09-06T15:01:15.550318Z",
+ "start_time": "2023-09-06T15:01:15.546428Z"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false
+ }
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/docs/extras/integrations/vectorstores/zep.ipynb b/docs/extras/integrations/vectorstores/zep.ipynb
index 0354da32105..4456aece0be 100644
--- a/docs/extras/integrations/vectorstores/zep.ipynb
+++ b/docs/extras/integrations/vectorstores/zep.ipynb
@@ -167,7 +167,7 @@
"Tables necessary to determine the places of the planets are not less\r\n",
"necessary than those for the sun, moon, and stars. Some notion of the\r\n",
"number and complexity of these tables may be formed, when we state that\r\n",
- "the positions of the two principal planets, (and these the most\r\n",
+ "the positions of the two principal planets, (and these are the most\r\n",
"necessary for the navigator,) Jupiter and Saturn, require each not less\r\n",
"than one hundred and sixteen tables. Yet it is not only necessary to\r\n",
"predict the position of these bodies, but it is likewise expedient to -> 0.8998482592744614 \n",
diff --git a/docs/extras/use_cases/apis.ipynb b/docs/extras/use_cases/apis.ipynb
index 1af0a7f3ceb..8d9259c3ca7 100644
--- a/docs/extras/use_cases/apis.ipynb
+++ b/docs/extras/use_cases/apis.ipynb
@@ -1,12 +1,21 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "id": "ea5c61b2-8b52-4270-bdb0-c4df88608f15",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Interacting with APIs\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"id": "a15e6a18",
"metadata": {},
"source": [
- "# Interacting with APIs\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/apis.ipynb)\n",
"\n",
"## Use case \n",
@@ -69,9 +78,7 @@
"cell_type": "code",
"execution_count": 2,
"id": "30b780e3",
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"name": "stderr",
@@ -415,7 +422,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/extras/use_cases/chatbots.ipynb b/docs/extras/use_cases/chatbots.ipynb
index 58e3ce5317d..c67d595c9f8 100644
--- a/docs/extras/use_cases/chatbots.ipynb
+++ b/docs/extras/use_cases/chatbots.ipynb
@@ -1,12 +1,21 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "id": "22fd28c9-9b48-476c-bca8-20efef7fdb14",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Chatbots\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"id": "ee7f95e4",
"metadata": {},
"source": [
- "# Chatbots\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/chatbots.ipynb)\n",
"\n",
"## Use case\n",
diff --git a/docs/extras/use_cases/code_understanding.ipynb b/docs/extras/use_cases/code_understanding.ipynb
index 60a02b9bb3b..df0cfbf9d1b 100644
--- a/docs/extras/use_cases/code_understanding.ipynb
+++ b/docs/extras/use_cases/code_understanding.ipynb
@@ -1,11 +1,19 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Code understanding\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Code Understanding\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/code_understanding.ipynb)\n",
"\n",
"## Use case\n",
@@ -1047,7 +1055,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/extras/use_cases/extraction.ipynb b/docs/extras/use_cases/extraction.ipynb
index 7aaa37f0464..628026127a4 100644
--- a/docs/extras/use_cases/extraction.ipynb
+++ b/docs/extras/use_cases/extraction.ipynb
@@ -1,12 +1,21 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "id": "df29b30a-fd27-4e08-8269-870df5631f9e",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Extraction\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b84edb4e",
"metadata": {},
"source": [
- "# Extraction\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/extraction.ipynb)\n",
"\n",
"## Use case\n",
@@ -589,7 +598,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/extras/use_cases/more/_category_.yml b/docs/extras/use_cases/more/_category_.yml
index 5e1490ecde0..53055fb940a 100644
--- a/docs/extras/use_cases/more/_category_.yml
+++ b/docs/extras/use_cases/more/_category_.yml
@@ -1,2 +1,2 @@
label: 'More'
-position: 1
+position: 2
\ No newline at end of file
diff --git a/docs/extras/use_cases/more/agents/agents.ipynb b/docs/extras/use_cases/more/agents/agents.ipynb
index 98b65d1bbec..54ba5c29dbd 100644
--- a/docs/extras/use_cases/more/agents/agents.ipynb
+++ b/docs/extras/use_cases/more/agents/agents.ipynb
@@ -584,7 +584,7 @@
"\n",
"Collectivly, this tells us: carefully inspect Agent traces and tool outputs. \n",
"\n",
- "As we saw with the [SQL use case](/docs/use_cases/sql), `ReAct agents` can be work very well for specific problems. \n",
+ "As we saw with the [SQL use case](/docs/use_cases/qa_structured/sql), `ReAct agents` can be work very well for specific problems. \n",
"\n",
"But, as shown here, the result is degraded relative to what we see with the OpenAI agent."
]
diff --git a/docs/extras/use_cases/more/code_writing/index.mdx b/docs/extras/use_cases/more/code_writing/index.mdx
index 4dd704b5a0a..218b4385159 100644
--- a/docs/extras/use_cases/more/code_writing/index.mdx
+++ b/docs/extras/use_cases/more/code_writing/index.mdx
@@ -1,7 +1,3 @@
----
-sidebar_position: 0
----
-
# Code writing
:::warning
diff --git a/docs/extras/use_cases/more/graph/diffbot_graphtransformer.ipynb b/docs/extras/use_cases/more/graph/diffbot_graphtransformer.ipynb
new file mode 100644
index 00000000000..da1c2fc020f
--- /dev/null
+++ b/docs/extras/use_cases/more/graph/diffbot_graphtransformer.ipynb
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7f0b0c06-ee70-468c-8bf5-b023f9e5e0a2",
+ "metadata": {},
+ "source": [
+ "# Diffbot Graph Transformer\n",
+ "\n",
+ "[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/more/graph/diffbot_transformer.ipynb)\n",
+ "\n",
+ "## Use case\n",
+ "\n",
+ "Text data often contain rich relationships and insights that can be useful for various analytics, recommendation engines, or knowledge management applications.\n",
+ "\n",
+ "Diffbot's NLP API allows for the extraction of entities, relationships, and semantic meaning from unstructured text data.\n",
+ "\n",
+ "By coupling Diffbot's NLP API with Neo4j, a graph database, you can create powerful, dynamic graph structures based on the information extracted from text. These graph structures are fully queryable and can be integrated into various applications.\n",
+ "\n",
+ "This combination allows for use cases such as:\n",
+ "\n",
+ "* Building knowledge graphs from textual documents, websites, or social media feeds.\n",
+ "* Generating recommendations based on semantic relationships in the data.\n",
+ "* Creating advanced search features that understand the relationships between entities.\n",
+ "* Building analytics dashboards that allow users to explore the hidden relationships in data.\n",
+ "\n",
+ "## Overview\n",
+ "\n",
+ "LangChain provides tools to interact with Graph Databases:\n",
+ "\n",
+ "1. `Construct knowledge graphs from text` using graph transformer and store integrations \n",
+ "2. `Query a graph database` using chains for query creation and execution\n",
+ "3. `Interact with a graph database` using agents for robust and flexible querying \n",
+ "\n",
+ "## Quickstart\n",
+ "\n",
+ "First, get required packages and set environment variables:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "975648da-b24f-4164-a671-6772179e12df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install langchain langchain-experimental openai neo4j wikipedia"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77718977-629e-46c2-b091-f9191b9ec569",
+ "metadata": {},
+ "source": [
+ "## Diffbot NLP Service\n",
+ "\n",
+ "Diffbot's NLP service is a tool for extracting entities, relationships, and semantic context from unstructured text data.\n",
+ "This extracted information can be used to construct a knowledge graph.\n",
+ "To use their service, you'll need to obtain an API key from [Diffbot](https://www.diffbot.com/products/natural-language/)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "2cbf97d0-3682-439b-8750-b695ff726789",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer\n",
+ "\n",
+ "diffbot_api_key = \"DIFFBOT_API_KEY\"\n",
+ "diffbot_nlp = DiffbotGraphTransformer(diffbot_api_key=diffbot_api_key)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e3b894a-e3ee-46c7-8116-f8377f8f0159",
+ "metadata": {},
+ "source": [
+ "This code fetches Wikipedia articles about \"Baldur's Gate 3\" and then uses `DiffbotGraphTransformer` to extract entities and relationships.\n",
+ "The `DiffbotGraphTransformer` outputs a structured data `GraphDocument`, which can be used to populate a graph database.\n",
+ "Note that text chunking is avoided due to Diffbot's [character limit per API request](https://docs.diffbot.com/reference/introduction-to-natural-language-api)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "53f8df86-47a1-44a1-9a0f-6725b90703bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.document_loaders import WikipediaLoader\n",
+ "\n",
+ "query = \"Warren Buffett\"\n",
+ "raw_documents = WikipediaLoader(query=query).load()\n",
+ "graph_documents = diffbot_nlp.convert_to_graph_documents(raw_documents)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "31bb851a-aab4-4b97-a6b7-fce397d32b47",
+ "metadata": {},
+ "source": [
+ "## Loading the data into a knowledge graph\n",
+ "\n",
+ "You will need to have a running Neo4j instance. One option is to create a [free Neo4j database instance in their Aura cloud service](https://neo4j.com/cloud/platform/aura-graph-database/). You can also run the database locally using the [Neo4j Desktop application](https://neo4j.com/download/), or running a docker container. You can run a local docker container by running the executing the following script:\n",
+ "```\n",
+ "docker run \\\n",
+ " --name neo4j \\\n",
+ " -p 7474:7474 -p 7687:7687 \\\n",
+ " -d \\\n",
+ " -e NEO4J_AUTH=neo4j/pleaseletmein \\\n",
+ " -e NEO4J_PLUGINS=\\[\\\"apoc\\\"\\] \\\n",
+ " neo4j:latest\n",
+ "``` \n",
+ "If you are using the docker container, you need to wait a couple of second for the database to start."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "0b2b6641-5a5d-467c-b148-e6aad5e4baa7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.graphs import Neo4jGraph\n",
+ "\n",
+ "url=\"bolt://localhost:7687\"\n",
+ "username=\"neo4j\"\n",
+ "password=\"pleaseletmein\"\n",
+ "\n",
+ "graph = Neo4jGraph(\n",
+ " url=url,\n",
+ " username=username, \n",
+ " password=password\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0b15e840-fe6f-45db-9193-1b4e2df5c12c",
+ "metadata": {},
+ "source": [
+ "The `GraphDocuments` can be loaded into a knowledge graph using the `add_graph_documents` method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "1a67c4a8-955c-42a2-9c5d-de3ac0e640ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "graph.add_graph_documents(graph_documents)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed411e05-2b03-460d-997e-938482774f40",
+ "metadata": {},
+ "source": [
+ "## Refresh graph schema information\n",
+ "If the schema of database changes, you can refresh the schema information needed to generate Cypher statements"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "904c9ee3-787c-403f-857d-459ce5ad5a1b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "graph.refresh_schema()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f19d1387-5899-4258-8c94-8ef5fa7db464",
+ "metadata": {},
+ "source": [
+ "## Querying the graph\n",
+ "We can now use the graph cypher QA chain to ask question of the graph. It is advisable to use **gpt-4** to construct Cypher queries to get the best experience."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "9393b732-67c8-45c1-9ec2-089f49c62448",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.chains import GraphCypherQAChain\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "\n",
+ "chain = GraphCypherQAChain.from_llm(\n",
+ " cypher_llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"),\n",
+ " qa_llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"),\n",
+ " graph=graph, verbose=True,\n",
+ " \n",
+ ")\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "1a9b3652-b436-404d-aa25-5fb576f23dc0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+ "Generated Cypher:\n",
+ "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: \"Warren Buffett\"})-[:EDUCATED_AT]->(o:Organization)\n",
+ "RETURN o.name\u001b[0m\n",
+ "Full Context:\n",
+ "\u001b[32;1m\u001b[1;3m[{'o.name': 'New York Institute of Finance'}, {'o.name': 'Alice Deal Junior High School'}, {'o.name': 'Woodrow Wilson High School'}, {'o.name': 'University of Nebraska'}]\u001b[0m\n",
+ "\n",
+ "\u001b[1m> Finished chain.\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'Warren Buffett attended the University of Nebraska.'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chain.run(\"Which university did Warren Buffett attend?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "adc0ba0f-a62c-4875-89ce-da717f3ab148",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+ "Generated Cypher:\n",
+ "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[r:EMPLOYEE_OR_MEMBER_OF]->(o:Organization) WHERE o.name = 'Berkshire Hathaway' RETURN p.name\u001b[0m\n",
+ "Full Context:\n",
+ "\u001b[32;1m\u001b[1;3m[{'p.name': 'Charlie Munger'}, {'p.name': 'Oliver Chace'}, {'p.name': 'Howard Buffett'}, {'p.name': 'Howard'}, {'p.name': 'Susan Buffett'}, {'p.name': 'Warren Buffett'}]\u001b[0m\n",
+ "\n",
+ "\u001b[1m> Finished chain.\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'Charlie Munger, Oliver Chace, Howard Buffett, Susan Buffett, and Warren Buffett are or were working at Berkshire Hathaway.'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chain.run(\"Who is or was working at Berkshire Hathaway?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d636954b-d967-4e96-9489-92e11c74af35",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/extras/use_cases/more/self_check/index.mdx b/docs/extras/use_cases/more/self_check/index.mdx
index 9880394eb5a..a424ea43701 100644
--- a/docs/extras/use_cases/more/self_check/index.mdx
+++ b/docs/extras/use_cases/more/self_check/index.mdx
@@ -1,7 +1,3 @@
----
-sidebar_position: 0
----
-
# Self-checking
One of the main issues with using LLMs is that they can often hallucinate and make false claims. One of the surprisingly effective ways to remediate this is to use the LLM itself to check its own answers.
diff --git a/docs/extras/use_cases/qa_structured/_category_.yml b/docs/extras/use_cases/qa_structured/_category_.yml
new file mode 100644
index 00000000000..209e3895ff5
--- /dev/null
+++ b/docs/extras/use_cases/qa_structured/_category_.yml
@@ -0,0 +1,3 @@
+label: 'QA over structured data'
+collapsed: false
+position: 0.5
diff --git a/docs/extras/use_cases/qa_structured/integrations/_category_.yml b/docs/extras/use_cases/qa_structured/integrations/_category_.yml
new file mode 100644
index 00000000000..4a4b0b2f28a
--- /dev/null
+++ b/docs/extras/use_cases/qa_structured/integrations/_category_.yml
@@ -0,0 +1 @@
+label: 'Integration-specific'
diff --git a/docs/extras/use_cases/qa_structured/integrations/elasticsearch.ipynb b/docs/extras/use_cases/qa_structured/integrations/elasticsearch.ipynb
new file mode 100644
index 00000000000..e28bc6bf618
--- /dev/null
+++ b/docs/extras/use_cases/qa_structured/integrations/elasticsearch.ipynb
@@ -0,0 +1,158 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Elasticsearch\n",
+ "\n",
+ "[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/qa_structured/integrations/elasticsearch.ipynb)\n",
+ "\n",
+ "We can use LLMs to interact with Elasticsearch analytics databases in natural language.\n",
+ "\n",
+ "This chain builds search queries via the Elasticsearch DSL API (filters and aggregations).\n",
+ "\n",
+ "The Elasticsearch client must have permissions for index listing, mapping description and search queries.\n",
+ "\n",
+ "See [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) for instructions on how to run Elasticsearch locally."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! pip install langchain langchain-experimental openai elasticsearch\n",
+ "\n",
+ "# Set env var OPENAI_API_KEY or load from a .env file\n",
+ "# import dotenv\n",
+ "\n",
+ "# dotenv.load_dotenv()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from elasticsearch import Elasticsearch\n",
+ "\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "from langchain.chains.elasticsearch_database import ElasticsearchDatabaseChain"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialize Elasticsearch python client.\n",
+ "# See https://elasticsearch-py.readthedocs.io/en/v8.8.2/api.html#elasticsearch.Elasticsearch\n",
+ "ELASTIC_SEARCH_SERVER = \"https://elastic:pass@localhost:9200\"\n",
+ "db = Elasticsearch(ELASTIC_SEARCH_SERVER)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Uncomment the next cell to initially populate your db."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# customers = [\n",
+ "# {\"firstname\": \"Jennifer\", \"lastname\": \"Walters\"},\n",
+ "# {\"firstname\": \"Monica\",\"lastname\":\"Rambeau\"},\n",
+ "# {\"firstname\": \"Carol\",\"lastname\":\"Danvers\"},\n",
+ "# {\"firstname\": \"Wanda\",\"lastname\":\"Maximoff\"},\n",
+ "# {\"firstname\": \"Jennifer\",\"lastname\":\"Takeda\"},\n",
+ "# ]\n",
+ "# for i, customer in enumerate(customers):\n",
+ "# db.create(index=\"customers\", document=customer, id=i)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "llm = ChatOpenAI(model_name=\"gpt-4\", temperature=0)\n",
+ "chain = ElasticsearchDatabaseChain.from_llm(llm=llm, database=db, verbose=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "question = \"What are the first names of all the customers?\"\n",
+ "chain.run(question)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can customize the prompt."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.chains.elasticsearch_database.prompts import DEFAULT_DSL_TEMPLATE\n",
+ "from langchain.prompts.prompt import PromptTemplate\n",
+ "\n",
+ "PROMPT_TEMPLATE = \"\"\"Given an input question, create a syntactically correct Elasticsearch query to run. Unless the user specifies in their question a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.\n",
+ "\n",
+ "Unless told to do not query for all the columns from a specific index, only ask for a the few relevant columns given the question.\n",
+ "\n",
+ "Pay attention to use only the column names that you can see in the mapping description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which index. Return the query as valid json.\n",
+ "\n",
+ "Use the following format:\n",
+ "\n",
+ "Question: Question here\n",
+ "ESQuery: Elasticsearch Query formatted as json\n",
+ "\"\"\"\n",
+ "\n",
+ "PROMPT = PromptTemplate.from_template(\n",
+ " PROMPT_TEMPLATE,\n",
+ ")\n",
+ "chain = ElasticsearchDatabaseChain.from_llm(llm=llm, database=db, query_prompt=PROMPT)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/extras/use_cases/qa_structured/integrations/myscale_vector_sql.ipynb b/docs/extras/use_cases/qa_structured/integrations/myscale_vector_sql.ipynb
new file mode 100644
index 00000000000..65bd8323ed0
--- /dev/null
+++ b/docs/extras/use_cases/qa_structured/integrations/myscale_vector_sql.ipynb
@@ -0,0 +1,200 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "245065c6",
+ "metadata": {},
+ "source": [
+ "# Vector SQL Retriever with MyScale\n",
+ "\n",
+ ">[MyScale](https://docs.myscale.com/en/) is an integrated vector database. You can access your database in SQL and also from here, LangChain. MyScale can make a use of [various data types and functions for filters](https://blog.myscale.com/2023/06/06/why-integrated-database-solution-can-boost-your-llm-apps/#filter-on-anything-without-constraints). It will boost up your LLM app no matter if you are scaling up your data or expand your system to broader application."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0246c5bf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip3 install clickhouse-sqlalchemy InstructorEmbedding sentence_transformers openai langchain-experimental"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7585d2c3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "from os import environ\n",
+ "import getpass\n",
+ "from typing import Dict, Any\n",
+ "from langchain import OpenAI, SQLDatabase, LLMChain\n",
+ "from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
+ "from sqlalchemy import create_engine, Column, MetaData\n",
+ "from langchain import PromptTemplate\n",
+ "\n",
+ "\n",
+ "from sqlalchemy import create_engine\n",
+ "\n",
+ "MYSCALE_HOST = \"msc-1decbcc9.us-east-1.aws.staging.myscale.cloud\"\n",
+ "MYSCALE_PORT = 443\n",
+ "MYSCALE_USER = \"chatdata\"\n",
+ "MYSCALE_PASSWORD = \"myscale_rocks\"\n",
+ "OPENAI_API_KEY = getpass.getpass(\"OpenAI API Key:\")\n",
+ "\n",
+ "engine = create_engine(\n",
+ " f\"clickhouse://{MYSCALE_USER}:{MYSCALE_PASSWORD}@{MYSCALE_HOST}:{MYSCALE_PORT}/default?protocol=https\"\n",
+ ")\n",
+ "metadata = MetaData(bind=engine)\n",
+ "environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e08d9ddc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.embeddings import HuggingFaceInstructEmbeddings\n",
+ "from langchain_experimental.sql.vector_sql import VectorSQLOutputParser\n",
+ "\n",
+ "output_parser = VectorSQLOutputParser.from_embeddings(\n",
+ " model=HuggingFaceInstructEmbeddings(\n",
+ " model_name=\"hkunlp/instructor-xl\", model_kwargs={\"device\": \"cpu\"}\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84b705b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "from langchain.llms import OpenAI\n",
+ "from langchain.callbacks import StdOutCallbackHandler\n",
+ "\n",
+ "from langchain.utilities.sql_database import SQLDatabase\n",
+ "from langchain_experimental.sql.prompt import MYSCALE_PROMPT\n",
+ "from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
+ "\n",
+ "chain = VectorSQLDatabaseChain(\n",
+ " llm_chain=LLMChain(\n",
+ " llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0),\n",
+ " prompt=MYSCALE_PROMPT,\n",
+ " ),\n",
+ " top_k=10,\n",
+ " return_direct=True,\n",
+ " sql_cmd_parser=output_parser,\n",
+ " database=SQLDatabase(engine, None, metadata),\n",
+ ")\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "pd.DataFrame(\n",
+ " chain.run(\n",
+ " \"Please give me 10 papers to ask what is PageRank?\",\n",
+ " callbacks=[StdOutCallbackHandler()],\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c09cda0",
+ "metadata": {},
+ "source": [
+ "## SQL Database as Retriever"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "734d7ff5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.chat_models import ChatOpenAI\n",
+ "from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain\n",
+ "\n",
+ "from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n",
+ "from langchain_experimental.retrievers.vector_sql_database \\\n",
+ " import VectorSQLDatabaseChainRetriever\n",
+ "from langchain_experimental.sql.prompt import MYSCALE_PROMPT\n",
+ "from langchain_experimental.sql.vector_sql import VectorSQLRetrieveAllOutputParser\n",
+ "\n",
+ "output_parser_retrieve_all = VectorSQLRetrieveAllOutputParser.from_embeddings(\n",
+ " output_parser.model\n",
+ ")\n",
+ "\n",
+ "chain = VectorSQLDatabaseChain.from_llm(\n",
+ " llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0),\n",
+ " prompt=MYSCALE_PROMPT,\n",
+ " top_k=10,\n",
+ " return_direct=True,\n",
+ " db=SQLDatabase(engine, None, metadata),\n",
+ " sql_cmd_parser=output_parser_retrieve_all,\n",
+ " native_format=True,\n",
+ ")\n",
+ "\n",
+ "# You need all those keys to get docs\n",
+ "retriever = VectorSQLDatabaseChainRetriever(sql_db_chain=chain, page_content_key=\"abstract\")\n",
+ "\n",
+ "document_with_metadata_prompt = PromptTemplate(\n",
+ " input_variables=[\"page_content\", \"id\", \"title\", \"authors\", \"pubdate\", \"categories\"],\n",
+ " template=\"Content:\\n\\tTitle: {title}\\n\\tAbstract: {page_content}\\n\\tAuthors: {authors}\\n\\tDate of Publication: {pubdate}\\n\\tCategories: {categories}\\nSOURCE: {id}\",\n",
+ ")\n",
+ "\n",
+ "chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
+ " ChatOpenAI(\n",
+ " model_name=\"gpt-3.5-turbo-16k\", openai_api_key=OPENAI_API_KEY, temperature=0.6\n",
+ " ),\n",
+ " retriever=retriever,\n",
+ " chain_type=\"stuff\",\n",
+ " chain_type_kwargs={\n",
+ " \"document_prompt\": document_with_metadata_prompt,\n",
+ " },\n",
+ " return_source_documents=True,\n",
+ ")\n",
+ "ans = chain(\"Please give me 10 papers to ask what is PageRank?\",\n",
+ " callbacks=[StdOutCallbackHandler()])\n",
+ "print(ans[\"answer\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4948ff25",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/extras/use_cases/sql.ipynb b/docs/extras/use_cases/qa_structured/sql.ipynb
similarity index 66%
rename from docs/extras/use_cases/sql.ipynb
rename to docs/extras/use_cases/qa_structured/sql.ipynb
index 7cd96a4f467..23bde6a2a53 100644
--- a/docs/extras/use_cases/sql.ipynb
+++ b/docs/extras/use_cases/qa_structured/sql.ipynb
@@ -1,12 +1,20 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: SQL\n",
+ "sidebar_position: 2\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# SQL\n",
- "\n",
- "[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/sql.ipynb)\n",
+ "[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/qa_structured/sql.ipynb)\n",
"\n",
"## Use case\n",
"\n",
@@ -713,6 +721,391 @@
"agent_executor.run(\"Describe the playlisttrack table\")"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extending the SQL Toolkit\n",
+ "\n",
+ "Although the out-of-the-box SQL Toolkit contains the necessary tools to start working on a database, it is often the case that some extra tools may be useful for extending the agent's capabilities. This is particularly useful when trying to use **domain specific knowledge** in the solution, in order to improve its overall performance.\n",
+ "\n",
+ "Some examples include:\n",
+ "\n",
+ "- Including dynamic few shot examples\n",
+ "- Finding misspellings in proper nouns to use as column filters\n",
+ "\n",
+ "We can create separate tools which tackle these specific use cases and include them as a complement to the standard SQL Toolkit. Let's see how to include these two custom tools.\n",
+ "\n",
+ "#### Including dynamic few-shot examples\n",
+ "\n",
+ "In order to include dynamic few-shot examples, we need a custom **Retriever Tool** that handles the vector database in order to retrieve the examples that are semantically similar to the user’s question.\n",
+ "\n",
+ "Let's start by creating a dictionary with some examples: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# few_shots = {'List all artists.': 'SELECT * FROM artists;',\n",
+ "# \"Find all albums for the artist 'AC/DC'.\": \"SELECT * FROM albums WHERE ArtistId = (SELECT ArtistId FROM artists WHERE Name = 'AC/DC');\",\n",
+ "# \"List all tracks in the 'Rock' genre.\": \"SELECT * FROM tracks WHERE GenreId = (SELECT GenreId FROM genres WHERE Name = 'Rock');\",\n",
+ "# 'Find the total duration of all tracks.': 'SELECT SUM(Milliseconds) FROM tracks;',\n",
+ "# 'List all customers from Canada.': \"SELECT * FROM customers WHERE Country = 'Canada';\",\n",
+ "# 'How many tracks are there in the album with ID 5?': 'SELECT COUNT(*) FROM tracks WHERE AlbumId = 5;',\n",
+ "# 'Find the total number of invoices.': 'SELECT COUNT(*) FROM invoices;',\n",
+ "# 'List all tracks that are longer than 5 minutes.': 'SELECT * FROM tracks WHERE Milliseconds > 300000;',\n",
+ "# 'Who are the top 5 customers by total purchase?': 'SELECT CustomerId, SUM(Total) AS TotalPurchase FROM invoices GROUP BY CustomerId ORDER BY TotalPurchase DESC LIMIT 5;',\n",
+ "# 'Which albums are from the year 2000?': \"SELECT * FROM albums WHERE strftime('%Y', ReleaseDate) = '2000';\",\n",
+ "# 'How many employees are there': 'SELECT COUNT(*) FROM \"employee\"'\n",
+ "# }"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can then create a retriever using the list of questions, assigning the target SQL query as metadata:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+ "from langchain.vectorstores import FAISS\n",
+ "from langchain.schema import Document\n",
+ "\n",
+ "embeddings = OpenAIEmbeddings()\n",
+ "\n",
+ "few_shot_docs = [Document(page_content=question, metadata={'sql_query': few_shots[question]}) for question in few_shots.keys()]\n",
+ "vector_db = FAISS.from_documents(few_shot_docs, embeddings)\n",
+ "retriever = vector_db.as_retriever()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can create our own custom tool and append it as a new tool in the `create_sql_agent` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.agents.agent_toolkits import create_retriever_tool\n",
+ "\n",
+ "tool_description = \"\"\"\n",
+ "This tool will help you understand similar examples to adapt them to the user question.\n",
+ "Input to this tool should be the user question.\n",
+ "\"\"\"\n",
+ "\n",
+ "retriever_tool = create_retriever_tool(\n",
+ " retriever,\n",
+ " name='sql_get_similar_examples',\n",
+ " description=tool_description\n",
+ " )\n",
+ "custom_tool_list = [retriever_tool]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can create the agent, adjusting the standard SQL Agent suffix to consider our use case. Although the most straightforward way to handle this would be to include it just in the tool description, this is often not enough and we need to specify it in the agent prompt using the `suffix` argument in the constructor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.agents import create_sql_agent, AgentType\n",
+ "from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n",
+ "from langchain.utilities import SQLDatabase\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "\n",
+ "db = SQLDatabase.from_uri(\"sqlite:///Chinook.db\")\n",
+ "llm = ChatOpenAI(model_name='gpt-4',temperature=0)\n",
+ "\n",
+ "toolkit = SQLDatabaseToolkit(db=db, llm=llm)\n",
+ "\n",
+ "custom_suffix = \"\"\"\n",
+ "I should first get the similar examples I know.\n",
+ "If the examples are enough to construct the query, I can build it.\n",
+ "Otherwise, I can then look at the tables in the database to see what I can query.\n",
+ "Then I should query the schema of the most relevant tables\n",
+ "\"\"\"\n",
+ "\n",
+ "agent = create_sql_agent(llm=llm,\n",
+ " toolkit=toolkit,\n",
+ " verbose=True,\n",
+ " agent_type=AgentType.OPENAI_FUNCTIONS,\n",
+ " extra_tools=custom_tool_list,\n",
+ " suffix=custom_suffix\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's try it out:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+ "\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_get_similar_examples` with `How many employees do we have?`\n",
+ "\n",
+ "\n",
+ "\u001b[0m\u001b[33;1m\u001b[1;3m[Document(page_content='How many employees are there', metadata={'sql_query': 'SELECT COUNT(*) FROM \"employee\"'}), Document(page_content='Find the total number of invoices.', metadata={'sql_query': 'SELECT COUNT(*) FROM invoices;'})]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_db_query_checker` with `SELECT COUNT(*) FROM employee`\n",
+ "responded: {content}\n",
+ "\n",
+ "\u001b[0m\u001b[36;1m\u001b[1;3mSELECT COUNT(*) FROM employee\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_db_query` with `SELECT COUNT(*) FROM employee`\n",
+ "\n",
+ "\n",
+ "\u001b[0m\u001b[36;1m\u001b[1;3m[(8,)]\u001b[0m\u001b[32;1m\u001b[1;3mWe have 8 employees.\u001b[0m\n",
+ "\n",
+ "\u001b[1m> Finished chain.\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'We have 8 employees.'"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "agent.run(\"How many employees do we have?\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see, the agent first used the `sql_get_similar_examples` tool in order to retrieve similar examples. As the question was very similar to other few shot examples, the agent **didn't need to use any other tool** from the standard Toolkit, thus **saving time and tokens**."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Finding and correcting misspellings for proper nouns\n",
+ "\n",
+ "In order to filter columns that contain proper nouns such as addresses, song names or artists, we first need to double-check the spelling in order to filter the data correctly. \n",
+ "\n",
+ "We can achieve this by creating a vector store using all the distinct proper nouns that exist in the database. We can then have the agent query that vector store each time the user includes a proper noun in their question, to find the correct spelling for that word. In this way, the agent can make sure it understands which entity the user is referring to before building the target query.\n",
+ "\n",
+ "Let's follow a similar approach to the few shots, but without metadata: just embedding the proper nouns and then querying to get the most similar one to the misspelled user question.\n",
+ "\n",
+ "First we need the unique values for each entity we want, for which we define a function that parses the result into a list of elements:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import ast\n",
+ "import re\n",
+ "\n",
+ "def run_query_save_results(db, query):\n",
+ " res = db.run(query)\n",
+ " res = [el for sub in ast.literal_eval(res) for el in sub if el]\n",
+ " res = [re.sub(r'\\b\\d+\\b', '', string).strip() for string in res]\n",
+ " return res\n",
+ "\n",
+ "artists = run_query_save_results(db, \"SELECT Name FROM Artist\")\n",
+ "albums = run_query_save_results(db, \"SELECT Title FROM Album\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can proceed with creating the custom **retreiver tool** and the final agent:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.agents.agent_toolkits import create_retriever_tool\n",
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+ "from langchain.vectorstores import FAISS\n",
+ "\n",
+ "\n",
+ "texts = (artists + albums)\n",
+ "\n",
+ "embeddings = OpenAIEmbeddings()\n",
+ "vector_db = FAISS.from_texts(texts, embeddings)\n",
+ "retriever = vector_db.as_retriever()\n",
+ "\n",
+ "retriever_tool = create_retriever_tool(\n",
+ " retriever,\n",
+ " name='name_search',\n",
+ " description='use to learn how a piece of data is actually written, can be from names, surnames addresses etc'\n",
+ " )\n",
+ "\n",
+ "custom_tool_list = [retriever_tool]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.agents import create_sql_agent, AgentType\n",
+ "from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n",
+ "from langchain.utilities import SQLDatabase\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "\n",
+ "# db = SQLDatabase.from_uri(\"sqlite:///Chinook.db\")\n",
+ "llm = ChatOpenAI(model_name='gpt-4', temperature=0)\n",
+ "\n",
+ "toolkit = SQLDatabaseToolkit(db=db, llm=llm)\n",
+ "\n",
+ "custom_suffix = \"\"\"\n",
+ "If a user asks for me to filter based on proper nouns, I should first check the spelling using the name_search tool.\n",
+ "Otherwise, I can then look at the tables in the database to see what I can query.\n",
+ "Then I should query the schema of the most relevant tables\n",
+ "\"\"\"\n",
+ "\n",
+ "agent = create_sql_agent(llm=llm,\n",
+ " toolkit=toolkit,\n",
+ " verbose=True,\n",
+ " agent_type=AgentType.OPENAI_FUNCTIONS,\n",
+ " extra_tools=custom_tool_list,\n",
+ " suffix=custom_suffix\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's try it out:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+ "\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `name_search` with `alis in pains`\n",
+ "\n",
+ "\n",
+ "\u001b[0m\u001b[33;1m\u001b[1;3m[Document(page_content='House of Pain', metadata={}), Document(page_content='Alice In Chains', metadata={}), Document(page_content='Aisha Duo', metadata={}), Document(page_content='House Of Pain', metadata={})]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_db_list_tables` with ``\n",
+ "responded: {content}\n",
+ "\n",
+ "\u001b[0m\u001b[38;5;200m\u001b[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_db_schema` with `Album, Artist`\n",
+ "responded: {content}\n",
+ "\n",
+ "\u001b[0m\u001b[33;1m\u001b[1;3m\n",
+ "CREATE TABLE \"Album\" (\n",
+ "\t\"AlbumId\" INTEGER NOT NULL, \n",
+ "\t\"Title\" NVARCHAR(160) NOT NULL, \n",
+ "\t\"ArtistId\" INTEGER NOT NULL, \n",
+ "\tPRIMARY KEY (\"AlbumId\"), \n",
+ "\tFOREIGN KEY(\"ArtistId\") REFERENCES \"Artist\" (\"ArtistId\")\n",
+ ")\n",
+ "\n",
+ "/*\n",
+ "3 rows from Album table:\n",
+ "AlbumId\tTitle\tArtistId\n",
+ "1\tFor Those About To Rock We Salute You\t1\n",
+ "2\tBalls to the Wall\t2\n",
+ "3\tRestless and Wild\t2\n",
+ "*/\n",
+ "\n",
+ "\n",
+ "CREATE TABLE \"Artist\" (\n",
+ "\t\"ArtistId\" INTEGER NOT NULL, \n",
+ "\t\"Name\" NVARCHAR(120), \n",
+ "\tPRIMARY KEY (\"ArtistId\")\n",
+ ")\n",
+ "\n",
+ "/*\n",
+ "3 rows from Artist table:\n",
+ "ArtistId\tName\n",
+ "1\tAC/DC\n",
+ "2\tAccept\n",
+ "3\tAerosmith\n",
+ "*/\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_db_query_checker` with `SELECT COUNT(*) FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId WHERE Artist.Name = 'Alice In Chains'`\n",
+ "responded: {content}\n",
+ "\n",
+ "\u001b[0m\u001b[36;1m\u001b[1;3mSELECT COUNT(*) FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId WHERE Artist.Name = 'Alice In Chains'\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+ "Invoking: `sql_db_query` with `SELECT COUNT(*) FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId WHERE Artist.Name = 'Alice In Chains'`\n",
+ "\n",
+ "\n",
+ "\u001b[0m\u001b[36;1m\u001b[1;3m[(1,)]\u001b[0m\u001b[32;1m\u001b[1;3mAlice In Chains has 1 album in the database.\u001b[0m\n",
+ "\n",
+ "\u001b[1m> Finished chain.\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'Alice In Chains has 1 album in the database.'"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "agent.run(\"How many albums does alis in pains have?\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see, the agent used the `name_search` tool in order to check how to correctly query the database for this specific artist."
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -867,7 +1260,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/extras/use_cases/question_answering/how_to/local_retrieval_qa.ipynb b/docs/extras/use_cases/question_answering/how_to/local_retrieval_qa.ipynb
index 8bb5bf71f3b..a1fe71e8b50 100644
--- a/docs/extras/use_cases/question_answering/how_to/local_retrieval_qa.ipynb
+++ b/docs/extras/use_cases/question_answering/how_to/local_retrieval_qa.ipynb
@@ -42,7 +42,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 1,
"id": "f8cf5765",
"metadata": {},
"outputs": [],
@@ -68,7 +68,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 2,
"id": "fdce8923",
"metadata": {},
"outputs": [
@@ -83,7 +83,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "objc[31511]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x14f4e8208) and /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x14f5fc208). One of the two will be used. Which one is undefined.\n"
+ "objc[49534]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x131614208) and /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x131988208). One of the two will be used. Which one is undefined.\n"
]
}
],
@@ -104,7 +104,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 3,
"id": "b0c55e98",
"metadata": {},
"outputs": [
@@ -114,7 +114,7 @@
"4"
]
},
- "execution_count": 6,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -204,7 +204,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 4,
"id": "cd7164e3",
"metadata": {},
"outputs": [],
@@ -225,7 +225,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "56158f83-6490-49b8-9f04-2e2e6ec3524b",
+ "id": "af1176bb-d52a-4cf0-b983-8b7433d45b4f",
"metadata": {},
"outputs": [],
"source": [
@@ -459,12 +459,11 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "4ae37573-63a7-4564-90e1-196a8ea9b526",
+ "id": "cc638992-0924-41c0-8dae-8cf683e72b16",
"metadata": {},
"outputs": [],
"source": [
- "from langchain import hub\n",
- "rag_prompt = hub.pull(\"rlm/rag-prompt-default\")"
+ "pip install langchainhub"
]
},
{
@@ -512,6 +511,9 @@
}
],
"source": [
+ "# Prompt \n",
+ "from langchain import hub\n",
+ "rag_prompt = hub.pull(\"rlm/rag-prompt\")\n",
"from langchain.chains.question_answering import load_qa_chain\n",
"# Chain\n",
"chain = load_qa_chain(llm, chain_type=\"stuff\", prompt=rag_prompt)\n",
@@ -529,7 +531,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 3,
"id": "78f6862d-b7a6-4e03-84e4-45667185bf9b",
"metadata": {},
"outputs": [
@@ -539,12 +541,13 @@
"ChatPromptTemplate(input_variables=['question', 'context'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question', 'context'], output_parser=None, partial_variables={}, template=\"[INST]<> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<> \\nQuestion: {question} \\nContext: {context} \\nAnswer: [/INST]\", template_format='f-string', validate_template=True), additional_kwargs={})])"
]
},
- "execution_count": 31,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
+ "# Prompt\n",
"rag_prompt_llama = hub.pull(\"rlm/rag-prompt-llama\")\n",
"rag_prompt_llama"
]
diff --git a/docs/extras/use_cases/question_answering/question_answering.ipynb b/docs/extras/use_cases/question_answering/question_answering.ipynb
index ccadf6debd5..5eda2bd40ae 100644
--- a/docs/extras/use_cases/question_answering/question_answering.ipynb
+++ b/docs/extras/use_cases/question_answering/question_answering.ipynb
@@ -52,7 +52,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "046cefc0",
"metadata": {},
"outputs": [],
@@ -269,28 +269,10 @@
},
{
"cell_type": "code",
- "execution_count": 9,
- "id": "c690f01a",
+ "execution_count": null,
+ "id": "9cfe3270-4e89-4c60-a2e5-9026b021bf76",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be approached?', '2. What are the different methods for Task Decomposition?', '3. What are the various approaches to decomposing tasks?']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "4"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import logging\n",
"\n",
@@ -318,7 +300,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"id": "99fa1aec",
"metadata": {},
"outputs": [
@@ -326,10 +308,10 @@
"data": {
"text/plain": [
"{'query': 'What are the approaches to Task Decomposition?',\n",
- " 'result': 'There are three approaches to task decomposition:\\n\\n1. Using Language Model with simple prompting: This approach involves using a Language Model (LLM) with simple prompts like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\" to guide the task decomposition process.\\n\\n2. Using task-specific instructions: In this approach, task-specific instructions are provided to guide the task decomposition. For example, for the task of writing a novel, an instruction like \"Write a story outline\" can be given to help decompose the task into smaller subtasks.\\n\\n3. Human inputs: Task decomposition can also be done with the help of human inputs. This involves getting input and guidance from humans to break down a complex task into smaller, more manageable subtasks.'}"
+ " 'result': 'The approaches to task decomposition include:\\n\\n1. Simple prompting: This approach involves using simple prompts or questions to guide the agent in breaking down a task into smaller subgoals. For example, the agent can be prompted with \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\" to facilitate task decomposition.\\n\\n2. Task-specific instructions: In this approach, task-specific instructions are provided to the agent to guide the decomposition process. For example, if the task is to write a novel, the agent can be instructed to \"Write a story outline\" as a step in the task decomposition.\\n\\n3. Human inputs: This approach involves incorporating human inputs in the task decomposition process. Humans can provide guidance, feedback, and assistance to the agent in breaking down complex tasks into manageable subgoals.\\n\\nThese approaches aim to enable efficient handling of complex tasks by breaking them down into smaller, more manageable subgoals.'}"
]
},
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -355,97 +337,7 @@
"#### Choosing LLMs\n",
"- Browse the > 55 LLM and chat model integrations [here](https://integrations.langchain.com/).\n",
"- See further documentation on LLMs and chat models [here](/docs/modules/model_io/models/).\n",
- "- Use local LLMS: The popularity of [PrivateGPT](https://github.com/imartinez/privateGPT) and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the importance of running LLMs locally.\n",
- "Using `GPT4All` is as simple as [downloading the binary]((/docs/integrations/llms/gpt4all)) and then:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "02d6c9dc",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Found model file at /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "objc[61331]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x2e3384208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x2e37b0208). One of the two will be used. Which one is undefined.\n",
- "llama.cpp: using Metal\n",
- "llama.cpp: loading model from /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n",
- "llama_model_load_internal: format = ggjt v3 (latest)\n",
- "llama_model_load_internal: n_vocab = 32001\n",
- "llama_model_load_internal: n_ctx = 2048\n",
- "llama_model_load_internal: n_embd = 5120\n",
- "llama_model_load_internal: n_mult = 256\n",
- "llama_model_load_internal: n_head = 40\n",
- "llama_model_load_internal: n_layer = 40\n",
- "llama_model_load_internal: n_rot = 128\n",
- "llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
- "llama_model_load_internal: n_ff = 13824\n",
- "llama_model_load_internal: n_parts = 1\n",
- "llama_model_load_internal: model size = 13B\n",
- "llama_model_load_internal: ggml ctx size = 0.09 MB\n",
- "llama_model_load_internal: mem required = 9031.71 MB (+ 1608.00 MB per state)\n",
- "llama_new_context_with_model: kv self size = 1600.00 MB\n",
- "ggml_metal_init: allocating\n",
- "ggml_metal_init: using MPS\n",
- "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/ggml-metal.metal'\n",
- "ggml_metal_init: loaded kernel_add 0x2bbbbc2f0\n",
- "ggml_metal_init: loaded kernel_mul 0x2bbbba840\n",
- "ggml_metal_init: loaded kernel_mul_row 0x2bb917dd0\n",
- "ggml_metal_init: loaded kernel_scale 0x2bb918150\n",
- "ggml_metal_init: loaded kernel_silu 0x2bb9184d0\n",
- "ggml_metal_init: loaded kernel_relu 0x2bb918850\n",
- "ggml_metal_init: loaded kernel_gelu 0x2bbbc3f10\n",
- "ggml_metal_init: loaded kernel_soft_max 0x2bbbc5840\n",
- "ggml_metal_init: loaded kernel_diag_mask_inf 0x2bbbc4c70\n",
- "ggml_metal_init: loaded kernel_get_rows_f16 0x2bbbc5fc0\n",
- "ggml_metal_init: loaded kernel_get_rows_q4_0 0x2bbbc6720\n",
- "ggml_metal_init: loaded kernel_get_rows_q4_1 0x2bb918c10\n",
- "ggml_metal_init: loaded kernel_get_rows_q2_k 0x2bbbc51b0\n",
- "ggml_metal_init: loaded kernel_get_rows_q3_k 0x2bbbc7630\n",
- "ggml_metal_init: loaded kernel_get_rows_q4_k 0x2d4394e30\n",
- "ggml_metal_init: loaded kernel_get_rows_q5_k 0x2bbbc7890\n",
- "ggml_metal_init: loaded kernel_get_rows_q6_k 0x2d4395210\n",
- "ggml_metal_init: loaded kernel_rms_norm 0x2bbbc8740\n",
- "ggml_metal_init: loaded kernel_norm 0x2bbbc8b30\n",
- "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x2d4395470\n",
- "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x2d4395a70\n",
- "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x1242b1a00\n",
- "ggml_metal_init: loaded kernel_mul_mat_q2_k_f32 0x29f17d1c0\n",
- "ggml_metal_init: loaded kernel_mul_mat_q3_k_f32 0x2d4396050\n",
- "ggml_metal_init: loaded kernel_mul_mat_q4_k_f32 0x2bbbc98a0\n",
- "ggml_metal_init: loaded kernel_mul_mat_q5_k_f32 0x2bbbca4a0\n",
- "ggml_metal_init: loaded kernel_mul_mat_q6_k_f32 0x2bbbcae90\n",
- "ggml_metal_init: loaded kernel_rope 0x2bbbca700\n",
- "ggml_metal_init: loaded kernel_alibi_f32 0x2bbbcc6e0\n",
- "ggml_metal_init: loaded kernel_cpy_f32_f16 0x2bbbccf90\n",
- "ggml_metal_init: loaded kernel_cpy_f32_f32 0x2bbbcd900\n",
- "ggml_metal_init: loaded kernel_cpy_f16_f16 0x2bbbce1f0\n",
- "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
- "ggml_metal_init: hasUnifiedMemory = true\n",
- "ggml_metal_init: maxTransferRate = built-in GPU\n",
- "ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6984.45 / 21845.34)\n",
- "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1024.00 MB, ( 8008.45 / 21845.34)\n",
- "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, ( 9610.45 / 21845.34)\n",
- "ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 512.00 MB, (10122.45 / 21845.34)\n",
- "ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (10634.45 / 21845.34)\n"
- ]
- }
- ],
- "source": [
- "from langchain.llms import GPT4All\n",
- "from langchain.chains import RetrievalQA\n",
- "\n",
- "llm = GPT4All(model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\",max_tokens=2048)\n",
- "qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())"
+ "- See a guide on local LLMS [here](/docs/modules/use_cases/question_answering/how_to/local_retrieval_qa)."
]
},
{
@@ -460,24 +352,17 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 10,
"id": "e4fee704",
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "ggml_metal_free: deallocating\n"
- ]
- },
{
"data": {
"text/plain": [
- "'The approaches to task decomposition include using LLM with simple prompting, task-specific instructions, or human inputs. Thanks for asking!'"
+ "'The approaches to Task Decomposition are (1) using simple prompting by LLM, (2) using task-specific instructions, and (3) incorporating human inputs. Thanks for asking!'"
]
},
- "execution_count": 13,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -507,8 +392,65 @@
},
{
"cell_type": "markdown",
- "id": "ff40e8db",
+ "id": "c825e9bf-6a56-46e4-8bbb-05441f76cb96",
"metadata": {},
+ "source": [
+ "We can also store and fetch prompts from the LangChain prompt hub.\n",
+ "\n",
+ "This will work with your [LangSmith API key](https://docs.smith.langchain.com/).\n",
+ "\n",
+ "For example, see [here](https://smith.langchain.com/hub/rlm/rag-prompt) is a common prompt for RAG.\n",
+ "\n",
+ "We can load this."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a896060f-ebc4-4236-a4ad-32960601c6e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pip install langchainhub"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "aef8e734-ba54-48ae-b959-1898618f2d90",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'The approaches to task decomposition include using LLM with simple prompting, task-specific instructions, and human inputs.'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# RAG prompt\n",
+ "from langchain import hub\n",
+ "QA_CHAIN_PROMPT_HUB = hub.pull(\"rlm/rag-prompt\")\n",
+ "\n",
+ "qa_chain = RetrievalQA.from_chain_type(\n",
+ " llm,\n",
+ " retriever=vectorstore.as_retriever(),\n",
+ " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT_HUB}\n",
+ ")\n",
+ "result = qa_chain({\"query\": question})\n",
+ "result[\"result\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ff40e8db",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true
+ },
"source": [
"#### Return source documents\n",
"\n",
diff --git a/docs/extras/use_cases/summarization.ipynb b/docs/extras/use_cases/summarization.ipynb
index 000ba481249..6d7e118ab75 100644
--- a/docs/extras/use_cases/summarization.ipynb
+++ b/docs/extras/use_cases/summarization.ipynb
@@ -1,12 +1,21 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "id": "2aca8168-62ec-4bba-93f0-73da08cd1920",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Summarization\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"id": "cf13f702",
"metadata": {},
"source": [
- "# Summarization\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/summarization.ipynb)\n",
"\n",
"## Use case\n",
@@ -548,7 +557,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/extras/use_cases/tagging.ipynb b/docs/extras/use_cases/tagging.ipynb
index 235f9d06cb1..37242a84f5e 100644
--- a/docs/extras/use_cases/tagging.ipynb
+++ b/docs/extras/use_cases/tagging.ipynb
@@ -1,12 +1,21 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "id": "cb6f552e-775f-4d84-bc7c-dca94c06a33c",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Tagging\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"id": "a0507a4b",
"metadata": {},
"source": [
- "# Tagging\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/tagging.ipynb)\n",
"\n",
"## Use case\n",
@@ -408,7 +417,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/extras/use_cases/web_scraping.ipynb b/docs/extras/use_cases/web_scraping.ipynb
index 57c9e8387a1..41bb28703ed 100644
--- a/docs/extras/use_cases/web_scraping.ipynb
+++ b/docs/extras/use_cases/web_scraping.ipynb
@@ -1,12 +1,21 @@
{
"cells": [
+ {
+ "cell_type": "raw",
+ "id": "e254cf03-49fc-4051-a4df-3a8e4e7d2688",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "sidebar_position: 1\n",
+ "title: Web scraping\n",
+ "---"
+ ]
+ },
{
"cell_type": "markdown",
"id": "6605e7f7",
"metadata": {},
"source": [
- "# Web Scraping\n",
- "\n",
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/web_scraping.ipynb)\n",
"\n",
"## Use case\n",
@@ -306,9 +315,7 @@
"cell_type": "code",
"execution_count": 7,
"id": "977560ba",
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -591,7 +598,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.1"
}
},
"nbformat": 4,
diff --git a/docs/snippets/modules/agents/agent_types/openai_functions_agent.mdx b/docs/snippets/modules/agents/agent_types/openai_functions_agent.mdx
index a04e2ac8c7b..aaf208dc056 100644
--- a/docs/snippets/modules/agents/agent_types/openai_functions_agent.mdx
+++ b/docs/snippets/modules/agents/agent_types/openai_functions_agent.mdx
@@ -5,10 +5,12 @@ pip install openai google-search-results
```
```python
-from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, SQLDatabase, SQLDatabaseChain
-from langchain.agents import initialize_agent, Tool
-from langchain.agents import AgentType
+from langchain.agents import initialize_agent, AgentType, Tool
+from langchain.chains import LLMMathChain
from langchain.chat_models import ChatOpenAI
+from langchain.llms import OpenAI
+from langchain.utilities import SerpAPIWrapper, SQLDatabase
+from langchain_experimental.sql import SQLDatabaseChain
```
diff --git a/libs/experimental/langchain_experimental/data_anonymizer/__init__.py b/libs/experimental/langchain_experimental/data_anonymizer/__init__.py
index 69babad859a..f43d6d98df5 100644
--- a/libs/experimental/langchain_experimental/data_anonymizer/__init__.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/__init__.py
@@ -1,4 +1,7 @@
"""Data anonymizer package"""
-from langchain_experimental.data_anonymizer.presidio import PresidioAnonymizer
+from langchain_experimental.data_anonymizer.presidio import (
+ PresidioAnonymizer,
+ PresidioReversibleAnonymizer,
+)
-__all__ = ["PresidioAnonymizer"]
+__all__ = ["PresidioAnonymizer", "PresidioReversibleAnonymizer"]
diff --git a/libs/experimental/langchain_experimental/data_anonymizer/base.py b/libs/experimental/langchain_experimental/data_anonymizer/base.py
index 3f9905375e0..875032342a7 100644
--- a/libs/experimental/langchain_experimental/data_anonymizer/base.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/base.py
@@ -15,3 +15,17 @@ class AnonymizerBase(ABC):
@abstractmethod
def _anonymize(self, text: str) -> str:
"""Abstract method to anonymize text"""
+
+
+class ReversibleAnonymizerBase(AnonymizerBase):
+ """
+ Base abstract class for reversible anonymizers.
+ """
+
+ def deanonymize(self, text: str) -> str:
+ """Deanonymize text"""
+ return self._deanonymize(text)
+
+ @abstractmethod
+ def _deanonymize(self, text: str) -> str:
+ """Abstract method to deanonymize text"""
diff --git a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py
new file mode 100644
index 00000000000..2ee03eb2080
--- /dev/null
+++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py
@@ -0,0 +1,21 @@
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Dict
+
+MappingDataType = Dict[str, Dict[str, str]]
+
+
+@dataclass
+class DeanonymizerMapping:
+ mapping: MappingDataType = field(
+ default_factory=lambda: defaultdict(lambda: defaultdict(str))
+ )
+
+ @property
+ def data(self) -> MappingDataType:
+ """Return the deanonymizer mapping"""
+ return {k: dict(v) for k, v in self.mapping.items()}
+
+ def update(self, new_mapping: MappingDataType) -> None:
+ for entity_type, values in new_mapping.items():
+ self.mapping[entity_type].update(values)
diff --git a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py
new file mode 100644
index 00000000000..e5d9e8581b6
--- /dev/null
+++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py
@@ -0,0 +1,17 @@
+from langchain_experimental.data_anonymizer.presidio import MappingDataType
+
+
+def default_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) -> str:
+ """
+ Default matching strategy for deanonymization.
+ It replaces all the anonymized entities with the original ones.
+
+ Args:
+ text: text to deanonymize
+ deanonymizer_mapping: mapping between anonymized entities and original ones"""
+
+ # Iterate over all the entities (PERSON, EMAIL_ADDRESS, etc.)
+ for entity_type in deanonymizer_mapping:
+ for anonymized, original in deanonymizer_mapping[entity_type].items():
+ text = text.replace(anonymized, original)
+ return text
diff --git a/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py b/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py
index 8db4f94c2fd..c2a339088e9 100644
--- a/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py
@@ -1,8 +1,8 @@
import string
-from typing import Callable, Dict
+from typing import Callable, Dict, Optional
-def get_pseudoanonymizer_mapping() -> Dict[str, Callable]:
+def get_pseudoanonymizer_mapping(seed: Optional[int] = None) -> Dict[str, Callable]:
try:
from faker import Faker
except ImportError as e:
@@ -11,6 +11,7 @@ def get_pseudoanonymizer_mapping() -> Dict[str, Callable]:
) from e
fake = Faker()
+ fake.seed_instance(seed)
# Listed entities supported by Microsoft Presidio (for now, global and US only)
# Source: https://microsoft.github.io/presidio/supported_entities/
diff --git a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py
index 298e3de1d56..d4886eb32c1 100644
--- a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py
@@ -1,24 +1,56 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, Dict, List, Optional
+import json
+from collections import defaultdict
+from pathlib import Path
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union
-from langchain_experimental.data_anonymizer.base import AnonymizerBase
+import yaml
+
+from langchain_experimental.data_anonymizer.base import (
+ AnonymizerBase,
+ ReversibleAnonymizerBase,
+)
+from langchain_experimental.data_anonymizer.deanonymizer_mapping import (
+ DeanonymizerMapping,
+ MappingDataType,
+)
+from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (
+ default_matching_strategy,
+)
from langchain_experimental.data_anonymizer.faker_presidio_mapping import (
get_pseudoanonymizer_mapping,
)
-if TYPE_CHECKING:
- from presidio_analyzer import EntityRecognizer
+try:
+ from presidio_analyzer import AnalyzerEngine
+except ImportError as e:
+ raise ImportError(
+ "Could not import presidio_analyzer, please install with "
+ "`pip install presidio-analyzer`. You will also need to download a "
+ "spaCy model to use the analyzer, e.g. "
+ "`python -m spacy download en_core_web_lg`."
+ ) from e
+try:
+ from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig
+except ImportError as e:
+ raise ImportError(
+ "Could not import presidio_anonymizer, please install with "
+ "`pip install presidio-anonymizer`."
+ ) from e
+
+if TYPE_CHECKING:
+ from presidio_analyzer import EntityRecognizer, RecognizerResult
+ from presidio_anonymizer.entities import EngineResult
-class PresidioAnonymizer(AnonymizerBase):
- """Anonymizer using Microsoft Presidio."""
-
+class PresidioAnonymizerBase(AnonymizerBase):
def __init__(
self,
analyzed_fields: Optional[List[str]] = None,
operators: Optional[Dict[str, OperatorConfig]] = None,
+ faker_seed: Optional[int] = None,
):
"""
Args:
@@ -28,25 +60,10 @@ class PresidioAnonymizer(AnonymizerBase):
Operators allow for custom anonymization of detected PII.
Learn more:
https://microsoft.github.io/presidio/tutorial/10_simple_anonymization/
+ faker_seed: Seed used to initialize faker.
+ Defaults to None, in which case faker will be seeded randomly
+ and provide random values.
"""
- try:
- from presidio_analyzer import AnalyzerEngine
- except ImportError as e:
- raise ImportError(
- "Could not import presidio_analyzer, please install with "
- "`pip install presidio-analyzer`. You will also need to download a "
- "spaCy model to use the analyzer, e.g. "
- "`python -m spacy download en_core_web_lg`."
- ) from e
- try:
- from presidio_anonymizer import AnonymizerEngine
- from presidio_anonymizer.entities import OperatorConfig
- except ImportError as e:
- raise ImportError(
- "Could not import presidio_anonymizer, please install with "
- "`pip install presidio-anonymizer`."
- ) from e
-
self.analyzed_fields = (
analyzed_fields
if analyzed_fields is not None
@@ -59,13 +76,41 @@ class PresidioAnonymizer(AnonymizerBase):
field: OperatorConfig(
operator_name="custom", params={"lambda": faker_function}
)
- for field, faker_function in get_pseudoanonymizer_mapping().items()
+ for field, faker_function in get_pseudoanonymizer_mapping(
+ faker_seed
+ ).items()
}
)
self._analyzer = AnalyzerEngine()
self._anonymizer = AnonymizerEngine()
+ def add_recognizer(self, recognizer: EntityRecognizer) -> None:
+ """Add a recognizer to the analyzer
+
+ Args:
+ recognizer: Recognizer to add to the analyzer.
+ """
+ self._analyzer.registry.add_recognizer(recognizer)
+ self.analyzed_fields.extend(recognizer.supported_entities)
+
+ def add_operators(self, operators: Dict[str, OperatorConfig]) -> None:
+ """Add operators to the anonymizer
+
+ Args:
+ operators: Operators to add to the anonymizer.
+ """
+ self.operators.update(operators)
+
+
+class PresidioAnonymizer(PresidioAnonymizerBase):
def _anonymize(self, text: str) -> str:
+ """Anonymize text.
+ Each PII entity is replaced with a fake value.
+ Each time fake values will be different, as they are generated randomly.
+
+ Args:
+ text: text to anonymize
+ """
results = self._analyzer.analyze(
text,
entities=self.analyzed_fields,
@@ -78,11 +123,185 @@ class PresidioAnonymizer(AnonymizerBase):
operators=self.operators,
).text
- def add_recognizer(self, recognizer: EntityRecognizer) -> None:
- """Add a recognizer to the analyzer"""
- self._analyzer.registry.add_recognizer(recognizer)
- self.analyzed_fields.extend(recognizer.supported_entities)
- def add_operators(self, operators: Dict[str, OperatorConfig]) -> None:
- """Add operators to the anonymizer"""
- self.operators.update(operators)
+class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerBase):
+ def __init__(
+ self,
+ analyzed_fields: Optional[List[str]] = None,
+ operators: Optional[Dict[str, OperatorConfig]] = None,
+ faker_seed: Optional[int] = None,
+ ):
+ super().__init__(analyzed_fields, operators, faker_seed)
+ self._deanonymizer_mapping = DeanonymizerMapping()
+
+ @property
+ def deanonymizer_mapping(self) -> MappingDataType:
+ """Return the deanonymizer mapping"""
+ return self._deanonymizer_mapping.data
+
+ def _update_deanonymizer_mapping(
+ self,
+ original_text: str,
+ analyzer_results: List[RecognizerResult],
+ anonymizer_results: EngineResult,
+ ) -> None:
+ """Creates or updates the mapping used to de-anonymize text.
+
+ This method exploits the results returned by the
+ analysis and anonymization processes.
+
+ It constructs a mapping from each anonymized entity
+ back to its original text value.
+
+ Mapping will be stored as "deanonymizer_mapping" property.
+
+ Example of "deanonymizer_mapping":
+ {
+ "PERSON": {
+ "": "",
+ "John Doe": "Slim Shady"
+ },
+ "PHONE_NUMBER": {
+ "111-111-1111": "555-555-5555"
+ }
+ ...
+ }
+ """
+
+ # We are able to zip and loop through both lists because we expect
+ # them to return corresponding entities for each identified piece
+ # of analyzable data from our input.
+
+ # We sort them by their 'start' attribute because it allows us to
+ # match corresponding entities by their position in the input text.
+ analyzer_results = sorted(analyzer_results, key=lambda d: d.start)
+ anonymizer_results.items = sorted(
+ anonymizer_results.items, key=lambda d: d.start
+ )
+
+ new_deanonymizer_mapping: MappingDataType = defaultdict(dict)
+
+ for analyzed_entity, anonymized_entity in zip(
+ analyzer_results, anonymizer_results.items
+ ):
+ original_value = original_text[analyzed_entity.start : analyzed_entity.end]
+ new_deanonymizer_mapping[anonymized_entity.entity_type][
+ anonymized_entity.text
+ ] = original_value
+
+ self._deanonymizer_mapping.update(new_deanonymizer_mapping)
+
+ def _anonymize(self, text: str) -> str:
+ """Anonymize text.
+ Each PII entity is replaced with a fake value.
+ Each time fake values will be different, as they are generated randomly.
+ At the same time, we will create a mapping from each anonymized entity
+ back to its original text value.
+
+ Args:
+ text: text to anonymize
+ """
+ analyzer_results = self._analyzer.analyze(
+ text,
+ entities=self.analyzed_fields,
+ language="en",
+ )
+
+ filtered_analyzer_results = (
+ self._anonymizer._remove_conflicts_and_get_text_manipulation_data(
+ analyzer_results
+ )
+ )
+
+ anonymizer_results = self._anonymizer.anonymize(
+ text,
+ analyzer_results=analyzer_results,
+ operators=self.operators,
+ )
+
+ self._update_deanonymizer_mapping(
+ text, filtered_analyzer_results, anonymizer_results
+ )
+
+ return anonymizer_results.text
+
+ def _deanonymize(
+ self,
+ text_to_deanonymize: str,
+ deanonymizer_matching_strategy: Callable[
+ [str, MappingDataType], str
+ ] = default_matching_strategy,
+ ) -> str:
+ """Deanonymize text.
+ Each anonymized entity is replaced with its original value.
+ This method exploits the mapping created during the anonymization process.
+
+ Args:
+ text_to_deanonymize: text to deanonymize
+ deanonymizer_matching_strategy: function to use to match
+ anonymized entities with their original values and replace them.
+ """
+ if not self._deanonymizer_mapping:
+ raise ValueError(
+ "Deanonymizer mapping is empty.",
+ "Please call anonymize() and anonymize some text first.",
+ )
+
+ text_to_deanonymize = deanonymizer_matching_strategy(
+ text_to_deanonymize, self.deanonymizer_mapping
+ )
+
+ return text_to_deanonymize
+
+ def save_deanonymizer_mapping(self, file_path: Union[Path, str]) -> None:
+ """Save the deanonymizer mapping to a JSON or YAML file.
+
+ Args:
+ file_path: Path to file to save the mapping to.
+
+ Example:
+ .. code-block:: python
+
+ anonymizer.save_deanonymizer_mapping(file_path="path/mapping.json")
+ """
+
+ save_path = Path(file_path)
+
+ if save_path.suffix not in [".json", ".yaml"]:
+ raise ValueError(f"{save_path} must have an extension of .json or .yaml")
+
+ # Make sure parent directories exist
+ save_path.parent.mkdir(parents=True, exist_ok=True)
+
+ if save_path.suffix == ".json":
+ with open(save_path, "w") as f:
+ json.dump(self.deanonymizer_mapping, f, indent=2)
+ elif save_path.suffix == ".yaml":
+ with open(save_path, "w") as f:
+ yaml.dump(self.deanonymizer_mapping, f, default_flow_style=False)
+
+ def load_deanonymizer_mapping(self, file_path: Union[Path, str]) -> None:
+ """Load the deanonymizer mapping from a JSON or YAML file.
+
+ Args:
+ file_path: Path to file to load the mapping from.
+
+ Example:
+ .. code-block:: python
+
+ anonymizer.load_deanonymizer_mapping(file_path="path/mapping.json")
+ """
+
+ load_path = Path(file_path)
+
+ if load_path.suffix not in [".json", ".yaml"]:
+ raise ValueError(f"{load_path} must have an extension of .json or .yaml")
+
+ if load_path.suffix == ".json":
+ with open(load_path, "r") as f:
+ loaded_mapping = json.load(f)
+ elif load_path.suffix == ".yaml":
+ with open(load_path, "r") as f:
+ loaded_mapping = yaml.load(f, Loader=yaml.FullLoader)
+
+ self._deanonymizer_mapping.update(loaded_mapping)
diff --git a/libs/experimental/langchain_experimental/graph_transformers/__init__.py b/libs/experimental/langchain_experimental/graph_transformers/__init__.py
new file mode 100644
index 00000000000..3f6c8a665ef
--- /dev/null
+++ b/libs/experimental/langchain_experimental/graph_transformers/__init__.py
@@ -0,0 +1,5 @@
+from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer
+
+__all__ = [
+ "DiffbotGraphTransformer",
+]
diff --git a/libs/experimental/langchain_experimental/graph_transformers/diffbot.py b/libs/experimental/langchain_experimental/graph_transformers/diffbot.py
new file mode 100644
index 00000000000..000c70de4b3
--- /dev/null
+++ b/libs/experimental/langchain_experimental/graph_transformers/diffbot.py
@@ -0,0 +1,316 @@
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+
+import requests
+from langchain.graphs.graph_document import GraphDocument, Node, Relationship
+from langchain.schema import Document
+from langchain.utils import get_from_env
+
+
+def format_property_key(s: str) -> str:
+ words = s.split()
+ if not words:
+ return s
+ first_word = words[0].lower()
+ capitalized_words = [word.capitalize() for word in words[1:]]
+ return "".join([first_word] + capitalized_words)
+
+
+class NodesList:
+ """
+ Manages a list of nodes with associated properties.
+
+ Attributes:
+ nodes (Dict[Tuple, Any]): Stores nodes as keys and their properties as values.
+ Each key is a tuple where the first element is the
+ node ID and the second is the node type.
+ """
+
+ def __init__(self) -> None:
+ self.nodes: Dict[Tuple[Union[str, int], str], Any] = dict()
+
+ def add_node_property(
+ self, node: Tuple[Union[str, int], str], properties: Dict[str, Any]
+ ) -> None:
+ """
+ Adds or updates node properties.
+
+ If the node does not exist in the list, it's added along with its properties.
+ If the node already exists, its properties are updated with the new values.
+
+ Args:
+ node (Tuple): A tuple containing the node ID and node type.
+ properties (Dict): A dictionary of properties to add or update for the node.
+ """
+ if node not in self.nodes:
+ self.nodes[node] = properties
+ else:
+ self.nodes[node].update(properties)
+
+ def return_node_list(self) -> List[Node]:
+ """
+ Returns the nodes as a list of Node objects.
+
+ Each Node object will have its ID, type, and properties populated.
+
+ Returns:
+ List[Node]: A list of Node objects.
+ """
+ nodes = [
+ Node(id=key[0], type=key[1], properties=self.nodes[key])
+ for key in self.nodes
+ ]
+ return nodes
+
+
+# Properties that should be treated as node properties instead of relationships
+FACT_TO_PROPERTY_TYPE = [
+ "Date",
+ "Number",
+ "Job title",
+ "Cause of death",
+ "Organization type",
+ "Academic title",
+]
+
+
+schema_mapping = [
+ ("HEADQUARTERS", "ORGANIZATION_LOCATIONS"),
+ ("RESIDENCE", "PERSON_LOCATION"),
+ ("ALL_PERSON_LOCATIONS", "PERSON_LOCATION"),
+ ("CHILD", "HAS_CHILD"),
+ ("PARENT", "HAS_PARENT"),
+ ("CUSTOMERS", "HAS_CUSTOMER"),
+ ("SKILLED_AT", "INTERESTED_IN"),
+]
+
+
+class SimplifiedSchema:
+ """
+ Provides functionality for working with a simplified schema mapping.
+
+ Attributes:
+ schema (Dict): A dictionary containing the mapping to simplified schema types.
+ """
+
+ def __init__(self) -> None:
+ """Initializes the schema dictionary based on the predefined list."""
+ self.schema = dict()
+ for row in schema_mapping:
+ self.schema[row[0]] = row[1]
+
+ def get_type(self, type: str) -> str:
+ """
+ Retrieves the simplified schema type for a given original type.
+
+ Args:
+ type (str): The original schema type to find the simplified type for.
+
+ Returns:
+ str: The simplified schema type if it exists;
+ otherwise, returns the original type.
+ """
+ try:
+ return self.schema[type]
+ except KeyError:
+ return type
+
+
+class DiffbotGraphTransformer:
+ """Transforms documents into graph documents using Diffbot's NLP API.
+
+ A graph document transformation system takes a sequence of Documents and returns a
+ sequence of Graph Documents.
+
+ Example:
+ .. code-block:: python
+
+ class DiffbotGraphTransformer(BaseGraphDocumentTransformer):
+
+ def transform_documents(
+ self, documents: Sequence[Document], **kwargs: Any
+ ) -> Sequence[GraphDocument]:
+ results = []
+
+ for document in documents:
+ raw_results = self.nlp_request(document.page_content)
+ graph_document = self.process_response(raw_results, document)
+ results.append(graph_document)
+ return results
+
+ async def atransform_documents(
+ self, documents: Sequence[Document], **kwargs: Any
+ ) -> Sequence[Document]:
+ raise NotImplementedError
+ """
+
+ def __init__(
+ self,
+ diffbot_api_key: Optional[str] = None,
+ fact_confidence_threshold: float = 0.7,
+ include_qualifiers: bool = True,
+ include_evidence: bool = True,
+ simplified_schema: bool = True,
+ ) -> None:
+ """
+ Initialize the graph transformer with various options.
+
+ Args:
+ diffbot_api_key (str):
+ The API key for Diffbot's NLP services.
+
+ fact_confidence_threshold (float):
+ Minimum confidence level for facts to be included.
+ include_qualifiers (bool):
+ Whether to include qualifiers in the relationships.
+ include_evidence (bool):
+ Whether to include evidence for the relationships.
+ simplified_schema (bool):
+ Whether to use a simplified schema for relationships.
+ """
+ self.diffbot_api_key = diffbot_api_key or get_from_env(
+ "diffbot_api_key", "DIFFBOT_API_KEY"
+ )
+ self.fact_threshold_confidence = fact_confidence_threshold
+ self.include_qualifiers = include_qualifiers
+ self.include_evidence = include_evidence
+ self.simplified_schema = None
+ if simplified_schema:
+ self.simplified_schema = SimplifiedSchema()
+
+ def nlp_request(self, text: str) -> Dict[str, Any]:
+ """
+ Make an API request to the Diffbot NLP endpoint.
+
+ Args:
+ text (str): The text to be processed.
+
+ Returns:
+ Dict[str, Any]: The JSON response from the API.
+ """
+
+ # Relationship extraction only works for English
+ payload = {
+ "content": text,
+ "lang": "en",
+ }
+
+ FIELDS = "facts"
+ HOST = "nl.diffbot.com"
+ url = (
+ f"https://{HOST}/v1/?fields={FIELDS}&"
+ f"token={self.diffbot_api_key}&language=en"
+ )
+ result = requests.post(url, data=payload)
+ return result.json()
+
+ def process_response(
+ self, payload: Dict[str, Any], document: Document
+ ) -> GraphDocument:
+ """
+ Transform the Diffbot NLP response into a GraphDocument.
+
+ Args:
+ payload (Dict[str, Any]): The JSON response from Diffbot's NLP API.
+ document (Document): The original document.
+
+ Returns:
+ GraphDocument: The transformed document as a graph.
+ """
+
+ # Return empty result if there are no facts
+ if "facts" not in payload or not payload["facts"]:
+ return GraphDocument(nodes=[], relationships=[], source=document)
+
+ # Nodes are a custom class because we need to deduplicate
+ nodes_list = NodesList()
+ # Relationships are a list because we don't deduplicate nor anything else
+ relationships = list()
+ for record in payload["facts"]:
+ # Skip if the fact is below the threshold confidence
+ if record["confidence"] < self.fact_threshold_confidence:
+ continue
+
+ # TODO: It should probably be treated as a node property
+ if not record["value"]["allTypes"]:
+ continue
+
+ # Define source node
+ source_id = (
+ record["entity"]["allUris"][0]
+ if record["entity"]["allUris"]
+ else record["entity"]["name"]
+ )
+ source_label = record["entity"]["allTypes"][0]["name"].capitalize()
+ source_name = record["entity"]["name"]
+ source_node = Node(id=source_id, type=source_label)
+ nodes_list.add_node_property(
+ (source_id, source_label), {"name": source_name}
+ )
+
+ # Define target node
+ target_id = (
+ record["value"]["allUris"][0]
+ if record["value"]["allUris"]
+ else record["value"]["name"]
+ )
+ target_label = record["value"]["allTypes"][0]["name"].capitalize()
+ target_name = record["value"]["name"]
+ # Some facts are better suited as node properties
+ if target_label in FACT_TO_PROPERTY_TYPE:
+ nodes_list.add_node_property(
+ (source_id, source_label),
+ {format_property_key(record["property"]["name"]): target_name},
+ )
+ else: # Define relationship
+ # Define target node object
+ target_node = Node(id=target_id, type=target_label)
+ nodes_list.add_node_property(
+ (target_id, target_label), {"name": target_name}
+ )
+ # Define relationship type
+ rel_type = record["property"]["name"].replace(" ", "_").upper()
+ if self.simplified_schema:
+ rel_type = self.simplified_schema.get_type(rel_type)
+
+ # Relationship qualifiers/properties
+ rel_properties = dict()
+ relationship_evidence = [el["passage"] for el in record["evidence"]][0]
+ if self.include_evidence:
+ rel_properties.update({"evidence": relationship_evidence})
+ if self.include_qualifiers and record.get("qualifiers"):
+ for property in record["qualifiers"]:
+ prop_key = format_property_key(property["property"]["name"])
+ rel_properties[prop_key] = property["value"]["name"]
+
+ relationship = Relationship(
+ source=source_node,
+ target=target_node,
+ type=rel_type,
+ properties=rel_properties,
+ )
+ relationships.append(relationship)
+
+ return GraphDocument(
+ nodes=nodes_list.return_node_list(),
+ relationships=relationships,
+ source=document,
+ )
+
+ def convert_to_graph_documents(
+ self, documents: Sequence[Document]
+ ) -> List[GraphDocument]:
+ """Convert a sequence of documents into graph documents.
+
+ Args:
+ documents (Sequence[Document]): The original documents.
+ **kwargs: Additional keyword arguments.
+
+ Returns:
+ Sequence[GraphDocument]: The transformed documents as graphs.
+ """
+ results = []
+ for document in documents:
+ raw_results = self.nlp_request(document.page_content)
+ graph_document = self.process_response(raw_results, document)
+ results.append(graph_document)
+ return results
diff --git a/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py b/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py
new file mode 100644
index 00000000000..1ec088dbc51
--- /dev/null
+++ b/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py
@@ -0,0 +1,38 @@
+"""Vector SQL Database Chain Retriever"""
+from typing import Any, Dict, List
+
+from langchain.callbacks.manager import (
+ AsyncCallbackManagerForRetrieverRun,
+ CallbackManagerForRetrieverRun,
+)
+from langchain.schema import BaseRetriever, Document
+
+from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain
+
+
+class VectorSQLDatabaseChainRetriever(BaseRetriever):
+ """Retriever that uses SQLDatabase as Retriever"""
+
+ sql_db_chain: VectorSQLDatabaseChain
+ """SQL Database Chain"""
+ page_content_key: str = "content"
+ """column name for page content of documents"""
+
+ def _get_relevant_documents(
+ self,
+ query: str,
+ *,
+ run_manager: CallbackManagerForRetrieverRun,
+ **kwargs: Any,
+ ) -> List[Document]:
+ ret: List[Dict[str, Any]] = self.sql_db_chain(
+ query, callbacks=run_manager.get_child(), **kwargs
+ )["result"]
+ return [
+ Document(page_content=r[self.page_content_key], metadata=r) for r in ret
+ ]
+
+ async def _aget_relevant_documents(
+ self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
+ ) -> List[Document]:
+ raise NotImplementedError
diff --git a/libs/experimental/langchain_experimental/sql/prompt.py b/libs/experimental/langchain_experimental/sql/prompt.py
new file mode 100644
index 00000000000..5f4c9b8a4fd
--- /dev/null
+++ b/libs/experimental/langchain_experimental/sql/prompt.py
@@ -0,0 +1,85 @@
+# flake8: noqa
+from langchain.prompts.prompt import PromptTemplate
+
+
+PROMPT_SUFFIX = """Only use the following tables:
+{table_info}
+
+Question: {input}"""
+
+_VECTOR_SQL_DEFAULT_TEMPLATE = """You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
+{dialect} queries has a vector distance function called `DISTANCE(column, array)` to compute relevance to the user's question and sort the feature array column by the relevance.
+When the query is asking for {top_k} closest row, you have to use this distance function to calculate distance to entity's array on vector column and order by the distance to retrieve relevant rows.
+
+*NOTICE*: `DISTANCE(column, array)` only accept an array column as its first argument and a `NeuralArray(entity)` as its second argument. You also need a user defined function called `NeuralArray(entity)` to retrieve the entity's array.
+
+Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per {dialect}. You should only order according to the distance function.
+Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
+Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
+Pay attention to use today() function to get the current date, if the question involves "today". `ORDER BY` clause should always be after `WHERE` clause. DO NOT add semicolon to the end of SQL. Pay attention to the comment in table schema.
+
+Use the following format:
+
+Question: "Question here"
+SQLQuery: "SQL Query to run"
+SQLResult: "Result of the SQLQuery"
+Answer: "Final answer here"
+"""
+
+VECTOR_SQL_PROMPT = PromptTemplate(
+ input_variables=["input", "table_info", "dialect", "top_k"],
+ template=_VECTOR_SQL_DEFAULT_TEMPLATE + PROMPT_SUFFIX,
+)
+
+
+_myscale_prompt = """You are a MyScale expert. Given an input question, first create a syntactically correct MyScale query to run, then look at the results of the query and return the answer to the input question.
+MyScale queries has a vector distance function called `DISTANCE(column, array)` to compute relevance to the user's question and sort the feature array column by the relevance.
+When the query is asking for {top_k} closest row, you have to use this distance function to calculate distance to entity's array on vector column and order by the distance to retrieve relevant rows.
+
+*NOTICE*: `DISTANCE(column, array)` only accept an array column as its first argument and a `NeuralArray(entity)` as its second argument. You also need a user defined function called `NeuralArray(entity)` to retrieve the entity's array.
+
+Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MyScale. You should only order according to the distance function.
+Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
+Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
+Pay attention to use today() function to get the current date, if the question involves "today". `ORDER BY` clause should always be after `WHERE` clause. DO NOT add semicolon to the end of SQL. Pay attention to the comment in table schema.
+
+Use the following format:
+
+======== table info ========
+
+
+Question: "Question here"
+SQLQuery: "SQL Query to run"
+
+
+Here are some examples:
+
+======== table info ========
+CREATE TABLE "ChatPaper" (
+ abstract String,
+ id String,
+ vector Array(Float32),
+) ENGINE = ReplicatedReplacingMergeTree()
+ ORDER BY id
+ PRIMARY KEY id
+
+Question: What is Feartue Pyramid Network?
+SQLQuery: SELECT ChatPaper.title, ChatPaper.id, ChatPaper.authors FROM ChatPaper ORDER BY DISTANCE(vector, NeuralArray(PaperRank contribution)) LIMIT {top_k}
+
+
+Let's begin:
+======== table info ========
+{table_info}
+
+Question: {input}
+SQLQuery: """
+
+MYSCALE_PROMPT = PromptTemplate(
+ input_variables=["input", "table_info", "top_k"],
+ template=_myscale_prompt + PROMPT_SUFFIX,
+)
+
+
+VECTOR_SQL_PROMPTS = {
+ "myscale": MYSCALE_PROMPT,
+}
diff --git a/libs/experimental/langchain_experimental/sql/vector_sql.py b/libs/experimental/langchain_experimental/sql/vector_sql.py
new file mode 100644
index 00000000000..98f3c2dee0c
--- /dev/null
+++ b/libs/experimental/langchain_experimental/sql/vector_sql.py
@@ -0,0 +1,237 @@
+"""Vector SQL Database Chain Retriever"""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Union
+
+from langchain.callbacks.manager import CallbackManagerForChainRun
+from langchain.chains.llm import LLMChain
+from langchain.chains.sql_database.prompt import PROMPT, SQL_PROMPTS
+from langchain.embeddings.base import Embeddings
+from langchain.prompts.prompt import PromptTemplate
+from langchain.schema import BaseOutputParser, BasePromptTemplate
+from langchain.schema.language_model import BaseLanguageModel
+from langchain.tools.sql_database.prompt import QUERY_CHECKER
+from langchain.utilities.sql_database import SQLDatabase
+
+from langchain_experimental.sql.base import INTERMEDIATE_STEPS_KEY, SQLDatabaseChain
+
+
+class VectorSQLOutputParser(BaseOutputParser[str]):
+ """Output Parser for Vector SQL
+ 1. finds for `NeuralArray()` and replace it with the embedding
+ 2. finds for `DISTANCE()` and replace it with the distance name in backend SQL
+ """
+
+ model: Embeddings
+ """Embedding model to extract embedding for entity"""
+ distance_func_name: str = "distance"
+ """Distance name for Vector SQL"""
+
+ class Config:
+ arbitrary_types_allowed = 1
+
+ @property
+ def _type(self) -> str:
+ return "vector_sql_parser"
+
+ @classmethod
+ def from_embeddings(
+ cls, model: Embeddings, distance_func_name: str = "distance", **kwargs: Any
+ ) -> BaseOutputParser:
+ return cls(model=model, distance_func_name=distance_func_name, **kwargs)
+
+ def parse(self, text: str) -> str:
+ text = text.strip()
+ start = text.find("NeuralArray(")
+ _sql_str_compl = text
+ if start > 0:
+ _matched = text[text.find("NeuralArray(") + len("NeuralArray(") :]
+ end = _matched.find(")") + start + len("NeuralArray(") + 1
+ entity = _matched[: _matched.find(")")]
+ vecs = self.model.embed_query(entity)
+ vecs_str = "[" + ",".join(map(str, vecs)) + "]"
+ _sql_str_compl = text.replace("DISTANCE", self.distance_func_name).replace(
+ text[start:end], vecs_str
+ )
+ if _sql_str_compl[-1] == ";":
+ _sql_str_compl = _sql_str_compl[:-1]
+ return _sql_str_compl
+
+
+class VectorSQLRetrieveAllOutputParser(VectorSQLOutputParser):
+ """Based on VectorSQLOutputParser
+ It also modify the SQL to get all columns
+ """
+
+ @property
+ def _type(self) -> str:
+ return "vector_sql_retrieve_all_parser"
+
+ def parse(self, text: str) -> str:
+ text = text.strip()
+ start = text.upper().find("SELECT")
+ if start >= 0:
+ end = text.upper().find("FROM")
+ text = text.replace(text[start + len("SELECT") + 1 : end - 1], "*")
+ return super().parse(text)
+
+
+def _try_eval(x: Any) -> Any:
+ try:
+ return eval(x)
+ except Exception:
+ return x
+
+
+def get_result_from_sqldb(
+ db: SQLDatabase, cmd: str
+) -> Union[str, List[Dict[str, Any]], Dict[str, Any]]:
+ result = db._execute(cmd, fetch="all") # type: ignore
+ if isinstance(result, list):
+ return [{k: _try_eval(v) for k, v in dict(d._asdict()).items()} for d in result]
+ else:
+ return {
+ k: _try_eval(v) for k, v in dict(result._asdict()).items() # type: ignore
+ }
+
+
+class VectorSQLDatabaseChain(SQLDatabaseChain):
+ """Chain for interacting with Vector SQL Database.
+
+ Example:
+ .. code-block:: python
+
+ from langchain_experimental.sql import SQLDatabaseChain
+ from langchain import OpenAI, SQLDatabase, OpenAIEmbeddings
+ db = SQLDatabase(...)
+ db_chain = VectorSQLDatabaseChain.from_llm(OpenAI(), db, OpenAIEmbeddings())
+
+ *Security note*: Make sure that the database connection uses credentials
+ that are narrowly-scoped to only include the permissions this chain needs.
+ Failure to do so may result in data corruption or loss, since this chain may
+ attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+ The best way to guard against such negative outcomes is to (as appropriate)
+ limit the permissions granted to the credentials used with this chain.
+ This issue shows an example negative outcome if these steps are not taken:
+ https://github.com/langchain-ai/langchain/issues/5923
+ """
+
+ sql_cmd_parser: VectorSQLOutputParser
+ """Parser for Vector SQL"""
+ native_format: bool = False
+ """If return_direct, controls whether to return in python native format"""
+
+ def _call(
+ self,
+ inputs: Dict[str, Any],
+ run_manager: Optional[CallbackManagerForChainRun] = None,
+ ) -> Dict[str, Any]:
+ _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+ input_text = f"{inputs[self.input_key]}\nSQLQuery:"
+ _run_manager.on_text(input_text, verbose=self.verbose)
+ # If not present, then defaults to None which is all tables.
+ table_names_to_use = inputs.get("table_names_to_use")
+ table_info = self.database.get_table_info(table_names=table_names_to_use)
+ llm_inputs = {
+ "input": input_text,
+ "top_k": str(self.top_k),
+ "dialect": self.database.dialect,
+ "table_info": table_info,
+ "stop": ["\nSQLResult:"],
+ }
+ intermediate_steps: List = []
+ try:
+ intermediate_steps.append(llm_inputs) # input: sql generation
+ llm_out = self.llm_chain.predict(
+ callbacks=_run_manager.get_child(),
+ **llm_inputs,
+ )
+ sql_cmd = self.sql_cmd_parser.parse(llm_out)
+ if self.return_sql:
+ return {self.output_key: sql_cmd}
+ if not self.use_query_checker:
+ _run_manager.on_text(llm_out, color="green", verbose=self.verbose)
+ intermediate_steps.append(
+ llm_out
+ ) # output: sql generation (no checker)
+ intermediate_steps.append({"sql_cmd": llm_out}) # input: sql exec
+ result = get_result_from_sqldb(self.database, sql_cmd)
+ intermediate_steps.append(str(result)) # output: sql exec
+ else:
+ query_checker_prompt = self.query_checker_prompt or PromptTemplate(
+ template=QUERY_CHECKER, input_variables=["query", "dialect"]
+ )
+ query_checker_chain = LLMChain(
+ llm=self.llm_chain.llm,
+ prompt=query_checker_prompt,
+ output_parser=self.llm_chain.output_parser,
+ )
+ query_checker_inputs = {
+ "query": llm_out,
+ "dialect": self.database.dialect,
+ }
+ checked_llm_out = query_checker_chain.predict(
+ callbacks=_run_manager.get_child(), **query_checker_inputs
+ )
+ checked_sql_command = self.sql_cmd_parser.parse(checked_llm_out)
+ intermediate_steps.append(
+ checked_llm_out
+ ) # output: sql generation (checker)
+ _run_manager.on_text(
+ checked_llm_out, color="green", verbose=self.verbose
+ )
+ intermediate_steps.append(
+ {"sql_cmd": checked_llm_out}
+ ) # input: sql exec
+ result = get_result_from_sqldb(self.database, checked_sql_command)
+ intermediate_steps.append(str(result)) # output: sql exec
+ llm_out = checked_llm_out
+ sql_cmd = checked_sql_command
+
+ _run_manager.on_text("\nSQLResult: ", verbose=self.verbose)
+ _run_manager.on_text(str(result), color="yellow", verbose=self.verbose)
+ # If return direct, we just set the final result equal to
+ # the result of the sql query result, otherwise try to get a human readable
+ # final answer
+ if self.return_direct:
+ final_result = result
+ else:
+ _run_manager.on_text("\nAnswer:", verbose=self.verbose)
+ input_text += f"{llm_out}\nSQLResult: {result}\nAnswer:"
+ llm_inputs["input"] = input_text
+ intermediate_steps.append(llm_inputs) # input: final answer
+ final_result = self.llm_chain.predict(
+ callbacks=_run_manager.get_child(),
+ **llm_inputs,
+ ).strip()
+ intermediate_steps.append(final_result) # output: final answer
+ _run_manager.on_text(final_result, color="green", verbose=self.verbose)
+ chain_result: Dict[str, Any] = {self.output_key: final_result}
+ if self.return_intermediate_steps:
+ chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
+ return chain_result
+ except Exception as exc:
+ # Append intermediate steps to exception, to aid in logging and later
+ # improvement of few shot prompt seeds
+ exc.intermediate_steps = intermediate_steps # type: ignore
+ raise exc
+
+ @property
+ def _chain_type(self) -> str:
+ return "vector_sql_database_chain"
+
+ @classmethod
+ def from_llm(
+ cls,
+ llm: BaseLanguageModel,
+ db: SQLDatabase,
+ prompt: Optional[BasePromptTemplate] = None,
+ sql_cmd_parser: Optional[VectorSQLOutputParser] = None,
+ **kwargs: Any,
+ ) -> VectorSQLDatabaseChain:
+ assert sql_cmd_parser, "`sql_cmd_parser` must be set in VectorSQLDatabaseChain."
+ prompt = prompt or SQL_PROMPTS.get(db.dialect, PROMPT)
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
+ return cls(
+ llm_chain=llm_chain, database=db, sql_cmd_parser=sql_cmd_parser, **kwargs
+ )
diff --git a/libs/experimental/poetry.lock b/libs/experimental/poetry.lock
index b0d5b9139af..9e8cf9f1aff 100644
--- a/libs/experimental/poetry.lock
+++ b/libs/experimental/poetry.lock
@@ -1245,6 +1245,7 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
files = [
{file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"},
+ {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"},
]
[[package]]
@@ -3752,6 +3753,31 @@ files = [
{file = "types_PyYAML-6.0.12.11-py3-none-any.whl", hash = "sha256:a461508f3096d1d5810ec5ab95d7eeecb651f3a15b71959999988942063bf01d"},
]
+[[package]]
+name = "types-requests"
+version = "2.31.0.2"
+description = "Typing stubs for requests"
+optional = false
+python-versions = "*"
+files = [
+ {file = "types-requests-2.31.0.2.tar.gz", hash = "sha256:6aa3f7faf0ea52d728bb18c0a0d1522d9bfd8c72d26ff6f61bfc3d06a411cf40"},
+ {file = "types_requests-2.31.0.2-py3-none-any.whl", hash = "sha256:56d181c85b5925cbc59f4489a57e72a8b2166f18273fd8ba7b6fe0c0b986f12a"},
+]
+
+[package.dependencies]
+types-urllib3 = "*"
+
+[[package]]
+name = "types-urllib3"
+version = "1.26.25.14"
+description = "Typing stubs for urllib3"
+optional = false
+python-versions = "*"
+files = [
+ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"},
+ {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"},
+]
+
[[package]]
name = "typing-extensions"
version = "4.7.1"
@@ -3995,4 +4021,4 @@ extended-testing = ["faker", "presidio-analyzer", "presidio-anonymizer"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
-content-hash = "66ac482bd05eb74414210ac28fc1e8dae1a9928a4a1314e1326fada3551aa8ad"
+content-hash = "443e88f690572715cf58671e4480a006574c7141a1258dff0a0818b954184901"
diff --git a/libs/experimental/pyproject.toml b/libs/experimental/pyproject.toml
index 1e342bf1f82..930883ebf87 100644
--- a/libs/experimental/pyproject.toml
+++ b/libs/experimental/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-experimental"
-version = "0.0.13"
+version = "0.0.15"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
@@ -23,6 +23,7 @@ black = "^23.1.0"
[tool.poetry.group.typing.dependencies]
mypy = "^0.991"
types-pyyaml = "^6.0.12.2"
+types-requests = "^2.28.11.5"
[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
diff --git a/libs/experimental/tests/unit_tests/test_reversible_data_anonymizer.py b/libs/experimental/tests/unit_tests/test_reversible_data_anonymizer.py
new file mode 100644
index 00000000000..9484a0e9dca
--- /dev/null
+++ b/libs/experimental/tests/unit_tests/test_reversible_data_anonymizer.py
@@ -0,0 +1,154 @@
+import os
+from typing import Iterator, List
+
+import pytest
+
+
+@pytest.fixture(scope="module", autouse=True)
+def check_spacy_model() -> Iterator[None]:
+ import spacy
+
+ if not spacy.util.is_package("en_core_web_lg"):
+ pytest.skip(reason="Spacy model 'en_core_web_lg' not installed")
+ yield
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+@pytest.mark.parametrize(
+ "analyzed_fields,should_contain",
+ [(["PERSON"], False), (["PHONE_NUMBER"], True), (None, False)],
+)
+def test_anonymize(analyzed_fields: List[str], should_contain: bool) -> None:
+ """Test anonymizing a name in a simple sentence"""
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ text = "Hello, my name is John Doe."
+ anonymizer = PresidioReversibleAnonymizer(analyzed_fields=analyzed_fields)
+ anonymized_text = anonymizer.anonymize(text)
+ assert ("John Doe" in anonymized_text) == should_contain
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+def test_anonymize_multiple() -> None:
+ """Test anonymizing multiple items in a sentence"""
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ text = "John Smith's phone number is 313-666-7440 and email is johnsmith@gmail.com"
+ anonymizer = PresidioReversibleAnonymizer()
+ anonymized_text = anonymizer.anonymize(text)
+ for phrase in ["John Smith", "313-666-7440", "johnsmith@gmail.com"]:
+ assert phrase not in anonymized_text
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+def test_anonymize_with_custom_operator() -> None:
+ """Test anonymize a name with a custom operator"""
+ from presidio_anonymizer.entities import OperatorConfig
+
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ custom_operator = {"PERSON": OperatorConfig("replace", {"new_value": ""})}
+ anonymizer = PresidioReversibleAnonymizer(operators=custom_operator)
+
+ text = "Jane Doe was here."
+
+ anonymized_text = anonymizer.anonymize(text)
+ assert anonymized_text == " was here."
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+def test_add_recognizer_operator() -> None:
+ """
+ Test add recognizer and anonymize a new type of entity and with a custom operator
+ """
+ from presidio_analyzer import PatternRecognizer
+ from presidio_anonymizer.entities import OperatorConfig
+
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ anonymizer = PresidioReversibleAnonymizer(analyzed_fields=[])
+ titles_list = ["Sir", "Madam", "Professor"]
+ custom_recognizer = PatternRecognizer(
+ supported_entity="TITLE", deny_list=titles_list
+ )
+ anonymizer.add_recognizer(custom_recognizer)
+
+ # anonymizing with custom recognizer
+ text = "Madam Jane Doe was here."
+ anonymized_text = anonymizer.anonymize(text)
+ assert anonymized_text == " Jane Doe was here."
+
+ # anonymizing with custom recognizer and operator
+ custom_operator = {"TITLE": OperatorConfig("replace", {"new_value": "Dear"})}
+ anonymizer.add_operators(custom_operator)
+ anonymized_text = anonymizer.anonymize(text)
+ assert anonymized_text == "Dear Jane Doe was here."
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+def test_deanonymizer_mapping() -> None:
+ """Test if deanonymizer mapping is correctly populated"""
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ anonymizer = PresidioReversibleAnonymizer(
+ analyzed_fields=["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD"]
+ )
+
+ anonymizer.anonymize("Hello, my name is John Doe and my number is 444 555 6666.")
+
+ # ["PERSON", "PHONE_NUMBER"]
+ assert len(anonymizer.deanonymizer_mapping.keys()) == 2
+ assert "John Doe" in anonymizer.deanonymizer_mapping.get("PERSON", {}).values()
+ assert (
+ "444 555 6666"
+ in anonymizer.deanonymizer_mapping.get("PHONE_NUMBER", {}).values()
+ )
+
+ text_to_anonymize = (
+ "And my name is Jane Doe, my email is jane@gmail.com and "
+ "my credit card is 4929 5319 6292 5362."
+ )
+ anonymizer.anonymize(text_to_anonymize)
+
+ # ["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD"]
+ assert len(anonymizer.deanonymizer_mapping.keys()) == 4
+ assert "Jane Doe" in anonymizer.deanonymizer_mapping.get("PERSON", {}).values()
+ assert (
+ "jane@gmail.com"
+ in anonymizer.deanonymizer_mapping.get("EMAIL_ADDRESS", {}).values()
+ )
+ assert (
+ "4929 5319 6292 5362"
+ in anonymizer.deanonymizer_mapping.get("CREDIT_CARD", {}).values()
+ )
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+def test_deanonymize() -> None:
+ """Test deanonymizing a name in a simple sentence"""
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ text = "Hello, my name is John Doe."
+ anonymizer = PresidioReversibleAnonymizer(analyzed_fields=["PERSON"])
+ anonymized_text = anonymizer.anonymize(text)
+ deanonymized_text = anonymizer.deanonymize(anonymized_text)
+ assert deanonymized_text == text
+
+
+@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
+def test_save_load_deanonymizer_mapping() -> None:
+ from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
+
+ anonymizer = PresidioReversibleAnonymizer(analyzed_fields=["PERSON"])
+ anonymizer.anonymize("Hello, my name is John Doe.")
+ try:
+ anonymizer.save_deanonymizer_mapping("test_file.json")
+ assert os.path.isfile("test_file.json")
+
+ anonymizer = PresidioReversibleAnonymizer()
+ anonymizer.load_deanonymizer_mapping("test_file.json")
+
+ assert "John Doe" in anonymizer.deanonymizer_mapping.get("PERSON", {}).values()
+
+ finally:
+ os.remove("test_file.json")
diff --git a/libs/langchain/langchain/agents/agent_toolkits/sql/base.py b/libs/langchain/langchain/agents/agent_toolkits/sql/base.py
index 0d516622bde..4b0531b8c77 100644
--- a/libs/langchain/langchain/agents/agent_toolkits/sql/base.py
+++ b/libs/langchain/langchain/agents/agent_toolkits/sql/base.py
@@ -1,5 +1,5 @@
"""SQL agent."""
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
from langchain.agents.agent_toolkits.sql.prompt import (
@@ -21,6 +21,7 @@ from langchain.prompts.chat import (
)
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import AIMessage, SystemMessage
+from langchain.tools import BaseTool
def create_sql_agent(
@@ -38,10 +39,11 @@ def create_sql_agent(
early_stopping_method: str = "force",
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
+ extra_tools: Sequence[BaseTool] = (),
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct an SQL agent from an LLM and tools."""
- tools = toolkit.get_tools()
+ tools = toolkit.get_tools() + list(extra_tools)
prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k)
agent: BaseSingleActionAgent
diff --git a/libs/langchain/langchain/chains/graph_qa/sparql.py b/libs/langchain/langchain/chains/graph_qa/sparql.py
index eb8a365d76a..2e1c017748b 100644
--- a/libs/langchain/langchain/chains/graph_qa/sparql.py
+++ b/libs/langchain/langchain/chains/graph_qa/sparql.py
@@ -84,17 +84,17 @@ class GraphSparqlQAChain(Chain):
_intent = self.sparql_intent_chain.run({"prompt": prompt}, callbacks=callbacks)
intent = _intent.strip()
- if "SELECT" not in intent and "UPDATE" not in intent:
+ if "SELECT" in intent and "UPDATE" not in intent:
+ sparql_generation_chain = self.sparql_generation_select_chain
+ intent = "SELECT"
+ elif "UPDATE" in intent and "SELECT" not in intent:
+ sparql_generation_chain = self.sparql_generation_update_chain
+ intent = "UPDATE"
+ else:
raise ValueError(
"I am sorry, but this prompt seems to fit none of the currently "
"supported SPARQL query types, i.e., SELECT and UPDATE."
)
- elif intent.find("SELECT") < intent.find("UPDATE"):
- sparql_generation_chain = self.sparql_generation_select_chain
- intent = "SELECT"
- else:
- sparql_generation_chain = self.sparql_generation_update_chain
- intent = "UPDATE"
_run_manager.on_text("Identified intent:", end="\n", verbose=self.verbose)
_run_manager.on_text(intent, color="green", end="\n", verbose=self.verbose)
diff --git a/libs/langchain/langchain/chat_loaders/telegram.py b/libs/langchain/langchain/chat_loaders/telegram.py
index 786dad7278e..f55fd714761 100644
--- a/libs/langchain/langchain/chat_loaders/telegram.py
+++ b/libs/langchain/langchain/chat_loaders/telegram.py
@@ -1,6 +1,7 @@
import json
import logging
import os
+import tempfile
import zipfile
from pathlib import Path
from typing import Iterator, List, Union
@@ -136,7 +137,8 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
with zipfile.ZipFile(path) as zip_file:
for file in zip_file.namelist():
if file.endswith((".html", ".json")):
- yield zip_file.extract(file)
+ with tempfile.TemporaryDirectory() as temp_dir:
+ yield zip_file.extract(file, path=temp_dir)
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
"""Lazy load the messages from the chat file and yield them
diff --git a/libs/langchain/langchain/document_loaders/async_html.py b/libs/langchain/langchain/document_loaders/async_html.py
index 286319a5ee6..80436bfec60 100644
--- a/libs/langchain/langchain/document_loaders/async_html.py
+++ b/libs/langchain/langchain/document_loaders/async_html.py
@@ -1,7 +1,8 @@
import asyncio
import logging
import warnings
-from typing import Any, Dict, Iterator, List, Optional, Union
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Dict, Iterator, List, Optional, Union, cast
import aiohttp
import requests
@@ -129,9 +130,18 @@ class AsyncHtmlLoader(BaseLoader):
def load(self) -> List[Document]:
"""Load text from the url(s) in web_path."""
- results = asyncio.run(self.fetch_all(self.web_paths))
+ try:
+ # Raises RuntimeError if there is no current event loop.
+ asyncio.get_running_loop()
+ # If there is a current event loop, we need to run the async code
+ # in a separate loop, in a separate thread.
+ with ThreadPoolExecutor(max_workers=1) as executor:
+ future = executor.submit(asyncio.run, self.fetch_all(self.web_paths))
+ results = future.result()
+ except RuntimeError:
+ results = asyncio.run(self.fetch_all(self.web_paths))
docs = []
- for i, text in enumerate(results):
+ for i, text in enumerate(cast(List[str], results)):
metadata = {"source": self.web_paths[i]}
docs.append(Document(page_content=text, metadata=metadata))
diff --git a/libs/langchain/langchain/document_loaders/parsers/pdf.py b/libs/langchain/langchain/document_loaders/parsers/pdf.py
index 07681a67671..2ec7a684be6 100644
--- a/libs/langchain/langchain/document_loaders/parsers/pdf.py
+++ b/libs/langchain/langchain/document_loaders/parsers/pdf.py
@@ -1,11 +1,16 @@
"""Module contains common parsers for PDFs."""
-from typing import Any, Iterator, Mapping, Optional, Sequence, Union
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Iterator, Mapping, Optional, Sequence, Union
from urllib.parse import urlparse
from langchain.document_loaders.base import BaseBlobParser
from langchain.document_loaders.blob_loaders import Blob
from langchain.schema import Document
+if TYPE_CHECKING:
+ import pdfplumber.page
+
class PyPDFParser(BaseBlobParser):
"""Load `PDF` using `pypdf` and chunk at character level."""
@@ -116,13 +121,17 @@ class PyPDFium2Parser(BaseBlobParser):
class PDFPlumberParser(BaseBlobParser):
"""Parse `PDF` with `PDFPlumber`."""
- def __init__(self, text_kwargs: Optional[Mapping[str, Any]] = None) -> None:
+ def __init__(
+ self, text_kwargs: Optional[Mapping[str, Any]] = None, dedupe: bool = False
+ ) -> None:
"""Initialize the parser.
Args:
text_kwargs: Keyword arguments to pass to ``pdfplumber.Page.extract_text()``
+ dedupe: Avoiding the error of duplicate characters if `dedupe=True`.
"""
self.text_kwargs = text_kwargs or {}
+ self.dedupe = dedupe
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazily parse the blob."""
@@ -133,7 +142,7 @@ class PDFPlumberParser(BaseBlobParser):
yield from [
Document(
- page_content=page.extract_text(**self.text_kwargs),
+ page_content=self._process_page_content(page),
metadata=dict(
{
"source": blob.source,
@@ -151,6 +160,12 @@ class PDFPlumberParser(BaseBlobParser):
for page in doc.pages
]
+ def _process_page_content(self, page: pdfplumber.page.Page) -> str:
+ """Process the page content based on dedupe."""
+ if self.dedupe:
+ return page.dedupe_chars().extract_text(**self.text_kwargs)
+ return page.extract_text(**self.text_kwargs)
+
class AmazonTextractPDFParser(BaseBlobParser):
"""Send `PDF` files to `Amazon Textract` and parse them.
diff --git a/libs/langchain/langchain/document_loaders/pdf.py b/libs/langchain/langchain/document_loaders/pdf.py
index d907494d458..801a426a76b 100644
--- a/libs/langchain/langchain/document_loaders/pdf.py
+++ b/libs/langchain/langchain/document_loaders/pdf.py
@@ -437,7 +437,10 @@ class PDFPlumberLoader(BasePDFLoader):
"""Load `PDF` files using `pdfplumber`."""
def __init__(
- self, file_path: str, text_kwargs: Optional[Mapping[str, Any]] = None
+ self,
+ file_path: str,
+ text_kwargs: Optional[Mapping[str, Any]] = None,
+ dedupe: bool = False,
) -> None:
"""Initialize with a file path."""
try:
@@ -450,11 +453,12 @@ class PDFPlumberLoader(BasePDFLoader):
super().__init__(file_path)
self.text_kwargs = text_kwargs or {}
+ self.dedupe = dedupe
def load(self) -> List[Document]:
"""Load file."""
- parser = PDFPlumberParser(text_kwargs=self.text_kwargs)
+ parser = PDFPlumberParser(text_kwargs=self.text_kwargs, dedupe=self.dedupe)
blob = Blob.from_path(self.file_path)
return parser.parse(blob)
diff --git a/libs/langchain/langchain/document_loaders/s3_directory.py b/libs/langchain/langchain/document_loaders/s3_directory.py
index 5c964fed53b..1363c826471 100644
--- a/libs/langchain/langchain/document_loaders/s3_directory.py
+++ b/libs/langchain/langchain/document_loaders/s3_directory.py
@@ -114,7 +114,7 @@ class S3DirectoryLoader(BaseLoader):
aws_access_key_id=self.aws_access_key_id,
aws_secret_access_key=self.aws_secret_access_key,
aws_session_token=self.aws_session_token,
- boto_config=self.boto_config,
+ config=self.boto_config,
)
bucket = s3.Bucket(self.bucket)
docs = []
diff --git a/libs/langchain/langchain/document_loaders/url_playwright.py b/libs/langchain/langchain/document_loaders/url_playwright.py
index 16f5b00fd3c..7aa60cf1883 100644
--- a/libs/langchain/langchain/document_loaders/url_playwright.py
+++ b/libs/langchain/langchain/document_loaders/url_playwright.py
@@ -8,7 +8,9 @@ from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
if TYPE_CHECKING:
- from playwright.async_api import AsyncBrowser, AsyncPage, AsyncResponse
+ from playwright.async_api import Browser as AsyncBrowser
+ from playwright.async_api import Page as AsyncPage
+ from playwright.async_api import Response as AsyncResponse
from playwright.sync_api import Browser, Page, Response
@@ -155,6 +157,9 @@ class PlaywrightURLLoader(BaseLoader):
try:
page = browser.new_page()
response = page.goto(url)
+ if response is None:
+ raise ValueError(f"page.goto() returned None for url {url}")
+
text = self.evaluator.evaluate(page, browser, response)
metadata = {"source": url}
docs.append(Document(page_content=text, metadata=metadata))
@@ -185,6 +190,9 @@ class PlaywrightURLLoader(BaseLoader):
try:
page = await browser.new_page()
response = await page.goto(url)
+ if response is None:
+ raise ValueError(f"page.goto() returned None for url {url}")
+
text = await self.evaluator.evaluate_async(page, browser, response)
metadata = {"source": url}
docs.append(Document(page_content=text, metadata=metadata))
diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py
index 87cb5e90d5a..e8aa683a9a0 100644
--- a/libs/langchain/langchain/embeddings/__init__.py
+++ b/libs/langchain/langchain/embeddings/__init__.py
@@ -35,6 +35,7 @@ from langchain.embeddings.gpt4all import GPT4AllEmbeddings
from langchain.embeddings.huggingface import (
HuggingFaceBgeEmbeddings,
HuggingFaceEmbeddings,
+ HuggingFaceInferenceAPIEmbeddings,
HuggingFaceInstructEmbeddings,
)
from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings
@@ -69,6 +70,7 @@ __all__ = [
"CohereEmbeddings",
"ElasticsearchEmbeddings",
"HuggingFaceEmbeddings",
+ "HuggingFaceInferenceAPIEmbeddings",
"JinaEmbeddings",
"LlamaCppEmbeddings",
"HuggingFaceHubEmbeddings",
diff --git a/libs/langchain/langchain/embeddings/huggingface.py b/libs/langchain/langchain/embeddings/huggingface.py
index 52afabd79bf..a91d6437934 100644
--- a/libs/langchain/langchain/embeddings/huggingface.py
+++ b/libs/langchain/langchain/embeddings/huggingface.py
@@ -1,5 +1,7 @@
from typing import Any, Dict, List, Optional
+import requests
+
from langchain.embeddings.base import Embeddings
from langchain.pydantic_v1 import BaseModel, Extra, Field
@@ -58,7 +60,7 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
except ImportError as exc:
raise ImportError(
"Could not import sentence_transformers python package. "
- "Please install it with `pip install sentence_transformers`."
+ "Please install it with `pip install sentence-transformers`."
) from exc
self.client = sentence_transformers.SentenceTransformer(
@@ -266,3 +268,71 @@ class HuggingFaceBgeEmbeddings(BaseModel, Embeddings):
self.query_instruction + text, **self.encode_kwargs
)
return embedding.tolist()
+
+
+class HuggingFaceInferenceAPIEmbeddings(BaseModel, Embeddings):
+ """Embed texts using the HuggingFace API.
+
+ Requires a HuggingFace Inference API key and a model name.
+ """
+
+ api_key: str
+ """Your API key for the HuggingFace Inference API."""
+ model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
+ """The name of the model to use for text embeddings."""
+
+ @property
+ def _api_url(self) -> str:
+ return (
+ "https://api-inference.huggingface.co"
+ "/pipeline"
+ "/feature-extraction"
+ f"/{self.model_name}"
+ )
+
+ @property
+ def _headers(self) -> dict:
+ return {"Authorization": f"Bearer {self.api_key}"}
+
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
+ """Get the embeddings for a list of texts.
+
+ Args:
+ texts (Documents): A list of texts to get embeddings for.
+
+ Returns:
+ Embedded texts as List[List[float]], where each inner List[float]
+ corresponds to a single input text.
+
+ Example:
+ .. code-block:: python
+
+ from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
+
+ hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
+ api_key="your_api_key",
+ model_name="sentence-transformers/all-MiniLM-l6-v2"
+ )
+ texts = ["Hello, world!", "How are you?"]
+ hf_embeddings.embed_documents(texts)
+ """
+ response = requests.post(
+ self._api_url,
+ headers=self._headers,
+ json={
+ "inputs": texts,
+ "options": {"wait_for_model": True, "use_cache": True},
+ },
+ )
+ return response.json()
+
+ def embed_query(self, text: str) -> List[float]:
+ """Compute query embeddings using a HuggingFace transformer model.
+
+ Args:
+ text: The text to embed.
+
+ Returns:
+ Embeddings for the text.
+ """
+ return self.embed_documents([text])[0]
diff --git a/libs/langchain/langchain/embeddings/openai.py b/libs/langchain/langchain/embeddings/openai.py
index 976c879f95e..88cb7c93326 100644
--- a/libs/langchain/langchain/embeddings/openai.py
+++ b/libs/langchain/langchain/embeddings/openai.py
@@ -87,8 +87,8 @@ def _async_retry_decorator(embeddings: OpenAIEmbeddings) -> Any:
# https://stackoverflow.com/questions/76469415/getting-embeddings-of-length-1-from-langchain-openaiembeddings
-def _check_response(response: dict) -> dict:
- if any(len(d["embedding"]) == 1 for d in response["data"]):
+def _check_response(response: dict, skip_empty: bool = False) -> dict:
+ if any(len(d["embedding"]) == 1 for d in response["data"]) and not skip_empty:
import openai
raise openai.error.APIError("OpenAI API returned an empty embedding")
@@ -102,7 +102,7 @@ def embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> Any:
@retry_decorator
def _embed_with_retry(**kwargs: Any) -> Any:
response = embeddings.client.create(**kwargs)
- return _check_response(response)
+ return _check_response(response, skip_empty=embeddings.skip_empty)
return _embed_with_retry(**kwargs)
@@ -113,7 +113,7 @@ async def async_embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) ->
@_async_retry_decorator(embeddings)
async def _async_embed_with_retry(**kwargs: Any) -> Any:
response = await embeddings.client.acreate(**kwargs)
- return _check_response(response)
+ return _check_response(response, skip_empty=embeddings.skip_empty)
return await _async_embed_with_retry(**kwargs)
@@ -196,6 +196,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
"""Whether to show a progress bar when embedding."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Holds any model parameters valid for `create` call not explicitly specified."""
+ skip_empty: bool = False
+ """Whether to skip empty strings when embedding or raise an error.
+ Defaults to not skipping."""
class Config:
"""Configuration for this pydantic object."""
@@ -371,6 +374,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
results: List[List[List[float]]] = [[] for _ in range(len(texts))]
num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]
for i in range(len(indices)):
+ if self.skip_empty and len(batched_embeddings[i]) == 1:
+ continue
results[indices[i]].append(batched_embeddings[i])
num_tokens_in_batch[indices[i]].append(len(tokens[i]))
diff --git a/libs/langchain/langchain/graphs/graph_document.py b/libs/langchain/langchain/graphs/graph_document.py
new file mode 100644
index 00000000000..9f72a3ad8e0
--- /dev/null
+++ b/libs/langchain/langchain/graphs/graph_document.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import List, Union
+
+from langchain.load.serializable import Serializable
+from langchain.pydantic_v1 import Field
+from langchain.schema import Document
+
+
+class Node(Serializable):
+ """Represents a node in a graph with associated properties.
+
+ Attributes:
+ id (Union[str, int]): A unique identifier for the node.
+ type (str): The type or label of the node, default is "Node".
+ properties (dict): Additional properties and metadata associated with the node.
+ """
+
+ id: Union[str, int]
+ type: str = "Node"
+ properties: dict = Field(default_factory=dict)
+
+
+class Relationship(Serializable):
+ """Represents a directed relationship between two nodes in a graph.
+
+ Attributes:
+ source (Node): The source node of the relationship.
+ target (Node): The target node of the relationship.
+ type (str): The type of the relationship.
+ properties (dict): Additional properties associated with the relationship.
+ """
+
+ source: Node
+ target: Node
+ type: str
+ properties: dict = Field(default_factory=dict)
+
+
+class GraphDocument(Serializable):
+ """Represents a graph document consisting of nodes and relationships.
+
+ Attributes:
+ nodes (List[Node]): A list of nodes in the graph.
+ relationships (List[Relationship]): A list of relationships in the graph.
+ source (Document): The document from which the graph information is derived.
+ """
+
+ nodes: List[Node]
+ relationships: List[Relationship]
+ source: Document
diff --git a/libs/langchain/langchain/graphs/neo4j_graph.py b/libs/langchain/langchain/graphs/neo4j_graph.py
index 02572b2d1a1..256df9d26bd 100644
--- a/libs/langchain/langchain/graphs/neo4j_graph.py
+++ b/libs/langchain/langchain/graphs/neo4j_graph.py
@@ -1,5 +1,7 @@
from typing import Any, Dict, List
+from langchain.graphs.graph_document import GraphDocument
+
node_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
@@ -99,3 +101,56 @@ class Neo4jGraph:
The relationships are the following:
{[el['output'] for el in relationships]}
"""
+
+ def add_graph_documents(
+ self, graph_documents: List[GraphDocument], include_source: bool = False
+ ) -> None:
+ """
+ Take GraphDocument as input as uses it to construct a graph.
+ """
+ for document in graph_documents:
+ include_docs_query = (
+ "CREATE (d:Document) "
+ "SET d.text = $document.page_content "
+ "SET d += $document.metadata "
+ "WITH d "
+ )
+ # Import nodes
+ self.query(
+ (
+ f"{include_docs_query if include_source else ''}"
+ "UNWIND $data AS row "
+ "CALL apoc.merge.node([row.type], {id: row.id}, "
+ "row.properties, {}) YIELD node "
+ f"{'MERGE (d)-[:MENTIONS]->(node) ' if include_source else ''}"
+ "RETURN distinct 'done' AS result"
+ ),
+ {
+ "data": [el.__dict__ for el in document.nodes],
+ "document": document.source.__dict__,
+ },
+ )
+ # Import relationships
+ self.query(
+ "UNWIND $data AS row "
+ "CALL apoc.merge.node([row.source_label], {id: row.source},"
+ "{}, {}) YIELD node as source "
+ "CALL apoc.merge.node([row.target_label], {id: row.target},"
+ "{}, {}) YIELD node as target "
+ "CALL apoc.merge.relationship(source, row.type, "
+ "{}, row.properties, target) YIELD rel "
+ "RETURN distinct 'done'",
+ {
+ "data": [
+ {
+ "source": el.source.id,
+ "source_label": el.source.type,
+ "target": el.target.id,
+ "target_label": el.target.type,
+ "type": el.type.replace(" ", "_").upper(),
+ "properties": el.properties,
+ }
+ for el in document.relationships
+ ]
+ },
+ )
diff --git a/libs/langchain/langchain/llms/bananadev.py b/libs/langchain/langchain/llms/bananadev.py
index f0659118d63..3a984a3cb2f 100644
--- a/libs/langchain/langchain/llms/bananadev.py
+++ b/libs/langchain/langchain/llms/bananadev.py
@@ -15,6 +15,7 @@ class Banana(LLM):
To use, you should have the ``banana-dev`` python package installed,
and the environment variable ``BANANA_API_KEY`` set with your API key.
+ This is the team API key available in the Banana dashboard.
Any parameters that are valid to be passed to the call can be passed
in, even if not explicitly saved on this class.
@@ -23,10 +24,13 @@ class Banana(LLM):
.. code-block:: python
from langchain.llms import Banana
- banana = Banana(model_key="")
+ banana = Banana(model_key="", model_url_slug="")
"""
model_key: str = ""
+ """model key to use"""
+
+ model_url_slug: str = ""
"""model endpoint to use"""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@@ -72,6 +76,7 @@ class Banana(LLM):
"""Get the identifying parameters."""
return {
**{"model_key": self.model_key},
+ **{"model_url_slug": self.model_url_slug},
**{"model_kwargs": self.model_kwargs},
}
@@ -89,7 +94,7 @@ class Banana(LLM):
) -> str:
"""Call to Banana endpoint."""
try:
- import banana_dev as banana
+ from banana_dev import Client
except ImportError:
raise ImportError(
"Could not import banana-dev python package. "
@@ -99,19 +104,25 @@ class Banana(LLM):
params = {**params, **kwargs}
api_key = self.banana_api_key
model_key = self.model_key
+ model_url_slug = self.model_url_slug
model_inputs = {
# a json specific to your model.
"prompt": prompt,
**params,
}
- response = banana.run(api_key, model_key, model_inputs)
+ model = Client(
+ # Found in main dashboard
+ api_key=api_key,
+ # Both found in model details page
+ model_key=model_key,
+ url=f"https://{model_url_slug}.run.banana.dev",
+ )
+ response, meta = model.call("/", model_inputs)
try:
- text = response["modelOutputs"][0]["output"]
+ text = response["outputs"]
except (KeyError, TypeError):
- returned = response["modelOutputs"][0]
raise ValueError(
- "Response should be of schema: {'output': 'text'}."
- f"\nResponse was: {returned}"
+ "Response should be of schema: {'outputs': 'text'}."
"\nTo fix this:"
"\n- fork the source repo of the Banana model"
"\n- modify app.py to return the above schema"
diff --git a/libs/langchain/langchain/llms/huggingface_text_gen_inference.py b/libs/langchain/langchain/llms/huggingface_text_gen_inference.py
index 6545078f169..683b2f4dde3 100644
--- a/libs/langchain/langchain/llms/huggingface_text_gen_inference.py
+++ b/libs/langchain/langchain/llms/huggingface_text_gen_inference.py
@@ -65,7 +65,7 @@ class HuggingFaceTextGenInference(LLM):
typical_p: Optional[float] = 0.95
"""Typical Decoding mass. See [Typical Decoding for Natural Language
Generation](https://arxiv.org/abs/2202.00666) for more information."""
- temperature: float = 0.8
+ temperature: Optional[float] = 0.8
"""The value used to module the logits distribution."""
repetition_penalty: Optional[float] = None
"""The parameter for repetition penalty. 1.0 means no penalty.
diff --git a/libs/langchain/langchain/llms/pipelineai.py b/libs/langchain/langchain/llms/pipelineai.py
index 0d257e33647..cff6c0f5b18 100644
--- a/libs/langchain/langchain/llms/pipelineai.py
+++ b/libs/langchain/langchain/llms/pipelineai.py
@@ -91,7 +91,7 @@ class PipelineAI(LLM, BaseModel):
try:
from pipeline import PipelineCloud
except ImportError:
- raise ValueError(
+ raise ImportError(
"Could not import pipeline-ai python package. "
"Please install it with `pip install pipeline-ai`."
)
diff --git a/libs/langchain/langchain/llms/rwkv.py b/libs/langchain/langchain/llms/rwkv.py
index bb54c9d5deb..8072b2b91b6 100644
--- a/libs/langchain/langchain/llms/rwkv.py
+++ b/libs/langchain/langchain/llms/rwkv.py
@@ -121,7 +121,7 @@ class RWKV(LLM, BaseModel):
values["pipeline"] = PIPELINE(values["client"], values["tokens_path"])
except ImportError:
- raise ValueError(
+ raise ImportError(
"Could not import rwkv python package. "
"Please install it with `pip install rwkv`."
)
diff --git a/libs/langchain/langchain/llms/vllm.py b/libs/langchain/langchain/llms/vllm.py
index 1a6e1a5910b..537a9bbb6f3 100644
--- a/libs/langchain/langchain/llms/vllm.py
+++ b/libs/langchain/langchain/llms/vllm.py
@@ -62,6 +62,10 @@ class VLLM(BaseLLM):
dtype: str = "auto"
"""The data type for the model weights and activations."""
+ download_dir: Optional[str] = None
+ """Directory to download and load the weights. (Default to the default
+ cache dir of huggingface)"""
+
vllm_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Holds any model parameters valid for `vllm.LLM` call not explicitly specified."""
@@ -84,6 +88,7 @@ class VLLM(BaseLLM):
tensor_parallel_size=values["tensor_parallel_size"],
trust_remote_code=values["trust_remote_code"],
dtype=values["dtype"],
+ download_dir=values["download_dir"],
**values["vllm_kwargs"],
)
diff --git a/libs/langchain/langchain/output_parsers/__init__.py b/libs/langchain/langchain/output_parsers/__init__.py
index f029f52bfb8..d3d49c6bd2f 100644
--- a/libs/langchain/langchain/output_parsers/__init__.py
+++ b/libs/langchain/langchain/output_parsers/__init__.py
@@ -20,6 +20,7 @@ from langchain.output_parsers.fix import OutputFixingParser
from langchain.output_parsers.list import (
CommaSeparatedListOutputParser,
ListOutputParser,
+ NumberedListOutputParser,
)
from langchain.output_parsers.pydantic import PydanticOutputParser
from langchain.output_parsers.rail_parser import GuardrailsOutputParser
@@ -36,6 +37,7 @@ __all__ = [
"EnumOutputParser",
"GuardrailsOutputParser",
"ListOutputParser",
+ "NumberedListOutputParser",
"OutputFixingParser",
"PydanticOutputParser",
"RegexDictParser",
diff --git a/libs/langchain/langchain/schema/runnable/base.py b/libs/langchain/langchain/schema/runnable/base.py
index 9b23e504389..e8a18eb2199 100644
--- a/libs/langchain/langchain/schema/runnable/base.py
+++ b/libs/langchain/langchain/schema/runnable/base.py
@@ -39,6 +39,8 @@ from langchain.load.serializable import Serializable
from langchain.pydantic_v1 import Field
from langchain.schema.runnable.config import (
RunnableConfig,
+ acall_func_with_variable_args,
+ call_func_with_variable_args,
ensure_config,
get_async_callback_manager_for_config,
get_callback_manager_for_config,
@@ -47,16 +49,15 @@ from langchain.schema.runnable.config import (
patch_config,
)
from langchain.schema.runnable.utils import (
+ Input,
+ Output,
+ accepts_config,
accepts_run_manager,
- accepts_run_manager_and_config,
gather_with_concurrency,
)
from langchain.utils.aiter import atee, py_anext
from langchain.utils.iter import safetee
-Input = TypeVar("Input")
-# Output type should implement __concat__, as eg str, list, dict do
-Output = TypeVar("Output")
Other = TypeVar("Other")
@@ -311,16 +312,7 @@ class Runnable(Generic[Input, Output], ABC):
name=config.get("run_name"),
)
try:
- if accepts_run_manager_and_config(func):
- output = func(
- input,
- run_manager=run_manager,
- config=config,
- ) # type: ignore[call-arg]
- elif accepts_run_manager(func):
- output = func(input, run_manager=run_manager) # type: ignore[call-arg]
- else:
- output = func(input) # type: ignore[call-arg]
+ output = call_func_with_variable_args(func, input, run_manager, config)
except Exception as e:
run_manager.on_chain_error(e)
raise
@@ -353,19 +345,9 @@ class Runnable(Generic[Input, Output], ABC):
name=config.get("run_name"),
)
try:
- if accepts_run_manager_and_config(func):
- output = await func(
- input,
- run_manager=run_manager,
- config=config,
- ) # type: ignore[call-arg]
- elif accepts_run_manager(func):
- output = await func(
- input,
- run_manager=run_manager,
- ) # type: ignore[call-arg]
- else:
- output = await func(input) # type: ignore[call-arg]
+ output = await acall_func_with_variable_args(
+ func, input, run_manager, config
+ )
except Exception as e:
await run_manager.on_chain_error(e)
raise
@@ -408,16 +390,15 @@ class Runnable(Generic[Input, Output], ABC):
)
]
try:
- if accepts_run_manager_and_config(func):
- output = func(
- input,
- run_manager=run_managers,
- config=configs,
- ) # type: ignore[call-arg]
- elif accepts_run_manager(func):
- output = func(input, run_manager=run_managers) # type: ignore[call-arg]
- else:
- output = func(input) # type: ignore[call-arg]
+ kwargs: Dict[str, Any] = {}
+ if accepts_config(func):
+ kwargs["config"] = [
+ patch_config(c, callbacks=rm.get_child())
+ for c, rm in zip(configs, run_managers)
+ ]
+ if accepts_run_manager(func):
+ kwargs["run_manager"] = run_managers
+ output = func(input, **kwargs) # type: ignore[call-arg]
except Exception as e:
for run_manager in run_managers:
run_manager.on_chain_error(e)
@@ -479,16 +460,15 @@ class Runnable(Generic[Input, Output], ABC):
)
)
try:
- if accepts_run_manager_and_config(func):
- output = await func(
- input,
- run_manager=run_managers,
- config=configs,
- ) # type: ignore[call-arg]
- elif accepts_run_manager(func):
- output = await func(input, run_manager=run_managers) # type: ignore
- else:
- output = await func(input) # type: ignore[call-arg]
+ kwargs: Dict[str, Any] = {}
+ if accepts_config(func):
+ kwargs["config"] = [
+ patch_config(c, callbacks=rm.get_child())
+ for c, rm in zip(configs, run_managers)
+ ]
+ if accepts_run_manager(func):
+ kwargs["run_manager"] = run_managers
+ output = await func(input, **kwargs) # type: ignore[call-arg]
except Exception as e:
await asyncio.gather(
*(run_manager.on_chain_error(e) for run_manager in run_managers)
@@ -550,19 +530,16 @@ class Runnable(Generic[Input, Output], ABC):
name=config.get("run_name"),
)
try:
- if accepts_run_manager_and_config(transformer):
- iterator = transformer(
- input_for_transform,
- run_manager=run_manager,
- config=config,
- ) # type: ignore[call-arg]
- elif accepts_run_manager(transformer):
- iterator = transformer(
- input_for_transform,
- run_manager=run_manager,
- ) # type: ignore[call-arg]
- else:
- iterator = transformer(input_for_transform) # type: ignore[call-arg]
+ kwargs: Dict[str, Any] = {}
+ if accepts_config(transformer):
+ kwargs["config"] = patch_config(
+ config, callbacks=run_manager.get_child()
+ )
+ if accepts_run_manager(transformer):
+ kwargs["run_manager"] = run_manager
+ iterator = transformer(
+ input_for_transform, **kwargs
+ ) # type: ignore[call-arg]
for chunk in iterator:
yield chunk
if final_output_supported:
@@ -631,21 +608,16 @@ class Runnable(Generic[Input, Output], ABC):
name=config.get("run_name"),
)
try:
- # mypy can't quite work out thew type guard here, but this is safe,
- # check implementations of the accepts_* functions
- if accepts_run_manager_and_config(transformer):
- iterator = transformer(
- input_for_transform,
- run_manager=run_manager,
- config=config,
- ) # type: ignore[call-arg]
- elif accepts_run_manager(transformer):
- iterator = transformer(
- input_for_transform,
- run_manager=run_manager,
- ) # type: ignore[call-arg]
- else:
- iterator = transformer(input_for_transform) # type: ignore[call-arg]
+ kwargs: Dict[str, Any] = {}
+ if accepts_config(transformer):
+ kwargs["config"] = patch_config(
+ config, callbacks=run_manager.get_child()
+ )
+ if accepts_run_manager(transformer):
+ kwargs["run_manager"] = run_manager
+ iterator = transformer(
+ input_for_transform, **kwargs
+ ) # type: ignore[call-arg]
async for chunk in iterator:
yield chunk
if final_output_supported:
@@ -1756,7 +1728,7 @@ class RunnableLambda(Runnable[Input, Output]):
run_manager: CallbackManagerForChainRun,
config: RunnableConfig,
) -> Output:
- output = self.func(input)
+ output = call_func_with_variable_args(self.func, input, run_manager, config)
# If the output is a runnable, invoke it
if isinstance(output, Runnable):
recursion_limit = config["recursion_limit"]
@@ -1780,7 +1752,9 @@ class RunnableLambda(Runnable[Input, Output]):
run_manager: AsyncCallbackManagerForChainRun,
config: RunnableConfig,
) -> Output:
- output = await self.afunc(input)
+ output = await acall_func_with_variable_args(
+ self.afunc, input, run_manager, config
+ )
# If the output is a runnable, invoke it
if isinstance(output, Runnable):
recursion_limit = config["recursion_limit"]
@@ -1798,6 +1772,21 @@ class RunnableLambda(Runnable[Input, Output]):
)
return output
+ def _config(
+ self, config: Optional[RunnableConfig], callable: Callable[..., Any]
+ ) -> RunnableConfig:
+ config = config or {}
+
+ if config.get("run_name") is None:
+ try:
+ run_name = callable.__name__
+ except AttributeError:
+ run_name = None
+ if run_name is not None:
+ return patch_config(config, run_name=run_name)
+
+ return config
+
def invoke(
self,
input: Input,
@@ -1805,7 +1794,11 @@ class RunnableLambda(Runnable[Input, Output]):
**kwargs: Optional[Any],
) -> Output:
if hasattr(self, "func"):
- return self._call_with_config(self._invoke, input, config)
+ return self._call_with_config(
+ self._invoke,
+ input,
+ self._config(config, self.func),
+ )
else:
raise TypeError(
"Cannot invoke a coroutine function synchronously."
@@ -1819,7 +1812,11 @@ class RunnableLambda(Runnable[Input, Output]):
**kwargs: Optional[Any],
) -> Output:
if hasattr(self, "afunc"):
- return await self._acall_with_config(self._ainvoke, input, config)
+ return await self._acall_with_config(
+ self._ainvoke,
+ input,
+ self._config(config, self.afunc),
+ )
else:
# Delegating to super implementation of ainvoke.
# Uses asyncio executor to run the sync version (invoke)
diff --git a/libs/langchain/langchain/schema/runnable/config.py b/libs/langchain/langchain/schema/runnable/config.py
index 3f87f044039..987a2c7d2fa 100644
--- a/libs/langchain/langchain/schema/runnable/config.py
+++ b/libs/langchain/langchain/schema/runnable/config.py
@@ -3,13 +3,35 @@ from __future__ import annotations
from concurrent.futures import Executor, ThreadPoolExecutor
from contextlib import contextmanager
from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Awaitable,
+ Callable,
+ Dict,
+ Generator,
+ List,
+ Optional,
+ Union,
+)
from typing_extensions import TypedDict
+from langchain.schema.runnable.utils import (
+ Input,
+ Output,
+ accepts_config,
+ accepts_run_manager,
+)
+
if TYPE_CHECKING:
from langchain.callbacks.base import BaseCallbackManager, Callbacks
- from langchain.callbacks.manager import AsyncCallbackManager, CallbackManager
+ from langchain.callbacks.manager import (
+ AsyncCallbackManager,
+ AsyncCallbackManagerForChainRun,
+ CallbackManager,
+ CallbackManagerForChainRun,
+ )
class RunnableConfig(TypedDict, total=False):
@@ -117,6 +139,47 @@ def patch_config(
return config
+def call_func_with_variable_args(
+ func: Union[
+ Callable[[Input], Output],
+ Callable[[Input, CallbackManagerForChainRun], Output],
+ Callable[[Input, CallbackManagerForChainRun, RunnableConfig], Output],
+ ],
+ input: Input,
+ run_manager: CallbackManagerForChainRun,
+ config: RunnableConfig,
+) -> Output:
+ """Call function that may optionally accept a run_manager and/or config."""
+ kwargs: Dict[str, Any] = {}
+ if accepts_config(func):
+ kwargs["config"] = patch_config(config, callbacks=run_manager.get_child())
+ if accepts_run_manager(func):
+ kwargs["run_manager"] = run_manager
+ return func(input, **kwargs) # type: ignore[call-arg]
+
+
+async def acall_func_with_variable_args(
+ func: Union[
+ Callable[[Input], Awaitable[Output]],
+ Callable[[Input, AsyncCallbackManagerForChainRun], Awaitable[Output]],
+ Callable[
+ [Input, AsyncCallbackManagerForChainRun, RunnableConfig],
+ Awaitable[Output],
+ ],
+ ],
+ input: Input,
+ run_manager: AsyncCallbackManagerForChainRun,
+ config: RunnableConfig,
+) -> Output:
+ """Call function that may optionally accept a run_manager and/or config."""
+ kwargs: Dict[str, Any] = {}
+ if accepts_config(func):
+ kwargs["config"] = patch_config(config, callbacks=run_manager.get_child())
+ if accepts_run_manager(func):
+ kwargs["run_manager"] = run_manager
+ return await func(input, **kwargs) # type: ignore[call-arg]
+
+
def get_callback_manager_for_config(config: RunnableConfig) -> CallbackManager:
from langchain.callbacks.manager import CallbackManager
diff --git a/libs/langchain/langchain/schema/runnable/utils.py b/libs/langchain/langchain/schema/runnable/utils.py
index 2afa3705c4c..43d9b325fd9 100644
--- a/libs/langchain/langchain/schema/runnable/utils.py
+++ b/libs/langchain/langchain/schema/runnable/utils.py
@@ -2,7 +2,11 @@ from __future__ import annotations
import asyncio
from inspect import signature
-from typing import Any, Callable, Coroutine, Union
+from typing import Any, Callable, Coroutine, TypeVar, Union
+
+Input = TypeVar("Input")
+# Output type should implement __concat__, as eg str, list, dict do
+Output = TypeVar("Output")
async def gated_coro(semaphore: asyncio.Semaphore, coro: Coroutine) -> Any:
@@ -26,8 +30,8 @@ def accepts_run_manager(callable: Callable[..., Any]) -> bool:
return False
-def accepts_run_manager_and_config(callable: Callable[..., Any]) -> bool:
- return (
- accepts_run_manager(callable)
- and signature(callable).parameters.get("config") is not None
- )
+def accepts_config(callable: Callable[..., Any]) -> bool:
+ try:
+ return signature(callable).parameters.get("config") is not None
+ except ValueError:
+ return False
diff --git a/libs/langchain/langchain/text_splitter.py b/libs/langchain/langchain/text_splitter.py
index e804b93be9e..d0bf6fca1bc 100644
--- a/libs/langchain/langchain/text_splitter.py
+++ b/libs/langchain/langchain/text_splitter.py
@@ -100,6 +100,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
length_function: Callable[[str], int] = len,
keep_separator: bool = False,
add_start_index: bool = False,
+ strip_whitespace: bool = True,
) -> None:
"""Create a new TextSplitter.
@@ -109,6 +110,8 @@ class TextSplitter(BaseDocumentTransformer, ABC):
length_function: Function that measures the length of given chunks
keep_separator: Whether to keep the separator in the chunks
add_start_index: If `True`, includes chunk's start index in metadata
+ strip_whitespace: If `True`, strips whitespace from the start and end of
+ every document
"""
if chunk_overlap > chunk_size:
raise ValueError(
@@ -120,6 +123,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
self._length_function = length_function
self._keep_separator = keep_separator
self._add_start_index = add_start_index
+ self._strip_whitespace = strip_whitespace
@abstractmethod
def split_text(self, text: str) -> List[str]:
@@ -152,7 +156,8 @@ class TextSplitter(BaseDocumentTransformer, ABC):
def _join_docs(self, docs: List[str], separator: str) -> Optional[str]:
text = separator.join(docs)
- text = text.strip()
+ if self._strip_whitespace:
+ text = text.strip()
if text == "":
return None
else:
diff --git a/libs/langchain/langchain/utilities/redis.py b/libs/langchain/langchain/utilities/redis.py
index e6c5cb13883..a45391c8bcd 100644
--- a/libs/langchain/langchain/utilities/redis.py
+++ b/libs/langchain/langchain/utilities/redis.py
@@ -108,7 +108,7 @@ def get_client(redis_url: str, **kwargs: Any) -> RedisType:
try:
import redis
except ImportError:
- raise ValueError(
+ raise ImportError(
"Could not import redis python package. "
"Please install it with `pip install redis>=4.1.0`."
)
diff --git a/libs/langchain/langchain/utilities/sql_database.py b/libs/langchain/langchain/utilities/sql_database.py
index 110f081d3c0..13718c8c0c7 100644
--- a/libs/langchain/langchain/utilities/sql_database.py
+++ b/libs/langchain/langchain/utilities/sql_database.py
@@ -9,6 +9,7 @@ from sqlalchemy import MetaData, Table, create_engine, inspect, select, text
from sqlalchemy.engine import Engine
from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
from sqlalchemy.schema import CreateTable
+from sqlalchemy.types import NullType
from langchain.utils import get_from_env
@@ -314,6 +315,11 @@ class SQLDatabase:
tables.append(self._custom_table_info[table.name])
continue
+ # Ignore JSON datatyped columns
+ for k, v in table.columns.items():
+ if type(v.type) is NullType:
+ table._columns.remove(v)
+
# add create table command
create_table = str(CreateTable(table).compile(self._engine))
table_info = f"{create_table.rstrip()}"
@@ -384,6 +390,8 @@ class SQLDatabase:
connection.exec_driver_sql(f"SET @@dataset_id='{self._schema}'")
elif self.dialect == "mssql":
pass
+ elif self.dialect == "trino":
+ connection.exec_driver_sql(f"USE {self._schema}")
else: # postgresql and compatible dialects
connection.exec_driver_sql(f"SET search_path TO {self._schema}")
cursor = connection.execute(text(command))
diff --git a/libs/langchain/langchain/vectorstores/myscale.py b/libs/langchain/langchain/vectorstores/myscale.py
index 3c4361fcdcd..81812550c74 100644
--- a/libs/langchain/langchain/vectorstores/myscale.py
+++ b/libs/langchain/langchain/vectorstores/myscale.py
@@ -147,7 +147,12 @@ class MyScale(VectorStore):
)
for k in ["id", "vector", "text", "metadata"]:
assert k in self.config.column_map
- assert self.config.metric in ["ip", "cosine", "l2"]
+ assert self.config.metric.upper() in ["IP", "COSINE", "L2"]
+ if self.config.metric in ["ip", "cosine", "l2"]:
+ logger.warning(
+ "Lower case metric types will be deprecated "
+ "the future. Please use one of ('IP', 'Cosine', 'L2')"
+ )
# initialize the schema
dim = len(embedding.embed_query("try this out"))
@@ -174,7 +179,9 @@ class MyScale(VectorStore):
self.BS = "\\"
self.must_escape = ("\\", "'")
self._embeddings = embedding
- self.dist_order = "ASC" if self.config.metric in ["cosine", "l2"] else "DESC"
+ self.dist_order = (
+ "ASC" if self.config.metric.upper() in ["COSINE", "L2"] else "DESC"
+ )
# Create a connection to myscale
self.client = get_client(
diff --git a/libs/langchain/langchain/vectorstores/nucliadb.py b/libs/langchain/langchain/vectorstores/nucliadb.py
new file mode 100644
index 00000000000..8ba9d4454c4
--- /dev/null
+++ b/libs/langchain/langchain/vectorstores/nucliadb.py
@@ -0,0 +1,159 @@
+import os
+from typing import Any, Dict, Iterable, List, Optional, Type
+
+from langchain.embeddings.base import Embeddings
+from langchain.schema.document import Document
+from langchain.vectorstores.base import VST, VectorStore
+
+FIELD_TYPES = {
+ "f": "files",
+ "t": "texts",
+ "l": "links",
+}
+
+
+class NucliaDB(VectorStore):
+ """NucliaDB vector store."""
+
+ _config: Dict[str, Any] = {}
+
+ def __init__(
+ self,
+ knowledge_box: str,
+ local: bool,
+ api_key: Optional[str] = None,
+ backend: Optional[str] = None,
+ ) -> None:
+ """Initialize the NucliaDB client.
+
+ Args:
+ knowledge_box: the Knowledge Box id.
+ local: Whether to use a local NucliaDB instance or Nuclia Cloud
+ api_key: A contributor API key for the kb (needed when local is False)
+ backend: The backend url to use when local is True, defaults to
+ http://localhost:8080
+ """
+ try:
+ from nuclia.sdk import NucliaAuth
+ except ImportError:
+ raise ValueError(
+ "nuclia python package not found. "
+ "Please install it with `pip install nuclia`."
+ )
+ self._config["LOCAL"] = local
+ zone = os.environ.get("NUCLIA_ZONE", "europe-1")
+ self._kb = knowledge_box
+ if local:
+ if not backend:
+ backend = "http://localhost:8080"
+ self._config["BACKEND"] = f"{backend}/api/v1"
+ self._config["TOKEN"] = None
+ NucliaAuth().nucliadb(url=backend)
+ NucliaAuth().kb(url=self.kb_url, interactive=False)
+ else:
+ self._config["BACKEND"] = f"https://{zone}.nuclia.cloud/api/v1"
+ self._config["TOKEN"] = api_key
+ NucliaAuth().kb(
+ url=self.kb_url, token=self._config["TOKEN"], interactive=False
+ )
+
+ @property
+ def is_local(self) -> str:
+ return self._config["LOCAL"]
+
+ @property
+ def kb_url(self) -> str:
+ return f"{self._config['BACKEND']}/kb/{self._kb}"
+
+ def add_texts(
+ self,
+ texts: Iterable[str],
+ metadatas: Optional[List[dict]] = None,
+ **kwargs: Any,
+ ) -> List[str]:
+ """Upload texts to NucliaDB"""
+ ids = []
+ from nuclia.sdk import NucliaResource
+
+ factory = NucliaResource()
+ for i, text in enumerate(texts):
+ extra: Dict[str, Any] = {"metadata": ""}
+ if metadatas:
+ extra = {"metadata": metadatas[i]}
+ id = factory.create(
+ texts={"text": {"body": text}},
+ extra=extra,
+ url=self.kb_url,
+ api_key=self._config["TOKEN"],
+ )
+ ids.append(id)
+ return ids
+
+ def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
+ if not ids:
+ return None
+ from nuclia.sdk import NucliaResource
+
+ factory = NucliaResource()
+ results: List[bool] = []
+ for id in ids:
+ try:
+ factory.delete(rid=id, url=self.kb_url, api_key=self._config["TOKEN"])
+ results.append(True)
+ except ValueError:
+ results.append(False)
+ return all(results)
+
+ def similarity_search(
+ self, query: str, k: int = 4, **kwargs: Any
+ ) -> List[Document]:
+ from nuclia.sdk import NucliaSearch
+ from nucliadb_models.search import FindRequest, ResourceProperties
+
+ request = FindRequest(
+ query=query,
+ page_size=k,
+ show=[ResourceProperties.VALUES, ResourceProperties.EXTRA],
+ )
+ search = NucliaSearch()
+ results = search.find(
+ query=request, url=self.kb_url, api_key=self._config["TOKEN"]
+ )
+ paragraphs = []
+ for resource in results.resources.values():
+ for field in resource.fields.values():
+ for paragraph_id, paragraph in field.paragraphs.items():
+ info = paragraph_id.split("/")
+ field_type = FIELD_TYPES.get(info[1], None)
+ field_id = info[2]
+ if not field_type:
+ continue
+ value = getattr(resource.data, field_type, {}).get(field_id, None)
+ paragraphs.append(
+ {
+ "text": paragraph.text,
+ "metadata": {
+ "extra": getattr(
+ getattr(resource, "extra", {}), "metadata", None
+ ),
+ "value": value,
+ },
+ "order": paragraph.order,
+ }
+ )
+ sorted_paragraphs = sorted(paragraphs, key=lambda x: x["order"])
+ return [
+ Document(page_content=paragraph["text"], metadata=paragraph["metadata"])
+ for paragraph in sorted_paragraphs
+ ]
+
+ @classmethod
+ def from_texts(
+ cls: Type[VST],
+ texts: List[str],
+ embedding: Embeddings,
+ metadatas: Optional[List[dict]] = None,
+ **kwargs: Any,
+ ) -> VST:
+ """Return VectorStore initialized from texts and embeddings."""
+ raise NotImplementedError
diff --git a/libs/langchain/langchain/vectorstores/pgvector.py b/libs/langchain/langchain/vectorstores/pgvector.py
index 6b02fc19c0a..2fc66c0a0e0 100644
--- a/libs/langchain/langchain/vectorstores/pgvector.py
+++ b/libs/langchain/langchain/vectorstores/pgvector.py
@@ -349,16 +349,16 @@ class PGVector(VectorStore):
@property
def distance_strategy(self) -> Any:
- if self._distance_strategy == "l2":
+ if self._distance_strategy == DistanceStrategy.EUCLIDEAN:
return self.EmbeddingStore.embedding.l2_distance
- elif self._distance_strategy == "cosine":
+ elif self._distance_strategy == DistanceStrategy.COSINE:
return self.EmbeddingStore.embedding.cosine_distance
- elif self._distance_strategy == "inner":
+ elif self._distance_strategy == DistanceStrategy.MAX_INNER_PRODUCT:
return self.EmbeddingStore.embedding.max_inner_product
else:
raise ValueError(
f"Got unexpected value for distance: {self._distance_strategy}. "
- f"Should be one of `l2`, `cosine`, `inner`."
+ f"Should be one of {', '.join([ds.value for ds in DistanceStrategy])}."
)
def similarity_search_with_score_by_vector(
diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock
index d742e5a8965..399dcc2cabd 100644
--- a/libs/langchain/poetry.lock
+++ b/libs/langchain/poetry.lock
@@ -1,10 +1,9 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
[[package]]
name = "absl-py"
version = "1.4.0"
description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py."
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -16,7 +15,6 @@ files = [
name = "aioboto3"
version = "11.3.0"
description = "Async boto3 wrapper"
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -35,7 +33,6 @@ s3cse = ["cryptography (>=2.3.1)"]
name = "aiobotocore"
version = "2.6.0"
description = "Async client for aws services using botocore and aiohttp"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -58,7 +55,6 @@ boto3 = ["boto3 (>=1.28.17,<1.28.18)"]
name = "aiodns"
version = "3.0.0"
description = "Simple DNS resolver for asyncio"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -73,7 +69,6 @@ pycares = ">=4.0.0"
name = "aiofiles"
version = "23.2.1"
description = "File support for asyncio."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -85,7 +80,6 @@ files = [
name = "aiohttp"
version = "3.8.5"
description = "Async http client/server framework (asyncio)"
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -194,7 +188,6 @@ speedups = ["Brotli", "aiodns", "cchardet"]
name = "aiohttp-retry"
version = "2.8.3"
description = "Simple retry client for aiohttp"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -209,7 +202,6 @@ aiohttp = "*"
name = "aioitertools"
version = "0.11.0"
description = "itertools and builtins for AsyncIO and mixed iterables"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -224,7 +216,6 @@ typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""}
name = "aiosignal"
version = "1.3.1"
description = "aiosignal: a list of registered asynchronous callbacks"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -239,7 +230,6 @@ frozenlist = ">=1.1.0"
name = "aleph-alpha-client"
version = "2.17.0"
description = "python client to interact with Aleph Alpha api endpoints"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -267,7 +257,6 @@ types = ["mypy", "types-Pillow", "types-requests"]
name = "altair"
version = "4.2.2"
description = "Altair: A declarative statistical visualization library for Python."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -290,7 +279,6 @@ dev = ["black", "docutils", "flake8", "ipython", "m2r", "mistune (<2.0.0)", "pyt
name = "amadeus"
version = "8.1.0"
description = "Python module for the Amadeus travel APIs"
-category = "main"
optional = true
python-versions = ">=3.4.8"
files = [
@@ -301,7 +289,6 @@ files = [
name = "amazon-textract-caller"
version = "0.0.29"
description = "Amazon Textract Caller tools"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -321,7 +308,6 @@ testing = ["amazon-textract-response-parser", "pytest"]
name = "amazon-textract-response-parser"
version = "1.0.0"
description = "Easily parse JSON returned by Amazon Textract."
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -337,7 +323,6 @@ marshmallow = ">=3.14,<4"
name = "anyio"
version = "3.7.1"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -359,7 +344,6 @@ trio = ["trio (<0.22)"]
name = "appnope"
version = "0.1.3"
description = "Disable App Nap on macOS >= 10.9"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -371,7 +355,6 @@ files = [
name = "argon2-cffi"
version = "23.1.0"
description = "Argon2 for Python"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -392,7 +375,6 @@ typing = ["mypy"]
name = "argon2-cffi-bindings"
version = "21.2.0"
description = "Low-level CFFI bindings for Argon2"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -430,7 +412,6 @@ tests = ["pytest"]
name = "arrow"
version = "1.2.3"
description = "Better dates & times for Python"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -445,7 +426,6 @@ python-dateutil = ">=2.7.0"
name = "arxiv"
version = "1.4.8"
description = "Python wrapper for the arXiv API: http://arxiv.org/help/api/"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -460,7 +440,6 @@ feedparser = "*"
name = "assemblyai"
version = "0.17.0"
description = "AssemblyAI Python SDK"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -481,7 +460,6 @@ extras = ["pyaudio (>=0.2.13)"]
name = "asttokens"
version = "2.2.1"
description = "Annotate AST trees with source code positions"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -499,7 +477,6 @@ test = ["astroid", "pytest"]
name = "astunparse"
version = "1.6.3"
description = "An AST unparser for Python"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -515,7 +492,6 @@ wheel = ">=0.23.0,<1.0"
name = "async-lru"
version = "2.0.4"
description = "Simple LRU cache for asyncio"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -530,7 +506,6 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
name = "async-timeout"
version = "4.0.3"
description = "Timeout context manager for asyncio programs"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -542,7 +517,6 @@ files = [
name = "atlassian-python-api"
version = "3.41.0"
description = "Python Atlassian REST API Wrapper"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -564,7 +538,6 @@ kerberos = ["requests-kerberos"]
name = "attr"
version = "0.3.2"
description = "Simple decorator to set attributes of target function or class in a DRY way."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -576,7 +549,6 @@ files = [
name = "attrs"
version = "23.1.0"
description = "Classes Without Boilerplate"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -595,7 +567,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte
name = "audioread"
version = "3.0.0"
description = "multi-library, cross-platform audio decoding"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -606,7 +577,6 @@ files = [
name = "authlib"
version = "1.2.1"
description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -621,7 +591,6 @@ cryptography = ">=3.2"
name = "awadb"
version = "0.3.10"
description = "AI Native database for embedding vectors"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -648,7 +617,6 @@ test = ["pytest (>=6.0)"]
name = "azure-ai-formrecognizer"
version = "3.3.0"
description = "Microsoft Azure Form Recognizer Client Library for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -666,7 +634,6 @@ typing-extensions = ">=4.0.1"
name = "azure-ai-vision"
version = "0.11.1b1"
description = "Microsoft Azure AI Vision SDK for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -678,7 +645,6 @@ files = [
name = "azure-cognitiveservices-speech"
version = "1.31.0"
description = "Microsoft Cognitive Services Speech SDK for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -694,7 +660,6 @@ files = [
name = "azure-common"
version = "1.1.28"
description = "Microsoft Azure Client Library for Python (Common)"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -706,7 +671,6 @@ files = [
name = "azure-core"
version = "1.29.1"
description = "Microsoft Azure Core Library for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -726,7 +690,6 @@ aio = ["aiohttp (>=3.0)"]
name = "azure-cosmos"
version = "4.5.0"
description = "Microsoft Azure Cosmos Client Library for Python"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -741,7 +704,6 @@ azure-core = ">=1.23.0,<2.0.0"
name = "azure-identity"
version = "1.14.0"
description = "Microsoft Azure Identity Library for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -759,7 +721,6 @@ msal-extensions = ">=0.3.0,<2.0.0"
name = "azure-search-documents"
version = "11.4.0b8"
description = "Microsoft Azure Cognitive Search Client Library for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -776,7 +737,6 @@ isodate = ">=0.6.0"
name = "babel"
version = "2.12.1"
description = "Internationalization utilities"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -791,7 +751,6 @@ pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""}
name = "backcall"
version = "0.2.0"
description = "Specifications for callback functions passed in to an API"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -803,7 +762,6 @@ files = [
name = "backoff"
version = "2.2.1"
description = "Function decoration for backoff and retry"
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -815,7 +773,6 @@ files = [
name = "backports-zoneinfo"
version = "0.2.1"
description = "Backport of the standard library zoneinfo module"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -844,7 +801,6 @@ tzdata = ["tzdata"]
name = "beautifulsoup4"
version = "4.12.2"
description = "Screen-scraping library"
-category = "main"
optional = false
python-versions = ">=3.6.0"
files = [
@@ -863,7 +819,6 @@ lxml = ["lxml"]
name = "bibtexparser"
version = "1.4.0"
description = "Bibtex parser for python 3"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -877,7 +832,6 @@ pyparsing = ">=2.0.3"
name = "black"
version = "23.7.0"
description = "The uncompromising code formatter."
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -924,7 +878,6 @@ uvloop = ["uvloop (>=0.15.2)"]
name = "bleach"
version = "6.0.0"
description = "An easy safelist-based HTML-sanitizing tool."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -943,7 +896,6 @@ css = ["tinycss2 (>=1.1.0,<1.2)"]
name = "blinker"
version = "1.6.2"
description = "Fast, simple object-to-object and broadcast signaling"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -955,7 +907,6 @@ files = [
name = "boto3"
version = "1.28.17"
description = "The AWS SDK for Python"
-category = "main"
optional = true
python-versions = ">= 3.7"
files = [
@@ -975,7 +926,6 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
name = "botocore"
version = "1.31.17"
description = "Low-level, data-driven core of boto 3."
-category = "main"
optional = true
python-versions = ">= 3.7"
files = [
@@ -995,7 +945,6 @@ crt = ["awscrt (==0.16.26)"]
name = "brotli"
version = "1.0.9"
description = "Python bindings for the Brotli compression library"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -1087,7 +1036,6 @@ files = [
name = "brotlicffi"
version = "1.0.9.2"
description = "Python CFFI bindings to the Brotli library"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -1130,7 +1078,6 @@ cffi = ">=1.0.0"
name = "build"
version = "0.10.0"
description = "A simple, correct Python build frontend"
-category = "main"
optional = true
python-versions = ">= 3.7"
files = [
@@ -1154,7 +1101,6 @@ virtualenv = ["virtualenv (>=20.0.35)"]
name = "cachetools"
version = "5.3.1"
description = "Extensible memoizing collections and decorators"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -1166,7 +1112,6 @@ files = [
name = "cassandra-driver"
version = "3.28.0"
description = "DataStax Driver for Apache Cassandra"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -1218,7 +1163,6 @@ graph = ["gremlinpython (==3.4.6)"]
name = "cassio"
version = "0.1.0"
description = "A framework-agnostic Python library to seamlessly integrate Apache Cassandra(R) with ML/LLM/genAI workloads."
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -1234,7 +1178,6 @@ numpy = ">=1.0"
name = "certifi"
version = "2023.7.22"
description = "Python package for providing Mozilla's CA Bundle."
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -1246,7 +1189,6 @@ files = [
name = "cffi"
version = "1.15.1"
description = "Foreign Function Interface for Python calling C code."
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -1323,7 +1265,6 @@ pycparser = "*"
name = "chardet"
version = "5.2.0"
description = "Universal encoding detector for Python 3"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -1335,7 +1276,6 @@ files = [
name = "charset-normalizer"
version = "3.2.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-category = "main"
optional = false
python-versions = ">=3.7.0"
files = [
@@ -1420,7 +1360,6 @@ files = [
name = "clarifai"
version = "9.7.1"
description = "Clarifai Python Utilities"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -1437,7 +1376,6 @@ tritonclient = "2.34.0"
name = "clarifai-grpc"
version = "9.7.3"
description = "Clarifai gRPC API Client"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -1455,7 +1393,6 @@ requests = ">=2.25.1"
name = "click"
version = "8.1.7"
description = "Composable command line interface toolkit"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -1470,7 +1407,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
name = "click-plugins"
version = "1.1.1"
description = "An extension module for click to enable registering CLI commands via setuptools entry-points."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -1488,7 +1424,6 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"]
name = "clickhouse-connect"
version = "0.5.25"
description = "ClickHouse core driver, SqlAlchemy, and Superset libraries"
-category = "main"
optional = true
python-versions = "~=3.7"
files = [
@@ -1578,7 +1513,6 @@ superset = ["apache-superset (>=1.4.1)"]
name = "cligj"
version = "0.7.2"
description = "Click params for commmand line interfaces to GeoJSON"
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4"
files = [
@@ -1596,7 +1530,6 @@ test = ["pytest-cov"]
name = "codespell"
version = "2.2.5"
description = "Codespell"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -1614,7 +1547,6 @@ types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency
name = "cohere"
version = "4.21"
description = ""
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -1634,7 +1566,6 @@ urllib3 = ">=1.26,<3"
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
-category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
@@ -1646,7 +1577,6 @@ files = [
name = "colored"
version = "1.4.4"
description = "Simple library for color and formatting to terminal"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -1657,7 +1587,6 @@ files = [
name = "comm"
version = "0.1.4"
description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc."
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -1677,7 +1606,6 @@ typing = ["mypy (>=0.990)"]
name = "coverage"
version = "7.3.0"
description = "Code coverage measurement for Python"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -1745,7 +1673,6 @@ toml = ["tomli"]
name = "cryptography"
version = "41.0.3"
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -1791,7 +1718,6 @@ test-randomorder = ["pytest-randomly"]
name = "cssselect"
version = "1.2.0"
description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -1803,7 +1729,6 @@ files = [
name = "dashvector"
version = "1.0.1"
description = "DashVector Client Python Sdk Library"
-category = "main"
optional = true
python-versions = ">=3.7.0"
files = [
@@ -1823,7 +1748,6 @@ protobuf = ">=3.8.0,<4.0.0"
name = "dataclasses-json"
version = "0.5.9"
description = "Easily serialize dataclasses to and from JSON"
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -1843,7 +1767,6 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (
name = "debugpy"
version = "1.6.7.post1"
description = "An implementation of the Debug Adapter Protocol for Python"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -1871,7 +1794,6 @@ files = [
name = "decorator"
version = "5.1.1"
description = "Decorators for Humans"
-category = "main"
optional = false
python-versions = ">=3.5"
files = [
@@ -1883,7 +1805,6 @@ files = [
name = "deeplake"
version = "3.6.19"
description = "Activeloop Deep Lake"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -1921,7 +1842,6 @@ visualizer = ["IPython", "flask"]
name = "defusedxml"
version = "0.7.1"
description = "XML bomb protection for Python stdlib modules"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
@@ -1933,7 +1853,6 @@ files = [
name = "deprecated"
version = "1.2.14"
description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -1951,7 +1870,6 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"]
name = "deprecation"
version = "2.1.0"
description = "A library to handle automated deprecations"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -1966,7 +1884,6 @@ packaging = "*"
name = "dill"
version = "0.3.7"
description = "serialize all of Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -1981,7 +1898,6 @@ graph = ["objgraph (>=1.7.2)"]
name = "dnspython"
version = "2.4.2"
description = "DNS toolkit"
-category = "main"
optional = true
python-versions = ">=3.8,<4.0"
files = [
@@ -2001,7 +1917,6 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"]
name = "docarray"
version = "0.32.1"
description = "The data structure for multimodal data"
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -2040,7 +1955,6 @@ web = ["fastapi (>=0.87.0)"]
name = "docker"
version = "6.1.3"
description = "A Python library for the Docker Engine API."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2062,7 +1976,6 @@ ssh = ["paramiko (>=2.4.3)"]
name = "docopt"
version = "0.6.2"
description = "Pythonic argument parser, that will make you smile"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2073,7 +1986,6 @@ files = [
name = "duckdb"
version = "0.8.1"
description = "DuckDB embedded database"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -2135,7 +2047,6 @@ files = [
name = "duckdb-engine"
version = "0.7.3"
description = "SQLAlchemy driver for duckdb"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -2152,7 +2063,6 @@ sqlalchemy = ">=1.3.22"
name = "duckduckgo-search"
version = "3.8.5"
description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2170,7 +2080,6 @@ lxml = ">=4.9.2"
name = "elastic-transport"
version = "8.4.0"
description = "Transport classes and utilities shared among Python Elastic client libraries"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -2189,7 +2098,6 @@ develop = ["aiohttp", "mock", "pytest", "pytest-asyncio", "pytest-cov", "pytest-
name = "elasticsearch"
version = "8.9.0"
description = "Python client for Elasticsearch"
-category = "main"
optional = true
python-versions = ">=3.6, <4"
files = [
@@ -2208,7 +2116,6 @@ requests = ["requests (>=2.4.0,<3.0.0)"]
name = "entrypoints"
version = "0.4"
description = "Discover and load entry points from installed packages."
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -2220,7 +2127,6 @@ files = [
name = "esprima"
version = "4.0.1"
description = "ECMAScript parsing infrastructure for multipurpose analysis in Python"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2231,7 +2137,6 @@ files = [
name = "exceptiongroup"
version = "1.1.3"
description = "Backport of PEP 654 (exception groups)"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -2246,7 +2151,6 @@ test = ["pytest (>=6)"]
name = "executing"
version = "1.2.0"
description = "Get the currently executing AST node of a frame, and other information"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -2261,7 +2165,6 @@ tests = ["asttokens", "littleutils", "pytest", "rich"]
name = "faiss-cpu"
version = "1.7.4"
description = "A library for efficient similarity search and clustering of dense vectors."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2296,7 +2199,6 @@ files = [
name = "fastavro"
version = "1.8.2"
description = "Fast read/write of AVRO files"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -2337,7 +2239,6 @@ zstandard = ["zstandard"]
name = "fastjsonschema"
version = "2.18.0"
description = "Fastest Python implementation of JSON schema"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -2352,7 +2253,6 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc
name = "feedfinder2"
version = "0.0.4"
description = "Find the feed URLs for a website."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2368,7 +2268,6 @@ six = "*"
name = "feedparser"
version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -2383,7 +2282,6 @@ sgmllib3k = "*"
name = "filelock"
version = "3.12.2"
description = "A platform independent file lock."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2399,7 +2297,6 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "p
name = "fiona"
version = "1.9.4.post1"
description = "Fiona reads and writes spatial data files"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2444,7 +2341,6 @@ test = ["Fiona[s3]", "pytest (>=7)", "pytest-cov", "pytz"]
name = "flatbuffers"
version = "23.5.26"
description = "The FlatBuffers serialization format for Python"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2456,7 +2352,6 @@ files = [
name = "fqdn"
version = "1.5.1"
description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4"
files = [
@@ -2468,7 +2363,6 @@ files = [
name = "freezegun"
version = "1.2.2"
description = "Let your Python tests travel through time"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -2483,7 +2377,6 @@ python-dateutil = ">=2.7"
name = "frozenlist"
version = "1.4.0"
description = "A list-like structure which implements collections.abc.MutableSequence"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -2554,7 +2447,6 @@ files = [
name = "fsspec"
version = "2023.6.0"
description = "File-system specification"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -2590,7 +2482,6 @@ tqdm = ["tqdm"]
name = "future"
version = "0.18.3"
description = "Clean single-source support for Python 3 and 2"
-category = "main"
optional = true
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
@@ -2601,7 +2492,6 @@ files = [
name = "gast"
version = "0.4.0"
description = "Python AST that abstracts the underlying Python version"
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -2613,7 +2503,6 @@ files = [
name = "geojson"
version = "2.5.0"
description = "Python bindings and utilities for GeoJSON"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2625,7 +2514,6 @@ files = [
name = "geomet"
version = "0.2.1.post1"
description = "GeoJSON <-> WKT/WKB conversion utilities"
-category = "main"
optional = false
python-versions = ">2.6, !=3.3.*, <4"
files = [
@@ -2641,7 +2529,6 @@ six = "*"
name = "geopandas"
version = "0.13.2"
description = "Geographic pandas extensions"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -2660,7 +2547,6 @@ shapely = ">=1.7.1"
name = "gitdb"
version = "4.0.10"
description = "Git Object Database"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2675,7 +2561,6 @@ smmap = ">=3.0.1,<6"
name = "gitpython"
version = "3.1.32"
description = "GitPython is a Python library used to interact with Git repositories"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2690,7 +2575,6 @@ gitdb = ">=4.0.1,<5"
name = "google-api-core"
version = "2.11.1"
description = "Google API client core library"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2713,7 +2597,6 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
name = "google-api-python-client"
version = "2.70.0"
description = "Google API Client Library for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2722,7 +2605,7 @@ files = [
]
[package.dependencies]
-google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev"
+google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev"
google-auth = ">=1.19.0,<3.0.0dev"
google-auth-httplib2 = ">=0.1.0"
httplib2 = ">=0.15.0,<1dev"
@@ -2732,7 +2615,6 @@ uritemplate = ">=3.0.1,<5"
name = "google-auth"
version = "2.22.0"
description = "Google Authentication Library"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -2758,7 +2640,6 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
name = "google-auth-httplib2"
version = "0.1.0"
description = "Google Authentication Library: httplib2 transport"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2775,7 +2656,6 @@ six = "*"
name = "google-auth-oauthlib"
version = "1.0.0"
description = "Google Authentication Library"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -2794,7 +2674,6 @@ tool = ["click (>=6.0.0)"]
name = "google-pasta"
version = "0.2.0"
description = "pasta is an AST-based Python refactoring library"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2810,7 +2689,6 @@ six = "*"
name = "google-search-results"
version = "2.4.2"
description = "Scrape and search localized results from Google, Bing, Baidu, Yahoo, Yandex, Ebay, Homedepot, youtube at scale using SerpApi.com"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -2824,7 +2702,6 @@ requests = "*"
name = "googleapis-common-protos"
version = "1.60.0"
description = "Common protobufs used in Google APIs"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -2842,7 +2719,6 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
name = "gptcache"
version = "0.1.39.1"
description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications."
-category = "main"
optional = true
python-versions = ">=3.8.1"
files = [
@@ -2859,7 +2735,6 @@ requests = "*"
name = "gql"
version = "3.4.1"
description = "GraphQL client for Python"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -2886,7 +2761,6 @@ websockets = ["websockets (>=10,<11)", "websockets (>=9,<10)"]
name = "graphql-core"
version = "3.2.3"
description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL."
-category = "main"
optional = true
python-versions = ">=3.6,<4"
files = [
@@ -2898,7 +2772,6 @@ files = [
name = "greenlet"
version = "2.0.2"
description = "Lightweight in-process concurrent programming"
-category = "main"
optional = false
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
files = [
@@ -2972,7 +2845,6 @@ test = ["objgraph", "psutil"]
name = "grpcio"
version = "1.57.0"
description = "HTTP/2-based RPC framework"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3030,7 +2902,6 @@ protobuf = ["grpcio-tools (>=1.57.0)"]
name = "grpcio-tools"
version = "1.48.2"
description = "Protobuf code generator for gRPC"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -3091,7 +2962,6 @@ setuptools = "*"
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3103,7 +2973,6 @@ files = [
name = "h2"
version = "4.1.0"
description = "HTTP/2 State-Machine based protocol implementation"
-category = "main"
optional = true
python-versions = ">=3.6.1"
files = [
@@ -3119,7 +2988,6 @@ hyperframe = ">=6.0,<7"
name = "h5py"
version = "3.9.0"
description = "Read and write HDF5 files from Python"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -3153,7 +3021,6 @@ numpy = ">=1.17.3"
name = "hnswlib"
version = "0.7.0"
description = "hnswlib"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -3167,7 +3034,6 @@ numpy = "*"
name = "hpack"
version = "4.0.0"
description = "Pure-Python HPACK header compression"
-category = "main"
optional = true
python-versions = ">=3.6.1"
files = [
@@ -3179,7 +3045,6 @@ files = [
name = "html2text"
version = "2020.1.16"
description = "Turn HTML into equivalent Markdown-structured text."
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -3191,7 +3056,6 @@ files = [
name = "httpcore"
version = "0.17.3"
description = "A minimal low-level HTTP client."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3203,17 +3067,16 @@ files = [
anyio = ">=3.0,<5.0"
certifi = "*"
h11 = ">=0.13,<0.15"
-sniffio = ">=1.0.0,<2.0.0"
+sniffio = "==1.*"
[package.extras]
http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (>=1.0.0,<2.0.0)"]
+socks = ["socksio (==1.*)"]
[[package]]
name = "httplib2"
version = "0.22.0"
description = "A comprehensive HTTP client library."
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -3228,7 +3091,6 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0
name = "httpx"
version = "0.24.1"
description = "The next generation HTTP client."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3244,19 +3106,18 @@ h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
httpcore = ">=0.15.0,<0.18.0"
idna = "*"
sniffio = "*"
-socksio = {version = ">=1.0.0,<2.0.0", optional = true, markers = "extra == \"socks\""}
+socksio = {version = "==1.*", optional = true, markers = "extra == \"socks\""}
[package.extras]
brotli = ["brotli", "brotlicffi"]
-cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"]
+cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (>=1.0.0,<2.0.0)"]
+socks = ["socksio (==1.*)"]
[[package]]
name = "huggingface-hub"
version = "0.16.4"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
-category = "main"
optional = true
python-versions = ">=3.7.0"
files = [
@@ -3289,7 +3150,6 @@ typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "typ
name = "humbug"
version = "0.3.2"
description = "Humbug: Do you build developer tools? Humbug helps you know your users."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -3309,7 +3169,6 @@ profile = ["GPUtil", "psutil", "types-psutil"]
name = "hyperframe"
version = "6.0.1"
description = "HTTP/2 framing layer for Python"
-category = "main"
optional = true
python-versions = ">=3.6.1"
files = [
@@ -3321,7 +3180,6 @@ files = [
name = "idna"
version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)"
-category = "main"
optional = false
python-versions = ">=3.5"
files = [
@@ -3333,7 +3191,6 @@ files = [
name = "importlib-metadata"
version = "6.8.0"
description = "Read metadata from Python packages"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -3353,7 +3210,6 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs
name = "importlib-resources"
version = "6.0.1"
description = "Read resources from Python packages"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -3372,7 +3228,6 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)",
name = "iniconfig"
version = "2.0.0"
description = "brain-dead simple config-ini parsing"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -3384,7 +3239,6 @@ files = [
name = "ipykernel"
version = "6.25.1"
description = "IPython Kernel for Jupyter"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3398,7 +3252,7 @@ comm = ">=0.1.1"
debugpy = ">=1.6.5"
ipython = ">=7.23.1"
jupyter-client = ">=6.1.12"
-jupyter-core = ">=4.12,<5.0.0 || >=5.1.0"
+jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
matplotlib-inline = ">=0.1"
nest-asyncio = "*"
packaging = "*"
@@ -3418,7 +3272,6 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio"
name = "ipython"
version = "8.12.2"
description = "IPython: Productive Interactive Computing"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3458,7 +3311,6 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa
name = "ipython-genutils"
version = "0.2.0"
description = "Vestigial utilities from IPython"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -3470,7 +3322,6 @@ files = [
name = "ipywidgets"
version = "8.1.0"
description = "Jupyter interactive widgets"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -3492,7 +3343,6 @@ test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"]
name = "isodate"
version = "0.6.1"
description = "An ISO 8601 date/time/duration parser and formatter"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -3507,7 +3357,6 @@ six = "*"
name = "isoduration"
version = "20.11.0"
description = "Operations with ISO 8601 durations"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -3522,7 +3371,6 @@ arrow = ">=0.15.0"
name = "jaraco-context"
version = "4.3.0"
description = "Context managers by jaraco"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3538,7 +3386,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
name = "jedi"
version = "0.19.0"
description = "An autocompletion tool for Python that can be used for text editors."
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -3558,7 +3405,6 @@ testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"]
name = "jieba3k"
version = "0.35.1"
description = "Chinese Words Segementation Utilities"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -3569,7 +3415,6 @@ files = [
name = "jinja2"
version = "3.1.2"
description = "A very fast and expressive template engine."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -3587,7 +3432,6 @@ i18n = ["Babel (>=2.7)"]
name = "jmespath"
version = "1.0.1"
description = "JSON Matching Expressions"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3599,7 +3443,6 @@ files = [
name = "joblib"
version = "1.3.2"
description = "Lightweight pipelining with Python functions"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -3611,7 +3454,6 @@ files = [
name = "jq"
version = "1.4.1"
description = "jq is a lightweight and flexible JSON processor."
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -3676,7 +3518,6 @@ files = [
name = "json5"
version = "0.9.14"
description = "A Python implementation of the JSON5 data format."
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -3691,7 +3532,6 @@ dev = ["hypothesis"]
name = "jsonable"
version = "0.3.1"
description = "An abstract class that supports jsonserialization/deserialization."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -3703,7 +3543,6 @@ files = [
name = "jsonlines"
version = "3.1.0"
description = "Library with helpers for the jsonlines file format"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -3718,7 +3557,6 @@ attrs = ">=19.2.0"
name = "jsonpointer"
version = "2.4"
description = "Identify specific nodes in a JSON document (RFC 6901)"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
files = [
@@ -3729,7 +3567,6 @@ files = [
name = "jsonschema"
version = "4.19.0"
description = "An implementation of JSON Schema validation for Python"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -3761,7 +3598,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-
name = "jsonschema-specifications"
version = "2023.7.1"
description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -3777,7 +3613,6 @@ referencing = ">=0.28.0"
name = "jupyter"
version = "1.0.0"
description = "Jupyter metapackage. Install all the Jupyter components in one go."
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -3798,7 +3633,6 @@ qtconsole = "*"
name = "jupyter-client"
version = "8.3.0"
description = "Jupyter protocol implementation and client libraries"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3808,7 +3642,7 @@ files = [
[package.dependencies]
importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""}
-jupyter-core = ">=4.12,<5.0.0 || >=5.1.0"
+jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
python-dateutil = ">=2.8.2"
pyzmq = ">=23.0"
tornado = ">=6.2"
@@ -3822,7 +3656,6 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt
name = "jupyter-console"
version = "6.6.3"
description = "Jupyter terminal console"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -3834,7 +3667,7 @@ files = [
ipykernel = ">=6.14"
ipython = "*"
jupyter-client = ">=7.0.0"
-jupyter-core = ">=4.12,<5.0.0 || >=5.1.0"
+jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
prompt-toolkit = ">=3.0.30"
pygments = "*"
pyzmq = ">=17"
@@ -3847,7 +3680,6 @@ test = ["flaky", "pexpect", "pytest"]
name = "jupyter-core"
version = "5.3.1"
description = "Jupyter core package. A base package on which Jupyter projects rely."
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3868,7 +3700,6 @@ test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"]
name = "jupyter-events"
version = "0.7.0"
description = "Jupyter Event System library"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3894,7 +3725,6 @@ test = ["click", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "p
name = "jupyter-lsp"
version = "2.2.0"
description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3910,7 +3740,6 @@ jupyter-server = ">=1.1.2"
name = "jupyter-server"
version = "2.7.2"
description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications."
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3923,7 +3752,7 @@ anyio = ">=3.1.0"
argon2-cffi = "*"
jinja2 = "*"
jupyter-client = ">=7.4.4"
-jupyter-core = ">=4.12,<5.0.0 || >=5.1.0"
+jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
jupyter-events = ">=0.6.0"
jupyter-server-terminals = "*"
nbconvert = ">=6.4.4"
@@ -3947,7 +3776,6 @@ test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-sc
name = "jupyter-server-terminals"
version = "0.4.4"
description = "A Jupyter Server Extension Providing Terminals."
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -3967,7 +3795,6 @@ test = ["coverage", "jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-cov",
name = "jupyterlab"
version = "4.0.5"
description = "JupyterLab computational environment"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -4001,7 +3828,6 @@ test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-cons
name = "jupyterlab-pygments"
version = "0.2.2"
description = "Pygments theme using JupyterLab CSS variables"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -4013,7 +3839,6 @@ files = [
name = "jupyterlab-server"
version = "2.24.0"
description = "A set of server components for JupyterLab and JupyterLab like applications."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -4040,7 +3865,6 @@ test = ["hatch", "ipykernel", "jupyterlab-server[openapi]", "openapi-spec-valida
name = "jupyterlab-widgets"
version = "3.0.8"
description = "Jupyter interactive widgets for JupyterLab"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -4052,7 +3876,6 @@ files = [
name = "keras"
version = "2.13.1"
description = "Deep learning for humans."
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -4064,7 +3887,6 @@ files = [
name = "lancedb"
version = "0.1.16"
description = "lancedb"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -4091,7 +3913,6 @@ tests = ["pandas (>=1.4)", "pytest", "pytest-asyncio", "pytest-mock"]
name = "langkit"
version = "0.0.15"
description = "A collection of text metric udfs for whylogs profiling and monitoring in WhyLabs"
-category = "main"
optional = true
python-versions = ">=3.8,<4.0"
files = [
@@ -4111,7 +3932,6 @@ all = ["datasets (>=2.12.0,<3.0.0)", "evaluate (>=0.4.0,<0.5.0)", "nltk (>=3.8.1
name = "langsmith"
version = "0.0.25"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
-category = "main"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
@@ -4127,7 +3947,6 @@ requests = ">=2,<3"
name = "lark"
version = "1.1.7"
description = "a modern parsing library"
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -4145,7 +3964,6 @@ regex = ["regex"]
name = "lazy-loader"
version = "0.3"
description = "lazy_loader"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -4161,10 +3979,11 @@ test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
name = "libclang"
version = "16.0.6"
description = "Clang Python Bindings, mirrored from the official LLVM repo: https://github.com/llvm/llvm-project/tree/main/clang/bindings/python, to make the installation process easier."
-category = "main"
optional = true
python-versions = "*"
files = [
+ {file = "libclang-16.0.6-1-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:88bc7e7b393c32e41e03ba77ef02fdd647da1f764c2cd028e69e0837080b79f6"},
+ {file = "libclang-16.0.6-1-py2.py3-none-manylinux2014_armv7l.whl", hash = "sha256:d80ed5827736ed5ec2bcedf536720476fd9d4fa4c79ef0cb24aea4c59332f361"},
{file = "libclang-16.0.6-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:da9e47ebc3f0a6d90fb169ef25f9fbcd29b4a4ef97a8b0e3e3a17800af1423f4"},
{file = "libclang-16.0.6-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:e1a5ad1e895e5443e205568c85c04b4608e4e973dae42f4dfd9cb46c81d1486b"},
{file = "libclang-16.0.6-py2.py3-none-manylinux2010_x86_64.whl", hash = "sha256:9dcdc730939788b8b69ffd6d5d75fe5366e3ee007f1e36a99799ec0b0c001492"},
@@ -4180,7 +3999,6 @@ files = [
name = "libdeeplake"
version = "0.0.60"
description = "C++ backend for Deep Lake"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4213,7 +4031,6 @@ numpy = "*"
name = "librosa"
version = "0.10.1"
description = "Python module for audio and music processing"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -4245,7 +4062,6 @@ tests = ["matplotlib (>=3.3.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "
name = "llvmlite"
version = "0.40.1"
description = "lightweight wrapper around basic LLVM functionality"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -4279,7 +4095,6 @@ files = [
name = "loguru"
version = "0.7.0"
description = "Python logging made (stupidly) simple"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -4298,7 +4113,6 @@ dev = ["Sphinx (==5.3.0)", "colorama (==0.4.5)", "colorama (==0.4.6)", "freezegu
name = "lxml"
version = "4.9.3"
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
files = [
@@ -4406,7 +4220,6 @@ source = ["Cython (>=0.29.35)"]
name = "lz4"
version = "4.3.2"
description = "LZ4 Bindings for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -4456,7 +4269,6 @@ tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"]
name = "manifest-ml"
version = "0.0.1"
description = "Manifest for Prompt Programming Foundation Models."
-category = "main"
optional = true
python-versions = ">=3.8.0"
files = [
@@ -4480,7 +4292,6 @@ dev = ["autopep8 (>=1.6.0)", "black (>=22.3.0)", "docformatter (>=1.4)", "flake8
name = "markdown"
version = "3.4.4"
description = "Python implementation of John Gruber's Markdown."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -4496,7 +4307,6 @@ testing = ["coverage", "pyyaml"]
name = "markdown-it-py"
version = "3.0.0"
description = "Python port of markdown-it. Markdown parsing, done right!"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -4521,7 +4331,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
name = "markdownify"
version = "0.11.6"
description = "Convert HTML to markdown."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4537,7 +4346,6 @@ six = ">=1.15,<2"
name = "markupsafe"
version = "2.1.3"
description = "Safely add untrusted strings to HTML/XML markup."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -4597,7 +4405,6 @@ files = [
name = "marqo"
version = "1.2.4"
description = "Tensor search for humans"
-category = "main"
optional = true
python-versions = ">=3"
files = [
@@ -4616,7 +4423,6 @@ urllib3 = "*"
name = "marshmallow"
version = "3.20.1"
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -4637,7 +4443,6 @@ tests = ["pytest", "pytz", "simplejson"]
name = "marshmallow-enum"
version = "1.5.1"
description = "Enum field for Marshmallow"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -4652,7 +4457,6 @@ marshmallow = ">=2.0.0"
name = "matplotlib-inline"
version = "0.1.6"
description = "Inline Matplotlib backend for Jupyter"
-category = "dev"
optional = false
python-versions = ">=3.5"
files = [
@@ -4667,7 +4471,6 @@ traitlets = "*"
name = "mdurl"
version = "0.1.2"
description = "Markdown URL utilities"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -4679,7 +4482,6 @@ files = [
name = "mistune"
version = "3.0.1"
description = "A sane and fast Markdown parser with useful plugins and renderers"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -4691,7 +4493,6 @@ files = [
name = "mmh3"
version = "3.1.0"
description = "Python wrapper for MurmurHash (MurmurHash3), a set of fast and robust hash functions."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4736,7 +4537,6 @@ files = [
name = "momento"
version = "1.7.1"
description = "SDK for Momento"
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -4753,7 +4553,6 @@ pyjwt = ">=2.4.0,<3.0.0"
name = "momento-wire-types"
version = "0.67.0"
description = "Momento Client Proto Generated Files"
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -4769,7 +4568,6 @@ protobuf = ">=3,<5"
name = "more-itertools"
version = "10.1.0"
description = "More routines for operating on iterables, beyond itertools"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -4781,7 +4579,6 @@ files = [
name = "mpmath"
version = "1.3.0"
description = "Python library for arbitrary-precision floating-point arithmetic"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4799,7 +4596,6 @@ tests = ["pytest (>=4.6)"]
name = "msal"
version = "1.23.0"
description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4819,7 +4615,6 @@ broker = ["pymsalruntime (>=0.13.2,<0.14)"]
name = "msal-extensions"
version = "1.0.0"
description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4838,7 +4633,6 @@ portalocker = [
name = "msgpack"
version = "1.0.5"
description = "MessagePack serializer"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -4911,7 +4705,6 @@ files = [
name = "msrest"
version = "0.7.1"
description = "AutoRest swagger generator Python client runtime."
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -4933,7 +4726,6 @@ async = ["aiodns", "aiohttp (>=3.0)"]
name = "multidict"
version = "6.0.4"
description = "multidict implementation"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -5017,7 +4809,6 @@ files = [
name = "multiprocess"
version = "0.70.15"
description = "better multiprocessing and multithreading in Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -5046,7 +4837,6 @@ dill = ">=0.3.7"
name = "mwcli"
version = "0.0.3"
description = "Utilities for processing MediaWiki on the command line."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5063,7 +4853,6 @@ para = "*"
name = "mwparserfromhell"
version = "0.6.4"
description = "MWParserFromHell is a parser for MediaWiki wikicode."
-category = "main"
optional = true
python-versions = ">= 3.6"
files = [
@@ -5101,7 +4890,6 @@ files = [
name = "mwtypes"
version = "0.3.2"
description = "A set of types for processing MediaWiki data."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5116,7 +4904,6 @@ jsonable = ">=0.3.0"
name = "mwxml"
version = "0.3.3"
description = "A set of utilities for processing MediaWiki XML dump data."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5134,7 +4921,6 @@ para = ">=0.0.1"
name = "mypy"
version = "0.991"
description = "Optional static typing for Python"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -5185,7 +4971,6 @@ reports = ["lxml"]
name = "mypy-extensions"
version = "1.0.0"
description = "Type system extensions for programs checked with the mypy type checker."
-category = "main"
optional = false
python-versions = ">=3.5"
files = [
@@ -5197,7 +4982,6 @@ files = [
name = "mypy-protobuf"
version = "3.3.0"
description = "Generate mypy stub files from protobuf specs"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -5213,7 +4997,6 @@ types-protobuf = ">=3.19.12"
name = "nbclient"
version = "0.8.0"
description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor."
-category = "dev"
optional = false
python-versions = ">=3.8.0"
files = [
@@ -5223,7 +5006,7 @@ files = [
[package.dependencies]
jupyter-client = ">=6.1.12"
-jupyter-core = ">=4.12,<5.0.0 || >=5.1.0"
+jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
nbformat = ">=5.1"
traitlets = ">=5.4"
@@ -5236,7 +5019,6 @@ test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=
name = "nbconvert"
version = "7.7.4"
description = "Converting Jupyter Notebooks"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -5275,7 +5057,6 @@ webpdf = ["playwright"]
name = "nbformat"
version = "5.9.2"
description = "The Jupyter Notebook format"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -5297,7 +5078,6 @@ test = ["pep440", "pre-commit", "pytest", "testpath"]
name = "nebula3-python"
version = "3.4.0"
description = "Python client for NebulaGraph V3.4"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5315,7 +5095,6 @@ six = ">=1.16.0"
name = "neo4j"
version = "5.11.0"
description = "Neo4j Bolt driver for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -5333,7 +5112,6 @@ pandas = ["numpy (>=1.7.0,<2.0.0)", "pandas (>=1.1.0,<3.0.0)"]
name = "nest-asyncio"
version = "1.5.7"
description = "Patch asyncio to allow nested event loops"
-category = "main"
optional = false
python-versions = ">=3.5"
files = [
@@ -5345,7 +5123,6 @@ files = [
name = "networkx"
version = "2.8.8"
description = "Python package for creating and manipulating graphs and networks"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -5364,7 +5141,6 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
name = "newspaper3k"
version = "0.2.8"
description = "Simplified python article discovery & extraction."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5391,7 +5167,6 @@ tldextract = ">=2.0.1"
name = "nlpcloud"
version = "1.1.44"
description = "Python client for the NLP Cloud API"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5406,7 +5181,6 @@ requests = "*"
name = "nltk"
version = "3.8.1"
description = "Natural Language Toolkit"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -5432,7 +5206,6 @@ twitter = ["twython"]
name = "nomic"
version = "1.1.14"
description = "The offical Nomic python client."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -5460,7 +5233,6 @@ gpt4all = ["peft (==0.3.0.dev0)", "sentencepiece", "torch", "transformers (==4.2
name = "notebook"
version = "7.0.2"
description = "Jupyter Notebook - A web-based notebook environment for interactive computing"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -5485,7 +5257,6 @@ test = ["ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[tes
name = "notebook-shim"
version = "0.2.3"
description = "A shim layer for notebook traits and config"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -5503,7 +5274,6 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync"
name = "numba"
version = "0.57.1"
description = "compiling Python code using LLVM"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -5535,14 +5305,13 @@ files = [
[package.dependencies]
importlib-metadata = {version = "*", markers = "python_version < \"3.9\""}
-llvmlite = ">=0.40.0dev0,<0.41"
+llvmlite = "==0.40.*"
numpy = ">=1.21,<1.25"
[[package]]
name = "numcodecs"
version = "0.11.0"
description = "A Python package providing buffer compression and transformation codecs for use"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -5575,7 +5344,6 @@ zfpy = ["zfpy (>=1.0.0)"]
name = "numexpr"
version = "2.8.5"
description = "Fast numerical expression evaluator for NumPy"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -5618,7 +5386,6 @@ numpy = ">=1.13.3"
name = "numpy"
version = "1.24.3"
description = "Fundamental package for array computing in Python"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -5656,7 +5423,6 @@ files = [
name = "nvidia-cublas-cu11"
version = "11.10.3.66"
description = "CUBLAS native runtime libraries"
-category = "main"
optional = true
python-versions = ">=3"
files = [
@@ -5672,7 +5438,6 @@ wheel = "*"
name = "nvidia-cuda-nvrtc-cu11"
version = "11.7.99"
description = "NVRTC native runtime libraries"
-category = "main"
optional = true
python-versions = ">=3"
files = [
@@ -5689,7 +5454,6 @@ wheel = "*"
name = "nvidia-cuda-runtime-cu11"
version = "11.7.99"
description = "CUDA Runtime native Libraries"
-category = "main"
optional = true
python-versions = ">=3"
files = [
@@ -5705,7 +5469,6 @@ wheel = "*"
name = "nvidia-cudnn-cu11"
version = "8.5.0.96"
description = "cuDNN runtime libraries"
-category = "main"
optional = true
python-versions = ">=3"
files = [
@@ -5721,7 +5484,6 @@ wheel = "*"
name = "o365"
version = "2.0.27"
description = "Microsoft Graph and Office 365 API made easy"
-category = "main"
optional = true
python-versions = ">=3.4"
files = [
@@ -5742,7 +5504,6 @@ tzlocal = ">=4.0,<5.0"
name = "oauthlib"
version = "3.2.2"
description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -5759,7 +5520,6 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
name = "openai"
version = "0.27.8"
description = "Python client library for the OpenAI API"
-category = "main"
optional = false
python-versions = ">=3.7.1"
files = [
@@ -5774,7 +5534,7 @@ tqdm = "*"
[package.extras]
datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"]
+dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"]
embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
@@ -5782,7 +5542,6 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1
name = "openapi-schema-pydantic"
version = "1.2.4"
description = "OpenAPI (v3) specification schema as pydantic class"
-category = "main"
optional = true
python-versions = ">=3.6.1"
files = [
@@ -5797,7 +5556,6 @@ pydantic = ">=1.8.2"
name = "openlm"
version = "0.0.5"
description = "Drop-in OpenAI-compatible that can call LLMs from other providers"
-category = "main"
optional = true
python-versions = ">=3.8.1,<4.0"
files = [
@@ -5812,7 +5570,6 @@ requests = ">=2,<3"
name = "opensearch-py"
version = "2.3.1"
description = "Python client for OpenSearch"
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
files = [
@@ -5837,7 +5594,6 @@ kerberos = ["requests-kerberos"]
name = "opt-einsum"
version = "3.3.0"
description = "Optimizing numpys einsum function"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -5856,7 +5612,6 @@ tests = ["pytest", "pytest-cov", "pytest-pep8"]
name = "orjson"
version = "3.9.5"
description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -5926,7 +5681,6 @@ files = [
name = "overrides"
version = "7.4.0"
description = "A decorator to automatically detect mismatch when overriding a method."
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -5938,7 +5692,6 @@ files = [
name = "packaging"
version = "23.1"
description = "Core utilities for Python packages"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -5950,7 +5703,6 @@ files = [
name = "pandas"
version = "2.0.3"
description = "Powerful data structures for data analysis, time series, and statistics"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -6018,7 +5770,6 @@ xml = ["lxml (>=4.6.3)"]
name = "pandocfilters"
version = "1.5.0"
description = "Utilities for writing pandoc filters in python"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -6030,7 +5781,6 @@ files = [
name = "para"
version = "0.0.8"
description = "a set utilities that ake advantage of python's 'multiprocessing' module to distribute CPU-intensive tasks"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -6042,7 +5792,6 @@ files = [
name = "parso"
version = "0.8.3"
description = "A Python Parser"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -6058,7 +5807,6 @@ testing = ["docopt", "pytest (<6.0.0)"]
name = "pathos"
version = "0.3.1"
description = "parallel graph management and execution in heterogeneous computing"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6076,7 +5824,6 @@ ppft = ">=1.7.6.7"
name = "pathspec"
version = "0.11.2"
description = "Utility library for gitignore style pattern matching of file paths."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -6088,7 +5835,6 @@ files = [
name = "pdfminer-six"
version = "20221105"
description = "PDF parser and analyzer"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -6109,7 +5855,6 @@ image = ["Pillow"]
name = "pexpect"
version = "4.8.0"
description = "Pexpect allows easy control of interactive console applications."
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -6124,7 +5869,6 @@ ptyprocess = ">=0.5"
name = "pgvector"
version = "0.1.8"
description = "pgvector support for Python"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -6138,7 +5882,6 @@ numpy = "*"
name = "pickleshare"
version = "0.7.5"
description = "Tiny 'shelve'-like database with concurrency support"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -6148,68 +5891,77 @@ files = [
[[package]]
name = "pillow"
-version = "10.0.0"
+version = "9.5.0"
description = "Python Imaging Library (Fork)"
-category = "main"
optional = true
-python-versions = ">=3.8"
+python-versions = ">=3.7"
files = [
- {file = "Pillow-10.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f62406a884ae75fb2f818694469519fb685cc7eaff05d3451a9ebe55c646891"},
- {file = "Pillow-10.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d5db32e2a6ccbb3d34d87c87b432959e0db29755727afb37290e10f6e8e62614"},
- {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edf4392b77bdc81f36e92d3a07a5cd072f90253197f4a52a55a8cec48a12483b"},
- {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:520f2a520dc040512699f20fa1c363eed506e94248d71f85412b625026f6142c"},
- {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:8c11160913e3dd06c8ffdb5f233a4f254cb449f4dfc0f8f4549eda9e542c93d1"},
- {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a74ba0c356aaa3bb8e3eb79606a87669e7ec6444be352870623025d75a14a2bf"},
- {file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5d0dae4cfd56969d23d94dc8e89fb6a217be461c69090768227beb8ed28c0a3"},
- {file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22c10cc517668d44b211717fd9775799ccec4124b9a7f7b3635fc5386e584992"},
- {file = "Pillow-10.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:dffe31a7f47b603318c609f378ebcd57f1554a3a6a8effbc59c3c69f804296de"},
- {file = "Pillow-10.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:9fb218c8a12e51d7ead2a7c9e101a04982237d4855716af2e9499306728fb485"},
- {file = "Pillow-10.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d35e3c8d9b1268cbf5d3670285feb3528f6680420eafe35cccc686b73c1e330f"},
- {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ed64f9ca2f0a95411e88a4efbd7a29e5ce2cea36072c53dd9d26d9c76f753b3"},
- {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b6eb5502f45a60a3f411c63187db83a3d3107887ad0d036c13ce836f8a36f1d"},
- {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c1fbe7621c167ecaa38ad29643d77a9ce7311583761abf7836e1510c580bf3dd"},
- {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cd25d2a9d2b36fcb318882481367956d2cf91329f6892fe5d385c346c0649629"},
- {file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3b08d4cc24f471b2c8ca24ec060abf4bebc6b144cb89cba638c720546b1cf538"},
- {file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737a602fbd82afd892ca746392401b634e278cb65d55c4b7a8f48e9ef8d008d"},
- {file = "Pillow-10.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3a82c40d706d9aa9734289740ce26460a11aeec2d9c79b7af87bb35f0073c12f"},
- {file = "Pillow-10.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:bc2ec7c7b5d66b8ec9ce9f720dbb5fa4bace0f545acd34870eff4a369b44bf37"},
- {file = "Pillow-10.0.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:d80cf684b541685fccdd84c485b31ce73fc5c9b5d7523bf1394ce134a60c6883"},
- {file = "Pillow-10.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76de421f9c326da8f43d690110f0e79fe3ad1e54be811545d7d91898b4c8493e"},
- {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81ff539a12457809666fef6624684c008e00ff6bf455b4b89fd00a140eecd640"},
- {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce543ed15570eedbb85df19b0a1a7314a9c8141a36ce089c0a894adbfccb4568"},
- {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:685ac03cc4ed5ebc15ad5c23bc555d68a87777586d970c2c3e216619a5476223"},
- {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d72e2ecc68a942e8cf9739619b7f408cc7b272b279b56b2c83c6123fcfa5cdff"},
- {file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d50b6aec14bc737742ca96e85d6d0a5f9bfbded018264b3b70ff9d8c33485551"},
- {file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:00e65f5e822decd501e374b0650146063fbb30a7264b4d2744bdd7b913e0cab5"},
- {file = "Pillow-10.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:f31f9fdbfecb042d046f9d91270a0ba28368a723302786c0009ee9b9f1f60199"},
- {file = "Pillow-10.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:1ce91b6ec08d866b14413d3f0bbdea7e24dfdc8e59f562bb77bc3fe60b6144ca"},
- {file = "Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:349930d6e9c685c089284b013478d6f76e3a534e36ddfa912cde493f235372f3"},
- {file = "Pillow-10.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3a684105f7c32488f7153905a4e3015a3b6c7182e106fe3c37fbb5ef3e6994c3"},
- {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4f69b3700201b80bb82c3a97d5e9254084f6dd5fb5b16fc1a7b974260f89f43"},
- {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f07ea8d2f827d7d2a49ecf1639ec02d75ffd1b88dcc5b3a61bbb37a8759ad8d"},
- {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:040586f7d37b34547153fa383f7f9aed68b738992380ac911447bb78f2abe530"},
- {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f88a0b92277de8e3ca715a0d79d68dc82807457dae3ab8699c758f07c20b3c51"},
- {file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c7cf14a27b0d6adfaebb3ae4153f1e516df54e47e42dcc073d7b3d76111a8d86"},
- {file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3400aae60685b06bb96f99a21e1ada7bc7a413d5f49bce739828ecd9391bb8f7"},
- {file = "Pillow-10.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbc02381779d412145331789b40cc7b11fdf449e5d94f6bc0b080db0a56ea3f0"},
- {file = "Pillow-10.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9211e7ad69d7c9401cfc0e23d49b69ca65ddd898976d660a2fa5904e3d7a9baa"},
- {file = "Pillow-10.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:faaf07ea35355b01a35cb442dd950d8f1bb5b040a7787791a535de13db15ed90"},
- {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f72a021fbb792ce98306ffb0c348b3c9cb967dce0f12a49aa4c3d3fdefa967"},
- {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f7c16705f44e0504a3a2a14197c1f0b32a95731d251777dcb060aa83022cb2d"},
- {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:76edb0a1fa2b4745fb0c99fb9fb98f8b180a1bbceb8be49b087e0b21867e77d3"},
- {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:368ab3dfb5f49e312231b6f27b8820c823652b7cd29cfbd34090565a015e99ba"},
- {file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:608bfdee0d57cf297d32bcbb3c728dc1da0907519d1784962c5f0c68bb93e5a3"},
- {file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5c6e3df6bdd396749bafd45314871b3d0af81ff935b2d188385e970052091017"},
- {file = "Pillow-10.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:7be600823e4c8631b74e4a0d38384c73f680e6105a7d3c6824fcf226c178c7e6"},
- {file = "Pillow-10.0.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:92be919bbc9f7d09f7ae343c38f5bb21c973d2576c1d45600fce4b74bafa7ac0"},
- {file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8182b523b2289f7c415f589118228d30ac8c355baa2f3194ced084dac2dbba"},
- {file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:38250a349b6b390ee6047a62c086d3817ac69022c127f8a5dc058c31ccef17f3"},
- {file = "Pillow-10.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:88af2003543cc40c80f6fca01411892ec52b11021b3dc22ec3bc9d5afd1c5334"},
- {file = "Pillow-10.0.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:c189af0545965fa8d3b9613cfdb0cd37f9d71349e0f7750e1fd704648d475ed2"},
- {file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce7b031a6fc11365970e6a5686d7ba8c63e4c1cf1ea143811acbb524295eabed"},
- {file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db24668940f82321e746773a4bc617bfac06ec831e5c88b643f91f122a785684"},
- {file = "Pillow-10.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:efe8c0681042536e0d06c11f48cebe759707c9e9abf880ee213541c5b46c5bf3"},
- {file = "Pillow-10.0.0.tar.gz", hash = "sha256:9c82b5b3e043c7af0d95792d0d20ccf68f61a1fec6b3530e718b688422727396"},
+ {file = "Pillow-9.5.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16"},
+ {file = "Pillow-9.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d3d403753c9d5adc04d4694d35cf0391f0f3d57c8e0030aac09d7678fa8030aa"},
+ {file = "Pillow-9.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ba1b81ee69573fe7124881762bb4cd2e4b6ed9dd28c9c60a632902fe8db8b38"},
+ {file = "Pillow-9.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe7e1c262d3392afcf5071df9afa574544f28eac825284596ac6db56e6d11062"},
+ {file = "Pillow-9.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f36397bf3f7d7c6a3abdea815ecf6fd14e7fcd4418ab24bae01008d8d8ca15e"},
+ {file = "Pillow-9.5.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:252a03f1bdddce077eff2354c3861bf437c892fb1832f75ce813ee94347aa9b5"},
+ {file = "Pillow-9.5.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85ec677246533e27770b0de5cf0f9d6e4ec0c212a1f89dfc941b64b21226009d"},
+ {file = "Pillow-9.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b416f03d37d27290cb93597335a2f85ed446731200705b22bb927405320de903"},
+ {file = "Pillow-9.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1781a624c229cb35a2ac31cc4a77e28cafc8900733a864870c49bfeedacd106a"},
+ {file = "Pillow-9.5.0-cp310-cp310-win32.whl", hash = "sha256:8507eda3cd0608a1f94f58c64817e83ec12fa93a9436938b191b80d9e4c0fc44"},
+ {file = "Pillow-9.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:d3c6b54e304c60c4181da1c9dadf83e4a54fd266a99c70ba646a9baa626819eb"},
+ {file = "Pillow-9.5.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:7ec6f6ce99dab90b52da21cf0dc519e21095e332ff3b399a357c187b1a5eee32"},
+ {file = "Pillow-9.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:560737e70cb9c6255d6dcba3de6578a9e2ec4b573659943a5e7e4af13f298f5c"},
+ {file = "Pillow-9.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96e88745a55b88a7c64fa49bceff363a1a27d9a64e04019c2281049444a571e3"},
+ {file = "Pillow-9.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d9c206c29b46cfd343ea7cdfe1232443072bbb270d6a46f59c259460db76779a"},
+ {file = "Pillow-9.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfcc2c53c06f2ccb8976fb5c71d448bdd0a07d26d8e07e321c103416444c7ad1"},
+ {file = "Pillow-9.5.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:a0f9bb6c80e6efcde93ffc51256d5cfb2155ff8f78292f074f60f9e70b942d99"},
+ {file = "Pillow-9.5.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:8d935f924bbab8f0a9a28404422da8af4904e36d5c33fc6f677e4c4485515625"},
+ {file = "Pillow-9.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fed1e1cf6a42577953abbe8e6cf2fe2f566daebde7c34724ec8803c4c0cda579"},
+ {file = "Pillow-9.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c1170d6b195555644f0616fd6ed929dfcf6333b8675fcca044ae5ab110ded296"},
+ {file = "Pillow-9.5.0-cp311-cp311-win32.whl", hash = "sha256:54f7102ad31a3de5666827526e248c3530b3a33539dbda27c6843d19d72644ec"},
+ {file = "Pillow-9.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfa4561277f677ecf651e2b22dc43e8f5368b74a25a8f7d1d4a3a243e573f2d4"},
+ {file = "Pillow-9.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:965e4a05ef364e7b973dd17fc765f42233415974d773e82144c9bbaaaea5d089"},
+ {file = "Pillow-9.5.0-cp312-cp312-win32.whl", hash = "sha256:22baf0c3cf0c7f26e82d6e1adf118027afb325e703922c8dfc1d5d0156bb2eeb"},
+ {file = "Pillow-9.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:432b975c009cf649420615388561c0ce7cc31ce9b2e374db659ee4f7d57a1f8b"},
+ {file = "Pillow-9.5.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:5d4ebf8e1db4441a55c509c4baa7a0587a0210f7cd25fcfe74dbbce7a4bd1906"},
+ {file = "Pillow-9.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:375f6e5ee9620a271acb6820b3d1e94ffa8e741c0601db4c0c4d3cb0a9c224bf"},
+ {file = "Pillow-9.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99eb6cafb6ba90e436684e08dad8be1637efb71c4f2180ee6b8f940739406e78"},
+ {file = "Pillow-9.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dfaaf10b6172697b9bceb9a3bd7b951819d1ca339a5ef294d1f1ac6d7f63270"},
+ {file = "Pillow-9.5.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:763782b2e03e45e2c77d7779875f4432e25121ef002a41829d8868700d119392"},
+ {file = "Pillow-9.5.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:35f6e77122a0c0762268216315bf239cf52b88865bba522999dc38f1c52b9b47"},
+ {file = "Pillow-9.5.0-cp37-cp37m-win32.whl", hash = "sha256:aca1c196f407ec7cf04dcbb15d19a43c507a81f7ffc45b690899d6a76ac9fda7"},
+ {file = "Pillow-9.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322724c0032af6692456cd6ed554bb85f8149214d97398bb80613b04e33769f6"},
+ {file = "Pillow-9.5.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:a0aa9417994d91301056f3d0038af1199eb7adc86e646a36b9e050b06f526597"},
+ {file = "Pillow-9.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8286396b351785801a976b1e85ea88e937712ee2c3ac653710a4a57a8da5d9c"},
+ {file = "Pillow-9.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c830a02caeb789633863b466b9de10c015bded434deb3ec87c768e53752ad22a"},
+ {file = "Pillow-9.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fbd359831c1657d69bb81f0db962905ee05e5e9451913b18b831febfe0519082"},
+ {file = "Pillow-9.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8fc330c3370a81bbf3f88557097d1ea26cd8b019d6433aa59f71195f5ddebbf"},
+ {file = "Pillow-9.5.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:7002d0797a3e4193c7cdee3198d7c14f92c0836d6b4a3f3046a64bd1ce8df2bf"},
+ {file = "Pillow-9.5.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:229e2c79c00e85989a34b5981a2b67aa079fd08c903f0aaead522a1d68d79e51"},
+ {file = "Pillow-9.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9adf58f5d64e474bed00d69bcd86ec4bcaa4123bfa70a65ce72e424bfb88ed96"},
+ {file = "Pillow-9.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:662da1f3f89a302cc22faa9f14a262c2e3951f9dbc9617609a47521c69dd9f8f"},
+ {file = "Pillow-9.5.0-cp38-cp38-win32.whl", hash = "sha256:6608ff3bf781eee0cd14d0901a2b9cc3d3834516532e3bd673a0a204dc8615fc"},
+ {file = "Pillow-9.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:e49eb4e95ff6fd7c0c402508894b1ef0e01b99a44320ba7d8ecbabefddcc5569"},
+ {file = "Pillow-9.5.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:482877592e927fd263028c105b36272398e3e1be3269efda09f6ba21fd83ec66"},
+ {file = "Pillow-9.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3ded42b9ad70e5f1754fb7c2e2d6465a9c842e41d178f262e08b8c85ed8a1d8e"},
+ {file = "Pillow-9.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c446d2245ba29820d405315083d55299a796695d747efceb5717a8b450324115"},
+ {file = "Pillow-9.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8aca1152d93dcc27dc55395604dcfc55bed5f25ef4c98716a928bacba90d33a3"},
+ {file = "Pillow-9.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:608488bdcbdb4ba7837461442b90ea6f3079397ddc968c31265c1e056964f1ef"},
+ {file = "Pillow-9.5.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:60037a8db8750e474af7ffc9faa9b5859e6c6d0a50e55c45576bf28be7419705"},
+ {file = "Pillow-9.5.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:07999f5834bdc404c442146942a2ecadd1cb6292f5229f4ed3b31e0a108746b1"},
+ {file = "Pillow-9.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a127ae76092974abfbfa38ca2d12cbeddcdeac0fb71f9627cc1135bedaf9d51a"},
+ {file = "Pillow-9.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:489f8389261e5ed43ac8ff7b453162af39c3e8abd730af8363587ba64bb2e865"},
+ {file = "Pillow-9.5.0-cp39-cp39-win32.whl", hash = "sha256:9b1af95c3a967bf1da94f253e56b6286b50af23392a886720f563c547e48e964"},
+ {file = "Pillow-9.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:77165c4a5e7d5a284f10a6efaa39a0ae8ba839da344f20b111d62cc932fa4e5d"},
+ {file = "Pillow-9.5.0-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:833b86a98e0ede388fa29363159c9b1a294b0905b5128baf01db683672f230f5"},
+ {file = "Pillow-9.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aaf305d6d40bd9632198c766fb64f0c1a83ca5b667f16c1e79e1661ab5060140"},
+ {file = "Pillow-9.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0852ddb76d85f127c135b6dd1f0bb88dbb9ee990d2cd9aa9e28526c93e794fba"},
+ {file = "Pillow-9.5.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:91ec6fe47b5eb5a9968c79ad9ed78c342b1f97a091677ba0e012701add857829"},
+ {file = "Pillow-9.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:cb841572862f629b99725ebaec3287fc6d275be9b14443ea746c1dd325053cbd"},
+ {file = "Pillow-9.5.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:c380b27d041209b849ed246b111b7c166ba36d7933ec6e41175fd15ab9eb1572"},
+ {file = "Pillow-9.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c9af5a3b406a50e313467e3565fc99929717f780164fe6fbb7704edba0cebbe"},
+ {file = "Pillow-9.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5671583eab84af046a397d6d0ba25343c00cd50bce03787948e0fff01d4fd9b1"},
+ {file = "Pillow-9.5.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:84a6f19ce086c1bf894644b43cd129702f781ba5751ca8572f08aa40ef0ab7b7"},
+ {file = "Pillow-9.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1e7723bd90ef94eda669a3c2c19d549874dd5badaeefabefd26053304abe5799"},
+ {file = "Pillow-9.5.0.tar.gz", hash = "sha256:bf548479d336726d7a0eceb6e767e179fbde37833ae42794602631a070d630f1"},
]
[package.extras]
@@ -6220,7 +5972,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
name = "pinecone-client"
version = "2.2.2"
description = "Pinecone client and SDK"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -6246,7 +5997,6 @@ grpc = ["googleapis-common-protos (>=1.53.0)", "grpc-gateway-protoc-gen-openapiv
name = "pinecone-text"
version = "0.4.2"
description = "Text utilities library by Pinecone.io"
-category = "main"
optional = true
python-versions = ">=3.8,<4.0"
files = [
@@ -6266,7 +6016,6 @@ wget = ">=3.2,<4.0"
name = "pkgutil-resolve-name"
version = "1.3.10"
description = "Resolve a name to an object."
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -6278,7 +6027,6 @@ files = [
name = "platformdirs"
version = "3.10.0"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -6294,7 +6042,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co
name = "playwright"
version = "1.37.0"
description = "A high-level API to automate web browsers"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -6316,7 +6063,6 @@ typing-extensions = {version = "*", markers = "python_version <= \"3.8\""}
name = "pluggy"
version = "1.2.0"
description = "plugin and hook calling mechanisms for python"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -6332,7 +6078,6 @@ testing = ["pytest", "pytest-benchmark"]
name = "pooch"
version = "1.7.0"
description = "\"Pooch manages your Python library's sample data files: it automatically downloads and stores them in a local directory, with support for versioning and corruption checks.\""
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6354,7 +6099,6 @@ xxhash = ["xxhash (>=1.4.3)"]
name = "portalocker"
version = "2.7.0"
description = "Wraps the portalocker recipe for easy usage"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -6374,7 +6118,6 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p
name = "pox"
version = "0.3.3"
description = "utilities for filesystem exploration and automated builds"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6386,7 +6129,6 @@ files = [
name = "ppft"
version = "1.7.6.7"
description = "distributed and parallel Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6401,7 +6143,6 @@ dill = ["dill (>=0.3.7)"]
name = "prometheus-client"
version = "0.17.1"
description = "Python client for the Prometheus monitoring system."
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -6416,7 +6157,6 @@ twisted = ["twisted"]
name = "prompt-toolkit"
version = "3.0.39"
description = "Library for building powerful interactive command lines in Python"
-category = "dev"
optional = false
python-versions = ">=3.7.0"
files = [
@@ -6431,7 +6171,6 @@ wcwidth = "*"
name = "protobuf"
version = "3.20.3"
description = "Protocol Buffers"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -6463,7 +6202,6 @@ files = [
name = "psutil"
version = "5.9.5"
description = "Cross-platform lib for process and system monitoring in Python."
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -6490,7 +6228,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
name = "psychicapi"
version = "0.8.4"
description = "Psychic.dev is an open-source data integration platform for LLMs. This is the Python client for Psychic"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -6505,7 +6242,6 @@ requests = "*"
name = "psycopg2-binary"
version = "2.9.7"
description = "psycopg2 - Python-PostgreSQL Database Adapter"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -6575,7 +6311,6 @@ files = [
name = "ptyprocess"
version = "0.7.0"
description = "Run a subprocess in a pseudo terminal"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -6587,7 +6322,6 @@ files = [
name = "pure-eval"
version = "0.2.2"
description = "Safely evaluate AST nodes without side effects"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -6602,7 +6336,6 @@ tests = ["pytest"]
name = "py"
version = "1.11.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
@@ -6614,7 +6347,6 @@ files = [
name = "py-trello"
version = "0.19.0"
description = "Python wrapper around the Trello API"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -6631,7 +6363,6 @@ requests-oauthlib = ">=0.4.1"
name = "py4j"
version = "0.10.9.7"
description = "Enables Python programs to dynamically access arbitrary Java objects"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -6643,7 +6374,6 @@ files = [
name = "pyaes"
version = "1.6.1"
description = "Pure-Python Implementation of the AES block-cipher and common modes of operation"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -6654,7 +6384,6 @@ files = [
name = "pyarrow"
version = "12.0.1"
description = "Python library for Apache Arrow"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6692,7 +6421,6 @@ numpy = ">=1.16.6"
name = "pyasn1"
version = "0.5.0"
description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
-category = "main"
optional = true
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
@@ -6704,7 +6432,6 @@ files = [
name = "pyasn1-modules"
version = "0.3.0"
description = "A collection of ASN.1-based protocols modules"
-category = "main"
optional = true
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
@@ -6719,7 +6446,6 @@ pyasn1 = ">=0.4.6,<0.6.0"
name = "pycares"
version = "4.3.0"
description = "Python interface for c-ares"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -6787,7 +6513,6 @@ idna = ["idna (>=2.1)"]
name = "pycparser"
version = "2.21"
description = "C parser in Python"
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -6799,7 +6524,6 @@ files = [
name = "pydantic"
version = "1.10.12"
description = "Data validation and settings management using python type hints"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -6852,7 +6576,6 @@ email = ["email-validator (>=1.0.3)"]
name = "pydeck"
version = "0.8.0"
description = "Widget for deck.gl maps"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6872,7 +6595,6 @@ jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "t
name = "pyee"
version = "9.0.4"
description = "A port of node.js's EventEmitter to python."
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -6887,7 +6609,6 @@ typing-extensions = "*"
name = "pygments"
version = "2.16.1"
description = "Pygments is a syntax highlighting package written in Python."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -6902,7 +6623,6 @@ plugins = ["importlib-metadata"]
name = "pyjwt"
version = "2.8.0"
description = "JSON Web Token implementation in Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -6923,7 +6643,6 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
name = "pylance"
version = "0.5.10"
description = "python wrapper for lance-rs"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -6945,7 +6664,6 @@ tests = ["duckdb", "ml_dtypes", "pandas (>=1.4)", "polars[pandas,pyarrow]", "pyt
name = "pymongo"
version = "4.5.0"
description = "Python driver for MongoDB "
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7047,7 +6765,6 @@ zstd = ["zstandard"]
name = "pympler"
version = "1.0.1"
description = "A development tool to measure, monitor and analyze the memory behavior of Python objects."
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -7059,7 +6776,6 @@ files = [
name = "pymupdf"
version = "1.22.5"
description = "Python bindings for the PDF toolkit and renderer MuPDF"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7099,7 +6815,6 @@ files = [
name = "pyowm"
version = "3.3.0"
description = "A Python wrapper around OpenWeatherMap web APIs"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7119,7 +6834,6 @@ requests = [
name = "pyparsing"
version = "3.1.1"
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-category = "main"
optional = true
python-versions = ">=3.6.8"
files = [
@@ -7134,7 +6848,6 @@ diagrams = ["jinja2", "railroad-diagrams"]
name = "pypdf"
version = "3.15.2"
description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -7156,7 +6869,6 @@ image = ["Pillow (>=8.0.0)"]
name = "pypdfium2"
version = "4.18.0"
description = "Python bindings to PDFium"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -7178,7 +6890,6 @@ files = [
name = "pyphen"
version = "0.14.0"
description = "Pure Python module to hyphenate text"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7194,7 +6905,6 @@ test = ["flake8", "isort", "pytest"]
name = "pyproj"
version = "3.5.0"
description = "Python interface to PROJ (cartographic projections and coordinate transformations library)"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -7242,7 +6952,6 @@ certifi = "*"
name = "pyproject-hooks"
version = "1.0.0"
description = "Wrappers to call pyproject.toml-based build backend hooks."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7257,7 +6966,6 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
name = "pysocks"
version = "1.7.1"
description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -7270,7 +6978,6 @@ files = [
name = "pyspark"
version = "3.4.1"
description = "Apache Spark Python API"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7291,7 +6998,6 @@ sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"]
name = "pytesseract"
version = "0.3.10"
description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -7307,7 +7013,6 @@ Pillow = ">=8.0.0"
name = "pytest"
version = "7.4.0"
description = "pytest: simple powerful testing with Python"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -7330,7 +7035,6 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
name = "pytest-asyncio"
version = "0.20.3"
description = "Pytest support for asyncio"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -7349,7 +7053,6 @@ testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy
name = "pytest-cov"
version = "4.1.0"
description = "Pytest plugin for measuring coverage."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -7368,7 +7071,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale
name = "pytest-dotenv"
version = "0.5.2"
description = "A py.test plugin that parses environment files before running tests"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -7384,7 +7086,6 @@ python-dotenv = ">=0.9.1"
name = "pytest-mock"
version = "3.11.1"
description = "Thin-wrapper around the mock package for easier use with pytest"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -7402,7 +7103,6 @@ dev = ["pre-commit", "pytest-asyncio", "tox"]
name = "pytest-socket"
version = "0.6.0"
description = "Pytest Plugin to disable socket calls during tests"
-category = "dev"
optional = false
python-versions = ">=3.7,<4.0"
files = [
@@ -7417,7 +7117,6 @@ pytest = ">=3.6.3"
name = "pytest-vcr"
version = "1.0.2"
description = "Plugin for managing VCR.py cassettes"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -7433,7 +7132,6 @@ vcrpy = "*"
name = "pytest-watcher"
version = "0.2.6"
description = "Continiously runs pytest on changes in *.py files"
-category = "dev"
optional = false
python-versions = ">=3.7.0,<4.0.0"
files = [
@@ -7448,7 +7146,6 @@ watchdog = ">=2.0.0"
name = "python-arango"
version = "7.6.0"
description = "Python Driver for ArangoDB"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -7472,7 +7169,6 @@ dev = ["black (>=22.3.0)", "flake8 (>=4.0.1)", "isort (>=5.10.1)", "mock", "mypy
name = "python-dateutil"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
-category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
files = [
@@ -7487,7 +7183,6 @@ six = ">=1.5"
name = "python-dotenv"
version = "1.0.0"
description = "Read key-value pairs from a .env file and set them as environment variables"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -7502,7 +7197,6 @@ cli = ["click (>=5.0)"]
name = "python-json-logger"
version = "2.0.7"
description = "A python library adding a json log formatter"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -7514,7 +7208,6 @@ files = [
name = "python-rapidjson"
version = "1.10"
description = "Python wrapper around rapidjson"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -7580,7 +7273,6 @@ files = [
name = "pytz"
version = "2023.3"
description = "World timezone definitions, modern and historical"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -7592,7 +7284,6 @@ files = [
name = "pytz-deprecation-shim"
version = "0.1.0.post0"
description = "Shims to make deprecation of pytz easier"
-category = "main"
optional = true
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
@@ -7608,7 +7299,6 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""}
name = "pyvespa"
version = "0.33.0"
description = "Python API for vespa.ai"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -7633,7 +7323,6 @@ ml = ["keras-tuner", "tensorflow", "tensorflow-ranking", "torch (<1.13)", "trans
name = "pywin32"
version = "306"
description = "Python for Window Extensions"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -7657,7 +7346,6 @@ files = [
name = "pywinpty"
version = "2.0.11"
description = "Pseudo terminal support for Windows from Python."
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -7672,7 +7360,6 @@ files = [
name = "pyyaml"
version = "6.0.1"
description = "YAML parser and emitter for Python"
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -7722,7 +7409,6 @@ files = [
name = "pyzmq"
version = "25.1.1"
description = "Python bindings for 0MQ"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -7828,7 +7514,6 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""}
name = "qdrant-client"
version = "1.4.0"
description = "Client library for the Qdrant vector search engine"
-category = "main"
optional = true
python-versions = ">=3.7,<3.12"
files = [
@@ -7849,7 +7534,6 @@ urllib3 = ">=1.26.14,<2.0.0"
name = "qtconsole"
version = "5.4.3"
description = "Jupyter Qt console"
-category = "dev"
optional = false
python-versions = ">= 3.7"
files = [
@@ -7876,7 +7560,6 @@ test = ["flaky", "pytest", "pytest-qt"]
name = "qtpy"
version = "2.3.1"
description = "Provides an abstraction layer on top of the various Qt bindings (PyQt5/6 and PySide2/6)."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -7894,7 +7577,6 @@ test = ["pytest (>=6,!=7.0.0,!=7.0.1)", "pytest-cov (>=3.0.0)", "pytest-qt"]
name = "rank-bm25"
version = "0.2.2"
description = "Various BM25 algorithms for document ranking"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -7912,7 +7594,6 @@ dev = ["pytest"]
name = "rapidfuzz"
version = "3.2.0"
description = "rapid fuzzy string matching"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -8017,7 +7698,6 @@ full = ["numpy"]
name = "ratelimiter"
version = "1.2.0.post0"
description = "Simple python rate limiting object"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8032,7 +7712,6 @@ test = ["pytest (>=3.0)", "pytest-asyncio"]
name = "rdflib"
version = "6.3.2"
description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
-category = "main"
optional = true
python-versions = ">=3.7,<4.0"
files = [
@@ -8054,7 +7733,6 @@ networkx = ["networkx (>=2.0.0,<3.0.0)"]
name = "redis"
version = "4.6.0"
description = "Python client for Redis database and key-value store"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -8073,7 +7751,6 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"
name = "referencing"
version = "0.30.2"
description = "JSON Referencing + Python"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -8089,7 +7766,6 @@ rpds-py = ">=0.7.0"
name = "regex"
version = "2023.8.8"
description = "Alternative regular expression module, to replace re."
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -8187,7 +7863,6 @@ files = [
name = "requests"
version = "2.31.0"
description = "Python HTTP for Humans."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -8210,7 +7885,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
name = "requests-file"
version = "1.5.1"
description = "File transport adapter for Requests"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8226,7 +7900,6 @@ six = "*"
name = "requests-oauthlib"
version = "1.3.1"
description = "OAuthlib authentication support for Requests."
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -8245,7 +7918,6 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
name = "requests-toolbelt"
version = "1.0.0"
description = "A utility belt for advanced users of python-requests"
-category = "main"
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -8260,7 +7932,6 @@ requests = ">=2.0.1,<3.0.0"
name = "responses"
version = "0.22.0"
description = "A utility library for mocking out the `requests` Python library."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -8281,7 +7952,6 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asy
name = "retry"
version = "0.9.2"
description = "Easy to use retry decorator."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8297,7 +7967,6 @@ py = ">=1.4.26,<2.0.0"
name = "rfc3339-validator"
version = "0.1.4"
description = "A pure python RFC3339 validator"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
@@ -8312,7 +7981,6 @@ six = "*"
name = "rfc3986-validator"
version = "0.1.1"
description = "Pure python rfc3986 validator"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
@@ -8324,7 +7992,6 @@ files = [
name = "rich"
version = "13.5.2"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
-category = "main"
optional = true
python-versions = ">=3.7.0"
files = [
@@ -8344,7 +8011,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]
name = "rpds-py"
version = "0.9.2"
description = "Python bindings to Rust's persistent data structures (rpds)"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -8451,7 +8117,6 @@ files = [
name = "rsa"
version = "4.9"
description = "Pure-Python RSA implementation"
-category = "main"
optional = true
python-versions = ">=3.6,<4"
files = [
@@ -8466,7 +8131,6 @@ pyasn1 = ">=0.1.3"
name = "ruff"
version = "0.0.249"
description = "An extremely fast Python linter, written in Rust."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -8493,7 +8157,6 @@ files = [
name = "s3transfer"
version = "0.6.2"
description = "An Amazon S3 Transfer Manager"
-category = "main"
optional = true
python-versions = ">= 3.7"
files = [
@@ -8511,7 +8174,6 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"]
name = "safetensors"
version = "0.3.2"
description = "Fast and Safe Tensor serialization"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8585,7 +8247,6 @@ torch = ["torch (>=1.10)"]
name = "scikit-learn"
version = "1.3.0"
description = "A set of python modules for machine learning and data mining"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -8628,7 +8289,6 @@ tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (
name = "scipy"
version = "1.9.3"
description = "Fundamental algorithms for scientific computing in Python"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -8667,7 +8327,6 @@ test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "sciki
name = "semver"
version = "3.0.1"
description = "Python helper for Semantic Versioning (https://semver.org)"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -8679,7 +8338,6 @@ files = [
name = "send2trash"
version = "1.8.2"
description = "Send file to trash natively under Mac OS X, Windows and Linux"
-category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
files = [
@@ -8696,7 +8354,6 @@ win32 = ["pywin32"]
name = "sentence-transformers"
version = "2.2.2"
description = "Multilingual text embeddings"
-category = "main"
optional = true
python-versions = ">=3.6.0"
files = [
@@ -8719,7 +8376,6 @@ transformers = ">=4.6.0,<5.0.0"
name = "sentencepiece"
version = "0.1.99"
description = "SentencePiece python wrapper"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8774,7 +8430,6 @@ files = [
name = "setuptools"
version = "67.8.0"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -8791,7 +8446,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
name = "sgmllib3k"
version = "1.0.0"
description = "Py3k port of sgmllib."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8802,7 +8456,6 @@ files = [
name = "shapely"
version = "2.0.1"
description = "Manipulation and analysis of geometric objects"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -8850,14 +8503,13 @@ files = [
numpy = ">=1.14"
[package.extras]
-docs = ["matplotlib", "numpydoc (>=1.1.0,<1.2.0)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"]
+docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"]
test = ["pytest", "pytest-cov"]
[[package]]
name = "singlestoredb"
version = "0.7.1"
description = "Interface to the SingleStore database and cluster management APIs"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -8890,7 +8542,6 @@ sqlalchemy = ["sqlalchemy-singlestoredb"]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
@@ -8902,7 +8553,6 @@ files = [
name = "smmap"
version = "5.0.0"
description = "A pure Python implementation of a sliding window memory map manager"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -8914,7 +8564,6 @@ files = [
name = "sniffio"
version = "1.3.0"
description = "Sniff out which async library your code is running under"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -8926,7 +8575,6 @@ files = [
name = "socksio"
version = "1.0.0"
description = "Sans-I/O implementation of SOCKS4, SOCKS4A, and SOCKS5."
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -8938,7 +8586,6 @@ files = [
name = "soundfile"
version = "0.12.1"
description = "An audio library based on libsndfile, CFFI and NumPy"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -8962,7 +8609,6 @@ numpy = ["numpy"]
name = "soupsieve"
version = "2.4.1"
description = "A modern CSS selector implementation for Beautiful Soup."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -8974,7 +8620,6 @@ files = [
name = "soxr"
version = "0.3.6"
description = "High quality, one-dimensional sample-rate conversion library"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -9016,7 +8661,6 @@ test = ["pytest"]
name = "sqlalchemy"
version = "2.0.20"
description = "Database Abstraction Library"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -9064,7 +8708,7 @@ files = [
]
[package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
+greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""}
typing-extensions = ">=4.2.0"
[package.extras]
@@ -9095,7 +8739,6 @@ sqlcipher = ["sqlcipher3-binary"]
name = "sqlite-vss"
version = "0.1.2"
description = ""
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9111,7 +8754,6 @@ test = ["pytest"]
name = "sqlitedict"
version = "2.1.0"
description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9122,7 +8764,6 @@ files = [
name = "sqlparams"
version = "5.1.0"
description = "Convert between various DB API 2.0 parameter styles."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9134,7 +8775,6 @@ files = [
name = "stack-data"
version = "0.6.2"
description = "Extract data from python stack frames and tracebacks for informative displays"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -9152,41 +8792,40 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
[[package]]
name = "streamlit"
-version = "1.22.0"
+version = "1.26.0"
description = "A faster way to build and share data apps"
-category = "main"
optional = true
-python-versions = ">=3.7, !=3.9.7"
+python-versions = ">=3.8, !=3.9.7"
files = [
- {file = "streamlit-1.22.0-py2.py3-none-any.whl", hash = "sha256:520dd9b9e6efb559b5a9a22feadb48b1e6f0340ec83da3514810059fdecd4167"},
- {file = "streamlit-1.22.0.tar.gz", hash = "sha256:5bef9bf8deef32814d9565c9df48331e6357eb0b90dabc3ec4f53c44fb34fc73"},
+ {file = "streamlit-1.26.0-py2.py3-none-any.whl", hash = "sha256:2bfdac041816e2e1ba27f061d40112afe61e0d4e72d25f354b38ba81107b4cb3"},
+ {file = "streamlit-1.26.0.tar.gz", hash = "sha256:25475fb15a3cc9fb184945f3fc936f011998bd8386e0c892febe14c9625bf47a"},
]
[package.dependencies]
-altair = ">=3.2.0,<5"
-blinker = ">=1.0.0"
-cachetools = ">=4.0"
-click = ">=7.0"
-gitpython = "!=3.1.19"
-importlib-metadata = ">=1.4"
-numpy = "*"
-packaging = ">=14.1"
-pandas = ">=0.25,<3"
-pillow = ">=6.2.0"
-protobuf = ">=3.12,<4"
-pyarrow = ">=4.0"
-pydeck = ">=0.1.dev5"
-pympler = ">=0.9"
-python-dateutil = "*"
-requests = ">=2.4"
-rich = ">=10.11.0"
-tenacity = ">=8.0.0,<9"
-toml = "*"
-tornado = ">=6.0.3"
-typing-extensions = ">=3.10.0.0"
-tzlocal = ">=1.1"
-validators = ">=0.2"
-watchdog = {version = "*", markers = "platform_system != \"Darwin\""}
+altair = ">=4.0,<6"
+blinker = ">=1.0.0,<2"
+cachetools = ">=4.0,<6"
+click = ">=7.0,<9"
+gitpython = ">=3.0.7,<3.1.19 || >3.1.19,<4"
+importlib-metadata = ">=1.4,<7"
+numpy = ">=1.19.3,<2"
+packaging = ">=16.8,<24"
+pandas = ">=1.3.0,<3"
+pillow = ">=7.1.0,<10"
+protobuf = ">=3.20,<5"
+pyarrow = ">=6.0"
+pydeck = ">=0.8,<1"
+pympler = ">=0.9,<2"
+python-dateutil = ">=2.7.3,<3"
+requests = ">=2.18,<3"
+rich = ">=10.14.0,<14"
+tenacity = ">=8.1.0,<9"
+toml = ">=0.10.1,<2"
+tornado = ">=6.0.3,<7"
+typing-extensions = ">=4.1.0,<5"
+tzlocal = ">=1.1,<5"
+validators = ">=0.2,<1"
+watchdog = {version = ">=2.1.5", markers = "platform_system != \"Darwin\""}
[package.extras]
snowflake = ["snowflake-snowpark-python"]
@@ -9195,7 +8834,6 @@ snowflake = ["snowflake-snowpark-python"]
name = "stringcase"
version = "1.2.0"
description = "String case converter."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9206,7 +8844,6 @@ files = [
name = "sympy"
version = "1.12"
description = "Computer algebra system (CAS) in Python"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -9221,7 +8858,6 @@ mpmath = ">=0.19"
name = "syrupy"
version = "4.2.1"
description = "Pytest Snapshot Test Utility"
-category = "dev"
optional = false
python-versions = ">=3.8.1,<4"
files = [
@@ -9237,7 +8873,6 @@ pytest = ">=7.0.0,<8.0.0"
name = "telethon"
version = "1.29.3"
description = "Full-featured Telegram client library for Python 3"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -9255,7 +8890,6 @@ cryptg = ["cryptg"]
name = "tenacity"
version = "8.2.3"
description = "Retry code until it succeeds"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -9270,7 +8904,6 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"]
name = "tensorboard"
version = "2.13.0"
description = "TensorBoard lets you watch Tensors Flow"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -9295,7 +8928,6 @@ wheel = ">=0.26"
name = "tensorboard-data-server"
version = "0.7.1"
description = "Fast data loading for TensorBoard"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9308,7 +8940,6 @@ files = [
name = "tensorflow"
version = "2.13.0"
description = "TensorFlow is an open source machine learning framework for everyone."
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -9361,7 +8992,6 @@ wrapt = ">=1.11.0"
name = "tensorflow-estimator"
version = "2.13.0"
description = "TensorFlow Estimator."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9372,7 +9002,6 @@ files = [
name = "tensorflow-hub"
version = "0.14.0"
description = "TensorFlow Hub is a library to foster the publication, discovery, and consumption of reusable parts of machine learning models."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9387,7 +9016,6 @@ protobuf = ">=3.19.6"
name = "tensorflow-io-gcs-filesystem"
version = "0.33.0"
description = "TensorFlow IO"
-category = "main"
optional = true
python-versions = ">=3.7, <3.12"
files = [
@@ -9418,7 +9046,6 @@ tensorflow-rocm = ["tensorflow-rocm (>=2.13.0,<2.14.0)"]
name = "tensorflow-macos"
version = "2.13.0"
description = "TensorFlow is an open source machine learning framework for everyone."
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -9454,7 +9081,6 @@ wrapt = ">=1.11.0"
name = "tensorflow-text"
version = "2.13.0"
description = "TF.Text is a TensorFlow library of text related ops, modules, and subgraphs."
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9479,7 +9105,6 @@ tests = ["absl-py", "pytest", "tensorflow-datasets (>=3.2.0)"]
name = "termcolor"
version = "2.3.0"
description = "ANSI color formatting for output in terminal"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9494,7 +9119,6 @@ tests = ["pytest", "pytest-cov"]
name = "terminado"
version = "0.17.1"
description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -9515,7 +9139,6 @@ test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"]
name = "textstat"
version = "0.7.3"
description = "Calculate statistical features from text"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -9530,7 +9153,6 @@ pyphen = "*"
name = "threadpoolctl"
version = "3.2.0"
description = "threadpoolctl"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -9542,7 +9164,6 @@ files = [
name = "tigrisdb"
version = "1.0.0b6"
description = "Python SDK for Tigris "
-category = "main"
optional = true
python-versions = ">=3.8,<4.0"
files = [
@@ -9558,7 +9179,6 @@ protobuf = ">=3.19.6"
name = "tiktoken"
version = "0.3.3"
description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -9604,7 +9224,6 @@ blobfile = ["blobfile (>=2)"]
name = "tinycss2"
version = "1.2.1"
description = "A tiny CSS parser"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -9623,7 +9242,6 @@ test = ["flake8", "isort", "pytest"]
name = "tinysegmenter"
version = "0.3"
description = "Very compact Japanese tokenizer"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9634,7 +9252,6 @@ files = [
name = "tldextract"
version = "3.4.4"
description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well."
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9652,7 +9269,6 @@ requests-file = ">=1.4"
name = "tokenizers"
version = "0.13.3"
description = "Fast and Customizable Tokenizers"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9707,7 +9323,6 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
-category = "main"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
@@ -9719,7 +9334,6 @@ files = [
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -9731,7 +9345,6 @@ files = [
name = "toolz"
version = "0.12.0"
description = "List processing tools and functional utilities"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -9743,7 +9356,6 @@ files = [
name = "torch"
version = "1.13.1"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
-category = "main"
optional = true
python-versions = ">=3.7.0"
files = [
@@ -9784,7 +9396,6 @@ opt-einsum = ["opt-einsum (>=3.3)"]
name = "torchvision"
version = "0.14.1"
description = "image and video datasets and models for torch deep learning"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -9811,7 +9422,7 @@ files = [
[package.dependencies]
numpy = "*"
-pillow = ">=5.3.0,<8.3.0 || >=8.4.0"
+pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
requests = "*"
torch = "1.13.1"
typing-extensions = "*"
@@ -9823,7 +9434,6 @@ scipy = ["scipy"]
name = "tornado"
version = "6.3.3"
description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
-category = "main"
optional = false
python-versions = ">= 3.8"
files = [
@@ -9844,7 +9454,6 @@ files = [
name = "tqdm"
version = "4.66.1"
description = "Fast, Extensible Progress Meter"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -9865,7 +9474,6 @@ telegram = ["requests"]
name = "traitlets"
version = "5.9.0"
description = "Traitlets Python configuration system"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -9881,7 +9489,6 @@ test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"]
name = "transformers"
version = "4.32.0"
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
-category = "main"
optional = true
python-versions = ">=3.8.0"
files = [
@@ -9951,7 +9558,6 @@ vision = ["Pillow (<10.0.0)"]
name = "tritonclient"
version = "2.34.0"
description = "Python client library and utilities for communicating with Triton Inference Server"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -9973,7 +9579,6 @@ http = ["aiohttp (>=3.8.1,<4.0.0)", "geventhttpclient (>=1.4.4,<=2.0.2)", "numpy
name = "types-chardet"
version = "5.0.4.6"
description = "Typing stubs for chardet"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -9985,7 +9590,6 @@ files = [
name = "types-protobuf"
version = "4.24.0.1"
description = "Typing stubs for protobuf"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -9997,7 +9601,6 @@ files = [
name = "types-pyopenssl"
version = "23.2.0.2"
description = "Typing stubs for pyOpenSSL"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10012,7 +9615,6 @@ cryptography = ">=35.0.0"
name = "types-pytz"
version = "2023.3.0.1"
description = "Typing stubs for pytz"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10024,7 +9626,6 @@ files = [
name = "types-pyyaml"
version = "6.0.12.11"
description = "Typing stubs for PyYAML"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10036,7 +9637,6 @@ files = [
name = "types-redis"
version = "4.6.0.5"
description = "Typing stubs for redis"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10052,7 +9652,6 @@ types-pyOpenSSL = "*"
name = "types-requests"
version = "2.31.0.2"
description = "Typing stubs for requests"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -10067,7 +9666,6 @@ types-urllib3 = "*"
name = "types-toml"
version = "0.10.8.7"
description = "Typing stubs for toml"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10079,7 +9677,6 @@ files = [
name = "types-urllib3"
version = "1.26.25.14"
description = "Typing stubs for urllib3"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -10091,7 +9688,6 @@ files = [
name = "typing-extensions"
version = "4.5.0"
description = "Backported and Experimental Type Hints for Python 3.7+"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -10103,7 +9699,6 @@ files = [
name = "typing-inspect"
version = "0.9.0"
description = "Runtime inspection utilities for typing module."
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -10119,7 +9714,6 @@ typing-extensions = ">=3.7.4"
name = "tzdata"
version = "2023.3"
description = "Provider of IANA time zone data"
-category = "main"
optional = false
python-versions = ">=2"
files = [
@@ -10131,7 +9725,6 @@ files = [
name = "tzlocal"
version = "4.3.1"
description = "tzinfo object for the local timezone"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -10151,7 +9744,6 @@ devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pyte
name = "uri-template"
version = "1.3.0"
description = "RFC 6570 URI Template Processor"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -10166,7 +9758,6 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake
name = "uritemplate"
version = "4.1.1"
description = "Implementation of RFC 6570 URI Templates"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -10178,7 +9769,6 @@ files = [
name = "urllib3"
version = "1.26.16"
description = "HTTP library with thread-safe connection pooling, file post, and more."
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
files = [
@@ -10195,7 +9785,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
name = "validators"
version = "0.21.0"
description = "Python Data Validation for Humans™"
-category = "main"
optional = true
python-versions = ">=3.8,<4.0"
files = [
@@ -10207,7 +9796,6 @@ files = [
name = "vcrpy"
version = "5.1.0"
description = "Automatically mock your HTTP interactions to simplify and speed up testing"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -10225,7 +9813,6 @@ yarl = "*"
name = "watchdog"
version = "3.0.0"
description = "Filesystem events monitoring"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -10265,7 +9852,6 @@ watchmedo = ["PyYAML (>=3.10)"]
name = "wcwidth"
version = "0.2.6"
description = "Measures the displayed width of unicode strings in a terminal"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10277,7 +9863,6 @@ files = [
name = "weaviate-client"
version = "3.23.0"
description = "A python native Weaviate client"
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -10298,7 +9883,6 @@ grpc = ["grpcio", "grpcio-tools"]
name = "webcolors"
version = "1.13"
description = "A library for working with the color formats defined by HTML and CSS."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -10314,7 +9898,6 @@ tests = ["pytest", "pytest-cov"]
name = "webencodings"
version = "0.5.1"
description = "Character encoding aliases for legacy web content"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -10326,7 +9909,6 @@ files = [
name = "websocket-client"
version = "1.6.2"
description = "WebSocket client for Python with low level API options"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -10343,7 +9925,6 @@ test = ["websockets"]
name = "websockets"
version = "11.0.3"
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -10423,7 +10004,6 @@ files = [
name = "werkzeug"
version = "2.3.7"
description = "The comprehensive WSGI web application library."
-category = "main"
optional = true
python-versions = ">=3.8"
files = [
@@ -10441,7 +10021,6 @@ watchdog = ["watchdog (>=2.3)"]
name = "wget"
version = "3.2"
description = "pure python download utility"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -10452,7 +10031,6 @@ files = [
name = "wheel"
version = "0.41.2"
description = "A built-package format for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -10467,7 +10045,6 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"]
name = "whylabs-client"
version = "0.5.4"
description = "WhyLabs API client"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -10483,7 +10060,6 @@ urllib3 = ">=1.25.3"
name = "whylogs"
version = "1.2.6"
description = "Profile and monitor your ML data pipeline end-to-end"
-category = "main"
optional = true
python-versions = ">=3.7.1,<4"
files = [
@@ -10517,7 +10093,6 @@ viz = ["Pillow (>=9.2.0,<10.0.0)", "ipython", "numpy", "numpy (>=1.23.2)", "pyba
name = "whylogs-sketching"
version = "3.4.1.dev3"
description = "sketching library of whylogs"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -10558,7 +10133,6 @@ files = [
name = "widgetsnbextension"
version = "4.0.8"
description = "Jupyter interactive widgets for Jupyter Notebook"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -10570,7 +10144,6 @@ files = [
name = "wikipedia"
version = "1.4.0"
description = "Wikipedia API for Python"
-category = "main"
optional = true
python-versions = "*"
files = [
@@ -10585,7 +10158,6 @@ requests = ">=2.0.0,<3.0.0"
name = "win32-setctime"
version = "1.1.0"
description = "A small Python utility to set file creation time on Windows"
-category = "main"
optional = true
python-versions = ">=3.5"
files = [
@@ -10600,7 +10172,6 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
name = "wolframalpha"
version = "5.0.0"
description = "Wolfram|Alpha 2.0 API client"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -10621,7 +10192,6 @@ testing = ["keyring", "pmxbot", "pytest (>=3.5,!=3.7.3)", "pytest-black (>=0.3.7
name = "wonderwords"
version = "2.2.0"
description = "A python package for random words and sentences in the english language"
-category = "main"
optional = true
python-versions = ">=3.6"
files = [
@@ -10636,7 +10206,6 @@ cli = ["rich (==9.10.0)"]
name = "wrapt"
version = "1.15.0"
description = "Module for decorators, wrappers and monkey patching."
-category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
files = [
@@ -10721,7 +10290,6 @@ files = [
name = "xata"
version = "1.0.0b0"
description = "Python client for Xata.io"
-category = "main"
optional = true
python-versions = ">=3.8,<4.0"
files = [
@@ -10739,7 +10307,6 @@ requests = ">=2.28.1,<3.0.0"
name = "xmltodict"
version = "0.13.0"
description = "Makes working with XML feel like you are working with JSON"
-category = "main"
optional = true
python-versions = ">=3.4"
files = [
@@ -10751,7 +10318,6 @@ files = [
name = "yarl"
version = "1.9.2"
description = "Yet another URL library"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -10839,7 +10405,6 @@ multidict = ">=4.0"
name = "zipp"
version = "3.16.2"
description = "Backport of pathlib-compatible object wrapper for zip files"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -10855,7 +10420,6 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
name = "zstandard"
version = "0.21.0"
description = "Zstandard bindings for Python"
-category = "main"
optional = true
python-versions = ">=3.7"
files = [
@@ -10911,15 +10475,15 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
cffi = ["cffi (>=1.11)"]
[extras]
-all = ["clarifai", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "marqo", "pymongo", "weaviate-client", "redis", "google-api-python-client", "google-auth", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "libdeeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "langkit", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "pdfminer-six", "lxml", "requests-toolbelt", "neo4j", "openlm", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "momento", "singlestoredb", "tigrisdb", "nebula3-python", "awadb", "esprima", "rdflib", "amadeus", "librosa", "python-arango"]
-azure = ["azure-identity", "azure-cosmos", "openai", "azure-core", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-search-documents"]
+all = ["O365", "aleph-alpha-client", "amadeus", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clarifai", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "esprima", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jinja2", "jq", "lancedb", "langkit", "lark", "libdeeplake", "librosa", "lxml", "manifest-ml", "marqo", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "python-arango", "pyvespa", "qdrant-client", "rdflib", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
+azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "openai"]
clarifai = ["clarifai"]
cohere = ["cohere"]
docarray = ["docarray"]
embeddings = ["sentence-transformers"]
-extended-testing = ["amazon-textract-caller", "assemblyai", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "esprima", "jq", "pdfminer-six", "pgvector", "pypdf", "pymupdf", "pypdfium2", "tqdm", "lxml", "atlassian-python-api", "mwparserfromhell", "mwxml", "pandas", "telethon", "psychicapi", "gql", "requests-toolbelt", "html2text", "py-trello", "scikit-learn", "streamlit", "pyspark", "openai", "sympy", "rapidfuzz", "openai", "rank-bm25", "geopandas", "jinja2", "gitpython", "newspaper3k", "feedparser", "xata", "xmltodict", "faiss-cpu", "openapi-schema-pydantic", "markdownify", "dashvector", "sqlite-vss"]
+extended-testing = ["amazon-textract-caller", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "mwparserfromhell", "mwxml", "newspaper3k", "openai", "openai", "openapi-schema-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "requests-toolbelt", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tqdm", "xata", "xmltodict"]
javascript = ["esprima"]
-llms = ["clarifai", "cohere", "openai", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
+llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
openai = ["openai", "tiktoken"]
qdrant = ["qdrant-client"]
text-helpers = ["chardet"]
diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
index 9fda48eb817..2296bb461d9 100644
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain"
-version = "0.0.280"
+version = "0.0.284"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
diff --git a/libs/langchain/tests/data.py b/libs/langchain/tests/data.py
index 228a9b212e1..c3b240bbc57 100644
--- a/libs/langchain/tests/data.py
+++ b/libs/langchain/tests/data.py
@@ -8,3 +8,4 @@ _EXAMPLES_DIR = _THIS_DIR / "integration_tests" / "examples"
# Paths to test PDF files
HELLO_PDF = _EXAMPLES_DIR / "hello.pdf"
LAYOUT_PARSER_PAPER_PDF = _EXAMPLES_DIR / "layout-parser-paper.pdf"
+DUPLICATE_CHARS = _EXAMPLES_DIR / "duplicate-chars.pdf"
diff --git a/libs/langchain/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py b/libs/langchain/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
index 7b76e0f721f..408498c126a 100644
--- a/libs/langchain/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
+++ b/libs/langchain/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
@@ -19,6 +19,10 @@ LAYOUT_PARSER_PAPER_PDF = (
Path(__file__).parent.parent.parent / "examples" / "layout-parser-paper.pdf"
)
+DUPLICATE_CHARS = (
+ Path(__file__).parent.parent.parent / "examples" / "duplicate-chars.pdf"
+)
+
def _assert_with_parser(parser: BaseBlobParser, splits_by_page: bool = True) -> None:
"""Standard tests to verify that the given parser works.
@@ -59,6 +63,26 @@ def _assert_with_parser(parser: BaseBlobParser, splits_by_page: bool = True) ->
assert metadata["page"] == 0
+def _assert_with_duplicate_parser(parser: BaseBlobParser, dedupe: bool = False) -> None:
+ """PDFPlumber tests to verify that duplicate characters appear or not
+ Args:
+ parser (BaseBlobParser): The parser to test.
+ splits_by_page (bool): Whether the parser splits by page or not by default.
+ dedupe: Avoiding the error of duplicate characters if `dedupe=True`.
+ """
+ blob = Blob.from_path(DUPLICATE_CHARS)
+ doc_generator = parser.lazy_parse(blob)
+ assert isinstance(doc_generator, Iterator)
+ docs = list(doc_generator)
+
+ if dedupe:
+ # use dedupe avoid duplicate characters.
+ assert "1000 Series" == docs[0].page_content.split("\n")[0]
+ else:
+ # duplicate characters will appear in doc if not dedupe
+ assert "11000000 SSeerriieess" == docs[0].page_content.split("\n")[0]
+
+
def test_pymupdf_loader() -> None:
"""Test PyMuPDF loader."""
_assert_with_parser(PyMuPDFParser())
@@ -84,3 +108,5 @@ def test_pypdfium2_parser() -> None:
def test_pdfplumber_parser() -> None:
"""Test PDFPlumber parser."""
_assert_with_parser(PDFPlumberParser())
+ _assert_with_duplicate_parser(PDFPlumberParser())
+ _assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
diff --git a/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py b/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py
index 7bea1c6dee7..eb53682d75b 100644
--- a/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py
+++ b/libs/langchain/tests/integration_tests/document_loaders/test_url_playwright.py
@@ -7,7 +7,9 @@ from langchain.document_loaders import PlaywrightURLLoader
from langchain.document_loaders.url_playwright import PlaywrightEvaluator
if TYPE_CHECKING:
- from playwright.async_api import AsyncBrowser, AsyncPage, AsyncResponse
+ from playwright.async_api import Browser as AsyncBrowser
+ from playwright.async_api import Page as AsyncPage
+ from playwright.async_api import Response as AsyncResponse
from playwright.sync_api import Browser, Page, Response
diff --git a/libs/langchain/tests/integration_tests/examples/duplicate-chars.pdf b/libs/langchain/tests/integration_tests/examples/duplicate-chars.pdf
new file mode 100644
index 00000000000..47467cd035d
Binary files /dev/null and b/libs/langchain/tests/integration_tests/examples/duplicate-chars.pdf differ
diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_nucliadb.py b/libs/langchain/tests/integration_tests/vectorstores/test_nucliadb.py
new file mode 100644
index 00000000000..1cfeea0da43
--- /dev/null
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_nucliadb.py
@@ -0,0 +1,98 @@
+from typing import Any
+from unittest import mock
+
+from langchain.vectorstores.nucliadb import NucliaDB
+
+
+class attrdict(dict):
+ def __getitem__(self, key: str) -> Any:
+ value = dict.__getitem__(self, key)
+ return attrdict(value) if isinstance(value, dict) else value
+
+ __getattr__ = __getitem__
+
+
+def FakeCreate(**args: Any) -> Any:
+ def fn(self: Any, **kwargs: Any) -> str:
+ return "fake_uuid"
+
+ return fn
+
+
+def FakeDelete(**args: Any) -> Any:
+ def fn(self: Any, **kwargs: Any) -> None:
+ return None
+
+ return fn
+
+
+def FakeFind(**args: Any) -> Any:
+ def fn(self: Any, **kwargs: Any) -> Any:
+ return attrdict(
+ {
+ "resources": {
+ "123": attrdict(
+ {
+ "fields": {
+ "456": attrdict(
+ {
+ "paragraphs": {
+ "123/t/text/0-14": attrdict(
+ {
+ "text": "This is a test",
+ "order": 0,
+ }
+ ),
+ }
+ }
+ )
+ },
+ "data": {
+ "texts": {
+ "text": {
+ "body": "This is a test",
+ }
+ }
+ },
+ "extra": attrdict({"metadata": {"some": "metadata"}}),
+ }
+ )
+ }
+ }
+ )
+
+ return fn
+
+
+def test_add_texts() -> None:
+ with mock.patch(
+ "nuclia.sdk.resource.NucliaResource.create",
+ new_callable=FakeCreate,
+ ):
+ ndb = NucliaDB(knowledge_box="YOUR_KB_ID", local=False, api_key="YOUR_API_KEY")
+ assert ndb.is_local is False
+ ids = ndb.add_texts(["This is a new test", "This is a second test"])
+ assert len(ids) == 2
+
+
+def test_delete() -> None:
+ with mock.patch(
+ "nuclia.sdk.resource.NucliaResource.delete",
+ new_callable=FakeDelete,
+ ):
+ ndb = NucliaDB(knowledge_box="YOUR_KB_ID", local=False, api_key="YOUR_API_KEY")
+ success = ndb.delete(["123", "456"])
+ assert success
+
+
+def test_search() -> None:
+ with mock.patch(
+ "nuclia.sdk.search.NucliaSearch.find",
+ new_callable=FakeFind,
+ ):
+ ndb = NucliaDB(knowledge_box="YOUR_KB_ID", local=False, api_key="YOUR_API_KEY")
+ results = ndb.similarity_search("Who was inspired by Ada Lovelace?")
+ assert len(results) == 1
+ assert results[0].page_content == "This is a test"
+ assert results[0].metadata["extra"]["some"] == "metadata"
+ assert results[0].metadata["value"]["body"] == "This is a test"
diff --git a/libs/langchain/tests/unit_tests/schema/runnable/__snapshots__/test_runnable.ambr b/libs/langchain/tests/unit_tests/schema/runnable/__snapshots__/test_runnable.ambr
index 5ea21b13d61..63c0acc38d7 100644
--- a/libs/langchain/tests/unit_tests/schema/runnable/__snapshots__/test_runnable.ambr
+++ b/libs/langchain/tests/unit_tests/schema/runnable/__snapshots__/test_runnable.ambr
@@ -467,7 +467,7 @@
# ---
# name: test_combining_sequences.3
list([
- Run(id=UUID('00000000-0000-4000-8000-000000000000'), name='RunnableSequence', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'runnable', 'RunnableSequence'], 'kwargs': {'first': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, 'middle': [{'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo, bar'], sleep=None, i=0)"}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}, {'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nicer assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, {'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['baz, qux'], sleep=None, i=0)"}], 'last': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'output': ['baz', 'qux']}, reference_example_id=None, parent_run_id=None, tags=[], execution_order=None, child_execution_order=None, child_runs=[Run(id=UUID('00000000-0000-4000-8000-000000000001'), name='ChatPromptTemplate', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='prompt', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptValue'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'SystemMessage'], 'kwargs': {'content': 'You are a nice assistant.', 'additional_kwargs': {}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is your name?', 'additional_kwargs': {}}}]}}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:1'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000002'), name='FakeListChatModel', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='llm', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={'invocation_params': {'responses': ['foo, bar'], '_type': 'fake-list-chat-model', 'stop': None}, 'options': {'stop': None}}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo, bar'], sleep=None, i=0)"}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'prompts': ['System: You are a nice assistant.\nHuman: What is your name?']}, outputs={'generations': [[{'text': 'foo, bar', 'generation_info': None, 'message': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': 'foo, bar'}}}]], 'llm_output': None, 'run': None}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:2'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000003'), name='CommaSeparatedListOutputParser', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='parser', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': AIMessage(content='foo, bar', additional_kwargs={}, example=False)}, outputs={'output': ['foo', 'bar']}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:3'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000004'), name='RunnableLambda', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': ['foo', 'bar']}, outputs={'question': 'foobar'}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:4'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000005'), name='ChatPromptTemplate', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='prompt', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nicer assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'foobar'}, outputs={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptValue'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'SystemMessage'], 'kwargs': {'content': 'You are a nicer assistant.', 'additional_kwargs': {}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'foobar', 'additional_kwargs': {}}}]}}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:5'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000006'), name='FakeListChatModel', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='llm', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={'invocation_params': {'responses': ['baz, qux'], '_type': 'fake-list-chat-model', 'stop': None}, 'options': {'stop': None}}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['baz, qux'], sleep=None, i=0)"}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'prompts': ['System: You are a nicer assistant.\nHuman: foobar']}, outputs={'generations': [[{'text': 'baz, qux', 'generation_info': None, 'message': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': 'baz, qux'}}}]], 'llm_output': None, 'run': None}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:6'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000007'), name='CommaSeparatedListOutputParser', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='parser', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': AIMessage(content='baz, qux', additional_kwargs={}, example=False)}, outputs={'output': ['baz', 'qux']}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:7'], execution_order=None, child_execution_order=None, child_runs=[])]),
+ Run(id=UUID('00000000-0000-4000-8000-000000000000'), name='RunnableSequence', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'runnable', 'RunnableSequence'], 'kwargs': {'first': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, 'middle': [{'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo, bar'], sleep=None, i=0)"}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}, {'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nicer assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, {'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['baz, qux'], sleep=None, i=0)"}], 'last': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'output': ['baz', 'qux']}, reference_example_id=None, parent_run_id=None, tags=[], execution_order=None, child_execution_order=None, child_runs=[Run(id=UUID('00000000-0000-4000-8000-000000000001'), name='ChatPromptTemplate', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='prompt', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptValue'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'SystemMessage'], 'kwargs': {'content': 'You are a nice assistant.', 'additional_kwargs': {}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is your name?', 'additional_kwargs': {}}}]}}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:1'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000002'), name='FakeListChatModel', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='llm', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={'invocation_params': {'responses': ['foo, bar'], '_type': 'fake-list-chat-model', 'stop': None}, 'options': {'stop': None}}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo, bar'], sleep=None, i=0)"}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'prompts': ['System: You are a nice assistant.\nHuman: What is your name?']}, outputs={'generations': [[{'text': 'foo, bar', 'generation_info': None, 'message': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': 'foo, bar'}}}]], 'llm_output': None, 'run': None}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:2'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000003'), name='CommaSeparatedListOutputParser', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='parser', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': AIMessage(content='foo, bar', additional_kwargs={}, example=False)}, outputs={'output': ['foo', 'bar']}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:3'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000004'), name='', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': ['foo', 'bar']}, outputs={'question': 'foobar'}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:4'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000005'), name='ChatPromptTemplate', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='prompt', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nicer assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'foobar'}, outputs={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptValue'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'SystemMessage'], 'kwargs': {'content': 'You are a nicer assistant.', 'additional_kwargs': {}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'foobar', 'additional_kwargs': {}}}]}}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:5'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000006'), name='FakeListChatModel', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='llm', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={'invocation_params': {'responses': ['baz, qux'], '_type': 'fake-list-chat-model', 'stop': None}, 'options': {'stop': None}}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'chat_models', 'fake', 'FakeListChatModel'], 'repr': "FakeListChatModel(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['baz, qux'], sleep=None, i=0)"}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'prompts': ['System: You are a nicer assistant.\nHuman: foobar']}, outputs={'generations': [[{'text': 'baz, qux', 'generation_info': None, 'message': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': 'baz, qux'}}}]], 'llm_output': None, 'run': None}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:6'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000007'), name='CommaSeparatedListOutputParser', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='parser', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'output_parsers', 'list', 'CommaSeparatedListOutputParser'], 'kwargs': {}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': AIMessage(content='baz, qux', additional_kwargs={}, example=False)}, outputs={'output': ['baz', 'qux']}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:7'], execution_order=None, child_execution_order=None, child_runs=[])]),
])
# ---
# name: test_each
@@ -1407,7 +1407,7 @@
# ---
# name: test_prompt_with_llm_and_async_lambda.1
list([
- Run(id=UUID('00000000-0000-4000-8000-000000000000'), name='RunnableSequence', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'runnable', 'RunnableSequence'], 'kwargs': {'first': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, 'middle': [{'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'llms', 'fake', 'FakeListLLM'], 'repr': "FakeListLLM(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo', 'bar'], sleep=None, i=0)"}], 'last': {'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'output': 'foo'}, reference_example_id=None, parent_run_id=None, tags=[], execution_order=None, child_execution_order=None, child_runs=[Run(id=UUID('00000000-0000-4000-8000-000000000001'), name='ChatPromptTemplate', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='prompt', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptValue'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'SystemMessage'], 'kwargs': {'content': 'You are a nice assistant.', 'additional_kwargs': {}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is your name?', 'additional_kwargs': {}}}]}}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:1'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000002'), name='FakeListLLM', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='llm', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={'invocation_params': {'responses': ['foo', 'bar'], '_type': 'fake-list', 'stop': None}, 'options': {'stop': None}}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'llms', 'fake', 'FakeListLLM'], 'repr': "FakeListLLM(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo', 'bar'], sleep=None, i=0)"}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'prompts': ['System: You are a nice assistant.\nHuman: What is your name?']}, outputs={'generations': [[{'text': 'foo', 'generation_info': None}]], 'llm_output': None, 'run': None}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:2'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000003'), name='RunnableLambda', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': 'foo'}, outputs={'output': 'foo'}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:3'], execution_order=None, child_execution_order=None, child_runs=[])]),
+ Run(id=UUID('00000000-0000-4000-8000-000000000000'), name='RunnableSequence', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'runnable', 'RunnableSequence'], 'kwargs': {'first': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, 'middle': [{'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'llms', 'fake', 'FakeListLLM'], 'repr': "FakeListLLM(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo', 'bar'], sleep=None, i=0)"}], 'last': {'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'output': 'foo'}, reference_example_id=None, parent_run_id=None, tags=[], execution_order=None, child_execution_order=None, child_runs=[Run(id=UUID('00000000-0000-4000-8000-000000000001'), name='ChatPromptTemplate', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='prompt', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptTemplate'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'SystemMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': [], 'template': 'You are a nice assistant.', 'template_format': 'f-string', 'partial_variables': {}}}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'HumanMessagePromptTemplate'], 'kwargs': {'prompt': {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'prompt', 'PromptTemplate'], 'kwargs': {'input_variables': ['question'], 'template': '{question}', 'template_format': 'f-string', 'partial_variables': {}}}}}], 'input_variables': ['question']}}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'question': 'What is your name?'}, outputs={'lc': 1, 'type': 'constructor', 'id': ['langchain', 'prompts', 'chat', 'ChatPromptValue'], 'kwargs': {'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'SystemMessage'], 'kwargs': {'content': 'You are a nice assistant.', 'additional_kwargs': {}}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is your name?', 'additional_kwargs': {}}}]}}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:1'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000002'), name='FakeListLLM', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='llm', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={'invocation_params': {'responses': ['foo', 'bar'], '_type': 'fake-list', 'stop': None}, 'options': {'stop': None}}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'llms', 'fake', 'FakeListLLM'], 'repr': "FakeListLLM(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, responses=['foo', 'bar'], sleep=None, i=0)"}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'prompts': ['System: You are a nice assistant.\nHuman: What is your name?']}, outputs={'generations': [[{'text': 'foo', 'generation_info': None}]], 'llm_output': None, 'run': None}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:2'], execution_order=None, child_execution_order=None, child_runs=[]), Run(id=UUID('00000000-0000-4000-8000-000000000003'), name='passthrough', start_time=FakeDatetime(2023, 1, 1, 0, 0), run_type='chain', end_time=FakeDatetime(2023, 1, 1, 0, 0), extra={}, error=None, serialized={'lc': 1, 'type': 'not_implemented', 'id': ['langchain', 'schema', 'runnable', 'base', 'RunnableLambda'], 'repr': 'RunnableLambda(...)'}, events=[{'name': 'start', 'time': FakeDatetime(2023, 1, 1, 0, 0)}, {'name': 'end', 'time': FakeDatetime(2023, 1, 1, 0, 0)}], inputs={'input': 'foo'}, outputs={'output': 'foo'}, reference_example_id=None, parent_run_id=UUID('00000000-0000-4000-8000-000000000000'), tags=['seq:step:3'], execution_order=None, child_execution_order=None, child_runs=[])]),
])
# ---
# name: test_router_runnable
diff --git a/libs/langchain/tests/unit_tests/schema/runnable/test_runnable.py b/libs/langchain/tests/unit_tests/schema/runnable/test_runnable.py
index 2e0be35ddc9..8dd871ee4e1 100644
--- a/libs/langchain/tests/unit_tests/schema/runnable/test_runnable.py
+++ b/libs/langchain/tests/unit_tests/schema/runnable/test_runnable.py
@@ -948,7 +948,7 @@ async def test_higher_order_lambda_runnable(
parent_run = next(r for r in tracer.runs if r.parent_run_id is None)
assert len(parent_run.child_runs) == 2
router_run = parent_run.child_runs[1]
- assert router_run.name == "RunnableLambda"
+ assert router_run.name == "router"
assert len(router_run.child_runs) == 1
math_run = router_run.child_runs[0]
assert math_run.name == "RunnableSequence"
@@ -980,7 +980,7 @@ async def test_higher_order_lambda_runnable(
parent_run = next(r for r in tracer.runs if r.parent_run_id is None)
assert len(parent_run.child_runs) == 2
router_run = parent_run.child_runs[1]
- assert router_run.name == "RunnableLambda"
+ assert router_run.name == "arouter"
assert len(router_run.child_runs) == 1
math_run = router_run.child_runs[0]
assert math_run.name == "RunnableSequence"