mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-17 20:14:05 +00:00
Compare commits
20 Commits
v0.0.106
...
harrison/k
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92dc7b5f20 | ||
|
|
c68d3f699b | ||
|
|
15de3e8137 | ||
|
|
f95d551f7a | ||
|
|
c6bfa00178 | ||
|
|
01a57198b8 | ||
|
|
8dba30f31e | ||
|
|
9f78717b3c | ||
|
|
90846dcc28 | ||
|
|
6ed16e13b1 | ||
|
|
c1dc784a3d | ||
|
|
5b0e747f9a | ||
|
|
624c72c266 | ||
|
|
a950287206 | ||
|
|
30383abb12 | ||
|
|
cdb97f3dfb | ||
|
|
b44c8bd969 | ||
|
|
c9189d354a | ||
|
|
622578a022 | ||
|
|
7018806a92 |
@@ -30,6 +30,7 @@ version = data["tool"]["poetry"]["version"]
|
||||
release = version
|
||||
|
||||
html_title = project + " " + version
|
||||
html_last_updated_fmt = "%b %d, %Y"
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
@@ -13,7 +13,9 @@ It is broken into two parts: installation and setup, and then references to spec
|
||||
|
||||
There exists a wrapper around the Atlas neural database, allowing you to use it as a vectorstore.
|
||||
This vectorstore also gives you full access to the underlying AtlasProject object, which will allow you to use the full range of Atlas map interactions, such as bulk tagging and automatic topic modeling.
|
||||
Please see [the Nomic docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
|
||||
Please see [the Atlas docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -22,4 +24,4 @@ To import this vectorstore:
|
||||
from langchain.vectorstores import AtlasDB
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Chroma wrapper, see [this notebook](../modules/indexes/examples/vectorstores.ipynb)
|
||||
For a more detailed walkthrough of the AtlasDB wrapper, see [this notebook](../modules/indexes/vectorstore_examples/atlas.ipynb)
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to use the search tool to find out who is Leo DiCaprio's girlfriend and then use the calculator tool to raise her current age to the 0.43 power.\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: The first question requires a search, while the second question requires a calculator.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
@@ -96,32 +96,32 @@
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCamila Morrone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow I need to use the calculator tool to raise Camila Morrone's current age to the 0.43 power.\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mFor the second question, I need to use the calculator tool to raise her current age to the 0.43 power.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"22.5^(0.43)\"\n",
|
||||
" \"action_input\": \"22.0^(0.43)\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"22.5^(0.43)\u001b[32;1m\u001b[1;3m\n",
|
||||
"22.0^(0.43)\u001b[32;1m\u001b[1;3m\n",
|
||||
"```python\n",
|
||||
"import math\n",
|
||||
"print(math.pow(22.5, 0.43))\n",
|
||||
"print(math.pow(22.0, 0.43))\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.8145075848063126\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.777824273683966\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.8145075848063126\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.777824273683966\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer\n",
|
||||
"Final Answer: 3.8145075848063126\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
|
||||
"Final Answer: Camila Morrone, 3.777824273683966.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -129,7 +129,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'3.8145075848063126'"
|
||||
"'Camila Morrone, 3.777824273683966.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -154,29 +154,30 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to use the Search tool to find the name of the artist who recently released an album called 'The Storm Before the Calm'. Then, I can use the FooBar DB tool to check if they are in the database and what albums of theirs are in it.\n",
|
||||
"Action: \n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\n",
|
||||
"Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part of the question.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"Who is the artist that recently released an album called 'The Storm Before the Calm'?\"\n",
|
||||
" \"action_input\": \"Who recently released an album called 'The Storm Before the Calm'\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mstudio album by Canadian-American singer-songwriter Alanis Morissette, released June 17, 2022, via Epiphany Music and Thirty Tigers, as well as by RCA Records ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that I know the artist is Alanis Morissette, I can use the FooBar DB tool to check if she is in the database and what albums of hers are in it.\n",
|
||||
"Action: \n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAlanis Morissette\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that I have the name of the artist, I can use the FooBar DB tool to find their albums in the database.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"FooBar DB\",\n",
|
||||
" \"action_input\": \"What albums by Alanis Morissette are in the database?\"\n",
|
||||
" \"action_input\": \"What albums does Alanis Morissette have in the database?\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the database? \n",
|
||||
"What albums does Alanis Morissette have in the database? \n",
|
||||
"SQLQuery:"
|
||||
]
|
||||
},
|
||||
@@ -194,12 +195,12 @@
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m SELECT Title FROM Album WHERE ArtistId IN (SELECT ArtistId FROM Artist WHERE Name = 'Alanis Morissette') LIMIT 5;\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[('Jagged Little Pill',)]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m The albums by Alanis Morissette in the database are Jagged Little Pill.\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Alanis Morissette has the album 'Jagged Little Pill' in the database.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m The albums by Alanis Morissette in the database are Jagged Little Pill.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe only album by Alanis Morissette in the FooBar DB is Jagged Little Pill.\n",
|
||||
"Final Answer: The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The only album of hers in the FooBar DB is Jagged Little Pill.\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Alanis Morissette has the album 'Jagged Little Pill' in the database.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI have found the answer to both parts of the question.\n",
|
||||
"Final Answer: The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The album 'Jagged Little Pill' is in the FooBar database.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -207,7 +208,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The only album of hers in the FooBar DB is Jagged Little Pill.\""
|
||||
"\"The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The album 'Jagged Little Pill' is in the FooBar database.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
|
||||
@@ -136,3 +136,12 @@ Below is a list of all supported tools and relevant information:
|
||||
- Requires LLM: No
|
||||
- Extra Parameters: `serper_api_key`
|
||||
- For more information on this, see [this page](../../ecosystem/google_serper.md)
|
||||
|
||||
**wikipedia**
|
||||
|
||||
- Tool Name: Wikipedia
|
||||
- Tool Description: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, historical events, or other subjects. Input should be a search query.
|
||||
- Notes: Uses the [wikipedia](https://pypi.org/project/wikipedia/) Python package to call the MediaWiki API and then parses results.
|
||||
- Requires LLM: No
|
||||
- Extra Parameters: `top_k_results`
|
||||
|
||||
|
||||
101
docs/modules/chains/examples/structured_data_extraction.ipynb
Normal file
101
docs/modules/chains/examples/structured_data_extraction.ipynb
Normal file
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9bb1293b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Structured Data Extraction\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use a chain to extract structured data from text.\n",
|
||||
"\n",
|
||||
"This heavily utilizes the fabulous [kor library](https://eyurtsev.github.io/kor/index.html). As a result, it only works for Python 3.10+"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82ba478c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install kor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d4b12abd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from kor.extraction import Extractor\n",
|
||||
"from kor.nodes import Object, Text, Number\n",
|
||||
"from kor.llms import OpenAIChatCompletion, OpenAICompletion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "15726529",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAIChatCompletion(model=\"gpt-3.5-turbo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8c49e1ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = Extractor(llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "752e408d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = Text(\n",
|
||||
" id=\"first_name\",\n",
|
||||
" description=\"The first name of a person\",\n",
|
||||
" examples=[(\"I am billy.\", \"billy\"), (\"John Smith is 33 years old\", \"John\")],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50fe683e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
126
docs/modules/document_loaders/examples/csv.ipynb
Normal file
126
docs/modules/document_loaders/examples/csv.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,32 @@
|
||||
"Team", "Payroll (millions)", "Wins"
|
||||
"Nationals", 81.34, 98
|
||||
"Reds", 82.20, 97
|
||||
"Yankees", 197.96, 95
|
||||
"Giants", 117.62, 94
|
||||
"Braves", 83.31, 94
|
||||
"Athletics", 55.37, 94
|
||||
"Rangers", 120.51, 93
|
||||
"Orioles", 81.43, 93
|
||||
"Rays", 64.17, 90
|
||||
"Angels", 154.49, 89
|
||||
"Tigers", 132.30, 88
|
||||
"Cardinals", 110.30, 88
|
||||
"Dodgers", 95.14, 86
|
||||
"White Sox", 96.92, 85
|
||||
"Brewers", 97.65, 83
|
||||
"Phillies", 174.54, 81
|
||||
"Diamondbacks", 74.28, 81
|
||||
"Pirates", 63.43, 79
|
||||
"Padres", 55.24, 76
|
||||
"Mariners", 81.97, 75
|
||||
"Mets", 93.35, 74
|
||||
"Blue Jays", 75.48, 73
|
||||
"Royals", 60.91, 72
|
||||
"Marlins", 118.07, 69
|
||||
"Red Sox", 173.18, 69
|
||||
"Indians", 78.43, 68
|
||||
"Twins", 94.08, 66
|
||||
"Rockies", 78.06, 64
|
||||
"Cubs", 88.19, 61
|
||||
"Astros", 60.65, 55
|
||||
|
||||
|
145
docs/modules/document_loaders/examples/markdown.ipynb
Normal file
145
docs/modules/document_loaders/examples/markdown.ipynb
Normal file
@@ -0,0 +1,145 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39af9ecd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Markdown\n",
|
||||
"\n",
|
||||
"This covers how to load markdown documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "721c48aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredMarkdownLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9d3d0e35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredMarkdownLoader(\"../../../../README.md\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "06073f91",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c9adc5cb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"ð\\x9f¦\\x9cï¸\\x8fð\\x9f”\\x97 LangChain\\n\\nâ\\x9a¡ Building applications with LLMs through composability â\\x9a¡\\n\\nProduction Support: As you move your LangChains into production, we'd love to offer more comprehensive support.\\nPlease fill out this form and we'll set up a dedicated support Slack channel.\\n\\nQuick Install\\n\\npip install langchain\\n\\nð\\x9f¤” What is this?\\n\\nLarge language models (LLMs) are emerging as a transformative technology, enabling\\ndevelopers to build applications that they previously could not.\\nBut using these LLMs in isolation is often not enough to\\ncreate a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.\\n\\nThis library is aimed at assisting in the development of those types of applications. Common examples of these types of applications include:\\n\\nâ\\x9d“ Question Answering over specific documents\\n\\nDocumentation\\n\\nEnd-to-end Example: Question Answering over Notion Database\\n\\nð\\x9f’¬ Chatbots\\n\\nDocumentation\\n\\nEnd-to-end Example: Chat-LangChain\\n\\nð\\x9f¤\\x96 Agents\\n\\nDocumentation\\n\\nEnd-to-end Example: GPT+WolframAlpha\\n\\nð\\x9f“\\x96 Documentation\\n\\nPlease see here for full documentation on:\\n\\nGetting started (installation, setting up the environment, simple examples)\\n\\nHow-To examples (demos, integrations, helper functions)\\n\\nReference (full API docs)\\n Resources (high-level explanation of core concepts)\\n\\nð\\x9f\\x9a\\x80 What can this help with?\\n\\nThere are six main areas that LangChain is designed to help with.\\nThese are, in increasing order of complexity:\\n\\nð\\x9f“\\x83 LLMs and Prompts:\\n\\nThis includes prompt management, prompt optimization, generic interface for all LLMs, and common utilities for working with LLMs.\\n\\nð\\x9f”\\x97 Chains:\\n\\nChains go beyond just a single LLM call, and are sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.\\n\\nð\\x9f“\\x9a Data Augmented Generation:\\n\\nData Augmented Generation involves specific types of chains that first interact with an external datasource to fetch data to use in the generation step. Examples of this include summarization of long pieces of text and question/answering over specific data sources.\\n\\nð\\x9f¤\\x96 Agents:\\n\\nAgents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents.\\n\\nð\\x9f§\\xa0 Memory:\\n\\nMemory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.\\n\\nð\\x9f§\\x90 Evaluation:\\n\\n[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.\\n\\nFor more information on these concepts, please see our full documentation.\\n\\nð\\x9f’\\x81 Contributing\\n\\nAs an open source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infra, or better documentation.\\n\\nFor detailed information on how to contribute, see here.\", lookup_str='', metadata={'source': '../../../../README.md'}, lookup_index=0)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "525d6b67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retain Elements\n",
|
||||
"\n",
|
||||
"Under the hood, Unstructured creates different \"elements\" for different chunks of text. By default we combine those together, but you can easily keep that separation by specifying `mode=\"elements\"`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "064f9162",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredMarkdownLoader(\"../../../../README.md\", mode=\"elements\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "abefbbdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "a547c534",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='ð\\x9f¦\\x9cï¸\\x8fð\\x9f”\\x97 LangChain', lookup_str='', metadata={'source': '../../../../README.md', 'page_number': 1, 'category': 'UncategorizedText'}, lookup_index=0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "381d4139",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -36,6 +36,8 @@ In the below guides, we cover different types of vectorstores and how to use the
|
||||
|
||||
`Chroma <./vectorstore_examples/chroma.html>`_: A walkthrough of how to use the Chroma vectorstore wrapper.
|
||||
|
||||
`AtlasDB <./vectorstore_examples/atlas.html>`_: A walkthrough of how to use the AtlasDB vectorstore and visualizer wrapper.
|
||||
|
||||
`DeepLake <./vectorstore_examples/deeplake.html>`_: A walkthrough of how to use the Deep Lake, data lake, wrapper.
|
||||
|
||||
`FAISS <./vectorstore_examples/faiss.html>`_: A walkthrough of how to use the FAISS vectorstore wrapper.
|
||||
|
||||
@@ -2,11 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AtlasDB\n",
|
||||
"\n",
|
||||
@@ -15,10 +11,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -32,56 +28,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting en-core-web-sm==3.5.0\n",
|
||||
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)\n",
|
||||
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m12.8/12.8 MB\u001B[0m \u001B[31m90.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\n",
|
||||
"\u001B[?25hRequirement already satisfied: spacy<3.6.0,>=3.5.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from en-core-web-sm==3.5.0) (3.5.0)\n",
|
||||
"Requirement already satisfied: packaging>=20.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (23.0)\n",
|
||||
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.1.1)\n",
|
||||
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.3.0)\n",
|
||||
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.4.5)\n",
|
||||
"Requirement already satisfied: pathy>=0.10.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.10.1)\n",
|
||||
"Requirement already satisfied: setuptools in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (67.4.0)\n",
|
||||
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (4.64.1)\n",
|
||||
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.0.4)\n",
|
||||
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (6.3.0)\n",
|
||||
"Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (8.1.7)\n",
|
||||
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.0.7)\n",
|
||||
"Requirement already satisfied: typer<0.8.0,>=0.3.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.7.0)\n",
|
||||
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.28.2)\n",
|
||||
"Requirement already satisfied: jinja2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.1.2)\n",
|
||||
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.10.5)\n",
|
||||
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.0.8)\n",
|
||||
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.12)\n",
|
||||
"Requirement already satisfied: numpy>=1.15.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.24.2)\n",
|
||||
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.0.9)\n",
|
||||
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.8)\n",
|
||||
"Requirement already satisfied: typing-extensions>=4.2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (4.5.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.4)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2022.12.7)\n",
|
||||
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.26.14)\n",
|
||||
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.7.9)\n",
|
||||
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.0.4)\n",
|
||||
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from typer<0.8.0,>=0.3.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (8.1.3)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from jinja2->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.1.2)\n",
|
||||
"\n",
|
||||
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.0.1\u001B[0m\n",
|
||||
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n",
|
||||
"\u001B[38;5;2m✔ Download and installation successful\u001B[0m\n",
|
||||
"You can now load the package via spacy.load('en_core_web_sm')\n"
|
||||
]
|
||||
"scrolled": true,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
],
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python -m spacy download en_core_web_sm"
|
||||
]
|
||||
@@ -113,51 +67,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-02-24 16:13:49.696 | INFO | nomic.project:_create_project:884 - Creating project `test_index_1677255228.136989` in organization `Atlas Demo`\n",
|
||||
"2023-02-24 16:13:51.087 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
|
||||
"2023-02-24 16:13:51.225 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
|
||||
"2023-02-24 16:13:51.481 | INFO | nomic.project:add_text:1351 - Uploading text to Atlas.\n",
|
||||
"1it [00:00, 1.20it/s]\n",
|
||||
"2023-02-24 16:13:52.318 | INFO | nomic.project:add_text:1422 - Text upload succeeded.\n",
|
||||
"2023-02-24 16:13:52.628 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
|
||||
"2023-02-24 16:13:53.380 | INFO | nomic.project:create_index:1192 - Created map `test_index_1677255228.136989_index` in project `test_index_1677255228.136989`: https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\n"
|
||||
]
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
],
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = AtlasDB.from_texts(texts=texts,\n",
|
||||
" name='test_index_'+str(time.time()),\n",
|
||||
" description='test_index',\n",
|
||||
" name='test_index_'+str(time.time()), # unique name for your vector store\n",
|
||||
" description='test_index', #a description for your vector store\n",
|
||||
" api_key=ATLAS_TEST_API_KEY,\n",
|
||||
" index_kwargs={'build_topic_model': True})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-02-24 16:14:09.106 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with db.project.wait_for_project_lock():\n",
|
||||
" time.sleep(1)"
|
||||
]
|
||||
"db.project.wait_for_project_lock()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -263,4 +197,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,12 +35,28 @@
|
||||
"\n",
|
||||
"import langchain\n",
|
||||
"from langchain.agents import Tool, initialize_agent, load_tools\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1b62cd48",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Agent run with tracing. Ensure that OPENAI_API_KEY is set appropriately to run this example.\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"llm-math\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bfa16b79-aa4b-4d41-a067-70d1f593f667",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -70,16 +86,12 @@
|
||||
"'1.0891804557407723'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Agent run with tracing. Ensure that OPENAI_API_KEY is set appropriately to run this example.\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=\"zero-shot-react-description\", verbose=True\n",
|
||||
")\n",
|
||||
@@ -87,10 +99,94 @@
|
||||
"agent.run(\"What is 2 raised to .123243 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4829eb1d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: What is 2 raised to .123243 power?\n",
|
||||
"Thought: I need a calculator to solve this problem.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"calculator\",\n",
|
||||
" \"action_input\": \"2^0.123243\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: calculator is not a valid tool, try another one.\n",
|
||||
"\u001b[32;1m\u001b[1;3mI made a mistake, I need to use the correct tool for this question.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"calculator\",\n",
|
||||
" \"action_input\": \"2^0.123243\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: calculator is not a valid tool, try another one.\n",
|
||||
"\u001b[32;1m\u001b[1;3mI made a mistake, the tool name is actually \"calc\" instead of \"calculator\".\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"calc\",\n",
|
||||
" \"action_input\": \"2^0.123243\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: calc is not a valid tool, try another one.\n",
|
||||
"\u001b[32;1m\u001b[1;3mI made another mistake, the tool name is actually \"Calculator\" instead of \"calc\".\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"2^0.123243\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 1.0891804557407723\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe final answer is 1.0891804557407723.\n",
|
||||
"Final Answer: 1.0891804557407723\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'1.0891804557407723'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Agent run with tracing using a chat model\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, ChatOpenAI(temperature=0), agent=\"chat-zero-shot-react-description\", verbose=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent.run(\"What is 2 raised to .123243 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "25addd7f",
|
||||
"id": "76abfd82",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -112,7 +208,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -48,6 +48,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
from langchain.vectorstores import FAISS, ElasticVectorSearch
|
||||
|
||||
@@ -70,6 +71,7 @@ __all__ = [
|
||||
"GoogleSearchAPIWrapper",
|
||||
"GoogleSerperAPIWrapper",
|
||||
"WolframAlphaAPIWrapper",
|
||||
"WikipediaAPIWrapper",
|
||||
"Anthropic",
|
||||
"Banana",
|
||||
"CerebriumAI",
|
||||
|
||||
@@ -47,7 +47,10 @@ class Agent(BaseModel):
|
||||
|
||||
@property
|
||||
def _stop(self) -> List[str]:
|
||||
return [f"\n{self.observation_prefix}", f"\n\t{self.observation_prefix}"]
|
||||
return [
|
||||
f"\n{self.observation_prefix.rstrip()}",
|
||||
f"\n\t{self.observation_prefix.rstrip()}",
|
||||
]
|
||||
|
||||
def _construct_scratchpad(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]]
|
||||
|
||||
@@ -3,13 +3,15 @@ PREFIX = """Answer the following questions as best you can. You have access to t
|
||||
FORMAT_INSTRUCTIONS = """The way you use the tools is by specifying a json blob.
|
||||
Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).
|
||||
|
||||
The only values that should be in the "action" field are: {tool_names}
|
||||
|
||||
The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:
|
||||
|
||||
```
|
||||
{{
|
||||
"action": "calculator",
|
||||
"action_input": "1 + 2"
|
||||
}}
|
||||
{{{{
|
||||
"action": $TOOL_NAME,
|
||||
"action_input": $INPUT
|
||||
}}}}
|
||||
```
|
||||
|
||||
ALWAYS use the following format:
|
||||
|
||||
@@ -9,12 +9,13 @@ from langchain.chains.api.base import APIChain
|
||||
from langchain.chains.llm_math.base import LLMMathChain
|
||||
from langchain.chains.pal.base import PALChain
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
from langchain.requests import RequestsWrapper
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.bing_search.tool import BingSearchRun
|
||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
from langchain.tools.requests.tool import RequestsGetTool
|
||||
from langchain.tools.wikipedia.tool import WikipediaQueryRun
|
||||
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
|
||||
from langchain.utilities.bash import BashProcess
|
||||
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
||||
@@ -22,6 +23,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
|
||||
|
||||
@@ -124,6 +126,10 @@ def _get_google_search(**kwargs: Any) -> BaseTool:
|
||||
return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_wikipedia(**kwargs: Any) -> BaseTool:
|
||||
return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_google_serper(**kwargs: Any) -> BaseTool:
|
||||
return Tool(
|
||||
name="Serper Search",
|
||||
@@ -173,6 +179,7 @@ _EXTRA_OPTIONAL_TOOLS = {
|
||||
"google-serper": (_get_google_serper, ["serper_api_key"]),
|
||||
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
||||
"searx-search": (_get_searx_search, ["searx_host"]),
|
||||
"wikipedia": (_get_wikipedia, ["top_k_results"]),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from langchain.chains.sql_database.base import (
|
||||
)
|
||||
from langchain.chains.transform import TransformChain
|
||||
from langchain.chains.vector_db_qa.base import VectorDBQA
|
||||
from langchain.chains.structured_data_extraction.base import StructuredDataExtractionChain
|
||||
|
||||
__all__ = [
|
||||
"ConversationChain",
|
||||
@@ -52,4 +53,5 @@ __all__ = [
|
||||
"ChatVectorDBChain",
|
||||
"GraphQAChain",
|
||||
"ConstitutionalChain",
|
||||
"StructuredDataExtractionChain"
|
||||
]
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
"""Memory modules for conversation prompts."""
|
||||
|
||||
from langchain.memory.buffer import ConversationBufferMemory
|
||||
from langchain.memory.buffer import (
|
||||
ConversationBufferMemory,
|
||||
ConversationStringBufferMemory,
|
||||
)
|
||||
from langchain.memory.buffer_window import ConversationBufferWindowMemory
|
||||
from langchain.memory.combined import CombinedMemory
|
||||
from langchain.memory.entity import ConversationEntityMemory
|
||||
@@ -18,4 +21,5 @@ __all__ = [
|
||||
"ConversationEntityMemory",
|
||||
"ConversationBufferMemory",
|
||||
"CombinedMemory",
|
||||
"ConversationStringBufferMemory",
|
||||
]
|
||||
|
||||
45
langchain/chains/structured_data_extraction/base.py
Normal file
45
langchain/chains/structured_data_extraction/base.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
from typing import Dict, List, Any
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from pydantic import root_validator
|
||||
|
||||
|
||||
|
||||
class StructuredDataExtractionChain(Chain):
|
||||
|
||||
|
||||
kor_extractor: Any
|
||||
kor_schema: Any
|
||||
input_key: str = "text"
|
||||
output_key: str = "info"
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
return [self.output_key]
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
try:
|
||||
import kor
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import kor python package. "
|
||||
"Please it install it with `pip install kor`."
|
||||
)
|
||||
return values
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
result = self.kor_extractor(inputs[self.input_key], self.kor_schema)
|
||||
return {self.output_key: result}
|
||||
|
||||
async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
pass
|
||||
@@ -60,13 +60,44 @@ class BaseChatModel(BaseLanguageModel, BaseModel, ABC):
|
||||
self, prompts: List[PromptValue], stop: Optional[List[str]] = None
|
||||
) -> LLMResult:
|
||||
prompt_messages = [p.to_messages() for p in prompts]
|
||||
return self.generate(prompt_messages, stop=stop)
|
||||
prompt_strings = [p.to_string() for p in prompts]
|
||||
self.callback_manager.on_llm_start(
|
||||
{"name": self.__class__.__name__}, prompt_strings, verbose=self.verbose
|
||||
)
|
||||
try:
|
||||
output = self.generate(prompt_messages, stop=stop)
|
||||
except (KeyboardInterrupt, Exception) as e:
|
||||
self.callback_manager.on_llm_error(e, verbose=self.verbose)
|
||||
raise e
|
||||
self.callback_manager.on_llm_end(output, verbose=self.verbose)
|
||||
return output
|
||||
|
||||
async def agenerate_prompt(
|
||||
self, prompts: List[PromptValue], stop: Optional[List[str]] = None
|
||||
) -> LLMResult:
|
||||
prompt_messages = [p.to_messages() for p in prompts]
|
||||
return await self.agenerate(prompt_messages, stop=stop)
|
||||
prompt_strings = [p.to_string() for p in prompts]
|
||||
if self.callback_manager.is_async:
|
||||
await self.callback_manager.on_llm_start(
|
||||
{"name": self.__class__.__name__}, prompt_strings, verbose=self.verbose
|
||||
)
|
||||
else:
|
||||
self.callback_manager.on_llm_start(
|
||||
{"name": self.__class__.__name__}, prompt_strings, verbose=self.verbose
|
||||
)
|
||||
try:
|
||||
output = await self.agenerate(prompt_messages, stop=stop)
|
||||
except (KeyboardInterrupt, Exception) as e:
|
||||
if self.callback_manager.is_async:
|
||||
await self.callback_manager.on_llm_error(e, verbose=self.verbose)
|
||||
else:
|
||||
self.callback_manager.on_llm_error(e, verbose=self.verbose)
|
||||
raise e
|
||||
if self.callback_manager.is_async:
|
||||
await self.callback_manager.on_llm_end(output, verbose=self.verbose)
|
||||
else:
|
||||
self.callback_manager.on_llm_end(output, verbose=self.verbose)
|
||||
return output
|
||||
|
||||
@abstractmethod
|
||||
def _generate(
|
||||
|
||||
@@ -4,6 +4,7 @@ from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
|
||||
from langchain.document_loaders.azlyrics import AZLyricsLoader
|
||||
from langchain.document_loaders.college_confidential import CollegeConfidentialLoader
|
||||
from langchain.document_loaders.conllu import CoNLLULoader
|
||||
from langchain.document_loaders.csv import CSVLoader
|
||||
from langchain.document_loaders.directory import DirectoryLoader
|
||||
from langchain.document_loaders.docx import UnstructuredDocxLoader
|
||||
from langchain.document_loaders.email import UnstructuredEmailLoader
|
||||
@@ -19,6 +20,7 @@ from langchain.document_loaders.html import UnstructuredHTMLLoader
|
||||
from langchain.document_loaders.ifixit import IFixitLoader
|
||||
from langchain.document_loaders.image import UnstructuredImageLoader
|
||||
from langchain.document_loaders.imsdb import IMSDbLoader
|
||||
from langchain.document_loaders.markdown import UnstructuredMarkdownLoader
|
||||
from langchain.document_loaders.notebook import NotebookLoader
|
||||
from langchain.document_loaders.notion import NotionDirectoryLoader
|
||||
from langchain.document_loaders.obsidian import ObsidianLoader
|
||||
@@ -66,6 +68,7 @@ __all__ = [
|
||||
"ObsidianLoader",
|
||||
"UnstructuredDocxLoader",
|
||||
"UnstructuredEmailLoader",
|
||||
"UnstructuredMarkdownLoader",
|
||||
"RoamLoader",
|
||||
"YoutubeLoader",
|
||||
"S3FileLoader",
|
||||
@@ -94,4 +97,5 @@ __all__ = [
|
||||
"CoNLLULoader",
|
||||
"GoogleApiYoutubeLoader",
|
||||
"GoogleApiClient",
|
||||
"CSVLoader",
|
||||
]
|
||||
|
||||
47
langchain/document_loaders/csv.py
Normal file
47
langchain/document_loaders/csv.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from csv import DictReader
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class CSVLoader(BaseLoader):
|
||||
"""Loads a CSV file into a list of documents.
|
||||
|
||||
Each document represents one row of the CSV file. Every row is converted into a
|
||||
key/value pair and outputted to a new line in the document's page_content.
|
||||
|
||||
Output Example:
|
||||
.. code-block:: txt
|
||||
|
||||
column1: value1
|
||||
column2: value2
|
||||
column3: value3
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, csv_args: Optional[Dict] = None):
|
||||
self.file_path = file_path
|
||||
if csv_args is None:
|
||||
self.csv_args = {
|
||||
"delimiter": ",",
|
||||
"quotechar": '"',
|
||||
}
|
||||
else:
|
||||
self.csv_args = csv_args
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
docs = []
|
||||
|
||||
with open(self.file_path, newline="") as csvfile:
|
||||
csv = DictReader(csvfile, **self.csv_args) # type: ignore
|
||||
for row in csv:
|
||||
docs.append(
|
||||
Document(
|
||||
page_content="\n".join(
|
||||
f"{k.strip()}: {v.strip()}" for k, v in row.items()
|
||||
),
|
||||
metadata={"source": self.file_path},
|
||||
)
|
||||
)
|
||||
|
||||
return docs
|
||||
@@ -12,9 +12,26 @@ class GitbookLoader(WebBaseLoader):
|
||||
2. load all (relative) paths in the navbar.
|
||||
"""
|
||||
|
||||
def __init__(self, web_page: str, load_all_paths: bool = False):
|
||||
"""Initialize with web page and whether to load all paths."""
|
||||
def __init__(
|
||||
self,
|
||||
web_page: str,
|
||||
load_all_paths: bool = False,
|
||||
base_url: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with web page and whether to load all paths.
|
||||
|
||||
Args:
|
||||
web_page: The web page to load or the starting point from where
|
||||
relative paths are discovered.
|
||||
load_all_paths: If set to True, all relative paths in the navbar
|
||||
are loaded instead of only `web_page`.
|
||||
base_url: If `load_all_paths` is True, the relative paths are
|
||||
appended to this base url. Defaults to `web_page` if not set.
|
||||
"""
|
||||
super().__init__(web_page)
|
||||
self.base_url = base_url or web_page
|
||||
if self.base_url.endswith("/"):
|
||||
self.base_url = self.base_url[:-1]
|
||||
self.load_all_paths = load_all_paths
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
@@ -24,7 +41,7 @@ class GitbookLoader(WebBaseLoader):
|
||||
relative_paths = self._get_paths(soup_info)
|
||||
documents = []
|
||||
for path in relative_paths:
|
||||
url = self.web_path + path
|
||||
url = self.base_url + path
|
||||
print(f"Fetching text from {url}")
|
||||
soup_info = self._scrape(url)
|
||||
documents.append(self._get_document(soup_info, url))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Loader that loads PDF files."""
|
||||
"""Loader that uses unstructured to load HTML files."""
|
||||
from typing import List
|
||||
|
||||
from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
25
langchain/document_loaders/markdown.py
Normal file
25
langchain/document_loaders/markdown.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Loader that loads Markdown files."""
|
||||
from typing import List
|
||||
|
||||
from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
|
||||
class UnstructuredMarkdownLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load markdown files."""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
from unstructured.partition.md import partition_md
|
||||
|
||||
# NOTE(MthwRobinson) - enables the loader to work when you're using pre-release
|
||||
# versions of unstructured like 0.4.17-dev1
|
||||
_unstructured_version = __unstructured_version__.split("-")[0]
|
||||
unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")])
|
||||
|
||||
if unstructured_version < (0, 4, 16):
|
||||
raise ValueError(
|
||||
f"You are on unstructured version {__unstructured_version__}. "
|
||||
"Partitioning markdown files is only supported in unstructured>=0.4.16."
|
||||
)
|
||||
|
||||
return partition_md(filename=self.file_path)
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Loader that loads PDF files."""
|
||||
"""Loader that uses unstructured to load HTML files."""
|
||||
from typing import List
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
|
||||
@@ -50,8 +50,9 @@ class VectorstoreIndexCreator(BaseModel):
|
||||
"""Logic for creating indexes."""
|
||||
|
||||
vectorstore_cls: Type[VectorStore] = Chroma
|
||||
embedding: Embeddings = Field(default_factory=OpenAIEmbeddings)
|
||||
text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter)
|
||||
embedding: Embeddings = Field(default_factory=OpenAIEmbeddings)
|
||||
vectorstore_kwargs: dict = Field(default_factory=dict)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@@ -65,5 +66,7 @@ class VectorstoreIndexCreator(BaseModel):
|
||||
for loader in loaders:
|
||||
docs.extend(loader.load())
|
||||
sub_docs = self.text_splitter.split_documents(docs)
|
||||
vectorstore = self.vectorstore_cls.from_documents(sub_docs, self.embedding)
|
||||
vectorstore = self.vectorstore_cls.from_documents(
|
||||
sub_docs, self.embedding, **self.vectorstore_kwargs
|
||||
)
|
||||
return VectorStoreIndexWrapper(vectorstore=vectorstore)
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
from langchain.memory.buffer import ConversationBufferMemory
|
||||
from langchain.memory.buffer import (
|
||||
ConversationBufferMemory,
|
||||
ConversationStringBufferMemory,
|
||||
)
|
||||
from langchain.memory.buffer_window import ConversationBufferWindowMemory
|
||||
from langchain.memory.chat_memory import ChatMessageHistory
|
||||
from langchain.memory.combined import CombinedMemory
|
||||
@@ -18,4 +21,5 @@ __all__ = [
|
||||
"ConversationEntityMemory",
|
||||
"ConversationSummaryMemory",
|
||||
"ChatMessageHistory",
|
||||
"ConversationStringBufferMemory",
|
||||
]
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, root_validator
|
||||
|
||||
from langchain.memory.chat_memory import BaseChatMemory
|
||||
from langchain.memory.utils import get_buffer_string
|
||||
from langchain.memory.chat_memory import BaseChatMemory, BaseMemory
|
||||
from langchain.memory.utils import get_buffer_string, get_prompt_input_key
|
||||
|
||||
|
||||
class ConversationBufferMemory(BaseChatMemory, BaseModel):
|
||||
@@ -36,3 +36,55 @@ class ConversationBufferMemory(BaseChatMemory, BaseModel):
|
||||
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Return history buffer."""
|
||||
return {self.memory_key: self.buffer}
|
||||
|
||||
|
||||
class ConversationStringBufferMemory(BaseMemory, BaseModel):
|
||||
"""Buffer for storing conversation memory."""
|
||||
|
||||
human_prefix: str = "Human"
|
||||
ai_prefix: str = "AI"
|
||||
"""Prefix to use for AI generated responses."""
|
||||
buffer: str = ""
|
||||
output_key: Optional[str] = None
|
||||
input_key: Optional[str] = None
|
||||
memory_key: str = "history" #: :meta private:
|
||||
|
||||
@root_validator()
|
||||
def validate_chains(cls, values: Dict) -> Dict:
|
||||
"""Validate that return messages is not True."""
|
||||
if values.get("return_messages", False):
|
||||
raise ValueError(
|
||||
"return_messages must be False for ConversationStringBufferMemory"
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
def memory_variables(self) -> List[str]:
|
||||
"""Will always return list of memory variables.
|
||||
:meta private:
|
||||
"""
|
||||
return [self.memory_key]
|
||||
|
||||
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Return history buffer."""
|
||||
return {self.memory_key: self.buffer}
|
||||
|
||||
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
|
||||
"""Save context from this conversation to buffer."""
|
||||
if self.input_key is None:
|
||||
prompt_input_key = get_prompt_input_key(inputs, self.memory_variables)
|
||||
else:
|
||||
prompt_input_key = self.input_key
|
||||
if self.output_key is None:
|
||||
if len(outputs) != 1:
|
||||
raise ValueError(f"One output key expected, got {outputs.keys()}")
|
||||
output_key = list(outputs.keys())[0]
|
||||
else:
|
||||
output_key = self.output_key
|
||||
human = f"{self.human_prefix}: " + inputs[prompt_input_key]
|
||||
ai = f"{self.ai_prefix}: " + outputs[output_key]
|
||||
self.buffer += "\n" + "\n".join([human, ai])
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear memory contents."""
|
||||
self.buffer = ""
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Any, Callable, List, Sequence, Tuple, Type, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain.memory.buffer import get_buffer_string
|
||||
from langchain.prompts.base import BasePromptTemplate, StringPromptTemplate
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.schema import (
|
||||
@@ -111,7 +112,7 @@ class ChatPromptValue(PromptValue):
|
||||
|
||||
def to_string(self) -> str:
|
||||
"""Return prompt as string."""
|
||||
return str(self.messages)
|
||||
return get_buffer_string(self.messages)
|
||||
|
||||
def to_messages(self) -> List[BaseMessage]:
|
||||
"""Return prompt as messages."""
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Functionality for splitting text."""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import (
|
||||
@@ -51,7 +52,10 @@ class TextSplitter(ABC):
|
||||
documents = []
|
||||
for i, text in enumerate(texts):
|
||||
for chunk in self.split_text(text):
|
||||
documents.append(Document(page_content=chunk, metadata=_metadatas[i]))
|
||||
new_doc = Document(
|
||||
page_content=chunk, metadata=copy.deepcopy(_metadatas[i])
|
||||
)
|
||||
documents.append(new_doc)
|
||||
return documents
|
||||
|
||||
def split_documents(self, documents: List[Document]) -> List[Document]:
|
||||
|
||||
1
langchain/tools/wikipedia/__init__.py
Normal file
1
langchain/tools/wikipedia/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Wikipedia API toolkit."""
|
||||
25
langchain/tools/wikipedia/tool.py
Normal file
25
langchain/tools/wikipedia/tool.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Tool for the Wolfram Alpha API."""
|
||||
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
|
||||
|
||||
class WikipediaQueryRun(BaseTool):
|
||||
"""Tool that adds the capability to search using the Wikipedia API."""
|
||||
|
||||
name = "Wikipedia"
|
||||
description = (
|
||||
"A wrapper around Wikipedia. "
|
||||
"Useful for when you need to answer general questions about "
|
||||
"people, places, companies, historical events, or other subjects. "
|
||||
"Input should be a search query."
|
||||
)
|
||||
api_wrapper: WikipediaAPIWrapper
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Use the Wikipedia tool."""
|
||||
return self.api_wrapper.run(query)
|
||||
|
||||
async def _arun(self, query: str) -> str:
|
||||
"""Use the Wikipedia tool asynchronously."""
|
||||
raise NotImplementedError("WikipediaQueryRun does not support async")
|
||||
@@ -7,6 +7,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
|
||||
__all__ = [
|
||||
@@ -19,4 +20,5 @@ __all__ = [
|
||||
"SerpAPIWrapper",
|
||||
"SearxSearchWrapper",
|
||||
"BingSearchAPIWrapper",
|
||||
"WikipediaAPIWrapper",
|
||||
]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Chain that calls SearxNG meta search API.
|
||||
"""Utility for using SearxNG meta search API.
|
||||
|
||||
SearxNG is a privacy-friendly free metasearch engine that aggregates results from
|
||||
`multiple search engines
|
||||
@@ -15,7 +15,7 @@ Quick Start
|
||||
-----------
|
||||
|
||||
|
||||
In order to use this chain you need to provide the searx host. This can be done
|
||||
In order to use this tool you need to provide the searx host. This can be done
|
||||
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
|
||||
or exporting the environment variable SEARX_HOST.
|
||||
Note: this is the only required parameter.
|
||||
|
||||
56
langchain/utilities/wikipedia.py
Normal file
56
langchain/utilities/wikipedia.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Util that calls Wikipedia."""
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
|
||||
class WikipediaAPIWrapper(BaseModel):
|
||||
"""Wrapper around WikipediaAPI.
|
||||
|
||||
To use, you should have the ``wikipedia`` python package installed.
|
||||
This wrapper will use the Wikipedia API to conduct searches and
|
||||
fetch page summaries. By default, it will return the page summaries
|
||||
of the top-k results of an input search.
|
||||
"""
|
||||
|
||||
wiki_client: Any #: :meta private:
|
||||
top_k_results: int = 3
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that the python package exists in environment."""
|
||||
try:
|
||||
import wikipedia
|
||||
|
||||
values["wiki_client"] = wikipedia
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import wikipedia python package. "
|
||||
"Please it install it with `pip install wikipedia`."
|
||||
)
|
||||
return values
|
||||
|
||||
def run(self, query: str) -> str:
|
||||
"""Run Wikipedia search and get page summaries."""
|
||||
search_results = self.wiki_client.search(query)
|
||||
summaries = []
|
||||
for i in range(min(self.top_k_results, len(search_results))):
|
||||
summary = self.fetch_formatted_page_summary(search_results[i])
|
||||
if summary is not None:
|
||||
summaries.append(summary)
|
||||
return "\n\n".join(summaries)
|
||||
|
||||
def fetch_formatted_page_summary(self, page: str) -> Optional[str]:
|
||||
try:
|
||||
wiki_page = self.wiki_client.page(title=page)
|
||||
return f"Page: {page}\nSummary: {wiki_page.summary}"
|
||||
except (
|
||||
self.wiki_client.exceptions.PageError,
|
||||
self.wiki_client.exceptions.DisambiguationError,
|
||||
):
|
||||
return None
|
||||
@@ -16,6 +16,23 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def _results_to_docs(results: Any) -> List[Document]:
|
||||
return [doc for doc, _ in _results_to_docs_and_scores(results)]
|
||||
|
||||
|
||||
def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
||||
return [
|
||||
# TODO: Chroma can do batch querying,
|
||||
# we shouldn't hard code to the 1st result
|
||||
(Document(page_content=result[0], metadata=result[1]), result[2])
|
||||
for result in zip(
|
||||
results["documents"][0],
|
||||
results["metadatas"][0],
|
||||
results["distances"][0],
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class Chroma(VectorStore):
|
||||
"""Wrapper around ChromaDB embeddings platform.
|
||||
|
||||
@@ -61,22 +78,12 @@ class Chroma(VectorStore):
|
||||
self._client = chromadb.Client(self._client_settings)
|
||||
self._embedding_function = embedding_function
|
||||
self._persist_directory = persist_directory
|
||||
|
||||
# Check if the collection exists, create it if not
|
||||
if collection_name in [col.name for col in self._client.list_collections()]:
|
||||
self._collection = self._client.get_collection(name=collection_name)
|
||||
# TODO: Persist the user's embedding function
|
||||
logger.warning(
|
||||
f"Collection {collection_name} already exists,"
|
||||
" Do you have the right embedding function?"
|
||||
)
|
||||
else:
|
||||
self._collection = self._client.create_collection(
|
||||
name=collection_name,
|
||||
embedding_function=self._embedding_function.embed_documents
|
||||
if self._embedding_function is not None
|
||||
else None,
|
||||
)
|
||||
self._collection = self._client.get_or_create_collection(
|
||||
name=collection_name,
|
||||
embedding_function=self._embedding_function.embed_documents
|
||||
if self._embedding_function is not None
|
||||
else None,
|
||||
)
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
@@ -126,6 +133,22 @@ class Chroma(VectorStore):
|
||||
docs_and_scores = self.similarity_search_with_score(query, k)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
Returns:
|
||||
List of Documents most similar to the query vector.
|
||||
"""
|
||||
results = self._collection.query(query_embeddings=embedding, n_results=k)
|
||||
return _results_to_docs(results)
|
||||
|
||||
def similarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
@@ -154,17 +177,7 @@ class Chroma(VectorStore):
|
||||
query_embeddings=[query_embedding], n_results=k, where=filter
|
||||
)
|
||||
|
||||
docs = [
|
||||
# TODO: Chroma can do batch querying,
|
||||
# we shouldn't hard code to the 1st result
|
||||
(Document(page_content=result[0], metadata=result[1]), result[2])
|
||||
for result in zip(
|
||||
results["documents"][0],
|
||||
results["metadatas"][0],
|
||||
results["distances"][0],
|
||||
)
|
||||
]
|
||||
return docs
|
||||
return _results_to_docs_and_scores(results)
|
||||
|
||||
def delete_collection(self) -> None:
|
||||
"""Delete the collection."""
|
||||
@@ -201,12 +214,13 @@ class Chroma(VectorStore):
|
||||
Otherwise, the data will be ephemeral in-memory.
|
||||
|
||||
Args:
|
||||
texts (List[str]): List of texts to add to the collection.
|
||||
collection_name (str): Name of the collection to create.
|
||||
persist_directory (Optional[str]): Directory to persist the collection.
|
||||
documents (List[Document]): List of documents to add.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
|
||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
||||
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
|
||||
|
||||
Returns:
|
||||
Chroma: Chroma vectorstore.
|
||||
@@ -239,9 +253,10 @@ class Chroma(VectorStore):
|
||||
Args:
|
||||
collection_name (str): Name of the collection to create.
|
||||
persist_directory (Optional[str]): Directory to persist the collection.
|
||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
||||
documents (List[Document]): List of documents to add to the vectorstore.
|
||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
||||
|
||||
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
|
||||
Returns:
|
||||
Chroma: Chroma vectorstore.
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain"
|
||||
version = "0.0.106"
|
||||
version = "0.0.107"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
|
||||
@@ -70,12 +70,10 @@ def test_chat_prompt_template() -> None:
|
||||
|
||||
string = prompt.to_string()
|
||||
expected = (
|
||||
'[SystemMessage(content="Here\'s some context: context", '
|
||||
'additional_kwargs={}), HumanMessage(content="Hello foo, '
|
||||
"I'm bar. Thanks for the context\", additional_kwargs={}), "
|
||||
"AIMessage(content=\"I'm an AI. I'm foo. I'm bar.\", additional_kwargs={}), "
|
||||
"ChatMessage(content=\"I'm a generic message. I'm foo. I'm bar.\","
|
||||
" additional_kwargs={}, role='test')]"
|
||||
"System: Here's some context: context\n"
|
||||
"Human: Hello foo, I'm bar. Thanks for the context\n"
|
||||
"AI: I'm an AI. I'm foo. I'm bar.\n"
|
||||
"test: I'm a generic message. I'm foo. I'm bar."
|
||||
)
|
||||
assert string == expected
|
||||
|
||||
|
||||
@@ -94,6 +94,21 @@ def test_create_documents_with_metadata() -> None:
|
||||
assert docs == expected_docs
|
||||
|
||||
|
||||
def test_metadata_not_shallow() -> None:
|
||||
"""Test that metadatas are not shallow."""
|
||||
texts = ["foo bar"]
|
||||
splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0)
|
||||
docs = splitter.create_documents(texts, [{"source": "1"}])
|
||||
expected_docs = [
|
||||
Document(page_content="foo", metadata={"source": "1"}),
|
||||
Document(page_content="bar", metadata={"source": "1"}),
|
||||
]
|
||||
assert docs == expected_docs
|
||||
docs[0].metadata["foo"] = 1
|
||||
assert docs[0].metadata == {"source": "1", "foo": 1}
|
||||
assert docs[1].metadata == {"source": "1"}
|
||||
|
||||
|
||||
def test_iterative_text_splitter() -> None:
|
||||
"""Test iterative text splitter."""
|
||||
text = """Hi.\n\nI'm Harrison.\n\nHow? Are? You?\nOkay then f f f f.
|
||||
|
||||
Reference in New Issue
Block a user