mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-12 04:01:05 +00:00
Compare commits
111 Commits
eugene/com
...
isaac/more
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
91c2dd92b1 | ||
|
|
dfab23f931 | ||
|
|
91ea4b7449 | ||
|
|
652b3fa4a4 | ||
|
|
7040013140 | ||
|
|
dc7423e88f | ||
|
|
25f2e25be1 | ||
|
|
786ef021a3 | ||
|
|
429a0ee7fd | ||
|
|
da9281feb2 | ||
|
|
c6ece6a96d | ||
|
|
86355640c3 | ||
|
|
b9f65e5038 | ||
|
|
30fb345342 | ||
|
|
98779797fe | ||
|
|
663638d6a8 | ||
|
|
2f209d84fa | ||
|
|
c72e522e96 | ||
|
|
bf5193bb99 | ||
|
|
11adc09e02 | ||
|
|
6b32810b68 | ||
|
|
3da2713172 | ||
|
|
425f6ffa5b | ||
|
|
15a36dd0a2 | ||
|
|
249945a572 | ||
|
|
59b8850909 | ||
|
|
4828c441a7 | ||
|
|
725e4912ae | ||
|
|
d895db11d6 | ||
|
|
d77c7c4236 | ||
|
|
7b1a132aff | ||
|
|
df99b832a7 | ||
|
|
803eba3163 | ||
|
|
6e9a8b188f | ||
|
|
f337408b0f | ||
|
|
0b4608f71e | ||
|
|
a4086119f8 | ||
|
|
b4c12346cc | ||
|
|
dff83cce66 | ||
|
|
130e80b60f | ||
|
|
09fbce13c5 | ||
|
|
0ba125c3cd | ||
|
|
28e0958ff4 | ||
|
|
a2e9910268 | ||
|
|
7e7fcf5b1f | ||
|
|
04dd8d3b0a | ||
|
|
63d84e93b9 | ||
|
|
4d28c70000 | ||
|
|
46a47710b0 | ||
|
|
35ebd2620c | ||
|
|
23c9aba575 | ||
|
|
1331e8589c | ||
|
|
7882d5c978 | ||
|
|
70677202c7 | ||
|
|
78403a3746 | ||
|
|
5d10139fc7 | ||
|
|
d283f452cc | ||
|
|
264ab96980 | ||
|
|
267855b3c1 | ||
|
|
71c0698ee4 | ||
|
|
a72fddbf8d | ||
|
|
2c798622cd | ||
|
|
3abf1b6905 | ||
|
|
1028af17e7 | ||
|
|
061ed250f6 | ||
|
|
293a4a78de | ||
|
|
ffa0c838d8 | ||
|
|
111c7df117 | ||
|
|
6eb42c657e | ||
|
|
88a9a6a758 | ||
|
|
8f33fce871 | ||
|
|
423d286546 | ||
|
|
e572521f2a | ||
|
|
63ddf0afb4 | ||
|
|
4bcd2aad6c | ||
|
|
427a04151c | ||
|
|
d166967003 | ||
|
|
a74e466507 | ||
|
|
a02a09c973 | ||
|
|
41dfad5104 | ||
|
|
e7b95e0802 | ||
|
|
16bd0697dc | ||
|
|
6890daa90c | ||
|
|
335894893b | ||
|
|
c5cb52a3c6 | ||
|
|
cda79dbb6c | ||
|
|
cea3f72485 | ||
|
|
02c35da445 | ||
|
|
208042e0f2 | ||
|
|
f5da0d6d87 | ||
|
|
2c3e3dc6b1 | ||
|
|
7de62abc91 | ||
|
|
f9a11a9197 | ||
|
|
1dcee68cb8 | ||
|
|
e81ddb32a6 | ||
|
|
57747892ce | ||
|
|
679843abb0 | ||
|
|
73570873ab | ||
|
|
2ae76cecde | ||
|
|
4305f78e40 | ||
|
|
64ccddf3cb | ||
|
|
dd8e4cd020 | ||
|
|
0de0cd2d31 | ||
|
|
8e2316b8c2 | ||
|
|
c2538e7834 | ||
|
|
acba38a18e | ||
|
|
22c1a4041b | ||
|
|
4797b806c2 | ||
|
|
7061869aec | ||
|
|
98c22e9082 | ||
|
|
c04d95b962 |
@@ -52,7 +52,7 @@ Now:
|
||||
|
||||
`from langchain_experimental.sql import SQLDatabaseChain`
|
||||
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out [`create_sql_query_chain`](https://github.com/langchain-ai/langchain/blob/master/docs/extras/use_cases/tabular/sql_query.ipynb)
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out this [`SQL question-answering tutorial`](https://python.langchain.com/v0.2/docs/tutorials/sql_qa/#convert-question-to-sql-query)
|
||||
|
||||
`from langchain.chains import create_sql_query_chain`
|
||||
|
||||
|
||||
@@ -166,7 +166,7 @@
|
||||
"source": [
|
||||
"### SQL Database Agent example\n",
|
||||
"\n",
|
||||
"This example demonstrates the use of the [SQL Database Agent](/docs/integrations/toolkits/sql_database.html) for answering questions over a Databricks database."
|
||||
"This example demonstrates the use of the [SQL Database Agent](/docs/integrations/tools/sql_database) for answering questions over a Databricks database."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -13,7 +13,12 @@ OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs
|
||||
|
||||
PYTHON = .venv/bin/python
|
||||
|
||||
PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec test -e "{}/pyproject.toml" \; -print | grep -vE "airbyte|ibm|couchbase" | tr '\n' ' ')
|
||||
PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec sh -c ' \
|
||||
for dir; do \
|
||||
if find "$$dir" -maxdepth 1 -type f \( -name "pyproject.toml" -o -name "setup.py" \) | grep -q .; then \
|
||||
echo "$$dir"; \
|
||||
fi \
|
||||
done' sh {} + | grep -vE "airbyte|ibm|couchbase" | tr '\n' ' ')
|
||||
|
||||
PORT ?= 3001
|
||||
|
||||
|
||||
@@ -15,6 +15,8 @@ from pathlib import Path
|
||||
|
||||
import toml
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst.directives.admonitions import BaseAdmonition
|
||||
from docutils.statemachine import StringList
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
@@ -66,8 +68,23 @@ class ExampleLinksDirective(SphinxDirective):
|
||||
return [list_node]
|
||||
|
||||
|
||||
class Beta(BaseAdmonition):
|
||||
required_arguments = 0
|
||||
node_class = nodes.admonition
|
||||
|
||||
def run(self):
|
||||
self.content = self.content or StringList(
|
||||
[
|
||||
"This feature is in beta. It is actively being worked on, so the API may change."
|
||||
]
|
||||
)
|
||||
self.arguments = self.arguments or ["Beta"]
|
||||
return super().run()
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_directive("example_links", ExampleLinksDirective)
|
||||
app.add_directive("beta", Beta)
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
@@ -38,6 +38,8 @@ class ClassInfo(TypedDict):
|
||||
"""The kind of the class."""
|
||||
is_public: bool
|
||||
"""Whether the class is public or not."""
|
||||
is_deprecated: bool
|
||||
"""Whether the class is deprecated."""
|
||||
|
||||
|
||||
class FunctionInfo(TypedDict):
|
||||
@@ -49,6 +51,8 @@ class FunctionInfo(TypedDict):
|
||||
"""The fully qualified name of the function."""
|
||||
is_public: bool
|
||||
"""Whether the function is public or not."""
|
||||
is_deprecated: bool
|
||||
"""Whether the function is deprecated."""
|
||||
|
||||
|
||||
class ModuleMembers(TypedDict):
|
||||
@@ -121,6 +125,7 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
kind=kind,
|
||||
is_public=not name.startswith("_"),
|
||||
is_deprecated=".. deprecated::" in (type_.__doc__ or ""),
|
||||
)
|
||||
)
|
||||
elif inspect.isfunction(type_):
|
||||
@@ -129,6 +134,7 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
name=name,
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
is_public=not name.startswith("_"),
|
||||
is_deprecated=".. deprecated::" in (type_.__doc__ or ""),
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -255,8 +261,24 @@ def _construct_doc(
|
||||
|
||||
for module in namespaces:
|
||||
_members = members_by_namespace[module]
|
||||
classes = [el for el in _members["classes_"] if el["is_public"]]
|
||||
functions = [el for el in _members["functions"] if el["is_public"]]
|
||||
classes = [
|
||||
el
|
||||
for el in _members["classes_"]
|
||||
if el["is_public"] and not el["is_deprecated"]
|
||||
]
|
||||
functions = [
|
||||
el
|
||||
for el in _members["functions"]
|
||||
if el["is_public"] and not el["is_deprecated"]
|
||||
]
|
||||
deprecated_classes = [
|
||||
el for el in _members["classes_"] if el["is_public"] and el["is_deprecated"]
|
||||
]
|
||||
deprecated_functions = [
|
||||
el
|
||||
for el in _members["functions"]
|
||||
if el["is_public"] and el["is_deprecated"]
|
||||
]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
section = f":mod:`{package_namespace}.{module}`"
|
||||
@@ -310,6 +332,54 @@ Functions
|
||||
--------------
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: function.rst
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
if deprecated_classes:
|
||||
full_doc += f"""\
|
||||
Deprecated classes
|
||||
--------------
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
for class_ in sorted(deprecated_classes, key=lambda c: c["qualified_name"]):
|
||||
if class_["kind"] == "TypedDict":
|
||||
template = "typeddict.rst"
|
||||
elif class_["kind"] == "enum":
|
||||
template = "enum.rst"
|
||||
elif class_["kind"] == "Pydantic":
|
||||
template = "pydantic.rst"
|
||||
elif class_["kind"] == "RunnablePydantic":
|
||||
template = "runnable_pydantic.rst"
|
||||
elif class_["kind"] == "RunnableNonPydantic":
|
||||
template = "runnable_non_pydantic.rst"
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
full_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
|
||||
if deprecated_functions:
|
||||
_functions = [f["qualified_name"] for f in deprecated_functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
full_doc += f"""\
|
||||
Deprecated functions
|
||||
--------------
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: function.rst
|
||||
|
||||
@@ -897,6 +897,13 @@ div.admonition {
|
||||
background-color: #eee;
|
||||
}
|
||||
|
||||
div.admonition-beta {
|
||||
color: #d35400; /* A darker rich orange color */
|
||||
background-color: #FDF2E9; /* A light orange-tinted background color */
|
||||
border-color: #E59866; /* A darker soft orange border color */
|
||||
}
|
||||
|
||||
|
||||
div.admonition p:last-child,
|
||||
div.admonition dl:last-child,
|
||||
div.admonition dd:last-child,
|
||||
@@ -912,6 +919,13 @@ div.deprecated {
|
||||
border-color: #eed3d7;
|
||||
}
|
||||
|
||||
|
||||
div.warning {
|
||||
color: #b94a48;
|
||||
background-color: #F3E5E5;
|
||||
border-color: #eed3d7;
|
||||
}
|
||||
|
||||
div.seealso {
|
||||
background-color: #FFFBE8;
|
||||
border-color: #fbeed5;
|
||||
|
||||
@@ -542,7 +542,8 @@ Typical usage may look like the following:
|
||||
```python
|
||||
tools = [...] # Define a list of tools
|
||||
llm_with_tools = llm.bind_tools(tools)
|
||||
ai_msg = llm_with_tools.invoke("do xyz...") # AIMessage(tool_calls=[ToolCall(...), ...], ...)
|
||||
ai_msg = llm_with_tools.invoke("do xyz...")
|
||||
# -> AIMessage(tool_calls=[ToolCall(...), ...], ...)
|
||||
```
|
||||
|
||||
The `AIMessage` returned from the model MAY have `tool_calls` associated with it.
|
||||
@@ -559,9 +560,14 @@ This generally looks like:
|
||||
|
||||
```python
|
||||
# You will want to previously check that the LLM returned tool calls
|
||||
tool_call = ai_msg.tool_calls[0] # ToolCall(args={...}, id=..., ...)
|
||||
tool_call = ai_msg.tool_calls[0]
|
||||
# ToolCall(args={...}, id=..., ...)
|
||||
tool_output = tool.invoke(tool_call["args"])
|
||||
tool_message = ToolMessage(content=tool_output, tool_call_id=tool_call["id"], name=tool_call["name"])
|
||||
tool_message = ToolMessage(
|
||||
content=tool_output,
|
||||
tool_call_id=tool_call["id"],
|
||||
name=tool_call["name"]
|
||||
)
|
||||
```
|
||||
|
||||
Note that the `content` field will generally be passed back to the model.
|
||||
@@ -571,7 +577,12 @@ you can transform the tool output but also pass it as an artifact (read more abo
|
||||
```python
|
||||
... # Same code as above
|
||||
response_for_llm = transform(response)
|
||||
tool_message = ToolMessage(content=response_for_llm, tool_call_id=tool_call["id"], name=tool_call["name"], artifact=tool_output)
|
||||
tool_message = ToolMessage(
|
||||
content=response_for_llm,
|
||||
tool_call_id=tool_call["id"],
|
||||
name=tool_call["name"],
|
||||
artifact=tool_output
|
||||
)
|
||||
```
|
||||
|
||||
#### Invoke with `ToolCall`
|
||||
@@ -582,9 +593,14 @@ The benefits of this are that you don't have to write the logic yourself to tran
|
||||
This generally looks like:
|
||||
|
||||
```python
|
||||
tool_call = ai_msg.tool_calls[0] # ToolCall(args={...}, id=..., ...)
|
||||
tool_call = ai_msg.tool_calls[0]
|
||||
# -> ToolCall(args={...}, id=..., ...)
|
||||
tool_message = tool.invoke(tool_call)
|
||||
# -> ToolMessage(content="tool result foobar...", tool_call_id=..., name="tool_name")
|
||||
# -> ToolMessage(
|
||||
content="tool result foobar...",
|
||||
tool_call_id=...,
|
||||
name="tool_name"
|
||||
)
|
||||
```
|
||||
|
||||
If you are invoking the tool this way and want to include an [artifact](/docs/concepts/#toolmessage) for the ToolMessage, you will need to have the tool return two things.
|
||||
|
||||
@@ -409,7 +409,7 @@
|
||||
" # When configuring the end runnable, we can then use this id to configure this field\n",
|
||||
" ConfigurableField(id=\"prompt\"),\n",
|
||||
" # This sets a default_key.\n",
|
||||
" # If we specify this key, the default LLM (ChatAnthropic initialized above) will be used\n",
|
||||
" # If we specify this key, the default prompt (asking for a joke, as initialized above) will be used\n",
|
||||
" default_key=\"joke\",\n",
|
||||
" # This adds a new option, with name `poem`\n",
|
||||
" poem=PromptTemplate.from_template(\"Write a short poem about {topic}\"),\n",
|
||||
@@ -494,7 +494,7 @@
|
||||
" # When configuring the end runnable, we can then use this id to configure this field\n",
|
||||
" ConfigurableField(id=\"prompt\"),\n",
|
||||
" # This sets a default_key.\n",
|
||||
" # If we specify this key, the default LLM (ChatAnthropic initialized above) will be used\n",
|
||||
" # If we specify this key, the default prompt (asking for a joke, as initialized above) will be used\n",
|
||||
" default_key=\"joke\",\n",
|
||||
" # This adds a new option, with name `poem`\n",
|
||||
" poem=PromptTemplate.from_template(\"Write a short poem about {topic}\"),\n",
|
||||
|
||||
@@ -13,9 +13,14 @@ the v1 namespace of Pydantic 2.
|
||||
Because Pydantic does not support mixing .v1 and .v2 objects, users should be aware of a number of issues
|
||||
when using LangChain with Pydantic.
|
||||
|
||||
:::caution
|
||||
While LangChain supports Pydantic V2 objects in some APIs (listed below), it's suggested that users keep using Pydantic V1 objects until LangChain 0.3 is released.
|
||||
:::
|
||||
|
||||
|
||||
## 1. Passing Pydantic objects to LangChain APIs
|
||||
|
||||
Most LangChain APIs that accept Pydantic objects have been updated to accept both Pydantic v1 and v2 objects.
|
||||
Most LangChain APIs for *tool usage* (see list below) have been updated to accept either Pydantic v1 or v2 objects.
|
||||
|
||||
* Pydantic v1 objects correspond to subclasses of `pydantic.BaseModel` if `pydantic 1` is installed or subclasses of `pydantic.v1.BaseModel` if `pydantic 2` is installed.
|
||||
* Pydantic v2 objects correspond to subclasses of `pydantic.BaseModel` if `pydantic 2` is installed.
|
||||
@@ -38,6 +43,7 @@ Partner packages that accept pydantic v2 objects via `bind_tools` or `with_struc
|
||||
| langchain-robocorp | Yes | >=0.0.10 |
|
||||
| langchain-openai | Yes | >=0.1.19 |
|
||||
| langchain-fireworks | Yes | >=0.1.5 |
|
||||
| langchain-aws | Yes | >=0.1.15 |
|
||||
|
||||
Additional partner packages will be updated to accept Pydantic v2 objects in the future.
|
||||
|
||||
@@ -169,4 +175,4 @@ If you need OpenAPI docs, your options are to either install Pydantic 1:
|
||||
or else to use the `APIHandler` object in LangChain to manually create the
|
||||
routes for your API.
|
||||
|
||||
See: https://python.langchain.com/v0.2/docs/langserve/#pydantic
|
||||
See: https://python.langchain.com/v0.2/docs/langserve/#pydantic
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
"We will cover two approaches:\n",
|
||||
"\n",
|
||||
"1. Using the built-in [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html), which returns sources by default;\n",
|
||||
"2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle."
|
||||
"2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle.\n",
|
||||
"\n",
|
||||
"We will also show how to structure sources into the model response, such that a model can report what specific sources it used in generating its answer."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -130,8 +132,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "820244ae-74b4-4593-b392-822979dd91b8",
|
||||
"execution_count": null,
|
||||
"id": "24a69b8c-024e-4e34-b827-9c9de46512a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -211,11 +213,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Task Decomposition?',\n",
|
||||
" 'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\", metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down a complex task into smaller and simpler steps. This process helps agents or models handle challenging tasks by dividing them into more manageable subtasks. Techniques like Chain of Thought and Tree of Thoughts are used to decompose tasks into multiple steps for better problem-solving.'}"
|
||||
" 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\")],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down a complex task into smaller and more manageable steps. This process helps agents or models tackle difficult tasks by dividing them into simpler subtasks or components. Task decomposition can be achieved through techniques like Chain of Thought or Tree of Thoughts, which guide the agent in breaking down tasks into sequential or branching steps.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -251,18 +253,18 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "22ea137c-1a7a-44dd-ac73-281213979957",
|
||||
"id": "1950953a-e6f1-439d-b7b9-c3bd456e388d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Task Decomposition',\n",
|
||||
" 'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down complex tasks into smaller and simpler steps to make them more manageable for autonomous agents or models. This process can be achieved by techniques like Chain of Thought (CoT) or Tree of Thoughts, which guide the model to think step by step or explore multiple reasoning possibilities at each step. Task decomposition can be done through simple prompting with language models, task-specific instructions, or human inputs.'}"
|
||||
" 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:')],\n",
|
||||
" 'answer': 'Task decomposition is a technique used in artificial intelligence to break down complex tasks into smaller and more manageable subtasks. This approach helps agents or models to tackle difficult problems by dividing them into simpler steps, improving performance and interpretability. Different methods like Chain of Thought and Tree of Thoughts have been developed to enhance task decomposition in AI systems.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
@@ -279,15 +281,25 @@
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# This Runnable takes a dict with keys 'input' and 'context',\n",
|
||||
"# formats them into a prompt, and generates a response.\n",
|
||||
"rag_chain_from_docs = (\n",
|
||||
" RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"], # input query\n",
|
||||
" \"context\": lambda x: format_docs(x[\"context\"]), # context\n",
|
||||
" }\n",
|
||||
" | prompt # format query and context into prompt\n",
|
||||
" | llm # generate response\n",
|
||||
" | StrOutputParser() # coerce to string\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Pass input query to retriever\n",
|
||||
"retrieve_docs = (lambda x: x[\"input\"]) | retriever\n",
|
||||
"\n",
|
||||
"# Below, we chain `.assign` calls. This takes a dict and successively\n",
|
||||
"# adds keys-- \"context\" and \"answer\"-- where the value for each key\n",
|
||||
"# is determined by a Runnable. The Runnable operates on all existing\n",
|
||||
"# keys in the dict.\n",
|
||||
"chain = RunnablePassthrough.assign(context=retrieve_docs).assign(\n",
|
||||
" answer=rag_chain_from_docs\n",
|
||||
")\n",
|
||||
@@ -302,7 +314,105 @@
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"Check out the [LangSmith trace](https://smith.langchain.com/public/0cb42685-e29e-4280-a503-bef2014d7ba2/r)\n",
|
||||
"Check out the [LangSmith trace](https://smith.langchain.com/public/1c055a3b-0236-4670-a3fb-023d418ba796/r)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1c17797-d965-4fd2-b8d4-d386f25dd352",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Structure sources in model response\n",
|
||||
"\n",
|
||||
"Up to this point, we've simply propagated the documents returned from the retrieval step through to the final response. But this may not illustrate what subset of information the model relied on when generating its answer. Below, we show how to structure sources into the model response, allowing the model to report what specific context it relied on for its answer.\n",
|
||||
"\n",
|
||||
"Because the above LCEL implementation is composed of [Runnable](/docs/concepts/#runnable-interface) primitives, it is straightforward to extend. Below, we make a simple change:\n",
|
||||
"\n",
|
||||
"- We use the model's tool-calling features to generate [structured output](/docs/how_to/structured_output/), consisting of an answer and list of sources. The schema for the response is represented in the `AnswerWithSources` TypedDict, below.\n",
|
||||
"- We remove the `StrOutputParser()`, as we expect `dict` output in this scenario."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "8f916b14-1b0a-4975-a62f-52f1353bde15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Desired schema for response\n",
|
||||
"class AnswerWithSources(TypedDict):\n",
|
||||
" \"\"\"An answer to the question, with sources.\"\"\"\n",
|
||||
"\n",
|
||||
" answer: str\n",
|
||||
" sources: Annotated[\n",
|
||||
" List[str],\n",
|
||||
" ...,\n",
|
||||
" \"List of sources (author + year) used to answer the question\",\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Our rag_chain_from_docs has the following changes:\n",
|
||||
"# - add `.with_structured_output` to the LLM;\n",
|
||||
"# - remove the output parser\n",
|
||||
"rag_chain_from_docs = (\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"context\": lambda x: format_docs(x[\"context\"]),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | llm.with_structured_output(AnswerWithSources)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retrieve_docs = (lambda x: x[\"input\"]) | retriever\n",
|
||||
"\n",
|
||||
"chain = RunnablePassthrough.assign(context=retrieve_docs).assign(\n",
|
||||
" answer=rag_chain_from_docs\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = chain.invoke({\"input\": \"What is Chain of Thought?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "7a8fc0c5-afb3-4012-a467-3951996a6850",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"answer\": \"Chain of Thought (CoT) is a prompting technique that enhances model performance on complex tasks by instructing the model to \\\"think step by step\\\" to decompose hard tasks into smaller and simpler steps. It transforms big tasks into multiple manageable tasks and sheds light on the interpretation of the model's thinking process.\",\n",
|
||||
" \"sources\": [\n",
|
||||
" \"Wei et al. 2022\"\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"print(json.dumps(response[\"answer\"], indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7440f785-29c5-4c6b-9656-0d9d5efbac05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"View [LangSmith trace](https://smith.langchain.com/public/0eeddf06-3a7b-4f27-974c-310ca8160f60/r)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
|
||||
@@ -761,7 +761,7 @@
|
||||
"* [SQL tutorial](/docs/tutorials/sql_qa): Many of the challenges of working with SQL db's and CSV's are generic to any structured data type, so it's useful to read the SQL techniques even if you're using Pandas for CSV data analysis.\n",
|
||||
"* [Tool use](/docs/how_to/tool_calling): Guides on general best practices when working with chains and agents that invoke tools\n",
|
||||
"* [Agents](/docs/tutorials/agents): Understand the fundamentals of building LLM agents.\n",
|
||||
"* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/toolkits/spark)."
|
||||
"* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/tools/spark_sql)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -5,7 +5,6 @@ sidebar_position: 3
|
||||
|
||||
|
||||
Toolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.
|
||||
For a complete list of available ready-made toolkits, visit [Integrations](/docs/integrations/toolkits/).
|
||||
|
||||
All Toolkits expose a `get_tools` method which returns a list of tools.
|
||||
You can therefore do:
|
||||
|
||||
@@ -196,8 +196,6 @@
|
||||
"\n",
|
||||
"Toolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.\n",
|
||||
"\n",
|
||||
"For a complete list of available ready-made toolkits, visit [Integrations](/docs/integrations/toolkits/).\n",
|
||||
"\n",
|
||||
"All Toolkits expose a `get_tools` method which returns a list of tools.\n",
|
||||
"\n",
|
||||
"You're usually meant to use them this way:\n",
|
||||
|
||||
@@ -1,29 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Hugging Face\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatHuggingFace\n",
|
||||
"\n",
|
||||
"This will help you getting started with `langchain_huggingface` [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatHuggingFace` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html). For a list of models supported by Hugging Face check out [this page](https://huggingface.co/models).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"This notebook shows how to get started using Hugging Face LLMs as chat models.\n",
|
||||
"\n",
|
||||
"In particular, we will:\n",
|
||||
"1. Utilize the [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py) integrations to instantiate an LLM.\n",
|
||||
"2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/concepts/#message-types) abstraction.\n",
|
||||
"3. Explore tool calling with the `ChatHuggingFace`.\n",
|
||||
"4. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
@@ -64,7 +50,22 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Below we install additional packages as well for demonstration purposes:"
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatHuggingFace](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html) | [langchain_huggingface](https://api.python.langchain.com/en/latest/huggingface_api_reference.html) | ✅ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `langchain_huggingface` models you'll need to create a/an `Hugging Face` account, get an API key, and install the `langchain_huggingface` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"You'll need to have a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) saved as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,14 +74,41 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = getpass.getpass(\n",
|
||||
" \"Enter your Hugging Face API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2 bitsandbytes accelerate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"You can instantiate a `ChatHuggingFace` model in two different ways, either from a `HuggingFaceEndpoint` or from a `HuggingFacePipeline`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,19 +120,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
||||
"Token is valid (permission: fineGrained).\n",
|
||||
"Your token has been saved to /Users/isaachershenson/.cache/huggingface/token\n",
|
||||
"Login successful\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFaceEndpoint\n",
|
||||
"from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceEndpoint(\n",
|
||||
" repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
|
||||
" repo_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" task=\"text-generation\",\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
")"
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -116,11 +157,194 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "da32ae8ec8864ccfb480044fe2eec065",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"config.json: 0%| | 0.00/638 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "ee1891b7e5f64fba88ba35f444e598fb",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model.safetensors.index.json: 0%| | 0.00/23.9k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9ff1ec7f575b42adb608c15955de7888",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Downloading shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5214696698814b919f561647a684d1e4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00001-of-00008.safetensors: 0%| | 0.00/1.89G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9ac334c69a2048a0a77340cca44d8c80",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00002-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "465ad1a51d414e0daf1cd9308455be94",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00003-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a329c43c3d574df0afd38c7457cc639c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00004-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a736a6c4023542af8c6ecc232b823d18",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00005-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8bdee70b843d433e8236fff83ecda022",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00006-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5ecb6103e0304ae188a14d598119a361",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00007-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "174e3cb487bd453c9c70d7614254a35e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00008-of-00008.safetensors: 0%| | 0.00/816M [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "28f8c233b04b45d7800e12c785a8c4bc",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "449dfa023dc8430fbcde94544ba01c4f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"generation_config.json: 0%| | 0.00/111 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFacePipeline\n",
|
||||
"from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline\n",
|
||||
"\n",
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
@@ -129,8 +353,34 @@
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" return_full_text=False,\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instatiating with Quantization\n",
|
||||
"\n",
|
||||
"To run a quantized version of your model, you can specify a `bitsandbytes` quantization config as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import BitsAndBytesConfig\n",
|
||||
"\n",
|
||||
"quantization_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||
" bnb_4bit_compute_dtype=\"float16\",\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -138,30 +388,27 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To run a quantized version, you might specify a `bitsandbytes` quantization config as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from transformers import BitsAndBytesConfig\n",
|
||||
"\n",
|
||||
"quantization_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||
" bnb_4bit_compute_dtype=\"float16\",\n",
|
||||
" bnb_4bit_use_double_quant=True\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"pipeline = HuggingFacePipeline(\n",
|
||||
" ...\n",
|
||||
"\n",
|
||||
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" task=\"text-generation\",\n",
|
||||
" pipeline_kwargs=dict(\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" ),\n",
|
||||
" model_kwargs={\"quantization_config\": quantization_config},\n",
|
||||
" \n",
|
||||
" ...\n",
|
||||
")\n",
|
||||
"```"
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -171,34 +418,16 @@
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Instantiate the chat model and some messages to pass. \n",
|
||||
"\n",
|
||||
"**Note**: you need to pass the `model_id` explicitly if you are using self-hosted `text-generation-inference`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import (\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
")\n",
|
||||
"from langchain_huggingface import ChatHuggingFace\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You're a helpful assistant\"),\n",
|
||||
@@ -207,343 +436,35 @@
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check the `model_id`"
|
||||
"ai_msg = chat_model.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'meta-llama/Meta-Llama-3-70B-Instruct'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_model.model_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Inspect how the chat messages are formatted for the LLM call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nYou're a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nWhat happens when an unstoppable force meets an immovable object?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_model._to_chat_prompt(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"One of the classic thought experiments in physics!\n",
|
||||
"According to the popular phrase and hypothetical scenario, when an unstoppable force meets an immovable object, a paradoxical situation arises as both forces are seemingly contradictory. On one hand, an unstoppable force is an entity that cannot be stopped or prevented from moving forward, while on the other hand, an immovable object is something that cannot be moved or displaced from its position. \n",
|
||||
"\n",
|
||||
"The concept of an unstoppable force meeting an immovable object is a paradox that has puzzled philosophers and physicists for centuries. It's a mind-bending scenario that challenges our understanding of the fundamental laws of physics.\n",
|
||||
"\n",
|
||||
"In essence, an unstoppable force is something that cannot be halted or slowed down, while an immovable object is something that cannot be moved or displaced. If we assume that both entities exist in the same universe, we run into a logical contradiction.\n",
|
||||
"\n",
|
||||
"Here\n"
|
||||
"In this scenario, it is un\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res = chat_model.invoke(messages)\n",
|
||||
"print(res.content)"
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool calling with `ChatHuggingFace`\n",
|
||||
"\n",
|
||||
"`text-generation-inference` supports tool with open source LLMs starting from v2.0.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a basic tool (`Calculator`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Calculator(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers together.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bind the tool to the `chat_model` and give it a try:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Calculator(a=3, b=12)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers.openai_tools import PydanticToolsParser\n",
|
||||
"\n",
|
||||
"llm_with_multiply = chat_model.bind_tools([Calculator], tool_choice=\"auto\")\n",
|
||||
"parser = PydanticToolsParser(tools=[Calculator])\n",
|
||||
"tool_chain = llm_with_multiply | parser\n",
|
||||
"tool_chain.invoke(\"How much is 3 multiplied by 12?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use with agents\n",
|
||||
"\n",
|
||||
"Here we'll test out `Zephyr-7B-beta` as a zero-shot `ReAct` Agent. \n",
|
||||
"\n",
|
||||
"The agent is based on the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629)\n",
|
||||
"\n",
|
||||
"The example below is taken from [here](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/react/#using-chat-models).\n",
|
||||
"\n",
|
||||
"> Note: To run this section, you'll need to have a [SerpAPI Token](https://serpapi.com/) saved as an environment variable: `SERPAPI_API_KEY`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, load_tools\n",
|
||||
"from langchain.agents.format_scratchpad import format_log_to_str\n",
|
||||
"from langchain.agents.output_parsers import (\n",
|
||||
" ReActJsonSingleInputOutputParser,\n",
|
||||
")\n",
|
||||
"from langchain.tools.render import render_text_description\n",
|
||||
"from langchain_community.utilities import SerpAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Configure the agent with a `react-json` style prompt and access to a search engine and calculator."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# setup tools\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"\n",
|
||||
"# setup ReAct style prompt\n",
|
||||
"prompt = hub.pull(\"hwchase17/react-json\")\n",
|
||||
"prompt = prompt.partial(\n",
|
||||
" tools=render_text_description(tools),\n",
|
||||
" tool_names=\", \".join([t.name for t in tools]),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# define the agent\n",
|
||||
"chat_model_with_stop = chat_model.bind(stop=[\"\\nObservation\"])\n",
|
||||
"agent = (\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | chat_model_with_stop\n",
|
||||
" | ReActJsonSingleInputOutputParser()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# instantiate AgentExecutor\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\n",
|
||||
"\n",
|
||||
"Thought: I need to use the Search tool to find out who Leo DiCaprio's current girlfriend is. Then, I can use the Calculator tool to raise her current age to the power of 0.43.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"leo dicaprio girlfriend\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mLeonardo DiCaprio may have found The One in Vittoria Ceretti. “They are in love,” a source exclusively reveals in the latest issue of Us Weekly. “Leo was clearly very proud to be showing Vittoria off and letting everyone see how happy they are together.”\u001b[0m\u001b[32;1m\u001b[1;3mNow that we know Leo DiCaprio's current girlfriend is Vittoria Ceretti, let's find out her current age.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"vittoria ceretti age\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m25 years\u001b[0m\u001b[32;1m\u001b[1;3mNow that we know Vittoria Ceretti's current age is 25, let's use the Calculator tool to raise it to the power of 0.43.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"25^0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3mAnswer: 3.991298452658078\u001b[0m\u001b[32;1m\u001b[1;3mFinal Answer: Vittoria Ceretti, Leo DiCaprio's current girlfriend, when raised to the power of 0.43 is approximately 4.0 rounded to two decimal places. Her current age is 25 years old.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\",\n",
|
||||
" 'output': \"Vittoria Ceretti, Leo DiCaprio's current girlfriend, when raised to the power of 0.43 is approximately 4.0 rounded to two decimal places. Her current age is 25 years old.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wahoo! Our open-source 7b parameter Zephyr model was able to:\n",
|
||||
"\n",
|
||||
"1. Plan out a series of actions: `I need to use the Search tool to find out who Leo DiCaprio's current girlfriend is. Then, I can use the Calculator tool to raise her current age to the power of 0.43.`\n",
|
||||
"2. Then execute a search using the SerpAPI tool to find who Leo DiCaprio's current girlfriend is\n",
|
||||
"3. Execute another search to find her age\n",
|
||||
"4. And finally use a calculator tool to calculate her age raised to the power of 0.43\n",
|
||||
"\n",
|
||||
"It's exciting to see how far open-source LLM's can go as general purpose reasoning agents. Give it a try yourself!"
|
||||
"For detailed documentation of all `ChatHuggingFace` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -572,7 +493,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kinetica SqlAssist LLM Demo\n",
|
||||
"# Kinetica Language To SQL Chat Model\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use Kinetica to transform natural language into SQL\n",
|
||||
"and simplify the process of data retrieval. This demo is intended to show the mechanics\n",
|
||||
|
||||
@@ -12,12 +12,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a14c83bf-af26-4f22-8c1a-d632c5795ecf",
|
||||
"id": "d295c2a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAI\n",
|
||||
"# ChatMistralAI\n",
|
||||
"\n",
|
||||
"This will help you getting started with Mistral [chat models](/docs/concepts/#chat-models), accessed via their [API](https://docs.mistral.ai/api/). For detailed documentation of all ChatMistralAI features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html).\n",
|
||||
"This will help you getting started with Mistral [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatMistralAI` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html). The `ChatMistralAI` class is built on top of the [Mistral API](https://docs.mistral.ai/api/). For a list of all the models supported by Mistral, check out [this page](https://docs.mistral.ai/getting-started/models/).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
@@ -29,36 +29,35 @@
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Mistral models you'll need to create a Mistral account, get an API key, and install the `langchain-mistralai` integration package.\n",
|
||||
"\n",
|
||||
"To access `ChatMistralAI` models you'll need to create a Mistral account, get an API key, and install the `langchain_mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API. Once you've obtained an API key, store it in the `MISTRAL_API_KEY` environment variable:"
|
||||
"\n",
|
||||
"A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API. Once you've done this set the MISTRAL_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9acd8340-09d4-4ece-871a-a35b0732c7d8",
|
||||
"id": "2461605e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"__MODULE_NAME___API_KEY\"):\n",
|
||||
" os.environ[\"__MODULE_NAME___API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your __ModuleName__ API key: \"\n",
|
||||
" )"
|
||||
"os.environ[\"MISTRAL_API_KEY\"] = getpass.getpass(\"Enter your Mistral API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42c979b1-df49-4f6c-9fe6-d9dbf3ea8c2a",
|
||||
"id": "788f37ac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
@@ -67,37 +66,37 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cc4f11ec-5cb3-4caf-b3cd-7a20c41b0cfe",
|
||||
"id": "007209d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0fc42221-97b2-466b-95db-10368e17ca56",
|
||||
"id": "0f5c74f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain MistralAI integration lives in the `langchain-mistralai` package:"
|
||||
"The LangChain Mistral integration lives in the `langchain_mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85cb1ab8-9f2c-4b93-8415-ad65819dcb38",
|
||||
"id": "1ab11a65",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-mistralai"
|
||||
"%pip install -qU langchain_mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502127fd",
|
||||
"id": "fb1a335e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
@@ -107,19 +106,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2dfa801a-d040-4c09-9634-58604e8eaf16",
|
||||
"execution_count": 5,
|
||||
"id": "e6c38580",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai.chat_models import ChatMistralAI\n",
|
||||
"from langchain_mistralai import ChatMistralAI\n",
|
||||
"\n",
|
||||
"llm = ChatMistralAI(model=\"mistral-large-latest\")"
|
||||
"llm = ChatMistralAI(\n",
|
||||
" model=\"mistral-large-latest\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_retries=2,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f668acff-eb14-4b3a-959a-df5bfc02968b",
|
||||
"id": "aec79099",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
@@ -127,17 +131,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "86e3f9e6-67ec-4fbf-8ff1-85331200f412",
|
||||
"execution_count": 6,
|
||||
"id": "8838c3cc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 36, 'completion_tokens': 9}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-d6196c33-9410-413b-b454-4ed0bec1f0c7-0', usage_metadata={'input_tokens': 27, 'output_tokens': 9, 'total_tokens': 36})"
|
||||
"AIMessage(content='Sure, I\\'d be happy to help you translate that sentence into French! The English sentence \"I love programming\" translates to \"J\\'aime programmer\" in French. Let me know if you have any other questions or need further assistance!', response_metadata={'token_usage': {'prompt_tokens': 32, 'total_tokens': 84, 'completion_tokens': 52}, 'model': 'mistral-small', 'finish_reason': 'stop'}, id='run-64bac156-7160-4b68-b67e-4161f63e021f-0', usage_metadata={'input_tokens': 32, 'output_tokens': 52, 'total_tokens': 84})"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -156,15 +160,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8f8a24bc-b7f0-4d3a-b310-8a4e0ba125dd",
|
||||
"execution_count": 7,
|
||||
"id": "bbf6a048",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J'adore la programmation.\n"
|
||||
"Sure, I'd be happy to help you translate that sentence into French! The English sentence \"I love programming\" translates to \"J'aime programmer\" in French. Let me know if you have any other questions or need further assistance!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -174,116 +178,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Async"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'aime programmer.\", response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 34, 'completion_tokens': 7}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-1873888a-186f-49a8-ab81-24335bd3099b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 7, 'total_tokens': 34})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await llm.ainvoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86ccef97",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J'adore programmer."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in llm.stream(messages):\n",
|
||||
" print(chunk.content, end=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f6189577",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Batch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e63aebcb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 36, 'completion_tokens': 9}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-2aa2a189-c405-4cf5-bd31-e9025e4c8536-0', usage_metadata={'input_tokens': 27, 'output_tokens': 9, 'total_tokens': 36})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm.batch([messages])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38e39e71",
|
||||
"id": "32b87f87",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language-lcel)"
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "ee43a1ae",
|
||||
"execution_count": 8,
|
||||
"id": "24e2c51c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren.', response_metadata={'token_usage': {'prompt_tokens': 21, 'total_tokens': 28, 'completion_tokens': 7}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-409ebc9a-b4a0-4734-ab6f-e11f6b4f808f-0', usage_metadata={'input_tokens': 21, 'output_tokens': 7, 'total_tokens': 28})"
|
||||
"AIMessage(content='Ich liebe Programmierung. (German translation)', response_metadata={'token_usage': {'prompt_tokens': 26, 'total_tokens': 38, 'completion_tokens': 12}, 'model': 'mistral-small', 'finish_reason': 'stop'}, id='run-dfd4094f-e347-47b0-9056-8ebd7ea35fe7-0', usage_metadata={'input_tokens': 26, 'output_tokens': 12, 'total_tokens': 38})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -291,7 +206,7 @@
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
@@ -313,12 +228,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb7e01fb-a433-48b1-a4c2-e6009523a896",
|
||||
"id": "cb9b5834",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatMistralAI features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html"
|
||||
"Head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html) for detailed documentation of all attributes and methods."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -338,7 +253,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -56,23 +56,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "e817fe2e-4f1d-4533-b19e-2400b1cf6ce8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Enter your OpenAI API key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
"if not os.environ.get(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -126,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "522686de",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -281,12 +274,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 4,
|
||||
"id": "b7ea7690-ec7a-4337-b392-e87d1f39a6ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetWeather(BaseModel):\n",
|
||||
@@ -322,6 +315,47 @@
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67b0f63d-15e6-45e0-9e86-2852ddcff54f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ``strict=True``\n",
|
||||
"\n",
|
||||
":::info Requires ``langchain-openai>=0.1.21rc1``\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"As of Aug 6, 2024, OpenAI supports a `strict` argument when calling tools that will enforce that the tool argument schema is respected by the model. See more here: https://platform.openai.com/docs/guides/function-calling\n",
|
||||
"\n",
|
||||
"**Note**: If ``strict=True`` the tool definition will also be validated, and a subset of JSON schema are accepted. Crucially, schema cannot have optional args (those with default values). Read the full docs on what types of schema are supported here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "dc8ac4f1-4039-4392-90c1-2d8331cd6910",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VYEfpPDh3npMQ95J9EWmWvSn', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a4c6749b-adbb-45c7-8b17-8d6835d5c443-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_VYEfpPDh3npMQ95J9EWmWvSn', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_with_tools = llm.bind_tools([GetWeather], strict=True)\n",
|
||||
"ai_msg = llm_with_tools.invoke(\n",
|
||||
" \"what is the weather like in San Francisco\",\n",
|
||||
")\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "768d1ae4-4b1a-48eb-a329-c8d5051067a3",
|
||||
@@ -412,9 +446,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-2",
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-2"
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"You need to have an existing dataset on the Apify platform. If you don't have one, please first check out [this notebook](/docs/integrations/tools/apify) on how to use Apify to extract content from documentation, knowledge bases, help centers, or blogs. This example shows how to load a dataset produced by the [Website Content Crawler](https://apify.com/apify/website-content-crawler)."
|
||||
"You need to have an existing dataset on the Apify platform. This example shows how to load a dataset produced by the [Website Content Crawler](https://apify.com/apify/website-content-crawler)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because one or more lines are too long
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
@@ -0,0 +1,182 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with `PyPDF` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use `PyPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"To use `PyPDFLoader` you need to have the `langchain-community` python package downloaded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"len(page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `PyPDFLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,7 +7,18 @@
|
||||
"source": [
|
||||
"# Recursive URL\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents."
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/recursive_url_loader/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [RecursiveUrlLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| RecursiveUrlLoader | ✅ | ❌ | \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,6 +28,12 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `RecursiveUrlLoader`.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lives in the `langchain-community` package. There's no other required packages, though you will get richer default Document metadata if you have ``beautifulsoup4` installed as well."
|
||||
]
|
||||
},
|
||||
@@ -186,6 +203,50 @@
|
||||
"That certainly looks like HTML that comes from the url https://docs.python.org/3.9/, which is what we expected. Let's now look at some variations we can make to our basic example that can be helpful in different situations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b17b7202",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4b13e4d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb039682",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f41cc89",
|
||||
@@ -256,50 +317,6 @@
|
||||
"You can similarly pass in a `metadata_extractor` to customize how Document metadata is extracted from the HTTP response. See the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) for more on this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1dddbc94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "7d0114fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f88a7c2f-35df-4c3a-b238-f91be2674b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e4d1c8f",
|
||||
|
||||
@@ -7,20 +7,41 @@
|
||||
"source": [
|
||||
"# Unstructured\n",
|
||||
"\n",
|
||||
"This notebook covers how to use `Unstructured` package to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"This notebook covers how to use `Unstructured` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders) to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"\n",
|
||||
"Please see [this guide](/docs/integrations/providers/unstructured/) for more instructions on setting up Unstructured locally, including setting up required system dependencies."
|
||||
"Please see [this guide](../../integrations/providers/unstructured.mdx) for more instructions on setting up Unstructured locally, including setting up required system dependencies.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/unstructured/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [UnstructuredLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/unstructured_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| UnstructuredLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires offloading of the partitioning logic to the Unstructured API, which requires an API key. If you use the local installation, you do not need an API key. To get your API key, head over to [this site](https://unstructured.io) and get an API key, and then set it in the cell below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "2886982e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet \"langchain-unstructured\""
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your Unstructured API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,15 +49,32 @@
|
||||
"id": "e75e2a6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Local Partitioning (Optional)\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires\n",
|
||||
"offloading of the partitioning logic to the Unstructured API, which requires an `api_key`. For\n",
|
||||
"partitioning using the API, refer to the Unstructured API section below.\n",
|
||||
"#### Normal Installation\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install\n",
|
||||
"a combination of system dependencies, as outlined in the \n",
|
||||
"[Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"The following packages are required to run the rest of this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9de83b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet langchain-unstructured unstructured-client unstructured \"unstructured[pdf]\" python-magic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "637eda35",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Installation for Local\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install a combination of system dependencies, as outlined in the [Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"\n",
|
||||
"For example, on Macs you can install the required dependencies with:\n",
|
||||
"\n",
|
||||
@@ -48,7 +86,7 @@
|
||||
"brew install libxml2 libxslt\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can install the required `pip` dependencies with:\n",
|
||||
"You can install the required `pip` dependencies needed for local with:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install \"langchain-unstructured[local]\"\n",
|
||||
@@ -60,120 +98,117 @@
|
||||
"id": "a9c1c775",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Quickstart\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"To simply load a file as a document, you can use the LangChain `DocumentLoader.load` \n",
|
||||
"interface:"
|
||||
"The `UnstructuredLoader` allows loading from a variety of different file types. To read all about the `unstructured` package please refer to their [documentation](https://docs.unstructured.io/open-source/introduction/overview)/. In this example, we show loading from both a text file and a PDF file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "79d3e549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/state_of_the_union.txt\")\n",
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4ab0a79",
|
||||
"id": "8b68dcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load list of files"
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "092d9a0b",
|
||||
"execution_count": 2,
|
||||
"id": "8da59ef8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO: NumExpr defaulting to 12 threads.\n",
|
||||
"INFO: pikepdf C++ to Python logger bridge initialized\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "97f7aa1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"whatsapp_chat.txt : 1/22/23, 6:30 PM - User 1: Hi! Im interested in your bag. Im offering $50. Let me know if you are in\n",
|
||||
"state_of_the_union.txt : May God bless you all. May God protect our troops.\n"
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/whatsapp_chat.txt\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"print(docs[0].metadata.get(\"filename\"), \": \", docs[0].page_content[:100])\n",
|
||||
"print(docs[-1].metadata.get(\"filename\"), \": \", docs[-1].page_content[:100])"
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8de9ef16",
|
||||
"id": "0d7f991b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PDF Example\n",
|
||||
"\n",
|
||||
"Processing PDF documents works exactly the same way. Unstructured detects the file type and extracts the same types of elements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "672733fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define a Partitioning Strategy\n",
|
||||
"\n",
|
||||
"Unstructured document loader allow users to pass in a `strategy` parameter that lets Unstructured\n",
|
||||
"know how to partition pdf and other OCR'd documents. Currently supported strategies are `\"auto\"`,\n",
|
||||
"`\"hi_res\"`, `\"ocr_only\"`, and `\"fast\"`. Learn more about the different strategies\n",
|
||||
"[here](https://docs.unstructured.io/open-source/core-functionality/partitioning#partition-pdf). \n",
|
||||
"\n",
|
||||
"Not all document types have separate hi res and fast partitioning strategies. For those document types, the `strategy` kwarg is\n",
|
||||
"ignored. In some cases, the high res strategy will fallback to fast if there is a dependency missing\n",
|
||||
"(i.e. a model for document partitioning). You can see how to apply a strategy to an\n",
|
||||
"`UnstructuredLoader` below."
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "60685353",
|
||||
"execution_count": 4,
|
||||
"id": "b05604d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 393.9), (16.34, 560.0), (36.34, 560.0), (36.34, 393.9)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': '89565df026a24279aaea20dc08cedbec', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'e9fa370aef7ee5c05744eb7bb7d9981b'}, page_content='2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((157.62199999999999, 114.23496279999995), (157.62199999999999, 146.5141628), (457.7358962799999, 146.5141628), (457.7358962799999, 114.23496279999995)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'Title', 'element_id': 'bde0b230a1aa488e3ce837d33015181b'}, page_content='LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((134.809, 168.64029940800003), (134.809, 192.2517444), (480.5464199080001, 192.2517444), (480.5464199080001, 168.64029940800003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': '54700f902899f0c8c90488fa8d825bce'}, page_content='Zejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain Lee4, Jacob Carlson3, and Weining Li5'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((207.23000000000002, 202.57205439999996), (207.23000000000002, 311.8195408), (408.12676, 311.8195408), (408.12676, 202.57205439999996)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'b650f5867bad9bb4e30384282c79bcfe'}, page_content='1 Allen Institute for AI shannons@allenai.org 2 Brown University ruochen zhang@brown.edu 3 Harvard University {melissadell,jacob carlson}@fas.harvard.edu 4 University of Washington bcgl@cs.washington.edu 5 University of Waterloo w422li@uwaterloo.ca'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((162.779, 338.45008160000003), (162.779, 566.8455408), (454.0372021523199, 566.8455408), (454.0372021523199, 338.45008160000003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'links': [{'text': ':// layout - parser . github . io', 'url': 'https://layout-parser.github.io', 'start_index': 1477}], 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'NarrativeText', 'element_id': 'cfc957c94fe63c8fd7c7f4bcb56e75a7'}, page_content='Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model configurations complicate the easy reuse of im- portant innovations by a wide audience. Though there have been on-going efforts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applica- tions. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout de- tection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digiti- zation pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io.')]"
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/layout-parser-paper.pdf\", strategy=\"fast\")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[5:10]"
|
||||
"pages[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -242,23 +277,6 @@
|
||||
"if you’d like to self-host the Unstructured API or run it locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e5fde16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package\n",
|
||||
"%pip install \"langchain-unstructured\"\n",
|
||||
"%pip install \"unstructured-client\"\n",
|
||||
"\n",
|
||||
"# Set API key\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = \"FAKE_API_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@@ -496,6 +514,16 @@
|
||||
"print(\"Number of LangChain documents:\", len(docs))\n",
|
||||
"print(\"Length of text in the document:\", len(docs[0].page_content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce01aa40",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `UnstructuredLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -514,7 +542,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -334,6 +334,121 @@
|
||||
"llm.invoke(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b29dd776",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Semantic Cache\n",
|
||||
"Use [Upstash Vector](https://upstash.com/docs/vector/overall/whatisvector) to do a semantic similarity search and cache the most similar response in the database. The vectorization is automatically done by the selected embedding model while creating Upstash Vector database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b37fb3c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install upstash-semantic-cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "8470eedc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.globals import set_llm_cache\n",
|
||||
"from upstash_semantic_cache import SemanticCache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "16b9fb03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"UPSTASH_VECTOR_REST_URL = \"<UPSTASH_VECTOR_REST_URL>\"\n",
|
||||
"UPSTASH_VECTOR_REST_TOKEN = \"<UPSTASH_VECTOR_REST_TOKEN>\"\n",
|
||||
"\n",
|
||||
"cache = SemanticCache(\n",
|
||||
" url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN, min_proximity=0.7\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "8d37104b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"set_llm_cache(cache)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "926a08b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 28.4 ms, sys: 3.93 ms, total: 32.3 ms\n",
|
||||
"Wall time: 1.89 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nNew York City is the most crowded city in the USA.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"llm.invoke(\"Which city is the most crowded city in the USA?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "0ce37d57",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 3.22 ms, sys: 940 μs, total: 4.16 ms\n",
|
||||
"Wall time: 97.7 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nNew York City is the most crowded city in the USA.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"llm.invoke(\"Which city has the highest population in the USA?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "278ad7ae",
|
||||
@@ -2684,7 +2799,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -787,7 +787,7 @@ We need to install `langchain-google-community` with required dependencies:
|
||||
pip install langchain-google-community[gmail]
|
||||
```
|
||||
|
||||
See a [usage example and authorization instructions](/docs/integrations/toolkits/gmail).
|
||||
See a [usage example and authorization instructions](/docs/integrations/tools/gmail).
|
||||
|
||||
```python
|
||||
from langchain_google_community import GmailToolkit
|
||||
|
||||
@@ -370,7 +370,7 @@ We need to install several python packages.
|
||||
pip install azure-ai-formrecognizer azure-cognitiveservices-speech azure-ai-vision-imageanalysis
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/azure_ai_services).
|
||||
See a [usage example](/docs/integrations/tools/azure_ai_services).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import azure_ai_services
|
||||
@@ -385,7 +385,7 @@ pip install O365
|
||||
```
|
||||
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/office365).
|
||||
See a [usage example](/docs/integrations/tools/office365).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import O365Toolkit
|
||||
@@ -399,7 +399,7 @@ We need to install `azure-identity` python package.
|
||||
pip install azure-identity
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/powerbi).
|
||||
See a [usage example](/docs/integrations/tools/powerbi).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import PowerBIToolkit
|
||||
|
||||
@@ -15,7 +15,7 @@ pip install ain-py
|
||||
You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key.
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/ainetwork).
|
||||
See a [usage example](/docs/integrations/tools/ainetwork).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
|
||||
@@ -27,7 +27,7 @@ You can use the `ApifyWrapper` to run Actors on the Apify platform.
|
||||
from langchain_community.utilities import ApifyWrapper
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integrations/tools/apify).
|
||||
For more information on this wrapper, see [the API reference](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.apify.ApifyWrapper.html).
|
||||
|
||||
|
||||
## Document loader
|
||||
|
||||
@@ -80,6 +80,6 @@ from langchain_community.agent_toolkits.cassandra_database.toolkit import (
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/toolkits/cassandra_database).
|
||||
Learn more in the [example notebook](/docs/integrations/tools/cassandra_database).
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ The Kinetica LLM wrapper uses the [Kinetica SqlAssist
|
||||
LLM](https://docs.kinetica.com/7.2/sql-gpt/concepts/) to transform natural language into
|
||||
SQL to simplify the process of data retrieval.
|
||||
|
||||
See [Kinetica SqlAssist LLM Demo](/docs/integrations/chat/kinetica) for usage.
|
||||
See [Kinetica Language To SQL Chat Model](/docs/integrations/chat/kinetica) for usage.
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models.kinetica import ChatKinetica
|
||||
|
||||
@@ -30,7 +30,7 @@ from langchain_robocorp.toolkits import ActionServerRequestTool
|
||||
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/robocorp).
|
||||
See a [usage example](/docs/integrations/tools/robocorp).
|
||||
|
||||
```python
|
||||
from langchain_robocorp import ActionServerToolkit
|
||||
|
||||
@@ -17,7 +17,7 @@ from langchain_community.document_loaders import SlackDirectoryLoader
|
||||
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/slack).
|
||||
See a [usage example](/docs/integrations/tools/slack).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import SlackToolkit
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Question Answering\n",
|
||||
"This notebook shows how to do question answering over structured data, in this case using the `AirbyteStripeLoader`.\n",
|
||||
"\n",
|
||||
"Vectorstores often have a hard time answering questions that requires computing, grouping and filtering structured data so the high level idea is to use a `pandas` dataframe to help with these types of questions. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Load data from Stripe using Airbyte. user the `record_handler` paramater to return a JSON from the data loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain_community.document_loaders.airbyte import AirbyteStripeLoader\n",
|
||||
"from langchain_experimental.agents import create_pandas_dataframe_agent\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"stream_name = \"customers\"\n",
|
||||
"config = {\n",
|
||||
" \"client_secret\": os.getenv(\"STRIPE_CLIENT_SECRET\"),\n",
|
||||
" \"account_id\": os.getenv(\"STRIPE_ACCOUNT_D\"),\n",
|
||||
" \"start_date\": \"2023-01-20T00:00:00Z\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def handle_record(record: dict, _id: str):\n",
|
||||
" return record.data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = AirbyteStripeLoader(\n",
|
||||
" config=config,\n",
|
||||
" record_handler=handle_record,\n",
|
||||
" stream_name=stream_name,\n",
|
||||
")\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Pass the data to `pandas` dataframe."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.DataFrame(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Pass the dataframe `df` to the `create_pandas_dataframe_agent` and invoke\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_pandas_dataframe_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
|
||||
" df,\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"4. Run the agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output = agent.run(\"How many rows are there?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,301 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7094e328",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CSV\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with data in `CSV` format. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Pandas DataFrame agent under the hood, which in turn calls the Python agent, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "caae0bec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_csv_agent\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bd806175",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "a1717204",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" OpenAI(temperature=0),\n",
|
||||
" \"titanic.csv\",\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c31bb8a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "16c4dc59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
" \"titanic.csv\",\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "46b9489d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df.shape[0]`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m891\u001b[0m\u001b[32;1m\u001b[1;3mThere are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many rows are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a96309be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df[df['SibSp'] > 3]['PassengerId'].count()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m30\u001b[0m\u001b[32;1m\u001b[1;3mThere are 30 people in the dataframe who have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 30 people in the dataframe who have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "964a09f7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `import pandas as pd\n",
|
||||
"import math\n",
|
||||
"\n",
|
||||
"# Create a dataframe\n",
|
||||
"data = {'Age': [22, 38, 26, 35, 35]}\n",
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"\n",
|
||||
"# Calculate the average age\n",
|
||||
"average_age = df['Age'].mean()\n",
|
||||
"\n",
|
||||
"# Calculate the square root of the average age\n",
|
||||
"square_root = math.sqrt(average_age)\n",
|
||||
"\n",
|
||||
"square_root`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m5.585696017507576\u001b[0m\u001b[32;1m\u001b[1;3mThe square root of the average age is approximately 5.59.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The square root of the average age is approximately 5.59.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the square root of the average age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09539c18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Multi CSV Example\n",
|
||||
"\n",
|
||||
"This next part shows how the agent can interact with multiple csv files passed in as a list."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "15f11fbd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df1['Age'].nunique() - df2['Age'].nunique()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m-1\u001b[0m\u001b[32;1m\u001b[1;3mThere is 1 row in the age column that is different between the two dataframes.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There is 1 row in the age column that is different between the two dataframes.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
" [\"titanic.csv\", \"titanic_age_fillna.csv\"],\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")\n",
|
||||
"agent.run(\"how many rows in the age column are different between the two dfs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f2909808",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,977 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Github\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GithubToolkit\n",
|
||||
"\n",
|
||||
"The `Github` toolkit contains tools that enable an LLM agent to interact with a github repository. \n",
|
||||
"The tool is a wrapper for the [PyGitHub](https://github.com/PyGithub/PyGithub) library. \n",
|
||||
"\n",
|
||||
"For detailed documentation of all GithubToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"At a high-level, we will:\n",
|
||||
"\n",
|
||||
"1. Install the pygithub library\n",
|
||||
"2. Create a Github app\n",
|
||||
"3. Set your environmental variables\n",
|
||||
"4. Pass the tools to your agent with `toolkit.get_tools()`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"#### 1. Install dependencies\n",
|
||||
"\n",
|
||||
"This integration is implemented in `langchain-community`. We will also need the `pygithub` dependency:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pygithub langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 2. Create a Github App\n",
|
||||
"\n",
|
||||
"[Follow the instructions here](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to create and register a Github app. Make sure your app has the following [repository permissions:](https://docs.github.com/en/rest/overview/permissions-required-for-github-apps?apiVersion=2022-11-28)\n",
|
||||
"\n",
|
||||
"* Commit statuses (read only)\n",
|
||||
"* Contents (read and write)\n",
|
||||
"* Issues (read and write)\n",
|
||||
"* Metadata (read only)\n",
|
||||
"* Pull requests (read and write)\n",
|
||||
"\n",
|
||||
"Once the app has been registered, you must give your app permission to access each of the repositories you whish it to act upon. Use the App settings on [github.com here](https://github.com/settings/installations).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#### 3. Set Environment Variables\n",
|
||||
"\n",
|
||||
"Before initializing your agent, the following environment variables need to be set:\n",
|
||||
"\n",
|
||||
"* **GITHUB_APP_ID**- A six digit number found in your app's general settings\n",
|
||||
"* **GITHUB_APP_PRIVATE_KEY**- The location of your app's private key .pem file, or the full text of that file as a string.\n",
|
||||
"* **GITHUB_REPOSITORY**- The name of the Github repository you want your bot to act upon. Must follow the format {username}/{repo-name}. *Make sure the app has been added to this repository first!*\n",
|
||||
"* Optional: **GITHUB_BRANCH**- The branch where the bot will make its commits. Defaults to `repo.default_branch`.\n",
|
||||
"* Optional: **GITHUB_BASE_BRANCH**- The base branch of your repo upon which PRs will based from. Defaults to `repo.default_branch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"for env_var in [\n",
|
||||
" \"GITHUB_APP_ID\",\n",
|
||||
" \"GITHUB_APP_PRIVATE_KEY\",\n",
|
||||
" \"GITHUB_REPOSITORY\",\n",
|
||||
"]:\n",
|
||||
" if not os.getenv(env_var):\n",
|
||||
" os.environ[env_var] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit\n",
|
||||
"from langchain_community.utilities.github import GitHubAPIWrapper\n",
|
||||
"\n",
|
||||
"github = GitHubAPIWrapper()\n",
|
||||
"toolkit = GitHubToolkit.from_github_api_wrapper(github)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Get Issues\n",
|
||||
"Get Issue\n",
|
||||
"Comment on Issue\n",
|
||||
"List open pull requests (PRs)\n",
|
||||
"Get Pull Request\n",
|
||||
"Overview of files included in PR\n",
|
||||
"Create Pull Request\n",
|
||||
"List Pull Requests' Files\n",
|
||||
"Create File\n",
|
||||
"Read File\n",
|
||||
"Update File\n",
|
||||
"Delete File\n",
|
||||
"Overview of existing files in Main branch\n",
|
||||
"Overview of files in current working branch\n",
|
||||
"List branches in this repository\n",
|
||||
"Set active branch\n",
|
||||
"Create a new branch\n",
|
||||
"Get files from a directory\n",
|
||||
"Search issues and pull requests\n",
|
||||
"Search code\n",
|
||||
"Create review request\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"for tool in tools:\n",
|
||||
" print(tool.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The purpose of these tools is as follows:\n",
|
||||
"\n",
|
||||
"Each of these steps will be explained in great detail below.\n",
|
||||
"\n",
|
||||
"1. **Get Issues**- fetches issues from the repository.\n",
|
||||
"\n",
|
||||
"2. **Get Issue**- fetches details about a specific issue.\n",
|
||||
"\n",
|
||||
"3. **Comment on Issue**- posts a comment on a specific issue.\n",
|
||||
"\n",
|
||||
"4. **Create Pull Request**- creates a pull request from the bot's working branch to the base branch.\n",
|
||||
"\n",
|
||||
"5. **Create File**- creates a new file in the repository.\n",
|
||||
"\n",
|
||||
"6. **Read File**- reads a file from the repository.\n",
|
||||
"\n",
|
||||
"7. **Update File**- updates a file in the repository.\n",
|
||||
"\n",
|
||||
"8. **Delete File**- deletes a file from the repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit\n",
|
||||
"from langchain_community.utilities.github import GitHubAPIWrapper\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set your environment variables using os.environ\n",
|
||||
"os.environ[\"GITHUB_APP_ID\"] = \"123456\"\n",
|
||||
"os.environ[\"GITHUB_APP_PRIVATE_KEY\"] = \"path/to/your/private-key.pem\"\n",
|
||||
"os.environ[\"GITHUB_REPOSITORY\"] = \"username/repo-name\"\n",
|
||||
"os.environ[\"GITHUB_BRANCH\"] = \"bot-branch-name\"\n",
|
||||
"os.environ[\"GITHUB_BASE_BRANCH\"] = \"main\"\n",
|
||||
"\n",
|
||||
"# This example also requires an OpenAI API key\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Available tools:\n",
|
||||
"\tGet Issues\n",
|
||||
"\tGet Issue\n",
|
||||
"\tComment on Issue\n",
|
||||
"\tList open pull requests (PRs)\n",
|
||||
"\tGet Pull Request\n",
|
||||
"\tOverview of files included in PR\n",
|
||||
"\tCreate Pull Request\n",
|
||||
"\tList Pull Requests' Files\n",
|
||||
"\tCreate File\n",
|
||||
"\tRead File\n",
|
||||
"\tUpdate File\n",
|
||||
"\tDelete File\n",
|
||||
"\tOverview of existing files in Main branch\n",
|
||||
"\tOverview of files in current working branch\n",
|
||||
"\tList branches in this repository\n",
|
||||
"\tSet active branch\n",
|
||||
"\tCreate a new branch\n",
|
||||
"\tGet files from a directory\n",
|
||||
"\tSearch issues and pull requests\n",
|
||||
"\tSearch code\n",
|
||||
"\tCreate review request\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n",
|
||||
"github = GitHubAPIWrapper()\n",
|
||||
"toolkit = GitHubToolkit.from_github_api_wrapper(github)\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"# STRUCTURED_CHAT includes args_schema for each tool, helps tool args parsing errors.\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
"print(\"Available tools:\")\n",
|
||||
"for tool in tools:\n",
|
||||
" print(\"\\t\" + tool.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to figure out what issues need to be completed.\n",
|
||||
"Action: Get Issues\n",
|
||||
"Action Input: N/A\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mFound 1 issues:\n",
|
||||
"[{'title': 'Update README file', 'number': 9}]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to get more information about this issue.\n",
|
||||
"Action: Get Issue\n",
|
||||
"Action Input: 9\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m{\"title\": \"Update README file\", \"body\": \"Find what the most popular frontend framework is right now and add a short blurb to the readme.md file about how this website will take advantage of it.\", \"comments\": \"[]\"}\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to update the README file.\n",
|
||||
"Action: Create File\n",
|
||||
"Action Input: README.md\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mFile already exists at README.md. Use update_file instead\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to update the existing README file.\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input: README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is a sample website\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is a sample website that uses the most popular frontend framework.\n",
|
||||
">>>> NEW\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mFile content was not updated because old content was not found.It may be helpful to use the read_file action to get the current file contents.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to get the current file contents.\n",
|
||||
"Action: Read File\n",
|
||||
"Action Input: README.md\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mThis is my awesome website!\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to update the README file with the new content.\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input: README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is my awesome website!\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is my awesome website that uses the most popular frontend framework.\n",
|
||||
">>>> NEW\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mUpdated file README.md\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: The README.md file has been updated with the new content.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The README.md file has been updated with the new content.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"You have the software engineering capabilities of a Google Principle engineer. You are tasked with completing issues on a github repository. Please look at the existing issues and complete them.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: Read an issue, open a pull request\n",
|
||||
"\n",
|
||||
"Workflow: \n",
|
||||
"1. Read issues, either a specific one or just ask it to look at recent ones. \n",
|
||||
"2. Write code, commit it to a new branch.\n",
|
||||
"3. Open a PR\n",
|
||||
"4. \"Request review\" on the PR from the original author of the issue.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Input data and LangSmith Trace\n",
|
||||
"* LangSmith trace for this run: https://smith.langchain.com/public/fee6643c-b214-42d0-967b-d24dcdd690fe/r\n",
|
||||
"* Input issue: https://github.com/KastanDay/ML4Bio/issues/33\n",
|
||||
"* Final PR created by bot: https://github.com/KastanDay/ML4Bio/pull/40"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Please implement these changes by creating or editing the necessary files. \n",
|
||||
"\n",
|
||||
"1. First use read_file to read any files in the repo that seem relevant. \n",
|
||||
"2. Then, when you're ready, start implementing changes by creating and updating files. Implement any and all remaining code to make the project work as the commenter intended. \n",
|
||||
"2. The last step is to create a PR with a clear and concise title and description, list any concerns or final changes necessary in the PR body.\n",
|
||||
"3. After opening the PR, comment on the original issue and mention the new PR your just opened, you must comment \"I opened a PR for you to review here #<PR_NUMBER>\" (it'll be something like #30). That hashtag syntax will automatically link to the PR, as necessary. Thanks.\n",
|
||||
"4. If you feel the PR is satisfactory for completing your assignment, create a review request for the original user that opened the issue. Use their username to tag them.\n",
|
||||
"\n",
|
||||
"Feel free to ask for help or leave a comment on the Issue or PR if you're stuck.\n",
|
||||
"\n",
|
||||
"Here's your latest assignment: {issue_description}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"\n",
|
||||
"gh_issue_prompt_template = hub.pull(\"kastanday/new-github-issue\")\n",
|
||||
"print(gh_issue_prompt_template.template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Please implement these changes by creating or editing the necessary files. \n",
|
||||
"\n",
|
||||
"1. First use read_file to read any files in the repo that seem relevant. \n",
|
||||
"2. Then, when you're ready, start implementing changes by creating and updating files. Implement any and all remaining code to make the project work as the commenter intended. \n",
|
||||
"2. The last step is to create a PR with a clear and concise title and description, list any concerns or final changes necessary in the PR body.\n",
|
||||
"3. After opening the PR, comment on the original issue and mention the new PR your just opened, you must comment \"I opened a PR for you to review here #<PR_NUMBER>\" (it'll be something like #30). That hashtag syntax will automatically link to the PR, as necessary. Thanks.\n",
|
||||
"4. If you feel the PR is satisfactory for completing your assignment, create a review request for the original user that opened the issue. Use their username to tag them.\n",
|
||||
"\n",
|
||||
"Feel free to ask for help or leave a comment on the Issue or PR if you're stuck.\n",
|
||||
"\n",
|
||||
"Here's your latest assignment: Title: Create a full command line executable workflow for RNA-Seq on PBMC Samples. Open a new pull request (on a separate branch) and comment the PR number here when you're done..\n",
|
||||
"Opened by user: KastanDay\n",
|
||||
"Body: Experiment Type:\n",
|
||||
"RNA-Seq\n",
|
||||
"Sequencing of total cellular RNA\n",
|
||||
"\n",
|
||||
"Workflow Management:\n",
|
||||
"Bash/SLURM\n",
|
||||
"Scripting and job scheduling\n",
|
||||
"\n",
|
||||
"Software Stack:\n",
|
||||
"FastQC\n",
|
||||
"MultiQC\n",
|
||||
"STAR\n",
|
||||
"RSEM\n",
|
||||
"samtools\n",
|
||||
"DESeq2\n",
|
||||
"\n",
|
||||
"What else to know about the pipeline?\n",
|
||||
"I am working PBMC samples collected from patients that are undergoing immunotherapy.\n",
|
||||
"\n",
|
||||
"Use the data files existing in [Report_WholeBrain](https://github.com/KastanDay/ML4Bio/tree/main/Report_WholeBrain) as input for this workflow.\n",
|
||||
"\n",
|
||||
"You should write a series of bash scripts and R scripts that can accomplish this task. Open a PR with those scripts when you're done.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def format_issue(issue):\n",
|
||||
" title = f\"Title: {issue.get('title')}.\"\n",
|
||||
" opened_by = f\"Opened by user: {issue.get('opened_by')}\"\n",
|
||||
" body = f\"Body: {issue.get('body')}\"\n",
|
||||
" comments = issue.get(\"comments\") # often too long\n",
|
||||
" return \"\\n\".join([title, opened_by, body])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"issue = github.get_issue(33) # task to implement a RNA-seq pipeline (bioinformatics)\n",
|
||||
"final_gh_issue_prompt = gh_issue_prompt_template.format(\n",
|
||||
" issue_description=format_issue(issue)\n",
|
||||
")\n",
|
||||
"print(final_gh_issue_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.summary_buffer import ConversationSummaryBufferMemory\n",
|
||||
"from langchain_core.prompts.chat import MessagesPlaceholder\n",
|
||||
"\n",
|
||||
"summarizer_llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\") # type: ignore\n",
|
||||
"chat_history = MessagesPlaceholder(variable_name=\"chat_history\")\n",
|
||||
"memory = ConversationSummaryBufferMemory(\n",
|
||||
" memory_key=\"chat_history\",\n",
|
||||
" return_messages=True,\n",
|
||||
" llm=summarizer_llm,\n",
|
||||
" max_token_limit=2_000,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
" handle_parsing_errors=True, # or pass a function that accepts the error and returns a string\n",
|
||||
" max_iterations=30,\n",
|
||||
" max_execution_time=None,\n",
|
||||
" early_stopping_method=\"generate\",\n",
|
||||
" memory=memory,\n",
|
||||
" # trim_intermediate_steps=fancier_trim_intermediate_steps,\n",
|
||||
" agent_kwargs={\n",
|
||||
" \"memory_prompts\": [chat_history],\n",
|
||||
" \"input_variables\": [\"input\", \"agent_scratchpad\", \"chat_history\"],\n",
|
||||
" \"prefix\": final_gh_issue_prompt,\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Get files from a directory\",\n",
|
||||
" \"action_input\": \"ML4Bio/tree/main/Report_WholeBrain\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mError: status code 404, None\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe previous action to get files from a directory failed because the path provided does not exist or is not accessible. I need to correct the path to access the files in the `Report_WholeBrain` directory. Let's try to fetch the list of files from the correct directory path. \n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Get files from a directory\",\n",
|
||||
" \"action_input\": \"Report_WholeBrain\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m['Report_WholeBrain/MDSclustering_WholeBrain.html', 'Report_WholeBrain/MDSclustering_WholeBrain_RUVremoved.html', 'Report_WholeBrain/Report_Antonson_WholeBrain_2022Mar.Rmd', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 1-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 2-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 3-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 4-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 6-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 7-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 8-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 9-1.png', 'Report_WholeBrain/SalmonSummarizedOutput.RData', 'Report_WholeBrain/SampleInfo_RUVvariables_WholeBrain_2022-05-12.csv', 'Report_WholeBrain/Targets_Final.txt', 'Report_WholeBrain/WholeBrain_GeneResults_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_GeneResults_RUV_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_Gene_level_counts_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_RUV_FDR0.1.html', 'Report_WholeBrain/WholeBrain_logCPMValues_RUVcorrected_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_logCPMvalues_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_rawP05.html', 'Report_WholeBrain/getGO.R', 'Report_WholeBrain/getPath.R', 'Report_WholeBrain/interactive_plots/css/glimma.min.css', 'Report_WholeBrain/interactive_plots/css/src/images/animated-overlay.gif', 'Report_WholeBrain/interactive_plots/css/src/images/favicon.ico', 'Report_WholeBrain/interactive_plots/css/src/images/sort_asc.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_asc_disabled.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_both.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_desc.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_desc_disabled.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_flat_0_aaaaaa_40x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_flat_75_ffffff_40x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_55_fbf9ee_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_65_ffffff_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_75_dadada_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_75_e6e6e6_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_95_fef1ec_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_highlight-soft_75_cccccc_1x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_222222_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_2e83ff_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_454545_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_888888_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_cd0a0a_256x240.png', 'Report_WholeBrain/interactive_plots/js/glimma.min.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_Microglia.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_Microglia_RUV.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain_RUV.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain_noOUT.js', 'Report_WholeBrain/interactive_plots/js/old_Microglia_rawP05.js', 'Report_WholeBrain/interactive_plots/js/old_WholeBrain_RUV_FDR0.1.js', 'Report_WholeBrain/interactive_plots/js/old_WholeBrain_rawP05.js', 'Report_WholeBrain/interactive_plots/old_MDSclustering_Microglia.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_Microglia_RUV.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain_RUV.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain_noOUT.html', 'Report_WholeBrain/interactive_plots/old_Microglia_rawP05.html', 'Report_WholeBrain/interactive_plots/old_WholeBrain_RUV_FDR0.1.html', 'Report_WholeBrain/interactive_plots/old_WholeBrain_rawP05.html', 'Report_WholeBrain/trx_EGids_combined.txt', 'Report_WholeBrain/⭐️____Report_Antonson_WholeBrain_2022Mar.html']\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe files in the `Report_WholeBrain` directory seem to be mostly reports, images, and some R scripts. However, none of these files appear to be raw RNA-Seq data files or scripts directly related to the RNA-Seq workflow requested by the user. To proceed with creating the workflow, I will need to create new bash and R scripts that will handle the RNA-Seq analysis using the software stack mentioned (FastQC, MultiQC, STAR, RSEM, samtools, DESeq2).\n",
|
||||
"\n",
|
||||
"Before creating the scripts, I should create a new branch to work on this feature.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create a new branch\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mBranch 'rna-seq-workflow_v1' created successfully, and set as current active branch.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that the new branch `rna-seq-workflow_v1` is created and active, I can start creating the necessary bash and R scripts for the RNA-Seq workflow. I will begin by creating a bash script for quality control using FastQC and MultiQC.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/01_quality_control.sh\\n#!/bin/bash\\n#PBS -N quality_control\\n#PBS -l walltime=04:00:00\\n#PBS -l nodes=1:ppn=8\\n#PBS -q batch\\n\\n# Load modules for FastQC and MultiQC\\nmodule load FastQC/0.11.8\\nmodule load MultiQC/1.9\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/raw_data'\\nOUTPUT_DIR='/path/to/output/quality_control'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run FastQC on all files in the input directory\\nfastqc -o $OUTPUT_DIR -t 8 $INPUT_DIR/*.fastq.gz\\n\\n# Run MultiQC to aggregate results\\nmultiqc -o $OUTPUT_DIR $OUTPUT_DIR\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/01_quality_control.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe quality control bash script has been created successfully. Next, I will create a bash script for the alignment step using STAR.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/02_alignment.sh\\n#!/bin/bash\\n#PBS -N alignment\\n#PBS -l walltime=08:00:00\\n#PBS -l nodes=1:ppn=16\\n#PBS -q batch\\n\\n# Load module for STAR\\nmodule load STAR/2.7.3a\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/quality_control'\\nOUTPUT_DIR='/path/to/output/alignment'\\nGENOME_DIR='/path/to/genome_indices'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run STAR for each sample\\nfor sample in $INPUT_DIR/*.fastq.gz\\ndo\\n # Define output file names\\n prefix=$(basename $sample .fastq.gz)\\n output_prefix=$OUTPUT_DIR/$prefix\\n\\n # Run STAR\\n STAR --runThreadN 16 --genomeDir $GENOME_DIR --readFilesIn $sample --readFilesCommand zcat --outFileNamePrefix $output_prefix --outSAMtype BAM SortedByCoordinate\\n\\ndone\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/02_alignment.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe alignment bash script has been created successfully. Next, I will create a bash script for transcript quantification using RSEM.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/03_transcript_quantification.sh\\n#!/bin/bash\\n#PBS -N transcript_quantification\\n#PBS -l walltime=12:00:00\\n#PBS -l nodes=1:ppn=8\\n#PBS -q batch\\n\\n# Load module for RSEM\\nmodule load RSEM/1.3.3\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/alignment'\\nOUTPUT_DIR='/path/to/output/transcript_quantification'\\nREFERENCE_DIR='/path/to/reference'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run RSEM for each sample\\nfor bam in $INPUT_DIR/*.bam\\ndo\\n # Define output file names\\n prefix=$(basename $bam .bam)\\n output_prefix=$OUTPUT_DIR/$prefix\\n\\n # Run RSEM\\n rsem-calculate-expression --bam --no-bam-output --estimate-rspd --append-names --output-genome-bam -p 8 $bam $REFERENCE_DIR $output_prefix\\n\\ndone\\n\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/03_transcript_quantification.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe transcript quantification bash script has been created successfully. Now, I will create a bash script for sorting and indexing BAM files using samtools, which is a common step before differential expression analysis.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/04_sort_index_bam.sh\\n#!/bin/bash\\n#PBS -N sort_index_bam\\n#PBS -l walltime=02:00:00\\n#PBS -l nodes=1:ppn=4\\n#PBS -q batch\\n\\n# Load module for samtools\\nmodule load samtools/1.10\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/alignment'\\nOUTPUT_DIR='/path/to/output/sorted_bams'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Sort and index BAM files\\nfor bam in $INPUT_DIR/*.bam\\ndo\\n # Define output file names\\n sorted_bam=$OUTPUT_DIR/$(basename $bam .bam).sorted.bam\\n\\n # Sort BAM file\\n samtools sort -@ 4 -o $sorted_bam $bam\\n\\n # Index sorted BAM file\\n samtools index $sorted_bam\\ndone\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/04_sort_index_bam.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe sorting and indexing of BAM files script has been created successfully. The next step is to create an R script for differential expression analysis using DESeq2. This script will be executed after the previous steps in the workflow have been completed.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/05_differential_expression.R\\n# Load DESeq2 library\\nlibrary(DESeq2)\\n\\n# Set input directory\\ninput_dir <- '/path/to/transcript_quantification'\\n\\n# Read in the count matrix and the sample information\\n# Assuming countData is a matrix with count data and colData is a DataFrame with sample information\\ncountData <- as.matrix(read.csv(file.path(input_dir, 'count_matrix.csv'), row.names=1))\\ncolData <- read.csv(file.path(input_dir, 'colData.csv'))\\n\\n# Create a DESeqDataSet object\\ndds <- DESeqDataSetFromMatrix(countData = countData,\\n colData = colData,\\n design = ~ condition)\\n\\n# Run the differential expression analysis\\ndds <- DESeq(dds)\\n\\n# Get the results\\nres <- results(dds)\\n\\n# Order the results by the adjusted p-value\\nresOrdered <- res[order(res$padj),]\\n\\n# Write the results to a CSV file\\nwrite.csv(as.data.frame(resOrdered), file.path(input_dir, 'differential_expression_results.csv'))\\n\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/05_differential_expression.R\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe R script for differential expression analysis using DESeq2 has been created successfully. Now that all the necessary scripts for the RNA-Seq workflow have been created, I should create a master bash script that will call each of these scripts in sequence to run the entire workflow.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/run_workflow.sh\\n#!/bin/bash\\n# Master script to run the RNA-Seq workflow\\n\\n# Run quality control\\nbash 01_quality_control.sh\\n\\n# Run alignment\\nbash 02_alignment.sh\\n\\n# Run transcript quantification\\nbash 03_transcript_quantification.sh\\n\\n# Sort and index BAM files\\nbash 04_sort_index_bam.sh\\n\\n# Run differential expression analysis\\nRscript 05_differential_expression.R\\n\\n# Workflow completed\\necho \\\"RNA-Seq workflow completed successfully.\\\"\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/run_workflow.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe master bash script to run the entire RNA-Seq workflow has been created successfully. The next step is to create a pull request with all the scripts that have been created in the `rna-seq-workflow_v1` branch.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create Pull Request\",\n",
|
||||
" \"action_input\": \"Create a full command line executable workflow for RNA-Seq on PBMC Samples\\n\\nThis pull request includes a series of bash and R scripts to perform RNA-Seq analysis on PBMC samples. The workflow includes steps for quality control, alignment, transcript quantification, sorting and indexing BAM files, and differential expression analysis using the following software stack: FastQC, MultiQC, STAR, RSEM, samtools, and DESeq2.\\n\\nPlease review the scripts and provide feedback or request any additional changes as necessary.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mSuccessfully created PR number 40\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe pull request has been successfully created. Now, I need to comment on the original issue to inform the user about the new PR.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Comment on Issue\",\n",
|
||||
" \"action_input\": \"2\\n\\nI opened a PR for you to review here #40\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCommented on issue 2\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe comment has been posted on the original issue. Now, I will create a review request for the user KastanDay who opened the issue.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create review request\",\n",
|
||||
" \"action_input\": \"KastanDay\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mReview request created for user KastanDay on PR #40\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"The tasks have been completed: a new branch was created, the necessary scripts for the RNA-Seq workflow were written, a pull request was opened, the original issue was commented on with the PR number, and a review request was sent to the user KastanDay.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.tracers.context import tracing_v2_enabled\n",
|
||||
"\n",
|
||||
"# To use langsmith (recommended for these long tasks):\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"ls__......\"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = \"Github_Demo_PR\"\n",
|
||||
"os.environ[\"LANGCHAIN_WANDB_TRACING\"] = \"false\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with tracing_v2_enabled(project_name=\"Github_Demo_PR\", tags=[\"PR_bot\"]) as cb:\n",
|
||||
" agent.run(final_gh_issue_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Full text of tools\n",
|
||||
"\n",
|
||||
"When using or building tools, it's always helpful to inspect what the model sees.\n",
|
||||
"\n",
|
||||
"On OpenAI models, tool descriptions are part of the `SystemPrompt`.\n",
|
||||
"\n",
|
||||
"The `args` are added to the prompt in structured chats, e.g. `AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION`, but not in `AgentType.ZERO_SHOT_REACT_DESCRIPTION`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Get Issues: \n",
|
||||
"This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Get Issue: \n",
|
||||
"This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer., args: {'issue_number': {'title': 'Issue Number', 'description': 'Issue number as an integer, e.g. `42`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Comment on Issue: \n",
|
||||
"This tool is useful when you need to comment on a GitHub issue. Simply pass in the issue number and the comment you would like to make. Please use this sparingly as we don't want to clutter the comment threads. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify the issue number as an integer\n",
|
||||
"- Then you must place two newlines\n",
|
||||
"- Then you must specify your comment, args: {'input': {'title': 'Input', 'description': 'Follow the required formatting.', 'type': 'string'}}\n",
|
||||
"List open pull requests (PRs): \n",
|
||||
"This tool will fetch a list of the repository's Pull Requests (PRs). It will return the title, and PR number of 5 PRs. It takes no input., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Get Pull Request: \n",
|
||||
"This tool will fetch the title, body, comment thread and commit history of a specific Pull Request (by PR number). **VERY IMPORTANT**: You must specify the PR number as an integer., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Overview of files included in PR: \n",
|
||||
"This tool will fetch the full text of all files in a pull request (PR) given the PR number as an input. This is useful for understanding the code changes in a PR or contributing to it. **VERY IMPORTANT**: You must specify the PR number as an integer input parameter., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Create Pull Request: \n",
|
||||
"This tool is useful when you need to create a new pull request in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify the title of the pull request\n",
|
||||
"- Then you must place two newlines\n",
|
||||
"- Then you must write the body or description of the pull request\n",
|
||||
"\n",
|
||||
"When appropriate, always reference relevant issues in the body by using the syntax `closes #<issue_number` like `closes #3, closes #6`.\n",
|
||||
"For example, if you would like to create a pull request called \"README updates\" with contents \"added contributors' names, closes #3\", you would pass in the following string:\n",
|
||||
"\n",
|
||||
"README updates\n",
|
||||
"\n",
|
||||
"added contributors' names, closes #3, args: {'formatted_pr': {'title': 'Formatted Pr', 'description': 'Follow the required formatting.', 'type': 'string'}}\n",
|
||||
"List Pull Requests' Files: \n",
|
||||
"This tool will fetch the full text of all files in a pull request (PR) given the PR number as an input. This is useful for understanding the code changes in a PR or contributing to it. **VERY IMPORTANT**: You must specify the PR number as an integer input parameter., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Create File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to create a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify which file to create by passing a full file path (**IMPORTANT**: the path must not start with a slash)\n",
|
||||
"- Then you must specify the contents of the file\n",
|
||||
"\n",
|
||||
"For example, if you would like to create a file called /test/test.txt with contents \"test contents\", you would pass in the following string:\n",
|
||||
"\n",
|
||||
"test/test.txt\n",
|
||||
"\n",
|
||||
"test contents, args: {'formatted_file': {'title': 'Formatted File', 'description': 'Follow the required formatting.', 'type': 'string'}}\n",
|
||||
"Read File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to read the contents of a file. Simply pass in the full file path of the file you would like to read. **IMPORTANT**: the path must not start with a slash, args: {'formatted_filepath': {'title': 'Formatted Filepath', 'description': 'The full file path of the file you would like to read where the path must NOT start with a slash, e.g. `some_dir/my_file.py`.', 'type': 'string'}}\n",
|
||||
"Update File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to update the contents of a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify which file to modify by passing a full file path (**IMPORTANT**: the path must not start with a slash)\n",
|
||||
"- Then you must specify the old contents which you would like to replace wrapped in OLD <<<< and >>>> OLD\n",
|
||||
"- Then you must specify the new contents which you would like to replace the old contents with wrapped in NEW <<<< and >>>> NEW\n",
|
||||
"\n",
|
||||
"For example, if you would like to replace the contents of the file /test/test.txt from \"old contents\" to \"new contents\", you would pass in the following string:\n",
|
||||
"\n",
|
||||
"test/test.txt\n",
|
||||
"\n",
|
||||
"This is text that will not be changed\n",
|
||||
"OLD <<<<\n",
|
||||
"old contents\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"new contents\n",
|
||||
">>>> NEW, args: {'formatted_file_update': {'title': 'Formatted File Update', 'description': 'Strictly follow the provided rules.', 'type': 'string'}}\n",
|
||||
"Delete File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash, args: {'formatted_filepath': {'title': 'Formatted Filepath', 'description': 'The full file path of the file you would like to delete where the path must NOT start with a slash, e.g. `some_dir/my_file.py`. Only input a string, not the param name.', 'type': 'string'}}\n",
|
||||
"Overview of existing files in Main branch: \n",
|
||||
"This tool will provide an overview of all existing files in the main branch of the repository. It will list the file names, their respective paths, and a brief summary of their contents. This can be useful for understanding the structure and content of the repository, especially when navigating through large codebases. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Overview of files in current working branch: \n",
|
||||
"This tool will provide an overview of all files in your current working branch where you should implement changes. This is great for getting a high level overview of the structure of your code. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"List branches in this repository: \n",
|
||||
"This tool will fetch a list of all branches in the repository. It will return the name of each branch. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Set active branch: \n",
|
||||
"This tool will set the active branch in the repository, similar to `git checkout <branch_name>` and `git switch -c <branch_name>`. **VERY IMPORTANT**: You must specify the name of the branch as a string input parameter., args: {'branch_name': {'title': 'Branch Name', 'description': 'The name of the branch, e.g. `my_branch`.', 'type': 'string'}}\n",
|
||||
"Create a new branch: \n",
|
||||
"This tool will create a new branch in the repository. **VERY IMPORTANT**: You must specify the name of the new branch as a string input parameter., args: {'branch_name': {'title': 'Branch Name', 'description': 'The name of the branch, e.g. `my_branch`.', 'type': 'string'}}\n",
|
||||
"Get files from a directory: \n",
|
||||
"This tool will fetch a list of all files in a specified directory. **VERY IMPORTANT**: You must specify the path of the directory as a string input parameter., args: {'input': {'title': 'Input', 'description': 'The path of the directory, e.g. `some_dir/inner_dir`. Only input a string, do not include the parameter name.', 'default': '', 'type': 'string'}}\n",
|
||||
"Search issues and pull requests: \n",
|
||||
"This tool will search for issues and pull requests in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter., args: {'search_query': {'title': 'Search Query', 'description': 'Natural language search query, e.g. `My issue title or topic`.', 'type': 'string'}}\n",
|
||||
"Search code: \n",
|
||||
"This tool will search for code in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter., args: {'search_query': {'title': 'Search Query', 'description': 'A keyword-focused natural language search query for code, e.g. `MyFunctionName()`.', 'type': 'string'}}\n",
|
||||
"Create review request: \n",
|
||||
"This tool will create a review request on the open pull request that matches the current active branch. **VERY IMPORTANT**: You must specify the username of the person who is being requested as a string input parameter., args: {'username': {'title': 'Username', 'description': 'GitHub username of the user being requested, e.g. `my_username`.', 'type': 'string'}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.tools.render import render_text_description_and_args\n",
|
||||
"\n",
|
||||
"print(render_text_description_and_args(tools))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: Agent with Search\n",
|
||||
"\n",
|
||||
"If your agent does not need to use all 8 tools, you can build tools individually to use. For this example, we'll make an agent that does not use the create_file, delete_file or create_pull_request tools, but can also use duckduckgo-search."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet duckduckgo-search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools import DuckDuckGoSearchRun\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"tools = []\n",
|
||||
"unwanted_tools = [\"Get Issue\", \"Delete File\", \"Create File\", \"Create Pull Request\"]\n",
|
||||
"\n",
|
||||
"for tool in toolkit.get_tools():\n",
|
||||
" if tool.name not in unwanted_tools:\n",
|
||||
" tools.append(tool)\n",
|
||||
"tools += [\n",
|
||||
" Tool(\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=DuckDuckGoSearchRun().run,\n",
|
||||
" description=\"useful for when you need to search the web\",\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=ChatOpenAI(temperature=0.1),\n",
|
||||
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally let's build a prompt and test it out!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mTo complete this issue, I need to find the most popular frontend framework and add a blurb about how this website will utilize it to the readme.md file. I should start by researching the most popular frontend frameworks and then update the readme file accordingly. I will use the \"Search\" tool to research the most popular frontend framework.\n",
|
||||
"\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"most popular frontend framework\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAlex Ivanovs February 25, 2023 Table of Contents What are the current Front-end trends? Top Front-end Frameworks for 2023 #1 - React #2 - Angular #3 - Vue #4 - Svelte #5 - Preact #6 - Ember #7 - Solid #8 - Lit #9 - Alpine #10 - Stencil #11 - Qwik Front-end Frameworks: A Summary Top 6 Frontend Frameworks To Use in 2022 by Nwose Lotanna Victor August 26, 2022 Web 0 Comments This post reveals the top six frontend libraries to use in 2022. The list is fresh and very different from the previous years. State of JS Though React is the most popular framework for frontend development, it also has some shortcomings. Due to its limitations, the idea was to design a small-size framework that will offer the same features as React. This is how a tiny version of React — Preact — appeared. Top 10 Popular Frontend Frameworks to Use in 2023 Sep 26, 2022 10 min Сontents 1. What is a framework? 2. Front-end frameworks vs backend frameworks 3. The best front-end frameworks in 2023 React Vue.js Angular Svelte JQuery Ember Backbone Semantic UI 4. Final words Technostacks Jan 11 2023 Top Frontend Frameworks of 2023 for Web Development Developing what the users see on their screens is the role of a front-end web developer. Unarguably, front-end developers worldwide are trying to use the best front-end frameworks to provide the best user experience.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mBased on my research, the most popular frontend framework right now is React. I will now update the readme.md file to include a blurb about how this website will take advantage of React.\n",
|
||||
"\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input:\n",
|
||||
"README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is the readme file for the website.\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is the readme file for the website.\n",
|
||||
"\n",
|
||||
"This website takes advantage of the React framework, which allows for efficient and reusable UI components. With React, we can easily manage the state of our application and create interactive user interfaces. It provides a smooth and seamless user experience, making this website highly responsive and dynamic.\n",
|
||||
">>>> NEW\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mFile content was not updated because old content was not found.It may be helpful to use the read_file action to get the current file contents.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to first read the contents of the README.md file to get the current content. Then I can update the file with the new content.\n",
|
||||
"\n",
|
||||
"Action: Read File\n",
|
||||
"Action Input: README.md\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mThis is my awesome website that uses the most popular frontend framework.\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe current content of the README.md file is \"This is my awesome website that uses the most popular frontend framework.\" I can now update the file with the new content.\n",
|
||||
"\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input:\n",
|
||||
"README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is my awesome website that uses the most popular frontend framework.\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is my awesome website that uses the most popular frontend framework.\n",
|
||||
"\n",
|
||||
"This website takes advantage of the React framework, which allows for efficient and reusable UI components. With React, we can easily manage the state of our application and create interactive user interfaces. It provides a smooth and seamless user experience, making this website highly responsive and dynamic.\n",
|
||||
">>>> NEW\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mUpdated file README.md\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI have successfully updated the README.md file with the blurb about how this website will take advantage of the React framework.\n",
|
||||
"\n",
|
||||
"Final Answer: The most popular frontend framework right now is React. This website takes advantage of React to create efficient and reusable UI components, manage application state, and provide a smooth and seamless user experience.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The most popular frontend framework right now is React. This website takes advantage of React to create efficient and reusable UI components, manage application state, and provide a smooth and seamless user experience.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The GitHubAPIWrapper can be used outside of an agent, too\n",
|
||||
"# This gets the info about issue number 9, since we want to\n",
|
||||
"# force the agent to address this specific issue.\n",
|
||||
"\n",
|
||||
"issue = github.get_issue(9)\n",
|
||||
"\n",
|
||||
"prompt = f\"\"\"\n",
|
||||
"You are a senior frontend developer who is experienced in HTML, CSS, and JS- especially React.\n",
|
||||
"You have been assigned the below issue. Complete it to the best of your ability.\n",
|
||||
"Remember to first make a plan and pay attention to details like file names and commonsense.\n",
|
||||
"Then execute the plan and use tools appropriately.\n",
|
||||
"Finally, make a pull request to merge your changes.\n",
|
||||
"Issue: {issue[\"title\"]}\n",
|
||||
"Issue Description: {issue['body']}\n",
|
||||
"Comments: {issue['comments']}\"\"\"\n",
|
||||
"\n",
|
||||
"agent.run(prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GithubToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,326 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: GMail\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GmailToolkit\n",
|
||||
"\n",
|
||||
"This will help you getting started with the GMail [toolkit](/docs/concepts/#toolkits). This toolkit interacts with the GMail API to read messages, draft and send messages, and more. For detailed documentation of all GmailToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.toolkit.GmailToolkit.html).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-google-community` package. We'll need the `gmail` extra:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-google-community\\[gmail\\]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"By default the toolkit reads the local `credentials.json` file. You can also manually provide a `Credentials` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community import GmailToolkit\n",
|
||||
"\n",
|
||||
"toolkit = GmailToolkit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customizing Authentication\n",
|
||||
"\n",
|
||||
"Behind the scenes, a `googleapi` resource is created using the following methods. \n",
|
||||
"you can manually build a `googleapi` resource for more auth control. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community.gmail.utils import (\n",
|
||||
" build_resource_service,\n",
|
||||
" get_gmail_credentials,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Can review scopes here https://developers.google.com/gmail/api/auth/scopes\n",
|
||||
"# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'\n",
|
||||
"credentials = get_gmail_credentials(\n",
|
||||
" token_file=\"token.json\",\n",
|
||||
" scopes=[\"https://mail.google.com/\"],\n",
|
||||
" client_secrets_file=\"credentials.json\",\n",
|
||||
")\n",
|
||||
"api_resource = build_resource_service(credentials=credentials)\n",
|
||||
"toolkit = GmailToolkit(api_resource=api_resource)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[GmailCreateDraft(name='create_gmail_draft', description='Use this tool to create a draft email with the provided message fields.', args_schema=<class 'langchain_community.tools.gmail.create_draft.CreateDraftSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSendMessage(name='send_gmail_message', description='Use this tool to send email messages. The input is the message, recipents', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSearch(name='search_gmail', description=('Use this tool to search for email messages or threads. The input must be a valid Gmail query. The output is a JSON list of the requested resource.',), args_schema=<class 'langchain_community.tools.gmail.search.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetMessage(name='get_gmail_message', description='Use this tool to fetch an email by message ID. Returns the thread ID, snipet, body, subject, and sender.', args_schema=<class 'langchain_community.tools.gmail.get_message.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetThread(name='get_gmail_thread', description=('Use this tool to search for email messages. The input must be a valid Gmail query. The output is a JSON list of messages.',), args_schema=<class 'langchain_community.tools.gmail.get_thread.GetThreadSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- [GmailCreateDraft](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.create_draft.GmailCreateDraft.html)\n",
|
||||
"- [GmailSendMessage](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.send_message.GmailSendMessage.html)\n",
|
||||
"- [GmailSearch](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.search.GmailSearch.html)\n",
|
||||
"- [GmailGetMessage](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.get_message.GmailGetMessage.html)\n",
|
||||
"- [GmailGetThread](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.get_thread.GmailGetThread.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"We show here how to use it as part of an [agent](/docs/tutorials/agents). We use the OpenAI Functions Agent, so we will need to setup and install the required dependencies for that. We will also use [LangSmith Hub](https://smith.langchain.com/hub) to pull the prompt from, so we will need to install that.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-openai langchainhub\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructions = \"\"\"You are an assistant.\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_openai_functions_agent(llm, toolkit.get_tools(), prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" # This is set to False to prevent information about my email showing up on the screen\n",
|
||||
" # Normally, it is helpful to have it set to True however.\n",
|
||||
" verbose=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Create a gmail draft for me to edit of a letter from the perspective of a sentient parrot who is looking to collaborate on some research with her estranged friend, a cat. Under no circumstances may you send the message, however.',\n",
|
||||
" 'output': 'I have created a draft email for you to edit. Please find the draft in your Gmail drafts folder. Remember, under no circumstances should you send the message.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Create a gmail draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||
" \" who is looking to collaborate on some research with her\"\n",
|
||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Could you search in my drafts for the latest email? what is the title?',\n",
|
||||
" 'output': 'The latest email in your drafts is titled \"Collaborative Research Proposal\".'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\"input\": \"Could you search in my drafts for the latest email? what is the title?\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
# Toolkits
|
||||
|
||||
**Toolkits** are collections of tools that are designed to be used together for specific tasks. They include conveniences for loading tools
|
||||
that share common authentication, services, or other objects. They can be implemented by subclassing the
|
||||
[BaseToolkit](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseToolkit.html#langchain_core.tools.BaseToolkit) class.
|
||||
|
||||
This table lists common toolkits.
|
||||
|
||||
|
||||
| Toolkit | Package |
|
||||
|------|---------------|
|
||||
| [GitHubToolkit](/docs/integrations/toolkits/github) | [langchain_community.agent_toolkits.github](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html) |
|
||||
| [GmailToolkit](/docs/integrations/toolkits/gmail) | [langchain_google_community.gmail.toolkit](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.toolkit.GmailToolkit.html) |
|
||||
| [RequestsToolkit](/docs/integrations/toolkits/requests) | [langchain_community.agent_toolkits.openapi](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.openapi.toolkit.RequestsToolkit.html) |
|
||||
| [SlackToolkit](/docs/integrations/toolkits/slack) | [langchain_community.agent_toolkits.slack](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html) |
|
||||
| [SQLDatabaseToolkit](/docs/integrations/toolkits/sql_database) | [langchain_community.agent_toolkits.sql](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html) |
|
||||
@@ -1,302 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c81da886",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pandas Dataframe\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a `Pandas DataFrame`. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the `Python` agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "0cdd9bf5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "051ebe84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\n",
|
||||
" \"https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a62858e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4185ff46",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7233ab56",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a8ea710e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_pandas_dataframe_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
" df,\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a9207a2e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df.shape[0]`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m891\u001b[0m\u001b[32;1m\u001b[1;3mThere are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.invoke(\"how many rows are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "bd43617c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of people with more than 3 siblings\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df[df['SibSp'] > 3].shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 30 people have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'30 people have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.invoke(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "94e64b58",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to calculate the average age first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df['Age'].mean()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to calculate the square root of the average age\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mNameError(\"name 'math' is not defined\")\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import the math library\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to calculate the square root of the average age\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The square root of the average age is 5.449689683556195.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The square root of the average age is 5.449689683556195.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.invoke(\"whats the square root of the average age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c4bc0584",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Multi DataFrame Example\n",
|
||||
"\n",
|
||||
"This next part shows how the agent can interact with multiple dataframes passed in as a list."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "42a15bd9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df1 = df.copy()\n",
|
||||
"df1[\"Age\"] = df1[\"Age\"].fillna(df1[\"Age\"].mean())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "eba13b4d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to compare the age columns in both dataframes\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: len(df1[df1['Age'] != df2['Age']])\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m177\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 177 rows in the age column are different.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'177 rows in the age column are different.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), [df, df1], verbose=True)\n",
|
||||
"agent.invoke(\"how many rows in the age column are different?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "60d08a56",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1,382 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "be75cb7e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"keywords: [PythonREPLTool]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82a4c2cc-20ea-4b20-a565-63e905dee8ff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Python\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to write and execute `Python` code to answer a question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f98e9c90-5c37-4fb9-af3e-d09693af8543",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor\n",
|
||||
"from langchain_experimental.tools import PythonREPLTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba9adf51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the tool(s)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "003bce04",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = [PythonREPLTool()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4aceaeaf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions Agent\n",
|
||||
"\n",
|
||||
"This is probably the most reliable type of agent, but is only compatible with function calling"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "3a054d1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "3454514b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructions = \"\"\"You are an agent designed to write and execute python code to answer questions.\n",
|
||||
"You have access to a python REPL, which you can use to execute python code.\n",
|
||||
"If you get an error, debug your code and try again.\n",
|
||||
"Only use the output of your code to answer the question. \n",
|
||||
"You might know the answer without running any code, but you should still run the code to get the answer.\n",
|
||||
"If it does not seem like you can write code to answer the question, just return \"I don't know\" as the answer.\n",
|
||||
"\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "2a573e95",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_openai_functions_agent(ChatOpenAI(temperature=0), tools, prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "cae41550",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca30d64c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ReAct Agent\n",
|
||||
"\n",
|
||||
"This is a less reliable type, but is compatible with most models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "bcaa0b18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_react_agent\n",
|
||||
"from langchain_anthropic import ChatAnthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "d2470880",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructions = \"\"\"You are an agent designed to write and execute python code to answer questions.\n",
|
||||
"You have access to a python REPL, which you can use to execute python code.\n",
|
||||
"If you get an error, debug your code and try again.\n",
|
||||
"Only use the output of your code to answer the question. \n",
|
||||
"You might know the answer without running any code, but you should still run the code to get the answer.\n",
|
||||
"If it does not seem like you can write code to answer the question, just return \"I don't know\" as the answer.\n",
|
||||
"\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/react-agent-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "cc422f53-c51c-4694-a834-72ecd1e68363",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_react_agent(ChatAnthropic(temperature=0), tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c16161de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fibonacci Example\n",
|
||||
"This example was created by [John Wiseman](https://twitter.com/lemonodor/status/1628270074074398720?s=20)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "25cd4f92-ea9b-4fe6-9838-a4f85f81eebe",
|
||||
"metadata": {
|
||||
"scrolled": false,
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m Sure, I can write some Python code to get the 10th Fibonacci number.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Python_REPL \n",
|
||||
"Action Input: \n",
|
||||
"def fib(n):\n",
|
||||
" a, b = 0, 1\n",
|
||||
" for i in range(n):\n",
|
||||
" a, b = b, a + b\n",
|
||||
" return a\n",
|
||||
"\n",
|
||||
"print(fib(10))\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m55\n",
|
||||
"\u001b[0m\u001b[32;1m\u001b[1;3m Let me break this down step-by-step:\n",
|
||||
"\n",
|
||||
"1. I defined a fibonacci function called `fib` that takes in a number `n`. \n",
|
||||
"2. Inside the function, I initialized two variables `a` and `b` to 0 and 1, which are the first two Fibonacci numbers.\n",
|
||||
"3. Then I used a for loop to iterate up to `n`, updating `a` and `b` each iteration to the next Fibonacci numbers.\n",
|
||||
"4. Finally, I return `a`, which after `n` iterations, contains the `n`th Fibonacci number.\n",
|
||||
"\n",
|
||||
"5. I called `fib(10)` to get the 10th Fibonacci number and printed the result.\n",
|
||||
"\n",
|
||||
"The key parts are defining the fibonacci calculation in the function, and then calling it with the desired input index to print the output.\n",
|
||||
"\n",
|
||||
"The observation shows the 10th Fibonacci number is 55, so that is the final answer.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"Thought: Do I need to use a tool? No\n",
|
||||
"Final Answer: 55\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is the 10th fibonacci number?', 'output': '55\\n```'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke({\"input\": \"What is the 10th fibonacci number?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7caa30de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training neural net\n",
|
||||
"This example was created by [Samee Ur Rehman](https://twitter.com/sameeurehman/status/1630130518133207046?s=20)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4b9f60e7-eb6a-4f14-8604-498d863d4482",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mCould not parse tool input: {'name': 'python', 'arguments': 'import torch\\nimport torch.nn as nn\\nimport torch.optim as optim\\n\\n# Define the neural network\\nclass SingleNeuron(nn.Module):\\n def __init__(self):\\n super(SingleNeuron, self).__init__()\\n self.linear = nn.Linear(1, 1)\\n \\n def forward(self, x):\\n return self.linear(x)\\n\\n# Create the synthetic data\\nx_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)\\ny_train = torch.tensor([[2.0], [4.0], [6.0], [8.0]], dtype=torch.float32)\\n\\n# Create the neural network\\nmodel = SingleNeuron()\\n\\n# Define the loss function and optimizer\\ncriterion = nn.MSELoss()\\noptimizer = optim.SGD(model.parameters(), lr=0.01)\\n\\n# Train the neural network\\nfor epoch in range(1, 1001):\\n # Forward pass\\n y_pred = model(x_train)\\n \\n # Compute loss\\n loss = criterion(y_pred, y_train)\\n \\n # Backward pass and optimization\\n optimizer.zero_grad()\\n loss.backward()\\n optimizer.step()\\n \\n # Print the loss every 100 epochs\\n if epoch % 100 == 0:\\n print(f\"Epoch {epoch}: Loss = {loss.item()}\")\\n\\n# Make a prediction for x = 5\\nx_test = torch.tensor([[5.0]], dtype=torch.float32)\\ny_pred = model(x_test)\\ny_pred.item()'} because the `arguments` is not valid JSON.\u001b[0mInvalid or incomplete response\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"\n",
|
||||
"# Define the neural network\n",
|
||||
"class SingleNeuron(nn.Module):\n",
|
||||
" def __init__(self):\n",
|
||||
" super(SingleNeuron, self).__init__()\n",
|
||||
" self.linear = nn.Linear(1, 1)\n",
|
||||
" \n",
|
||||
" def forward(self, x):\n",
|
||||
" return self.linear(x)\n",
|
||||
"\n",
|
||||
"# Create the synthetic data\n",
|
||||
"x_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)\n",
|
||||
"y_train = torch.tensor([[2.0], [4.0], [6.0], [8.0]], dtype=torch.float32)\n",
|
||||
"\n",
|
||||
"# Create the neural network\n",
|
||||
"model = SingleNeuron()\n",
|
||||
"\n",
|
||||
"# Define the loss function and optimizer\n",
|
||||
"criterion = nn.MSELoss()\n",
|
||||
"optimizer = optim.SGD(model.parameters(), lr=0.01)\n",
|
||||
"\n",
|
||||
"# Train the neural network\n",
|
||||
"for epoch in range(1, 1001):\n",
|
||||
" # Forward pass\n",
|
||||
" y_pred = model(x_train)\n",
|
||||
" \n",
|
||||
" # Compute loss\n",
|
||||
" loss = criterion(y_pred, y_train)\n",
|
||||
" \n",
|
||||
" # Backward pass and optimization\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" \n",
|
||||
" # Print the loss every 100 epochs\n",
|
||||
" if epoch % 100 == 0:\n",
|
||||
" print(f\"Epoch {epoch}: Loss = {loss.item()}\")\n",
|
||||
"\n",
|
||||
"# Make a prediction for x = 5\n",
|
||||
"x_test = torch.tensor([[5.0]], dtype=torch.float32)\n",
|
||||
"y_pred = model(x_test)\n",
|
||||
"y_pred.item()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mEpoch 100: Loss = 0.03825576975941658\n",
|
||||
"Epoch 200: Loss = 0.02100197970867157\n",
|
||||
"Epoch 300: Loss = 0.01152981910854578\n",
|
||||
"Epoch 400: Loss = 0.006329738534986973\n",
|
||||
"Epoch 500: Loss = 0.0034749575424939394\n",
|
||||
"Epoch 600: Loss = 0.0019077073084190488\n",
|
||||
"Epoch 700: Loss = 0.001047312980517745\n",
|
||||
"Epoch 800: Loss = 0.0005749554838985205\n",
|
||||
"Epoch 900: Loss = 0.0003156439634039998\n",
|
||||
"Epoch 1000: Loss = 0.00017328384274151176\n",
|
||||
"\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `x_test.item()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m\u001b[0m\u001b[32;1m\u001b[1;3mThe prediction for x = 5 is 10.000173568725586.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The prediction for x = 5 is 10.000173568725586.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"\"\"Understand, write a single neuron neural network in PyTorch.\n",
|
||||
"Take synthetic data for y=2x. Train for 1000 epochs and print every 100 epochs.\n",
|
||||
"Return prediction for x = 5\"\"\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb654671",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,361 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "050c5580-2c85-4763-8783-59dbd20395a5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Requests\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cfe4185a-34dc-4cdc-b831-001954f2d6e8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Requests Toolkit\n",
|
||||
"\n",
|
||||
"We can use the Requests [toolkit](/docs/concepts/#toolkits) to construct agents that generate HTTP requests.\n",
|
||||
"\n",
|
||||
"For detailed documentation of all API toolkit features and configurations head to the API reference for [RequestsToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.openapi.toolkit.RequestsToolkit.html).\n",
|
||||
"\n",
|
||||
"## ⚠️ Security note ⚠️\n",
|
||||
"There are inherent risks in giving models discretion to execute real-world actions. Take precautions to mitigate these risks:\n",
|
||||
"\n",
|
||||
"- Make sure that permissions associated with the tools are narrowly-scoped (e.g., for database operations or API requests);\n",
|
||||
"- When desired, make use of human-in-the-loop workflows."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d968e982-f370-4614-8469-c1bc71ee3e32",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-community` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f74f05fb-3f24-4c0b-a17f-cf4edeedbb9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36a178eb-1f2c-411e-bf25-0240ead4c62a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that if you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e68d0cd-6233-481c-b048-e8d95cba4c35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a7e2f64a-a72e-4fef-be52-eaf7c5072d24",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"First we will demonstrate a minimal example.\n",
|
||||
"\n",
|
||||
"**NOTE**: There are inherent risks in giving models discretion to execute real-world actions. We must \"opt-in\" to these risks by setting `allow_dangerous_request=True` to use these tools.\n",
|
||||
"**This can be dangerous for calling unwanted requests**. Please make sure your custom OpenAPI spec (yaml) is safe and that permissions associated with the tools are narrowly-scoped."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "018bd070-9fc8-459b-8d28-b4a3e283e640",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ALLOW_DANGEROUS_REQUEST = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a024f7b3-5437-4878-bd16-c4783bff394c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can use the [JSONPlaceholder](https://jsonplaceholder.typicode.com) API as a testing ground.\n",
|
||||
"\n",
|
||||
"Let's create (a subset of) its API spec:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2dcbcf92-2ad5-49c3-94ac-91047ccc8c5b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Any, Dict, Union\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"import yaml\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _get_schema(response_json: Union[dict, list]) -> dict:\n",
|
||||
" if isinstance(response_json, list):\n",
|
||||
" response_json = response_json[0] if response_json else {}\n",
|
||||
" return {key: type(value).__name__ for key, value in response_json.items()}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _get_api_spec() -> str:\n",
|
||||
" base_url = \"https://jsonplaceholder.typicode.com\"\n",
|
||||
" endpoints = [\n",
|
||||
" \"/posts\",\n",
|
||||
" \"/comments\",\n",
|
||||
" ]\n",
|
||||
" common_query_parameters = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"_limit\",\n",
|
||||
" \"in\": \"query\",\n",
|
||||
" \"required\": False,\n",
|
||||
" \"schema\": {\"type\": \"integer\", \"example\": 2},\n",
|
||||
" \"description\": \"Limit the number of results\",\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" openapi_spec: Dict[str, Any] = {\n",
|
||||
" \"openapi\": \"3.0.0\",\n",
|
||||
" \"info\": {\"title\": \"JSONPlaceholder API\", \"version\": \"1.0.0\"},\n",
|
||||
" \"servers\": [{\"url\": base_url}],\n",
|
||||
" \"paths\": {},\n",
|
||||
" }\n",
|
||||
" # Iterate over the endpoints to construct the paths\n",
|
||||
" for endpoint in endpoints:\n",
|
||||
" response = requests.get(base_url + endpoint)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" schema = _get_schema(response.json())\n",
|
||||
" openapi_spec[\"paths\"][endpoint] = {\n",
|
||||
" \"get\": {\n",
|
||||
" \"summary\": f\"Get {endpoint[1:]}\",\n",
|
||||
" \"parameters\": common_query_parameters,\n",
|
||||
" \"responses\": {\n",
|
||||
" \"200\": {\n",
|
||||
" \"description\": \"Successful response\",\n",
|
||||
" \"content\": {\n",
|
||||
" \"application/json\": {\n",
|
||||
" \"schema\": {\"type\": \"object\", \"properties\": schema}\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" return yaml.dump(openapi_spec, sort_keys=False)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"api_spec = _get_api_spec()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db3d6148-ae65-4a1d-91a6-59ee3e4e6efa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next we can instantiate the toolkit. We require no authorization or other headers for this API:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "63a630b3-45bb-4525-865b-083f322b944b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.openapi.toolkit import RequestsToolkit\n",
|
||||
"from langchain_community.utilities.requests import TextRequestsWrapper\n",
|
||||
"\n",
|
||||
"toolkit = RequestsToolkit(\n",
|
||||
" requests_wrapper=TextRequestsWrapper(headers={}),\n",
|
||||
" allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f4224a64-843a-479d-8a7b-84719e4b9d0c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "70ea0f4e-9f10-4906-894b-08df832fd515",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[RequestsGetTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),\n",
|
||||
" RequestsPostTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),\n",
|
||||
" RequestsPatchTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),\n",
|
||||
" RequestsPutTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),\n",
|
||||
" RequestsDeleteTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a21a6ca4-d650-4b7d-a944-1a8771b5293a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- [RequestsGetTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.requests.tool.RequestsGetTool.html)\n",
|
||||
"- [RequestsPostTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.requests.tool.RequestsPostTool.html)\n",
|
||||
"- [RequestsPatchTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.requests.tool.RequestsPatchTool.html)\n",
|
||||
"- [RequestsPutTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.requests.tool.RequestsPutTool.html)\n",
|
||||
"- [RequestsDeleteTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.requests.tool.RequestsDeleteTool.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e2dbb304-abf2-472a-9130-f03150a40549",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "db062da7-f22c-4f36-9df8-1da96c9f7538",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
|
||||
"\n",
|
||||
"system_message = \"\"\"\n",
|
||||
"You have access to an API to help answer user queries.\n",
|
||||
"Here is documentation on the API:\n",
|
||||
"{api_spec}\n",
|
||||
"\"\"\".format(api_spec=api_spec)\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, state_modifier=system_message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c1e47be9-374a-457c-928a-48f02b5530e3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Fetch the top two posts. What are their titles?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" requests_get (call_RV2SOyzCnV5h2sm4WPgG8fND)\n",
|
||||
" Call ID: call_RV2SOyzCnV5h2sm4WPgG8fND\n",
|
||||
" Args:\n",
|
||||
" url: https://jsonplaceholder.typicode.com/posts?_limit=2\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: requests_get\n",
|
||||
"\n",
|
||||
"[\n",
|
||||
" {\n",
|
||||
" \"userId\": 1,\n",
|
||||
" \"id\": 1,\n",
|
||||
" \"title\": \"sunt aut facere repellat provident occaecati excepturi optio reprehenderit\",\n",
|
||||
" \"body\": \"quia et suscipit\\nsuscipit recusandae consequuntur expedita et cum\\nreprehenderit molestiae ut ut quas totam\\nnostrum rerum est autem sunt rem eveniet architecto\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"userId\": 1,\n",
|
||||
" \"id\": 2,\n",
|
||||
" \"title\": \"qui est esse\",\n",
|
||||
" \"body\": \"est rerum tempore vitae\\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\\nqui aperiam non debitis possimus qui neque nisi nulla\"\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The titles of the top two posts are:\n",
|
||||
"1. \"sunt aut facere repellat provident occaecati excepturi optio reprehenderit\"\n",
|
||||
"2. \"qui est esse\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Fetch the top two posts. What are their titles?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01ec4886-de3d-4fda-bd05-e3f254810969",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all API toolkit features and configurations head to the API reference for [RequestsToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.openapi.toolkit.RequestsToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,419 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark Dataframe\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a `Spark DataFrame` and `Spark Connect`. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input your openai api key here...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `Spark DataFrame` example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"23/05/15 20:33:10 WARN Utils: Your hostname, Mikes-Mac-mini.local resolves to a loopback address: 127.0.0.1; using 192.168.68.115 instead (on interface en1)\n",
|
||||
"23/05/15 20:33:10 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
|
||||
"Setting default log level to \"WARN\".\n",
|
||||
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
|
||||
"23/05/15 20:33:10 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n",
|
||||
"| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n",
|
||||
"| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n",
|
||||
"| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n",
|
||||
"| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n",
|
||||
"| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n",
|
||||
"| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n",
|
||||
"| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n",
|
||||
"| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n",
|
||||
"| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n",
|
||||
"| 11| 1| 3|Sandstrom, Miss. ...|female| 4.0| 1| 1| PP 9549| 16.7| G6| S|\n",
|
||||
"| 12| 1| 1|Bonnell, Miss. El...|female|58.0| 0| 0| 113783| 26.55| C103| S|\n",
|
||||
"| 13| 0| 3|Saundercock, Mr. ...| male|20.0| 0| 0| A/5. 2151| 8.05| null| S|\n",
|
||||
"| 14| 0| 3|Andersson, Mr. An...| male|39.0| 1| 5| 347082| 31.275| null| S|\n",
|
||||
"| 15| 0| 3|Vestrom, Miss. Hu...|female|14.0| 0| 0| 350406| 7.8542| null| S|\n",
|
||||
"| 16| 1| 2|Hewlett, Mrs. (Ma...|female|55.0| 0| 0| 248706| 16.0| null| S|\n",
|
||||
"| 17| 0| 3|Rice, Master. Eugene| male| 2.0| 4| 1| 382652| 29.125| null| Q|\n",
|
||||
"| 18| 1| 2|Williams, Mr. Cha...| male|null| 0| 0| 244373| 13.0| null| S|\n",
|
||||
"| 19| 0| 3|Vander Planke, Mr...|female|31.0| 1| 0| 345763| 18.0| null| S|\n",
|
||||
"| 20| 1| 3|Masselmani, Mrs. ...|female|null| 0| 0| 2649| 7.225| null| C|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"only showing top 20 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_experimental.agents.agent_toolkits import create_spark_dataframe_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"spark = SparkSession.builder.getOrCreate()\n",
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
"df = spark.read.csv(csv_file_path, header=True, inferSchema=True)\n",
|
||||
"df.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_spark_dataframe_agent(llm=OpenAI(temperature=0), df=df, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many rows are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many people have more than 3 siblings\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.filter(df.SibSp > 3).count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 30 people have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'30 people have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to get the average age first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.agg({\"Age\": \"mean\"}).collect()[0][0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the average age, I need to get the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(29.69911764705882)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mname 'math' is not defined\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import math first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the math library imported, I can get the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(29.69911764705882)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 5.449689683556195\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'5.449689683556195'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the square root of the average age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark.stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `Spark Connect` example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# in apache-spark root directory. (tested here with \"spark-3.4.0-bin-hadoop3 and later\")\n",
|
||||
"# To launch Spark with support for Spark Connect sessions, run the start-connect-server.sh script.\n",
|
||||
"!./sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:3.4.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"23/05/08 10:06:09 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"# Now that the Spark server is running, we can connect to it remotely using Spark Connect. We do this by\n",
|
||||
"# creating a remote Spark session on the client where our application runs. Before we can do that, we need\n",
|
||||
"# to make sure to stop the existing regular Spark session because it cannot coexist with the remote\n",
|
||||
"# Spark Connect session we are about to create.\n",
|
||||
"SparkSession.builder.master(\"local[*]\").getOrCreate().stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The command we used above to launch the server configured Spark to run as localhost:15002.\n",
|
||||
"# So now we can create a remote Spark session on the client using the following command.\n",
|
||||
"spark = SparkSession.builder.remote(\"sc://localhost:15002\").getOrCreate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n",
|
||||
"| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n",
|
||||
"| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n",
|
||||
"| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n",
|
||||
"| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n",
|
||||
"| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n",
|
||||
"| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n",
|
||||
"| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n",
|
||||
"| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n",
|
||||
"| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n",
|
||||
"| 11| 1| 3|Sandstrom, Miss. ...|female| 4.0| 1| 1| PP 9549| 16.7| G6| S|\n",
|
||||
"| 12| 1| 1|Bonnell, Miss. El...|female|58.0| 0| 0| 113783| 26.55| C103| S|\n",
|
||||
"| 13| 0| 3|Saundercock, Mr. ...| male|20.0| 0| 0| A/5. 2151| 8.05| null| S|\n",
|
||||
"| 14| 0| 3|Andersson, Mr. An...| male|39.0| 1| 5| 347082| 31.275| null| S|\n",
|
||||
"| 15| 0| 3|Vestrom, Miss. Hu...|female|14.0| 0| 0| 350406| 7.8542| null| S|\n",
|
||||
"| 16| 1| 2|Hewlett, Mrs. (Ma...|female|55.0| 0| 0| 248706| 16.0| null| S|\n",
|
||||
"| 17| 0| 3|Rice, Master. Eugene| male| 2.0| 4| 1| 382652| 29.125| null| Q|\n",
|
||||
"| 18| 1| 2|Williams, Mr. Cha...| male|null| 0| 0| 244373| 13.0| null| S|\n",
|
||||
"| 19| 0| 3|Vander Planke, Mr...|female|31.0| 1| 0| 345763| 18.0| null| S|\n",
|
||||
"| 20| 1| 3|Masselmani, Mrs. ...|female|null| 0| 0| 2649| 7.225| null| C|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"only showing top 20 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
"df = spark.read.csv(csv_file_path, header=True, inferSchema=True)\n",
|
||||
"df.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_experimental.agents import create_spark_dataframe_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input your openai api key here...\"\n",
|
||||
"\n",
|
||||
"agent = create_spark_dataframe_agent(llm=OpenAI(temperature=0), df=df, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Thought: I need to find the row with the highest fare\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.sort(df.Fare.desc()).first()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mRow(PassengerId=259, Survived=1, Pclass=1, Name='Ward, Miss. Anna', Sex='female', Age=35.0, SibSp=0, Parch=0, Ticket='PC 17755', Fare=512.3292, Cabin=None, Embarked='C')\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the name of the person who bought the most expensive ticket\n",
|
||||
"Final Answer: Miss. Anna Ward\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Miss. Anna Ward'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"\"\"\n",
|
||||
"who bought the most expensive ticket?\n",
|
||||
"You can find all supported function types in https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/dataframe\n",
|
||||
"\"\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark.stop()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,624 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "757e7780-b89a-4a87-b10c-cfa42337a8e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: SQLDatabaseToolkit\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SQLDatabaseToolkit\n",
|
||||
"\n",
|
||||
"This will help you getting started with the SQL Database [toolkit](/docs/concepts/#toolkits). For detailed documentation of all `SQLDatabaseToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html).\n",
|
||||
"\n",
|
||||
"Tools within the `SQLDatabaseToolkit` are designed to interact with a `SQL` database. \n",
|
||||
"\n",
|
||||
"A common application is to enable agents to answer questions using data in a relational database, potentially in an iterative fashion (e.g., recovering from errors).\n",
|
||||
"\n",
|
||||
"**⚠️ Security note ⚠️**\n",
|
||||
"\n",
|
||||
"Building Q&A systems of SQL databases requires executing model-generated SQL queries. There are inherent risks in doing this. Make sure that your database connection permissions are always scoped as narrowly as possible for your chain/agent's needs. This will mitigate though not eliminate the risks of building a model-driven system. For more on general security best practices, [see here](/docs/security).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3de6e3be-1fd9-42a3-8564-8ca7dca11e1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "31896b61-68d2-4b4d-be9d-b829eda327d1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-community` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4933e04-9120-4ccc-9ef7-369987823b0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ad08dbe-1642-448c-b58d-153810024375",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For demonstration purposes, we will access a prompt in the LangChain [Hub](https://smith.langchain.com/hub). We will also require `langgraph` to demonstrate the use of the toolkit with an agent. This is not required to use the toolkit."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3dead45-9908-497d-a5a3-bce30642e88f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchainhub langgraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "804533b1-2f16-497b-821b-c82d67fcf7b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"The `SQLDatabaseToolkit` toolkit requires:\n",
|
||||
"\n",
|
||||
"- a [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) object;\n",
|
||||
"- a LLM or chat model (for instantiating the [QuerySQLCheckerTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLCheckerTool.html) tool).\n",
|
||||
"\n",
|
||||
"Below, we instantiate the toolkit with these objects. Let's first create a database object.\n",
|
||||
"\n",
|
||||
"This guide uses the example `Chinook` database based on [these instructions](https://database.guide/2-sample-databases-sqlite/).\n",
|
||||
"\n",
|
||||
"Below we will use the `requests` library to pull the `.sql` file and create an in-memory SQLite database. Note that this approach is lightweight, but ephemeral and not thread-safe. If you'd prefer, you can follow the instructions to save the file locally as `Chinook.db` and instantiate the database via `db = SQLDatabase.from_uri(\"sqlite:///Chinook.db\")`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "40d05f9b-5a8f-4307-8f8b-4153db0fdfa9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlite3\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"from langchain_community.utilities.sql_database import SQLDatabase\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"from sqlalchemy.pool import StaticPool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_engine_for_chinook_db():\n",
|
||||
" \"\"\"Pull sql file, populate in-memory database, and create engine.\"\"\"\n",
|
||||
" url = \"https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql\"\n",
|
||||
" response = requests.get(url)\n",
|
||||
" sql_script = response.text\n",
|
||||
"\n",
|
||||
" connection = sqlite3.connect(\":memory:\", check_same_thread=False)\n",
|
||||
" connection.executescript(sql_script)\n",
|
||||
" return create_engine(\n",
|
||||
" \"sqlite://\",\n",
|
||||
" creator=lambda: connection,\n",
|
||||
" poolclass=StaticPool,\n",
|
||||
" connect_args={\"check_same_thread\": False},\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"engine = get_engine_for_chinook_db()\n",
|
||||
"\n",
|
||||
"db = SQLDatabase(engine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b9a6326-78fd-4c42-a1cb-4316619ac449",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will also need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cc6e6108-83d9-404f-8f31-474c2fbf5f6c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77925e72-4730-43c3-8726-d68cedf635f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now instantiate the toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "42bd5a41-672a-4a53-b70a-2f0c0555758c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit\n",
|
||||
"\n",
|
||||
"toolkit = SQLDatabaseToolkit(db=db, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2f882cf-4156-4a9f-a714-db97ec8ccc37",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a18c3e69-bee0-4f5d-813e-eeb540f41b98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[QuerySQLDataBaseTool(description=\"Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.\", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>),\n",
|
||||
" InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>),\n",
|
||||
" ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>),\n",
|
||||
" QuerySQLCheckerTool(description='Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>, llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1148a97b0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1148aaec0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['dialect', 'query'], template='\\n{query}\\nDouble check the {dialect} query above for common mistakes, including:\\n- Using NOT IN with NULL values\\n- Using UNION when UNION ALL should have been used\\n- Using BETWEEN for exclusive ranges\\n- Data type mismatch in predicates\\n- Properly quoting identifiers\\n- Using the correct number of arguments for functions\\n- Casting to the correct data type\\n- Using the proper columns for joins\\n\\nIf there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.\\n\\nOutput the final SQL query only.\\n\\nSQL Query: '), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1148a97b0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1148aaec0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"toolkit.get_tools()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f5751e3-2e98-485f-8164-db8094039c25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"API references:\n",
|
||||
"\n",
|
||||
"- [QuerySQLDataBaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLDataBaseTool.html)\n",
|
||||
"- [InfoSQLDatabaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.InfoSQLDatabaseTool.html)\n",
|
||||
"- [ListSQLDatabaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.ListSQLDatabaseTool.html)\n",
|
||||
"- [QuerySQLCheckerTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLCheckerTool.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c067e0ed-dcca-4dcc-81b2-a0eeb4fc2a9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Following the [SQL Q&A Tutorial](/docs/tutorials/sql_qa/#agents), below we equip a simple question-answering agent with the tools in our toolkit. First we pull a relevant prompt and populate it with its required parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "eda12f8b-be90-4697-ac84-2ece9e2d1708",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['dialect', 'top_k']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"\n",
|
||||
"prompt_template = hub.pull(\"langchain-ai/sql-agent-system-prompt\")\n",
|
||||
"\n",
|
||||
"assert len(prompt_template.messages) == 1\n",
|
||||
"print(prompt_template.input_variables)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "3470ae96-e5e5-4717-a6d6-d7d28c7b7347",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_message = prompt_template.format(dialect=\"SQLite\", top_k=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97930c07-36d1-4137-94ae-fe5ac83ecc44",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We then instantiate the agent:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "48bca92c-9b4b-4d5c-bcce-1b239c9e901c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(\n",
|
||||
" llm, toolkit.get_tools(), state_modifier=system_message\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09fb1845-1105-4f41-98b4-24756452a3e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And issue it a query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "39e6d2bf-3194-4aba-854b-63faf919157b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Which country's customers spent the most?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_list_tables (call_eiheSxiL0s90KE50XyBnBtJY)\n",
|
||||
" Call ID: call_eiheSxiL0s90KE50XyBnBtJY\n",
|
||||
" Args:\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_list_tables\n",
|
||||
"\n",
|
||||
"Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_schema (call_YKwGWt4UUVmxxY7vjjBDzFLJ)\n",
|
||||
" Call ID: call_YKwGWt4UUVmxxY7vjjBDzFLJ\n",
|
||||
" Args:\n",
|
||||
" table_names: Customer, Invoice, InvoiceLine\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_schema\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Customer\" (\n",
|
||||
"\t\"CustomerId\" INTEGER NOT NULL, \n",
|
||||
"\t\"FirstName\" NVARCHAR(40) NOT NULL, \n",
|
||||
"\t\"LastName\" NVARCHAR(20) NOT NULL, \n",
|
||||
"\t\"Company\" NVARCHAR(80), \n",
|
||||
"\t\"Address\" NVARCHAR(70), \n",
|
||||
"\t\"City\" NVARCHAR(40), \n",
|
||||
"\t\"State\" NVARCHAR(40), \n",
|
||||
"\t\"Country\" NVARCHAR(40), \n",
|
||||
"\t\"PostalCode\" NVARCHAR(10), \n",
|
||||
"\t\"Phone\" NVARCHAR(24), \n",
|
||||
"\t\"Fax\" NVARCHAR(24), \n",
|
||||
"\t\"Email\" NVARCHAR(60) NOT NULL, \n",
|
||||
"\t\"SupportRepId\" INTEGER, \n",
|
||||
"\tPRIMARY KEY (\"CustomerId\"), \n",
|
||||
"\tFOREIGN KEY(\"SupportRepId\") REFERENCES \"Employee\" (\"EmployeeId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Customer table:\n",
|
||||
"CustomerId\tFirstName\tLastName\tCompany\tAddress\tCity\tState\tCountry\tPostalCode\tPhone\tFax\tEmail\tSupportRepId\n",
|
||||
"1\tLuís\tGonçalves\tEmbraer - Empresa Brasileira de Aeronáutica S.A.\tAv. Brigadeiro Faria Lima, 2170\tSão José dos Campos\tSP\tBrazil\t12227-000\t+55 (12) 3923-5555\t+55 (12) 3923-5566\tluisg@embraer.com.br\t3\n",
|
||||
"2\tLeonie\tKöhler\tNone\tTheodor-Heuss-Straße 34\tStuttgart\tNone\tGermany\t70174\t+49 0711 2842222\tNone\tleonekohler@surfeu.de\t5\n",
|
||||
"3\tFrançois\tTremblay\tNone\t1498 rue Bélanger\tMontréal\tQC\tCanada\tH2G 1A7\t+1 (514) 721-4711\tNone\tftremblay@gmail.com\t3\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Invoice\" (\n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"CustomerId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceDate\" DATETIME NOT NULL, \n",
|
||||
"\t\"BillingAddress\" NVARCHAR(70), \n",
|
||||
"\t\"BillingCity\" NVARCHAR(40), \n",
|
||||
"\t\"BillingState\" NVARCHAR(40), \n",
|
||||
"\t\"BillingCountry\" NVARCHAR(40), \n",
|
||||
"\t\"BillingPostalCode\" NVARCHAR(10), \n",
|
||||
"\t\"Total\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceId\"), \n",
|
||||
"\tFOREIGN KEY(\"CustomerId\") REFERENCES \"Customer\" (\"CustomerId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Invoice table:\n",
|
||||
"InvoiceId\tCustomerId\tInvoiceDate\tBillingAddress\tBillingCity\tBillingState\tBillingCountry\tBillingPostalCode\tTotal\n",
|
||||
"1\t2\t2021-01-01 00:00:00\tTheodor-Heuss-Straße 34\tStuttgart\tNone\tGermany\t70174\t1.98\n",
|
||||
"2\t4\t2021-01-02 00:00:00\tUllevålsveien 14\tOslo\tNone\tNorway\t0171\t3.96\n",
|
||||
"3\t8\t2021-01-03 00:00:00\tGrétrystraat 63\tBrussels\tNone\tBelgium\t1000\t5.94\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"InvoiceLine\" (\n",
|
||||
"\t\"InvoiceLineId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\t\"Quantity\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceLineId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from InvoiceLine table:\n",
|
||||
"InvoiceLineId\tInvoiceId\tTrackId\tUnitPrice\tQuantity\n",
|
||||
"1\t1\t2\t0.99\t1\n",
|
||||
"2\t1\t4\t0.99\t1\n",
|
||||
"3\t2\t6\t0.99\t1\n",
|
||||
"*/\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_7WBDcMxl1h7MnI05njx1q8V9)\n",
|
||||
" Call ID: call_7WBDcMxl1h7MnI05njx1q8V9\n",
|
||||
" Args:\n",
|
||||
" query: SELECT c.Country, SUM(i.Total) AS TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.Country ORDER BY TotalSpent DESC LIMIT 1\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"[('USA', 523.0600000000003)]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Customers from the USA spent the most, with a total amount spent of $523.06.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Which country's customers spent the most?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "adbf3d8d-7570-45a5-950f-ce84db5145ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also observe the agent recover from an error:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "23c1235c-6d18-43e4-98ab-85b426b53d94",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Who are the top 3 best selling artists?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_9F6Bp2vwsDkeLW6FsJFqLiet)\n",
|
||||
" Call ID: call_9F6Bp2vwsDkeLW6FsJFqLiet\n",
|
||||
" Args:\n",
|
||||
" query: SELECT artist_name, SUM(quantity) AS total_sold FROM sales GROUP BY artist_name ORDER BY total_sold DESC LIMIT 3\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"Error: (sqlite3.OperationalError) no such table: sales\n",
|
||||
"[SQL: SELECT artist_name, SUM(quantity) AS total_sold FROM sales GROUP BY artist_name ORDER BY total_sold DESC LIMIT 3]\n",
|
||||
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_list_tables (call_Gx5adzWnrBDIIxzUDzsn83zO)\n",
|
||||
" Call ID: call_Gx5adzWnrBDIIxzUDzsn83zO\n",
|
||||
" Args:\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_list_tables\n",
|
||||
"\n",
|
||||
"Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_schema (call_ftywrZgEgGWLrnk9dYC0xtZv)\n",
|
||||
" Call ID: call_ftywrZgEgGWLrnk9dYC0xtZv\n",
|
||||
" Args:\n",
|
||||
" table_names: Artist, Album, InvoiceLine\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_schema\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Album\" (\n",
|
||||
"\t\"AlbumId\" INTEGER NOT NULL, \n",
|
||||
"\t\"Title\" NVARCHAR(160) NOT NULL, \n",
|
||||
"\t\"ArtistId\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"AlbumId\"), \n",
|
||||
"\tFOREIGN KEY(\"ArtistId\") REFERENCES \"Artist\" (\"ArtistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Album table:\n",
|
||||
"AlbumId\tTitle\tArtistId\n",
|
||||
"1\tFor Those About To Rock We Salute You\t1\n",
|
||||
"2\tBalls to the Wall\t2\n",
|
||||
"3\tRestless and Wild\t2\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Artist\" (\n",
|
||||
"\t\"ArtistId\" INTEGER NOT NULL, \n",
|
||||
"\t\"Name\" NVARCHAR(120), \n",
|
||||
"\tPRIMARY KEY (\"ArtistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Artist table:\n",
|
||||
"ArtistId\tName\n",
|
||||
"1\tAC/DC\n",
|
||||
"2\tAccept\n",
|
||||
"3\tAerosmith\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"InvoiceLine\" (\n",
|
||||
"\t\"InvoiceLineId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\t\"Quantity\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceLineId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from InvoiceLine table:\n",
|
||||
"InvoiceLineId\tInvoiceId\tTrackId\tUnitPrice\tQuantity\n",
|
||||
"1\t1\t2\t0.99\t1\n",
|
||||
"2\t1\t4\t0.99\t1\n",
|
||||
"3\t2\t6\t0.99\t1\n",
|
||||
"*/\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_i6n3lmS7E2ZivN758VOayTiy)\n",
|
||||
" Call ID: call_i6n3lmS7E2ZivN758VOayTiy\n",
|
||||
" Args:\n",
|
||||
" query: SELECT Artist.Name AS artist_name, SUM(InvoiceLine.Quantity) AS total_sold FROM Artist JOIN Album ON Artist.ArtistId = Album.ArtistId JOIN Track ON Album.AlbumId = Track.AlbumId JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY total_sold DESC LIMIT 3\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"[('Iron Maiden', 140), ('U2', 107), ('Metallica', 91)]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The top 3 best selling artists are:\n",
|
||||
"1. Iron Maiden - 140 units sold\n",
|
||||
"2. U2 - 107 units sold\n",
|
||||
"3. Metallica - 91 units sold\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Who are the top 3 best selling artists?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73521f1b-be03-44e6-8b27-a9a46ae8e962",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specific functionality\n",
|
||||
"\n",
|
||||
"`SQLDatabaseToolkit` implements a [.get_context](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html#langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.get_context) method as a convenience for use in prompts or other contexts.\n",
|
||||
"\n",
|
||||
"**⚠️ Disclaimer ⚠️** : The agent may generate insert/update/delete queries. When this is not expected, use a custom prompt or create a SQL users without write permissions.\n",
|
||||
"\n",
|
||||
"The final user might overload your SQL database by asking a simple question such as \"run the biggest query possible\". The generated query might look like:\n",
|
||||
"\n",
|
||||
"```sql\n",
|
||||
"SELECT * FROM \"public\".\"users\"\n",
|
||||
" JOIN \"public\".\"user_permissions\" ON \"public\".\"users\".id = \"public\".\"user_permissions\".user_id\n",
|
||||
" JOIN \"public\".\"projects\" ON \"public\".\"users\".id = \"public\".\"projects\".user_id\n",
|
||||
" JOIN \"public\".\"events\" ON \"public\".\"projects\".id = \"public\".\"events\".project_id;\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"For a transactional SQL database, if one of the table above contains millions of rows, the query might cause trouble to other applications using the same database.\n",
|
||||
"\n",
|
||||
"Most datawarehouse oriented databases support user-level quota, for limiting resource usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1aa8a7e3-87ca-4963-a224-0cbdc9d88714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SQLDatabaseToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,752 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Xorbits"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook shows how to use agents to interact with [Xorbits Pandas](https://doc.xorbits.io/en/latest/reference/pandas/index.html) dataframe and [Xorbits Numpy](https://doc.xorbits.io/en/latest/reference/numpy/index.html) ndarray. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the `Python` agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pandas examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:06:33.955439Z",
|
||||
"start_time": "2023-07-13T08:06:33.767539500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import xorbits.pandas as pd\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_xorbits_agent\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:06:33.955439Z",
|
||||
"start_time": "2023-07-13T08:06:33.767539500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "05b7c067b1114ce9a8aef4a58a5d5fef",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = pd.read_csv(\"titanic.csv\")\n",
|
||||
"agent = create_xorbits_agent(OpenAI(temperature=0), data, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:11:06.622471100Z",
|
||||
"start_time": "2023-07-13T08:11:03.183042Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of rows and columns\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data.shape\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m(891, 12)\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows and 12 columns.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows and 12 columns.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How many rows and columns are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:11:23.189275300Z",
|
||||
"start_time": "2023-07-13T08:11:11.029030900Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8c63d745a7eb41a484043a5dba357997",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of people in pclass 1\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data[data['Pclass'] == 1].shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m216\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 216 people in pclass 1.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 216 people in pclass 1.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How many people are in pclass 1?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to calculate the mean age\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data['Age'].mean()\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "29af2e29f2d64a3397c212812adf0e9b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The mean age is 29.69911764705882.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The mean age is 29.69911764705882.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the mean age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to group the data by sex and then find the average age for each group\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data.groupby('Sex')['Age'].mean()\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c3d28625c35946fd91ebc2a47f8d8c5b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mSex\n",
|
||||
"female 27.915709\n",
|
||||
"male 30.726645\n",
|
||||
"Name: Age, dtype: float64\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the average age for each group\n",
|
||||
"Final Answer: The average age for female passengers is 27.92 and the average age for male passengers is 30.73.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The average age for female passengers is 27.92 and the average age for male passengers is 30.73.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Group the data by sex and find the average age for each group\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c72aab63b20d47599f4f9806f6887a69",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to filter the dataframe to get the desired result\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data[(data['Age'] > 30) & (data['Fare'] > 30) & (data['Fare'] < 50) & ((data['Pclass'] == 1) | (data['Pclass'] == 2))].shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m20\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 20\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'20'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Show the number of people whose age is greater than 30 and fare is between 30 and 50 , and pclass is either 1 or 2\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Numpy examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "fa8baf315a0c41c89392edc4a24b76f5",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import xorbits.numpy as np\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_xorbits_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"arr = np.array([1, 2, 3, 4, 5, 6])\n",
|
||||
"agent = create_xorbits_agent(OpenAI(temperature=0), arr, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the shape of the array\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data.shape\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m(6,)\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The shape of the array is (6,).\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The shape of the array is (6,).'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Give the shape of the array \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to access the 2nd element of the array\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data[1]\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "64efcc74f81f404eb0a7d3f0326cd8b3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m2\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 2\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'2'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Give the 2nd element of the array \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to reshape the array and then transpose it\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: np.reshape(data, (2,3)).T\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "fce51acf6fb347c0b400da67c6750534",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[[1 4]\n",
|
||||
" [2 5]\n",
|
||||
" [3 6]]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The reshaped and transposed array is [[1 4], [2 5], [3 6]].\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The reshaped and transposed array is [[1 4], [2 5], [3 6]].'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Reshape the array into a 2-dimensional array with 2 rows and 3 columns, and then transpose it\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to reshape the array and then sum it\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: np.sum(np.reshape(data, (3,2)), axis=0)\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "27fd4a0bbf694936bc41a6991064dec2",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[ 9 12]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The sum of the array along the first axis is [9, 12].\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The sum of the array along the first axis is [9, 12].'"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Reshape the array into a 2-dimensional array with 3 rows and 2 columns and sum the array along the first axis\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a591b6d7913f45cba98d2f3b71a5120a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
|
||||
"agent = create_xorbits_agent(OpenAI(temperature=0), arr, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to use the numpy covariance function\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: np.cov(data)\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5fe40f83cfae48d0919c147627b5839f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[[1. 1. 1.]\n",
|
||||
" [1. 1. 1.]\n",
|
||||
" [1. 1. 1.]]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The covariance matrix is [[1. 1. 1.], [1. 1. 1.], [1. 1. 1.]].\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The covariance matrix is [[1. 1. 1.], [1. 1. 1.], [1. 1. 1.]].'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"calculate the covariance matrix\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to use the SVD function\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: U, S, V = np.linalg.svd(data)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the U matrix\n",
|
||||
"Final Answer: U = [[-0.70710678 -0.70710678]\n",
|
||||
" [-0.70710678 0.70710678]]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'U = [[-0.70710678 -0.70710678]\\n [-0.70710678 0.70710678]]'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"compute the U of Singular Value Decomposition of the matrix\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AINetwork\n",
|
||||
"# AINetwork Toolkit\n",
|
||||
"\n",
|
||||
">[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate large-scale AI models, utilizing a decentralized GPU network powered by the [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).\n",
|
||||
">\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Amadeus\n",
|
||||
"# Amadeus Toolkit\n",
|
||||
"\n",
|
||||
"This notebook walks you through connecting LangChain to the `Amadeus` travel APIs.\n",
|
||||
"\n",
|
||||
@@ -1,170 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Apify\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the [Apify integration](/docs/integrations/providers/apify) for LangChain.\n",
|
||||
"\n",
|
||||
"[Apify](https://apify.com) is a cloud platform for web scraping and data extraction,\n",
|
||||
"which provides an [ecosystem](https://apify.com/store) of more than a thousand\n",
|
||||
"ready-made apps called *Actors* for various web scraping, crawling, and data extraction use cases.\n",
|
||||
"For example, you can use it to extract Google Search results, Instagram and Facebook profiles, products from Amazon or Shopify, Google Maps reviews, etc. etc.\n",
|
||||
"\n",
|
||||
"In this example, we'll use the [Website Content Crawler](https://apify.com/apify/website-content-crawler) Actor,\n",
|
||||
"which can deeply crawl websites such as documentation, knowledge bases, help centers, or blogs,\n",
|
||||
"and extract text content from the web pages. Then we feed the documents into a vector index and answer questions from it.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet apify-client langchain-community langchain-openai langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, import `ApifyWrapper` into your source code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.indexes import VectorstoreIndexCreator\n",
|
||||
"from langchain_community.utilities import ApifyWrapper\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from langchain_openai.embeddings import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize it using your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token) and for the purpose of this example, also with your OpenAI API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"Your OpenAI API key\"\n",
|
||||
"os.environ[\"APIFY_API_TOKEN\"] = \"Your Apify API token\"\n",
|
||||
"\n",
|
||||
"apify = ApifyWrapper()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then run the Actor, wait for it to finish, and fetch its results from the Apify dataset into a LangChain document loader.\n",
|
||||
"\n",
|
||||
"Note that if you already have some results in an Apify dataset, you can load them directly using `ApifyDatasetLoader`, as shown in [this notebook](/docs/integrations/document_loaders/apify_dataset). In that notebook, you'll also find the explanation of the `dataset_mapping_function`, which is used to map fields from the Apify dataset records to LangChain `Document` fields."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = apify.call_actor(\n",
|
||||
" actor_id=\"apify/website-content-crawler\",\n",
|
||||
" run_input={\"startUrls\": [{\"url\": \"https://python.langchain.com\"}]},\n",
|
||||
" dataset_mapping_function=lambda item: Document(\n",
|
||||
" page_content=item[\"text\"] or \"\", metadata={\"source\": item[\"url\"]}\n",
|
||||
" ),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the vector index from the crawled documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"index = VectorstoreIndexCreator(embedding=OpenAIEmbeddings()).from_loaders([loader])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And finally, query the vector index:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What is LangChain?\"\n",
|
||||
"result = index.query_with_sources(query, llm=OpenAI())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" LangChain is a standard interface through which you can interact with a variety of large language models (LLMs). It provides modules that can be used to build language model applications, and it also provides chains and agents with memory capabilities.\n",
|
||||
"\n",
|
||||
"https://python.langchain.com/en/latest/modules/models/llms.html, https://python.langchain.com/en/latest/getting_started/getting_started.html\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result[\"answer\"])\n",
|
||||
"print(result[\"sources\"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure AI Services\n",
|
||||
"# Azure AI Services Toolkit\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the `Azure AI Services API` to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Cognitive Services\n",
|
||||
"# Azure Cognitive Services Toolkit\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the `Azure Cognitive Services API` to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cassandra Database\n",
|
||||
"# Cassandra Database Toolkit\n",
|
||||
"\n",
|
||||
">`Apache Cassandra®` is a widely used database for storing transactional application data. The introduction of functions and >tooling in Large Language Models has opened up some exciting use cases for existing data in Generative AI applications. \n",
|
||||
"\n",
|
||||
@@ -148,11 +148,6 @@
|
||||
" CassandraDatabaseToolkit,\n",
|
||||
")\n",
|
||||
"from langchain_community.tools.cassandra_database.prompt import QUERY_PATH_PROMPT\n",
|
||||
"from langchain_community.tools.cassandra_database.tool import (\n",
|
||||
" GetSchemaCassandraDatabaseTool,\n",
|
||||
" GetTableDataCassandraDatabaseTool,\n",
|
||||
" QueryCassandraDatabaseTool,\n",
|
||||
")\n",
|
||||
"from langchain_community.utilities.cassandra_database import CassandraDatabase\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
@@ -263,12 +258,7 @@
|
||||
"source": [
|
||||
"# Create a CassandraDatabase instance\n",
|
||||
"# Uses the cassio session to connect to the database\n",
|
||||
"db = CassandraDatabase()\n",
|
||||
"\n",
|
||||
"# Create the Cassandra Database tools\n",
|
||||
"query_tool = QueryCassandraDatabaseTool(db=db)\n",
|
||||
"schema_tool = GetSchemaCassandraDatabaseTool(db=db)\n",
|
||||
"select_data_tool = GetTableDataCassandraDatabaseTool(db=db)"
|
||||
"db = CassandraDatabase()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ClickUp\n",
|
||||
"# ClickUp Toolkit\n",
|
||||
"\n",
|
||||
">[ClickUp](https://clickup.com/) is an all-in-one productivity platform that provides small and large teams across industries with flexible and customizable work management solutions, tools, and functions. \n",
|
||||
"\n",
|
||||
@@ -5,44 +5,17 @@
|
||||
"id": "19062701",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cogniswitch Tools\n",
|
||||
"# Cogniswitch Toolkit\n",
|
||||
"\n",
|
||||
"**Use CogniSwitch to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. Get started by interacting with your knowledge in just two simple steps.**\n",
|
||||
"CogniSwitch is used to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain, CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. \n",
|
||||
"\n",
|
||||
"visit [https://www.cogniswitch.ai/developer to register](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev).\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"**Registration:** \n",
|
||||
"Visit [this page](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev) to register a Cogniswitch account.\n",
|
||||
"\n",
|
||||
"- Signup with your email and verify your registration \n",
|
||||
"\n",
|
||||
"- You will get a mail with a platform token and oauth token for using the services.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"**step 1: Instantiate the toolkit and get the tools:**\n",
|
||||
"\n",
|
||||
"- Instantiate the cogniswitch toolkit with the cogniswitch token, openAI API key and OAuth token and get the tools. \n",
|
||||
"\n",
|
||||
"**step 2: Instantiate the agent with the tools and llm:**\n",
|
||||
"- Instantiate the agent with the list of cogniswitch tools and the llm, into the agent executor.\n",
|
||||
"\n",
|
||||
"**step 3: CogniSwitch Store Tool:** \n",
|
||||
"\n",
|
||||
"***CogniSwitch knowledge source file tool***\n",
|
||||
"- Use the agent to upload a file by giving the file path.(formats that are currently supported are .pdf, .docx, .doc, .txt, .html) \n",
|
||||
"- The content from the file will be processed by the cogniswitch and stored in your knowledge store. \n",
|
||||
"\n",
|
||||
"***CogniSwitch knowledge source url tool***\n",
|
||||
"- Use the agent to upload a URL. \n",
|
||||
"- The content from the url will be processed by the cogniswitch and stored in your knowledge store. \n",
|
||||
"\n",
|
||||
"**step 4: CogniSwitch Status Tool:**\n",
|
||||
"- Use the agent to know the status of the document uploaded with a document name.\n",
|
||||
"- You can also check the status of document processing in cogniswitch console. \n",
|
||||
"\n",
|
||||
"**step 5: CogniSwitch Answer Tool:**\n",
|
||||
"- Use the agent to ask your question.\n",
|
||||
"- You will get the answer from your knowledge as the response. \n"
|
||||
"- You will get a mail with a platform token and oauth token for using the services.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,7 +33,7 @@
|
||||
"id": "1435b193",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import necessary libraries"
|
||||
"## Import necessary libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,7 +59,7 @@
|
||||
"id": "6e6acf0e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Cogniswitch platform token, OAuth token and OpenAI API key"
|
||||
"## Cogniswitch platform token, OAuth token and OpenAI API key"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,7 +81,7 @@
|
||||
"id": "320e02fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instantiate the cogniswitch toolkit with the credentials"
|
||||
"## Instantiate the cogniswitch toolkit with the credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +119,7 @@
|
||||
"id": "4aae43a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instantiate the llm"
|
||||
"## Instantiate the LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -169,7 +142,9 @@
|
||||
"id": "04179282",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a agent executor"
|
||||
"## Use the LLM with the Toolkit\n",
|
||||
"\n",
|
||||
"### Create an agent with the LLM and Toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -9,7 +9,7 @@
|
||||
"Using this tool, you can integrate individual Connery Action into your LangChain agent.\n",
|
||||
"\n",
|
||||
"If you want to use more than one Connery Action in your agent,\n",
|
||||
"check out the [Connery Toolkit](/docs/integrations/toolkits/connery) documentation.\n",
|
||||
"check out the [Connery Toolkit](/docs/integrations/tools/connery_toolkit) documentation.\n",
|
||||
"\n",
|
||||
"## What is Connery?\n",
|
||||
"\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# File System\n",
|
||||
"# File Management Toolkit\n",
|
||||
"\n",
|
||||
"LangChain provides tools for interacting with a local file system out of the box. This notebook walks through some of them.\n",
|
||||
"The `FileManagementToolkit` provides tools for interacting with a local file system out of the box. This notebook walks through some of them.\n",
|
||||
"\n",
|
||||
"**Note:** these tools are not recommended for use outside a sandboxed environment! "
|
||||
]
|
||||
|
||||
303
docs/docs/integrations/tools/financial_datasets.ipynb
Normal file
303
docs/docs/integrations/tools/financial_datasets.ipynb
Normal file
@@ -0,0 +1,303 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# FinancialDatasets Toolkit\n",
|
||||
"\n",
|
||||
"The [financial datasets](https://financialdatasets.ai/) stock market API provides REST endpoints that let you get financial data for 16,000+ tickers spanning 30+ years.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use this toolkit, you need two API keys:\n",
|
||||
"\n",
|
||||
"`FINANCIAL_DATASETS_API_KEY`: Get it from [financialdatasets.ai](https://financialdatasets.ai/).\n",
|
||||
"`OPENAI_API_KEY`: Get it from [OpenAI](https://platform.openai.com/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"FINANCIAL_DATASETS_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-community` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.financial_datasets.toolkit import (\n",
|
||||
" FinancialDatasetsToolkit,\n",
|
||||
")\n",
|
||||
"from langchain_community.utilities.financial_datasets import FinancialDatasetsAPIWrapper\n",
|
||||
"\n",
|
||||
"api_wrapper = FinancialDatasetsAPIWrapper(\n",
|
||||
" financial_datasets_api_key=os.environ[\"FINANCIAL_DATASETS_API_KEY\"]\n",
|
||||
")\n",
|
||||
"toolkit = FinancialDatasetsToolkit(api_wrapper=api_wrapper)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c5f2839-4020-424e-9fc9-07777eede442",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51a60dbe-9f2e-4e04-bb62-23968f17164a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Let's equip our agent with the FinancialDatasetsToolkit and ask financial questions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are an advanced financial analysis AI assistant equipped with specialized tools\n",
|
||||
"to access and analyze financial data. Your primary function is to help users with\n",
|
||||
"financial analysis by retrieving and interpreting income statements, balance sheets,\n",
|
||||
"and cash flow statements for publicly traded companies.\n",
|
||||
"\n",
|
||||
"You have access to the following tools from the FinancialDatasetsToolkit:\n",
|
||||
"\n",
|
||||
"1. Balance Sheets: Retrieves balance sheet data for a given ticker symbol.\n",
|
||||
"2. Income Statements: Fetches income statement data for a specified company.\n",
|
||||
"3. Cash Flow Statements: Accesses cash flow statement information for a particular ticker.\n",
|
||||
"\n",
|
||||
"Your capabilities include:\n",
|
||||
"\n",
|
||||
"1. Retrieving financial statements for any publicly traded company using its ticker symbol.\n",
|
||||
"2. Analyzing financial ratios and metrics based on the data from these statements.\n",
|
||||
"3. Comparing financial performance across different time periods (e.g., year-over-year or quarter-over-quarter).\n",
|
||||
"4. Identifying trends in a company's financial health and performance.\n",
|
||||
"5. Providing insights on a company's liquidity, solvency, profitability, and efficiency.\n",
|
||||
"6. Explaining complex financial concepts in simple terms.\n",
|
||||
"\n",
|
||||
"When responding to queries:\n",
|
||||
"\n",
|
||||
"1. Always specify which financial statement(s) you're using for your analysis.\n",
|
||||
"2. Provide context for the numbers you're referencing (e.g., fiscal year, quarter).\n",
|
||||
"3. Explain your reasoning and calculations clearly.\n",
|
||||
"4. If you need more information to provide a complete answer, ask for clarification.\n",
|
||||
"5. When appropriate, suggest additional analyses that might be helpful.\n",
|
||||
"\n",
|
||||
"Remember, your goal is to provide accurate, insightful financial analysis to\n",
|
||||
"help users make informed decisions. Always maintain a professional and objective tone in your responses.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Instantiate the LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "310bf18e-6c9a-4072-b86e-47bc1fcca29d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.tools import tool\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model=\"gpt-4o\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Define a user query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What was AAPL's revenue in 2023? What about it's total debt in Q1 2024?\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Create the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentExecutor, create_tool_calling_agent\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", system_prompt),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" # Placeholders fill up a **list** of messages\n",
|
||||
" (\"placeholder\", \"{agent_scratchpad}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"agent = create_tool_calling_agent(model, tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Query the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke({\"input\": query})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `FinancialDatasetsToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.financial_datasets.toolkit.FinancialDatasetsToolkit.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
331
docs/docs/integrations/tools/github.ipynb
Normal file
331
docs/docs/integrations/tools/github.ipynb
Normal file
@@ -0,0 +1,331 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Github Toolkit\n",
|
||||
"\n",
|
||||
"The `Github` toolkit contains tools that enable an LLM agent to interact with a github repository. \n",
|
||||
"The tool is a wrapper for the [PyGitHub](https://github.com/PyGithub/PyGithub) library. \n",
|
||||
"\n",
|
||||
"For detailed documentation of all GithubToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"At a high-level, we will:\n",
|
||||
"\n",
|
||||
"1. Install the pygithub library\n",
|
||||
"2. Create a Github app\n",
|
||||
"3. Set your environmental variables\n",
|
||||
"4. Pass the tools to your agent with `toolkit.get_tools()`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"#### 1. Install dependencies\n",
|
||||
"\n",
|
||||
"This integration is implemented in `langchain-community`. We will also need the `pygithub` dependency:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pygithub langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 2. Create a Github App\n",
|
||||
"\n",
|
||||
"[Follow the instructions here](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to create and register a Github app. Make sure your app has the following [repository permissions:](https://docs.github.com/en/rest/overview/permissions-required-for-github-apps?apiVersion=2022-11-28)\n",
|
||||
"\n",
|
||||
"* Commit statuses (read only)\n",
|
||||
"* Contents (read and write)\n",
|
||||
"* Issues (read and write)\n",
|
||||
"* Metadata (read only)\n",
|
||||
"* Pull requests (read and write)\n",
|
||||
"\n",
|
||||
"Once the app has been registered, you must give your app permission to access each of the repositories you whish it to act upon. Use the App settings on [github.com here](https://github.com/settings/installations).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#### 3. Set Environment Variables\n",
|
||||
"\n",
|
||||
"Before initializing your agent, the following environment variables need to be set:\n",
|
||||
"\n",
|
||||
"* **GITHUB_APP_ID**- A six digit number found in your app's general settings\n",
|
||||
"* **GITHUB_APP_PRIVATE_KEY**- The location of your app's private key .pem file, or the full text of that file as a string.\n",
|
||||
"* **GITHUB_REPOSITORY**- The name of the Github repository you want your bot to act upon. Must follow the format {username}/{repo-name}. *Make sure the app has been added to this repository first!*\n",
|
||||
"* Optional: **GITHUB_BRANCH**- The branch where the bot will make its commits. Defaults to `repo.default_branch`.\n",
|
||||
"* Optional: **GITHUB_BASE_BRANCH**- The base branch of your repo upon which PRs will based from. Defaults to `repo.default_branch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"for env_var in [\n",
|
||||
" \"GITHUB_APP_ID\",\n",
|
||||
" \"GITHUB_APP_PRIVATE_KEY\",\n",
|
||||
" \"GITHUB_REPOSITORY\",\n",
|
||||
"]:\n",
|
||||
" if not os.getenv(env_var):\n",
|
||||
" os.environ[env_var] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit\n",
|
||||
"from langchain_community.utilities.github import GitHubAPIWrapper\n",
|
||||
"\n",
|
||||
"github = GitHubAPIWrapper()\n",
|
||||
"toolkit = GitHubToolkit.from_github_api_wrapper(github)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Get Issues\n",
|
||||
"Get Issue\n",
|
||||
"Comment on Issue\n",
|
||||
"List open pull requests (PRs)\n",
|
||||
"Get Pull Request\n",
|
||||
"Overview of files included in PR\n",
|
||||
"Create Pull Request\n",
|
||||
"List Pull Requests' Files\n",
|
||||
"Create File\n",
|
||||
"Read File\n",
|
||||
"Update File\n",
|
||||
"Delete File\n",
|
||||
"Overview of existing files in Main branch\n",
|
||||
"Overview of files in current working branch\n",
|
||||
"List branches in this repository\n",
|
||||
"Set active branch\n",
|
||||
"Create a new branch\n",
|
||||
"Get files from a directory\n",
|
||||
"Search issues and pull requests\n",
|
||||
"Search code\n",
|
||||
"Create review request\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"for tool in tools:\n",
|
||||
" print(tool.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The purpose of these tools is as follows:\n",
|
||||
"\n",
|
||||
"Each of these steps will be explained in great detail below.\n",
|
||||
"\n",
|
||||
"1. **Get Issues**- fetches issues from the repository.\n",
|
||||
"\n",
|
||||
"2. **Get Issue**- fetches details about a specific issue.\n",
|
||||
"\n",
|
||||
"3. **Comment on Issue**- posts a comment on a specific issue.\n",
|
||||
"\n",
|
||||
"4. **Create Pull Request**- creates a pull request from the bot's working branch to the base branch.\n",
|
||||
"\n",
|
||||
"5. **Create File**- creates a new file in the repository.\n",
|
||||
"\n",
|
||||
"6. **Read File**- reads a file from the repository.\n",
|
||||
"\n",
|
||||
"7. **Update File**- updates a file in the repository.\n",
|
||||
"\n",
|
||||
"8. **Delete File**- deletes a file from the repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"We will need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the agent with a subset of tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"tools = [tool for tool in toolkit.get_tools() if tool.name == \"Get Issue\"]\n",
|
||||
"assert len(tools) == 1\n",
|
||||
"tools[0].name = \"get_issue\"\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And issue it a query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What is the title of issue 24888?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" get_issue (call_iSYJVaM7uchfNHOMJoVPQsOi)\n",
|
||||
" Call ID: call_iSYJVaM7uchfNHOMJoVPQsOi\n",
|
||||
" Args:\n",
|
||||
" issue_number: 24888\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: get_issue\n",
|
||||
"\n",
|
||||
"{\"number\": 24888, \"title\": \"Standardize KV-Store Docs\", \"body\": \"To make our KV-store integrations as easy to use as possible we need to make sure the docs for them are thorough and standardized. There are two parts to this: updating the KV-store docstrings and updating the actual integration docs.\\r\\n\\r\\nThis needs to be done for each KV-store integration, ideally with one PR per KV-store.\\r\\n\\r\\nRelated to broader issues #21983 and #22005.\\r\\n\\r\\n## Docstrings\\r\\nEach KV-store class docstring should have the sections shown in the [Appendix](#appendix) below. The sections should have input and output code blocks when relevant.\\r\\n\\r\\nTo build a preview of the API docs for the package you're working on run (from root of repo):\\r\\n\\r\\n```shell\\r\\nmake api_docs_clean; make api_docs_quick_preview API_PKG=openai\\r\\n```\\r\\n\\r\\nwhere `API_PKG=` should be the parent directory that houses the edited package (e.g. community, openai, anthropic, huggingface, together, mistralai, groq, fireworks, etc.). This should be quite fast for all the partner packages.\\r\\n\\r\\n## Doc pages\\r\\nEach KV-store [docs page](https://python.langchain.com/v0.2/docs/integrations/stores/) should follow [this template](https://github.com/langchain-ai/langchain/blob/master/libs/cli/langchain_cli/integration_template/docs/kv_store.ipynb).\\r\\n\\r\\nHere is an example: https://python.langchain.com/v0.2/docs/integrations/stores/in_memory/\\r\\n\\r\\nYou can use the `langchain-cli` to quickly get started with a new chat model integration docs page (run from root of repo):\\r\\n\\r\\n```shell\\r\\npoetry run pip install -e libs/cli\\r\\npoetry run langchain-cli integration create-doc --name \\\"foo-bar\\\" --name-class FooBar --component-type kv_store --destination-dir ./docs/docs/integrations/stores/\\r\\n```\\r\\n\\r\\nwhere `--name` is the integration package name without the \\\"langchain-\\\" prefix and `--name-class` is the class name without the \\\"ByteStore\\\" suffix. This will create a template doc with some autopopulated fields at docs/docs/integrations/stores/foo_bar.ipynb.\\r\\n\\r\\nTo build a preview of the docs you can run (from root):\\r\\n\\r\\n```shell\\r\\nmake docs_clean\\r\\nmake docs_build\\r\\ncd docs/build/output-new\\r\\nyarn\\r\\nyarn start\\r\\n```\\r\\n\\r\\n## Appendix\\r\\nExpected sections for the KV-store class docstring.\\r\\n\\r\\n```python\\r\\n \\\"\\\"\\\"__ModuleName__ completion KV-store integration.\\r\\n\\r\\n # TODO: Replace with relevant packages, env vars.\\r\\n Setup:\\r\\n Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.\\r\\n\\r\\n .. code-block:: bash\\r\\n\\r\\n pip install -U __package_name__\\r\\n export __MODULE_NAME___API_KEY=\\\"your-api-key\\\"\\r\\n\\r\\n # TODO: Populate with relevant params.\\r\\n Key init args \\u2014 client params:\\r\\n api_key: Optional[str]\\r\\n __ModuleName__ API key. If not passed in will be read from env var __MODULE_NAME___API_KEY.\\r\\n\\r\\n See full list of supported init args and their descriptions in the params section.\\r\\n\\r\\n # TODO: Replace with relevant init params.\\r\\n Instantiate:\\r\\n .. code-block:: python\\r\\n\\r\\n from __module_name__ import __ModuleName__ByteStore\\r\\n\\r\\n kv_store = __ModuleName__ByteStore(\\r\\n # api_key=\\\"...\\\",\\r\\n # other params...\\r\\n )\\r\\n\\r\\n Set keys:\\r\\n .. code-block:: python\\r\\n\\r\\n kv_pairs = [\\r\\n [\\\"key1\\\", \\\"value1\\\"],\\r\\n [\\\"key2\\\", \\\"value2\\\"],\\r\\n ]\\r\\n\\r\\n kv_store.mset(kv_pairs)\\r\\n\\r\\n .. code-block:: python\\r\\n\\r\\n Get keys:\\r\\n .. code-block:: python\\r\\n\\r\\n kv_store.mget([\\\"key1\\\", \\\"key2\\\"])\\r\\n\\r\\n .. code-block:: python\\r\\n\\r\\n # TODO: Example output.\\r\\n\\r\\n Delete keys:\\r\\n ..code-block:: python\\r\\n\\r\\n kv_store.mdelete([\\\"key1\\\", \\\"key2\\\"])\\r\\n\\r\\n ..code-block:: python\\r\\n \\\"\\\"\\\" # noqa: E501\\r\\n```\", \"comments\": \"[]\", \"opened_by\": \"jacoblee93\"}\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The title of issue 24888 is \"Standardize KV-Store Docs\".\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"What is the title of issue 24888?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GithubToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gitlab\n",
|
||||
"# Gitlab Toolkit\n",
|
||||
"\n",
|
||||
"The `Gitlab` toolkit contains tools that enable an LLM agent to interact with a gitlab repository. \n",
|
||||
"The tool is a wrapper for the [python-gitlab](https://github.com/python-gitlab/python-gitlab) library. \n",
|
||||
271
docs/docs/integrations/tools/gmail.ipynb
Normal file
271
docs/docs/integrations/tools/gmail.ipynb
Normal file
@@ -0,0 +1,271 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gmail Toolkit\n",
|
||||
"\n",
|
||||
"This will help you getting started with the GMail [toolkit](/docs/concepts/#toolkits). This toolkit interacts with the GMail API to read messages, draft and send messages, and more. For detailed documentation of all GmailToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.toolkit.GmailToolkit.html).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-google-community` package. We'll need the `gmail` extra:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-google-community\\[gmail\\]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"By default the toolkit reads the local `credentials.json` file. You can also manually provide a `Credentials` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community import GmailToolkit\n",
|
||||
"\n",
|
||||
"toolkit = GmailToolkit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customizing Authentication\n",
|
||||
"\n",
|
||||
"Behind the scenes, a `googleapi` resource is created using the following methods. \n",
|
||||
"you can manually build a `googleapi` resource for more auth control. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community.gmail.utils import (\n",
|
||||
" build_resource_service,\n",
|
||||
" get_gmail_credentials,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Can review scopes here https://developers.google.com/gmail/api/auth/scopes\n",
|
||||
"# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'\n",
|
||||
"credentials = get_gmail_credentials(\n",
|
||||
" token_file=\"token.json\",\n",
|
||||
" scopes=[\"https://mail.google.com/\"],\n",
|
||||
" client_secrets_file=\"credentials.json\",\n",
|
||||
")\n",
|
||||
"api_resource = build_resource_service(credentials=credentials)\n",
|
||||
"toolkit = GmailToolkit(api_resource=api_resource)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[GmailCreateDraft(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailSendMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailSearch(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailGetMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailGetThread(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- [GmailCreateDraft](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.create_draft.GmailCreateDraft.html)\n",
|
||||
"- [GmailSendMessage](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.send_message.GmailSendMessage.html)\n",
|
||||
"- [GmailSearch](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.search.GmailSearch.html)\n",
|
||||
"- [GmailGetMessage](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.get_message.GmailGetMessage.html)\n",
|
||||
"- [GmailGetThread](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.get_thread.GmailGetThread.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Below we show how to incorporate the toolkit into an [agent](/docs/tutorials/agents).\n",
|
||||
"\n",
|
||||
"We will need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Draft an email to fake@fake.com thanking them for coffee.\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" create_gmail_draft (call_slGkYKZKA6h3Mf1CraUBzs6M)\n",
|
||||
" Call ID: call_slGkYKZKA6h3Mf1CraUBzs6M\n",
|
||||
" Args:\n",
|
||||
" message: Dear Fake,\n",
|
||||
"\n",
|
||||
"I wanted to take a moment to thank you for the coffee yesterday. It was a pleasure catching up with you. Let's do it again soon!\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"[Your Name]\n",
|
||||
" to: ['fake@fake.com']\n",
|
||||
" subject: Thank You for the Coffee\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: create_gmail_draft\n",
|
||||
"\n",
|
||||
"Draft created. Draft Id: r-7233782721440261513\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"I have drafted an email to fake@fake.com thanking them for the coffee. You can review and send it from your email draft with the subject \"Thank You for the Coffee\".\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Draft an email to fake@fake.com thanking them for coffee.\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GmailToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,106 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "487607cd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Places\n",
|
||||
"\n",
|
||||
"This notebook goes through how to use Google Places API"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8690845f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet googlemaps langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "fae31ef4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"GPLACES_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "abb502b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools import GooglePlacesTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a83a02ac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"places = GooglePlacesTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "2b65a285",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"1. Delfina Restaurant\\nAddress: 3621 18th St, San Francisco, CA 94110, USA\\nPhone: (415) 552-4055\\nWebsite: https://www.delfinasf.com/\\n\\n\\n2. Piccolo Forno\\nAddress: 725 Columbus Ave, San Francisco, CA 94133, USA\\nPhone: (415) 757-0087\\nWebsite: https://piccolo-forno-sf.com/\\n\\n\\n3. L'Osteria del Forno\\nAddress: 519 Columbus Ave, San Francisco, CA 94133, USA\\nPhone: (415) 982-1124\\nWebsite: Unknown\\n\\n\\n4. Il Fornaio\\nAddress: 1265 Battery St, San Francisco, CA 94111, USA\\nPhone: (415) 986-0100\\nWebsite: https://www.ilfornaio.com/\\n\\n\""
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"places.run(\"al fornos\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "66d3da8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -21,7 +21,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community"
|
||||
"%pip install --upgrade --quiet langchain_google_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,8 +44,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.utilities import GoogleSearchAPIWrapper\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_google_community import GoogleSearchAPIWrapper\n",
|
||||
"\n",
|
||||
"search = GoogleSearchAPIWrapper()\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Trends\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the Google Trends Tool to fetch trends information.\n",
|
||||
"\n",
|
||||
"First, you need to sign up for an `SerpApi key` key at: https://serpapi.com/users/sign_up.\n",
|
||||
"\n",
|
||||
"Then you must install `google-search-results` with the command:\n",
|
||||
"\n",
|
||||
"`pip install google-search-results`\n",
|
||||
"\n",
|
||||
"Then you will need to set the environment variable `SERPAPI_API_KEY` to your `SerpApi key`\n",
|
||||
"\n",
|
||||
"[Alternatively you can pass the key in as a argument to the wrapper `serp_api_key=\"your secret key\"`]\n",
|
||||
"\n",
|
||||
"## Use the Tool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: google-search-results in c:\\python311\\lib\\site-packages (2.4.2)\n",
|
||||
"Requirement already satisfied: requests in c:\\python311\\lib\\site-packages (from google-search-results) (2.31.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\python311\\lib\\site-packages (from requests->google-search-results) (3.3.2)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in c:\\python311\\lib\\site-packages (from requests->google-search-results) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\python311\\lib\\site-packages (from requests->google-search-results) (2.1.0)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in c:\\python311\\lib\\site-packages (from requests->google-search-results) (2023.7.22)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-search-results langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.tools.google_trends import GoogleTrendsQueryRun\n",
|
||||
"from langchain_community.utilities.google_trends import GoogleTrendsAPIWrapper\n",
|
||||
"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"\"\n",
|
||||
"tool = GoogleTrendsQueryRun(api_wrapper=GoogleTrendsAPIWrapper())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Query: Water\\nDate From: Nov 20, 2022\\nDate To: Nov 11, 2023\\nMin Value: 72\\nMax Value: 100\\nAverage Value: 84.25490196078431\\nPrecent Change: 5.555555555555555%\\nTrend values: 72, 72, 74, 77, 86, 80, 82, 88, 79, 79, 85, 82, 81, 84, 83, 77, 80, 85, 82, 80, 88, 84, 82, 84, 83, 85, 92, 92, 100, 92, 100, 96, 94, 95, 94, 98, 96, 84, 86, 84, 85, 83, 83, 76, 81, 85, 78, 77, 81, 75, 76\\nRising Related Queries: avatar way of water, avatar the way of water, owala water bottle, air up water bottle, lake mead water level\\nTop Related Queries: water park, water bottle, water heater, water filter, water tank, water bill, water world, avatar way of water, avatar the way of water, coconut water, deep water, water cycle, water dispenser, water purifier, water pollution, distilled water, hot water heater, water cooler, sparkling water, american water, micellar water, density of water, tankless water heater, tonic water, water jug'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"Water\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.16 ('langchain')",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "15e58ce194949b77a891bd4339ce3d86a9bd138e905926019517993f97db9e6c"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "245a954a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Jira\n",
|
||||
"# Jira Toolkit\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the `Jira` toolkit.\n",
|
||||
"\n",
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSON\n",
|
||||
"# JSON Toolkit\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent interacting with large `JSON/dict` objects. \n",
|
||||
"This is useful when you want to answer questions about a JSON blob that's too large to fit in the context window of an LLM. The agent is able to iteratively explore the blob to find what it needs to answer the user's question.\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MultiOn\n",
|
||||
"# MultiOn Toolkit\n",
|
||||
" \n",
|
||||
"[MultiON](https://www.multion.ai/blog/multion-building-a-brighter-future-for-humanity-with-ai-agents) has built an AI Agent that can interact with a broad array of web services and applications. \n",
|
||||
"\n",
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "e6fd05db-21c2-4227-9900-0840bc62cb31",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NASA\n",
|
||||
"# NASA Toolkit\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with the NASA toolkit. The toolkit provides access to the NASA Image and Video Library API, with potential to expand and include other accessible NASA APIs in future iterations.\n",
|
||||
"\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Office365\n",
|
||||
"# Office365 Toolkit\n",
|
||||
"\n",
|
||||
">[Microsoft 365](https://www.office.com/) is a product family of productivity software, collaboration and cloud-based services owned by `Microsoft`.\n",
|
||||
">\n",
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAPI\n",
|
||||
"# OpenAPI Toolkit\n",
|
||||
"\n",
|
||||
"We can construct agents to consume arbitrary APIs, here APIs conformant to the `OpenAPI`/`Swagger` specification."
|
||||
]
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "c7ad998d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Natural Language APIs\n",
|
||||
"# Natural Language API Toolkits\n",
|
||||
"\n",
|
||||
"`Natural Language API` Toolkits (`NLAToolkits`) permit LangChain Agents to efficiently plan and combine calls across endpoints. \n",
|
||||
"\n",
|
||||
@@ -1,200 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Oracle AI Vector Search: Generate Summary\n",
|
||||
"\n",
|
||||
"Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords.\n",
|
||||
"One of the biggest benefits of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system.\n",
|
||||
"This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems.\n",
|
||||
"\n",
|
||||
"In addition, your vectors can benefit from all of Oracle Database’s most powerful features, like the following:\n",
|
||||
"\n",
|
||||
" * [Partitioning Support](https://www.oracle.com/database/technologies/partitioning.html)\n",
|
||||
" * [Real Application Clusters scalability](https://www.oracle.com/database/real-application-clusters/)\n",
|
||||
" * [Exadata smart scans](https://www.oracle.com/database/technologies/exadata/software/smartscan/)\n",
|
||||
" * [Shard processing across geographically distributed databases](https://www.oracle.com/database/distributed-database/)\n",
|
||||
" * [Transactions](https://docs.oracle.com/en/database/oracle/oracle-database/23/cncpt/transactions.html)\n",
|
||||
" * [Parallel SQL](https://docs.oracle.com/en/database/oracle/oracle-database/21/vldbg/parallel-exec-intro.html#GUID-D28717E4-0F77-44F5-BB4E-234C31D4E4BA)\n",
|
||||
" * [Disaster recovery](https://www.oracle.com/database/data-guard/)\n",
|
||||
" * [Security](https://www.oracle.com/security/database-security/)\n",
|
||||
" * [Oracle Machine Learning](https://www.oracle.com/artificial-intelligence/database-machine-learning/)\n",
|
||||
" * [Oracle Graph Database](https://www.oracle.com/database/integrated-graph-database/)\n",
|
||||
" * [Oracle Spatial and Graph](https://www.oracle.com/database/spatial/)\n",
|
||||
" * [Oracle Blockchain](https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/dbms_blockchain_table.html#GUID-B469E277-978E-4378-A8C1-26D3FF96C9A6)\n",
|
||||
" * [JSON](https://docs.oracle.com/en/database/oracle/oracle-database/23/adjsn/json-in-oracle-database.html)\n",
|
||||
"\n",
|
||||
"The guide demonstrates how to use Summary Capabilities within Oracle AI Vector Search to generate summary for your documents using OracleSummary."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you are just starting with Oracle Database, consider exploring the [free Oracle 23 AI](https://www.oracle.com/database/free/#resources) which provides a great introduction to setting up your database environment. While working with the database, it is often advisable to avoid using the system user by default; instead, you can create your own user for enhanced security and customization. For detailed steps on user creation, refer to our [end-to-end guide](https://github.com/langchain-ai/langchain/blob/master/cookbook/oracleai_demo.ipynb) which also shows how to set up a user in Oracle. Additionally, understanding user privileges is crucial for managing database security effectively. You can learn more about this topic in the official [Oracle guide](https://docs.oracle.com/en/database/oracle/oracle-database/19/admqs/administering-user-accounts-and-security.html#GUID-36B21D72-1BBB-46C9-A0C9-F0D2A8591B8D) on administering user accounts and security."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites\n",
|
||||
"\n",
|
||||
"Please install Oracle Python Client driver to use Langchain with Oracle AI Vector Search. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install oracledb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to Oracle Database\n",
|
||||
"The following sample code will show how to connect to Oracle Database. By default, python-oracledb runs in a ‘Thin’ mode which connects directly to Oracle Database. This mode does not need Oracle Client libraries. However, some additional functionality is available when python-oracledb uses them. Python-oracledb is said to be in ‘Thick’ mode when Oracle Client libraries are used. Both modes have comprehensive functionality supporting the Python Database API v2.0 Specification. See the following [guide](https://python-oracledb.readthedocs.io/en/latest/user_guide/appendix_a.html#featuresummary) that talks about features supported in each mode. You might want to switch to thick-mode if you are unable to use thin-mode."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"import oracledb\n",
|
||||
"\n",
|
||||
"# please update with your username, password, hostname and service_name\n",
|
||||
"username = \"<username>\"\n",
|
||||
"password = \"<password>\"\n",
|
||||
"dsn = \"<hostname>/<service_name>\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" conn = oracledb.connect(user=username, password=password, dsn=dsn)\n",
|
||||
" print(\"Connection successful!\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Connection failed!\")\n",
|
||||
" sys.exit(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate Summary\n",
|
||||
"The Oracle AI Vector Search Langchain library offers a suite of APIs designed for document summarization. It supports multiple summarization providers such as Database, OCIGENAI, HuggingFace, among others, allowing users to select the provider that best meets their needs. To utilize these capabilities, users must configure the summary parameters as specified. For detailed information on these parameters, please consult the [Oracle AI Vector Search Guide book](https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/dbms_vector_chain1.html#GUID-EC9DDB58-6A15-4B36-BA66-ECBA20D2CE57)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"***Note:*** The users may need to set proxy if they want to use some 3rd party summary generation providers other than Oracle's in-house and default provider: 'database'. If you don't have proxy, please remove the proxy parameter when you instantiate the OracleSummary."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# proxy to be used when we instantiate summary and embedder object\n",
|
||||
"proxy = \"<proxy>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following sample code will show how to generate summary:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.utilities.oracleai import OracleSummary\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# using 'ocigenai' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"ocigenai\",\n",
|
||||
" \"credential_name\": \"OCI_CRED\",\n",
|
||||
" \"url\": \"https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/summarizeText\",\n",
|
||||
" \"model\": \"cohere.command\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# using 'huggingface' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"huggingface\",\n",
|
||||
" \"credential_name\": \"HF_CRED\",\n",
|
||||
" \"url\": \"https://api-inference.huggingface.co/models/\",\n",
|
||||
" \"model\": \"facebook/bart-large-cnn\",\n",
|
||||
" \"wait_for_model\": \"true\"\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# using 'database' provider\n",
|
||||
"summary_params = {\n",
|
||||
" \"provider\": \"database\",\n",
|
||||
" \"glevel\": \"S\",\n",
|
||||
" \"numParagraphs\": 1,\n",
|
||||
" \"language\": \"english\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# get the summary instance\n",
|
||||
"# Remove proxy if not required\n",
|
||||
"summ = OracleSummary(conn=conn, params=summary_params, proxy=proxy)\n",
|
||||
"summary = summ.get_summary(\n",
|
||||
" \"In the heart of the forest, \"\n",
|
||||
" + \"a lone fox ventured out at dusk, seeking a lost treasure. \"\n",
|
||||
" + \"With each step, memories flooded back, guiding its path. \"\n",
|
||||
" + \"As the moon rose high, illuminating the night, the fox unearthed \"\n",
|
||||
" + \"not gold, but a forgotten friendship, worth more than any riches.\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Summary generated by OracleSummary: {summary}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### End to End Demo\n",
|
||||
"Please refer to our complete demo guide [Oracle AI Vector Search End-to-End Demo Guide](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) to build an end to end RAG pipeline with the help of Oracle AI Vector Search.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
395
docs/docs/integrations/tools/playwright.ipynb
Normal file
395
docs/docs/integrations/tools/playwright.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@
|
||||
"id": "245a954a"
|
||||
},
|
||||
"source": [
|
||||
"# Polygon Stock Market API Tools\n",
|
||||
"# Polygon IO\n",
|
||||
"\n",
|
||||
">[Polygon](https://polygon.io/) The Polygon.io Stocks API provides REST endpoints that let you query the latest market data from all US stock exchanges.\n",
|
||||
"\n",
|
||||
@@ -25,7 +25,7 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "9363398d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PowerBI Dataset\n",
|
||||
"# PowerBI Toolkit\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent interacting with a `Power BI Dataset`. The agent is answering more general questions about a dataset, as well as recover from errors.\n",
|
||||
"\n",
|
||||
File diff suppressed because one or more lines are too long
@@ -5,7 +5,7 @@
|
||||
"id": "e49f1e0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Robocorp\n",
|
||||
"# Robocorp Toolkit\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with [Robocorp Action Server](https://github.com/robocorp/robocorp) action toolkit and LangChain.\n",
|
||||
"\n",
|
||||
@@ -1,441 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6510f51c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Search Tools\n",
|
||||
"\n",
|
||||
"This notebook shows off usage of various search tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e6860c2d",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dadbcfcd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ee251155",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Google Serper API Wrapper\n",
|
||||
"\n",
|
||||
"First, let's try to use the Google Serper API tool."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0cdaa487",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"google-serper\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "01b1ab4a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "5cf44ec0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I should look up the current weather conditions.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"weather in Pomfret\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m37°F\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the current temperature in Pomfret.\n",
|
||||
"Final Answer: The current temperature in Pomfret is 37°F.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current temperature in Pomfret is 37°F.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the weather in Pomfret?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8786bdc8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SearchApi\n",
|
||||
"\n",
|
||||
"Second, let's try SearchApi tool."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5fd5ca32",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"searchapi\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "547c9cf5-aa4d-48ed-b7a5-29ecc1491adf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a7564c40-83ec-490b-ad36-385be5c20e58",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the current weather in Pomfret.\n",
|
||||
"Action: searchapi\n",
|
||||
"Action Input: \"weather in Pomfret\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThu 14 | Day ... Some clouds this morning will give way to generally sunny skies for the afternoon. High 73F. Winds NW at 5 to 10 mph.\n",
|
||||
"Hourly Weather-Pomfret, CT · 1 pm. 71°. 0%. Sunny. Feels Like71°. WindNW 9 mph · 2 pm. 72°. 0%. Sunny. Feels Like72°. WindNW 9 mph · 3 pm. 72°. 0%. Sunny. Feels ...\n",
|
||||
"10 Day Weather-Pomfret, VT. As of 4:28 am EDT. Today. 68°/48°. 4%. Thu 14 | Day. 68°. 4%. WNW 10 mph. Some clouds this morning will give way to generally ...\n",
|
||||
"Be prepared with the most accurate 10-day forecast for Pomfret, MD with highs, lows, chance of precipitation from The Weather Channel and Weather.com.\n",
|
||||
"Current Weather. 10:00 PM. 65°F. RealFeel® 67°. Mostly cloudy. LOCAL HURRICANE TRACKER. Category2. Lee. Late Friday Night - Saturday Afternoon.\n",
|
||||
"10 Day Weather-Pomfret, NY. As of 5:09 pm EDT. Tonight. --/55°. 10%. Wed 13 | Night. 55°. 10%. NW 11 mph. Some clouds. Low near 55F.\n",
|
||||
"Pomfret CT. Overnight. Overnight: Patchy fog before 3am, then patchy fog after 4am. Otherwise, mostly. Patchy Fog. Low: 58 °F. Thursday.\n",
|
||||
"Isolated showers. Mostly cloudy, with a high near 76. Calm wind. Chance of precipitation is 20%. Tonight. Mostly Cloudy. Mostly cloudy, with a ...\n",
|
||||
"Partly sunny, with a high near 67. Breezy, with a north wind 18 to 22 mph, with gusts as high as 34 mph. Chance of precipitation is 30%. ... A chance of showers ...\n",
|
||||
"Today's Weather - Pomfret, CT ... Patchy fog. Showers. Lows in the upper 50s. Northwest winds around 5 mph. Chance of rain near 100 percent. ... Sunny. Patchy fog ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The current weather in Pomfret is mostly cloudy with a high near 67 and a chance of showers. Winds are from the north at 18 to 22 mph with gusts up to 34 mph.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current weather in Pomfret is mostly cloudy with a high near 67 and a chance of showers. Winds are from the north at 18 to 22 mph with gusts up to 34 mph.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the weather in Pomfret?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0e39fc46",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SerpAPI\n",
|
||||
"\n",
|
||||
"Now, let's use the SerpAPI tool."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e1c39a0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"serpapi\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "900dd6cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "342ee8ec",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out what the current weather is in Pomfret.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"weather in Pomfret\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m{'type': 'weather_result', 'temperature': '69', 'unit': 'Fahrenheit', 'precipitation': '2%', 'humidity': '90%', 'wind': '1 mph', 'location': 'Pomfret, CT', 'date': 'Sunday 9:00 PM', 'weather': 'Clear'}\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the current weather in Pomfret.\n",
|
||||
"Final Answer: The current weather in Pomfret is 69 degrees Fahrenheit, 2% precipitation, 90% humidity, and 1 mph wind. It is currently clear.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current weather in Pomfret is 69 degrees Fahrenheit, 2% precipitation, 90% humidity, and 1 mph wind. It is currently clear.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the weather in Pomfret?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "adc8bb68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## GoogleSearchAPIWrapper\n",
|
||||
"\n",
|
||||
"Now, let's use the official Google Search API Wrapper."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ef24f92d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"google-search\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "909cd28b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "46515d2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I should look up the current weather conditions.\n",
|
||||
"Action: Google Search\n",
|
||||
"Action Input: \"weather in Pomfret\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mShowers early becoming a steady light rain later in the day. Near record high temperatures. High around 60F. Winds SW at 10 to 15 mph. Chance of rain 60%. Pomfret, CT Weather Forecast, with current conditions, wind, air quality, and what to expect for the next 3 days. Hourly Weather-Pomfret, CT. As of 12:52 am EST. Special Weather Statement +2 ... Hazardous Weather Conditions. Special Weather Statement ... Pomfret CT. Tonight ... National Digital Forecast Database Maximum Temperature Forecast. Pomfret Center Weather Forecasts. Weather Underground provides local & long-range weather forecasts, weatherreports, maps & tropical weather conditions for ... Pomfret, CT 12 hour by hour weather forecast includes precipitation, temperatures, sky conditions, rain chance, dew-point, relative humidity, wind direction ... North Pomfret Weather Forecasts. Weather Underground provides local & long-range weather forecasts, weatherreports, maps & tropical weather conditions for ... Today's Weather - Pomfret, CT. Dec 31, 2022 4:00 PM. Putnam MS. --. Weather forecast icon. Feels like --. Hi --. Lo --. Pomfret, CT temperature trend for the next 14 Days. Find daytime highs and nighttime lows from TheWeatherNetwork.com. Pomfret, MD Weather Forecast Date: 332 PM EST Wed Dec 28 2022. The area/counties/county of: Charles, including the cites of: St. Charles and Waldorf.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the current weather conditions in Pomfret.\n",
|
||||
"Final Answer: Showers early becoming a steady light rain later in the day. Near record high temperatures. High around 60F. Winds SW at 10 to 15 mph. Chance of rain 60%.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Showers early becoming a steady light rain later in the day. Near record high temperatures. High around 60F. Winds SW at 10 to 15 mph. Chance of rain 60%.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the weather in Pomfret?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eabad3af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SearxNG Meta Search Engine\n",
|
||||
"\n",
|
||||
"Here we will be using a self hosted SearxNG meta search engine."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b196c704",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"searx-search\"], searx_host=\"http://localhost:8888\", llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "9023eeaa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3aad92c1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I should look up the current weather\n",
|
||||
"Action: SearX Search\n",
|
||||
"Action Input: \"weather in Pomfret\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mMainly cloudy with snow showers around in the morning. High around 40F. Winds NNW at 5 to 10 mph. Chance of snow 40%. Snow accumulations less than one inch.\n",
|
||||
"\n",
|
||||
"10 Day Weather - Pomfret, MD As of 1:37 pm EST Today 49°/ 41° 52% Mon 27 | Day 49° 52% SE 14 mph Cloudy with occasional rain showers. High 49F. Winds SE at 10 to 20 mph. Chance of rain 50%....\n",
|
||||
"\n",
|
||||
"10 Day Weather - Pomfret, VT As of 3:51 am EST Special Weather Statement Today 39°/ 32° 37% Wed 01 | Day 39° 37% NE 4 mph Cloudy with snow showers developing for the afternoon. High 39F....\n",
|
||||
"\n",
|
||||
"Pomfret, CT ; Current Weather. 1:06 AM. 35°F · RealFeel® 32° ; TODAY'S WEATHER FORECAST. 3/3. 44°Hi. RealFeel® 50° ; TONIGHT'S WEATHER FORECAST. 3/3. 32°Lo.\n",
|
||||
"\n",
|
||||
"Pomfret, MD Forecast Today Hourly Daily Morning 41° 1% Afternoon 43° 0% Evening 35° 3% Overnight 34° 2% Don't Miss Finally, Here’s Why We Get More Colds and Flu When It’s Cold Coast-To-Coast...\n",
|
||||
"\n",
|
||||
"Pomfret, MD Weather Forecast | AccuWeather Current Weather 5:35 PM 35° F RealFeel® 36° RealFeel Shade™ 36° Air Quality Excellent Wind E 3 mph Wind Gusts 5 mph Cloudy More Details WinterCast...\n",
|
||||
"\n",
|
||||
"Pomfret, VT Weather Forecast | AccuWeather Current Weather 11:21 AM 23° F RealFeel® 27° RealFeel Shade™ 25° Air Quality Fair Wind ESE 3 mph Wind Gusts 7 mph Cloudy More Details WinterCast...\n",
|
||||
"\n",
|
||||
"Pomfret Center, CT Weather Forecast | AccuWeather Daily Current Weather 6:50 PM 39° F RealFeel® 36° Air Quality Fair Wind NW 6 mph Wind Gusts 16 mph Mostly clear More Details WinterCast...\n",
|
||||
"\n",
|
||||
"12:00 pm · Feels Like36° · WindN 5 mph · Humidity43% · UV Index3 of 10 · Cloud Cover65% · Rain Amount0 in ...\n",
|
||||
"\n",
|
||||
"Pomfret Center, CT Weather Conditions | Weather Underground star Popular Cities San Francisco, CA 49 °F Clear Manhattan, NY 37 °F Fair Schiller Park, IL (60176) warning39 °F Mostly Cloudy...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The current weather in Pomfret is mainly cloudy with snow showers around in the morning. The temperature is around 40F with winds NNW at 5 to 10 mph. Chance of snow is 40%.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current weather in Pomfret is mainly cloudy with snow showers around in the morning. The temperature is around 40F with winds NNW at 5 to 10 mph. Chance of snow is 40%.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the weather in Pomfret\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "b1677b440931f40d89ef8be7bf03acb108ce003de0ac9b18e8d43753ea2e7103"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -4,16 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Slack\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SlackToolkit\n",
|
||||
"# Slack Toolkit\n",
|
||||
"\n",
|
||||
"This will help you getting started with the Slack [toolkit](/docs/concepts/#toolkits). For detailed documentation of all SlackToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html).\n",
|
||||
"\n",
|
||||
@@ -123,10 +114,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[SlackGetChannel(client=<slack_sdk.web.client.WebClient object at 0x10ce3a4d0>),\n",
|
||||
" SlackGetMessage(client=<slack_sdk.web.client.WebClient object at 0x10ce3a0e0>),\n",
|
||||
" SlackScheduleMessage(client=<slack_sdk.web.client.WebClient object at 0x10ce3a050>),\n",
|
||||
" SlackSendMessage(client=<slack_sdk.web.client.WebClient object at 0x10ce3a020>)]"
|
||||
"[SlackGetChannel(client=<slack_sdk.web.client.WebClient object at 0x113caa8c0>),\n",
|
||||
" SlackGetMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa4d0>),\n",
|
||||
" SlackScheduleMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa440>),\n",
|
||||
" SlackSendMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa410>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@@ -163,7 +154,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -177,7 +168,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -189,12 +180,12 @@
|
||||
"When was the #general channel created?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" get_channelid_name_dict (call_mINmB55OWDIkXykGXZXaL5Ar)\n",
|
||||
" Call ID: call_mINmB55OWDIkXykGXZXaL5Ar\n",
|
||||
" get_channelid_name_dict (call_NXDkALjoOx97uF1v0CoZTqtJ)\n",
|
||||
" Call ID: call_NXDkALjoOx97uF1v0CoZTqtJ\n",
|
||||
" Args:\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The #general channel was created on Unix timestamp 1671043305, which corresponds to \"Mon, 12 Dec 2022 18:41:45 GMT\" in human-readable format.\n"
|
||||
"The #general channel was created on timestamp 1671043305.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -211,53 +202,6 @@
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example with AgentExecutor:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_tools_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
|
||||
"prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
|
||||
"agent = create_openai_tools_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" prompt=prompt,\n",
|
||||
")\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Send a greeting to my coworkers in the #general channel. Note use `channel` as key of channel id, and `message` as key of content to sent in the channel.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
@@ -267,73 +211,33 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Send a friendly greeting to channel C072Q1LP4QM.\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" send_message (call_xQxpv4wFeAZNZgSBJRIuaizi)\n",
|
||||
" Call ID: call_xQxpv4wFeAZNZgSBJRIuaizi\n",
|
||||
" Args:\n",
|
||||
" message: Hello! Have a great day!\n",
|
||||
" channel: C072Q1LP4QM\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI need to get the list of channels in the workspace.\n",
|
||||
"Action: get_channelid_name_dict\n",
|
||||
"Action Input: {}\u001b[0m\u001b[36;1m\u001b[1;3m[{\"id\": \"C052SCUP4UD\", \"name\": \"general\", \"created\": 1681297313, \"num_members\": 1}, {\"id\": \"C052VBBU4M8\", \"name\": \"test-bots\", \"created\": 1681297343, \"num_members\": 2}, {\"id\": \"C053805TNUR\", \"name\": \"random\", \"created\": 1681297313, \"num_members\": 2}]\u001b[0m\u001b[32;1m\u001b[1;3mI now have the list of channels and their names.\n",
|
||||
"Final Answer: There are 3 channels in the workspace. Their names are \"general\", \"test-bots\", and \"random\".\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"I have sent a friendly greeting to the channel C072Q1LP4QM.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'How many channels are in the workspace? Please list out their names.',\n",
|
||||
" 'output': 'There are 3 channels in the workspace. Their names are \"general\", \"test-bots\", and \"random\".'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\"input\": \"How many channels are in the workspace? Please list out their names.\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFirst, I need to identify the channel ID for the #introductions channel.\n",
|
||||
"Action: get_channelid_name_dict\n",
|
||||
"Action Input: None\u001b[0m\u001b[36;1m\u001b[1;3m[{\"id\": \"C052SCUP4UD\", \"name\": \"general\", \"created\": 1681297313, \"num_members\": 1}, {\"id\": \"C052VBBU4M8\", \"name\": \"test-bots\", \"created\": 1681297343, \"num_members\": 2}, {\"id\": \"C053805TNUR\", \"name\": \"random\", \"created\": 1681297313, \"num_members\": 2}]\u001b[0m\u001b[32;1m\u001b[1;3mThe #introductions channel is not listed in the observed channels. I need to inform the user that the #introductions channel does not exist or is not accessible.\n",
|
||||
"Final Answer: The #introductions channel does not exist or is not accessible.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Tell me the number of messages sent in the #introductions channel from the past month.',\n",
|
||||
" 'output': 'The #introductions channel does not exist or is not accessible.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Tell me the number of messages sent in the #introductions channel from the past month.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
"example_query = \"Send a friendly greeting to channel C072Q1LP4QM.\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" message = event[\"messages\"][-1]\n",
|
||||
" if message.type != \"tool\": # mask sensitive information\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -342,7 +246,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__Toolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html)."
|
||||
"For detailed documentation of all `SlackToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark SQL\n",
|
||||
"# Spark SQL Toolkit\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with `Spark SQL`. Similar to [SQL Database Agent](/docs/integrations/toolkits/sql_database), it is designed to address general inquiries about `Spark SQL` and facilitate error recovery.\n",
|
||||
"This notebook shows how to use agents to interact with `Spark SQL`. Similar to [SQL Database Agent](/docs/integrations/tools/sql_database), it is designed to address general inquiries about `Spark SQL` and facilitate error recovery.\n",
|
||||
"\n",
|
||||
"**NOTE: Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your Spark cluster given certain questions. Be careful running it on sensitive data!**"
|
||||
]
|
||||
@@ -2,390 +2,591 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SQL Database\n",
|
||||
"# SQLDatabase Toolkit\n",
|
||||
"\n",
|
||||
":::note\n",
|
||||
"The `SQLDatabase` adapter utility is a wrapper around a database connection.\n",
|
||||
"This will help you getting started with the SQL Database [toolkit](/docs/concepts/#toolkits). For detailed documentation of all `SQLDatabaseToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html).\n",
|
||||
"\n",
|
||||
"For talking to SQL databases, it uses the [SQLAlchemy] Core API .\n",
|
||||
":::\n",
|
||||
"Tools within the `SQLDatabaseToolkit` are designed to interact with a `SQL` database. \n",
|
||||
"\n",
|
||||
"A common application is to enable agents to answer questions using data in a relational database, potentially in an iterative fashion (e.g., recovering from errors).\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the utility to access an SQLite database.\n",
|
||||
"It uses the example [Chinook Database], and demonstrates those features:\n",
|
||||
"**⚠️ Security note ⚠️**\n",
|
||||
"\n",
|
||||
"- Query using SQL\n",
|
||||
"- Query using SQLAlchemy selectable\n",
|
||||
"- Fetch modes `cursor`, `all`, and `one`\n",
|
||||
"- Bind query parameters\n",
|
||||
"Building Q&A systems of SQL databases requires executing model-generated SQL queries. There are inherent risks in doing this. Make sure that your database connection permissions are always scoped as narrowly as possible for your chain/agent's needs. This will mitigate though not eliminate the risks of building a model-driven system. For more on general security best practices, [see here](/docs/security).\n",
|
||||
"\n",
|
||||
"[Chinook Database]: https://github.com/lerocha/chinook-database\n",
|
||||
"[SQLAlchemy]: https://www.sqlalchemy.org/\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"You can use the `Tool` or `@tool` decorator to create a tool from this utility.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"::: {.callout-caution}\n",
|
||||
"If creating a tool from the SQLDatbase utility and combining it with an LLM or exposing it to an end user\n",
|
||||
"remember to follow good security practices.\n",
|
||||
"\n",
|
||||
"See security information: https://python.langchain.com/docs/security\n",
|
||||
":::"
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
}
|
||||
},
|
||||
"id": "3de6e3be-1fd9-42a3-8564-8ca7dca11e1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget 'https://github.com/lerocha/chinook-database/releases/download/v1.4.2/Chinook_Sqlite.sql'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1|AC/DC\r\n",
|
||||
"2|Accept\r\n",
|
||||
"3|Aerosmith\r\n",
|
||||
"4|Alanis Morissette\r\n",
|
||||
"5|Alice In Chains\r\n",
|
||||
"6|Antônio Carlos Jobim\r\n",
|
||||
"7|Apocalyptica\r\n",
|
||||
"8|Audioslave\r\n",
|
||||
"9|BackBeat\r\n",
|
||||
"10|Billy Cobham\r\n",
|
||||
"11|Black Label Society\r\n",
|
||||
"12|Black Sabbath\r\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!sqlite3 -bail -cmd '.read Chinook_Sqlite.sql' -cmd 'SELECT * FROM Artist LIMIT 12;' -cmd '.quit'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!sqlite3 -bail -cmd '.read Chinook_Sqlite.sql' -cmd '.save Chinook.db' -cmd '.quit'"
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "31896b61-68d2-4b4d-be9d-b829eda327d1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Database"
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-community` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4933e04-9120-4ccc-9ef7-369987823b0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ad08dbe-1642-448c-b58d-153810024375",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For demonstration purposes, we will access a prompt in the LangChain [Hub](https://smith.langchain.com/hub). We will also require `langgraph` to demonstrate the use of the toolkit with an agent. This is not required to use the toolkit."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3dead45-9908-497d-a5a3-bce30642e88f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchainhub langgraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "804533b1-2f16-497b-821b-c82d67fcf7b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"The `SQLDatabaseToolkit` toolkit requires:\n",
|
||||
"\n",
|
||||
"- a [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) object;\n",
|
||||
"- a LLM or chat model (for instantiating the [QuerySQLCheckerTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLCheckerTool.html) tool).\n",
|
||||
"\n",
|
||||
"Below, we instantiate the toolkit with these objects. Let's first create a database object.\n",
|
||||
"\n",
|
||||
"This guide uses the example `Chinook` database based on [these instructions](https://database.guide/2-sample-databases-sqlite/).\n",
|
||||
"\n",
|
||||
"Below we will use the `requests` library to pull the `.sql` file and create an in-memory SQLite database. Note that this approach is lightweight, but ephemeral and not thread-safe. If you'd prefer, you can follow the instructions to save the file locally as `Chinook.db` and instantiate the database via `db = SQLDatabase.from_uri(\"sqlite:///Chinook.db\")`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "40d05f9b-5a8f-4307-8f8b-4153db0fdfa9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlite3\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"from langchain_community.utilities.sql_database import SQLDatabase\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"from sqlalchemy.pool import StaticPool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_engine_for_chinook_db():\n",
|
||||
" \"\"\"Pull sql file, populate in-memory database, and create engine.\"\"\"\n",
|
||||
" url = \"https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql\"\n",
|
||||
" response = requests.get(url)\n",
|
||||
" sql_script = response.text\n",
|
||||
"\n",
|
||||
" connection = sqlite3.connect(\":memory:\", check_same_thread=False)\n",
|
||||
" connection.executescript(sql_script)\n",
|
||||
" return create_engine(\n",
|
||||
" \"sqlite://\",\n",
|
||||
" creator=lambda: connection,\n",
|
||||
" poolclass=StaticPool,\n",
|
||||
" connect_args={\"check_same_thread\": False},\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"engine = get_engine_for_chinook_db()\n",
|
||||
"\n",
|
||||
"db = SQLDatabase(engine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b9a6326-78fd-4c42-a1cb-4316619ac449",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will also need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "cc6e6108-83d9-404f-8f31-474c2fbf5f6c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pprint import pprint\n",
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"import sqlalchemy as sa\n",
|
||||
"from langchain_community.utilities import SQLDatabase\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///Chinook.db\")"
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "77925e72-4730-43c3-8726-d68cedf635f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query as cursor\n",
|
||||
"\n",
|
||||
"The fetch mode `cursor` returns results as SQLAlchemy's\n",
|
||||
"`CursorResult` instance."
|
||||
"We can now instantiate the toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'sqlalchemy.engine.cursor.CursorResult'>\n",
|
||||
"[{'ArtistId': 1, 'Name': 'AC/DC'},\n",
|
||||
" {'ArtistId': 2, 'Name': 'Accept'},\n",
|
||||
" {'ArtistId': 3, 'Name': 'Aerosmith'},\n",
|
||||
" {'ArtistId': 4, 'Name': 'Alanis Morissette'},\n",
|
||||
" {'ArtistId': 5, 'Name': 'Alice In Chains'},\n",
|
||||
" {'ArtistId': 6, 'Name': 'Antônio Carlos Jobim'},\n",
|
||||
" {'ArtistId': 7, 'Name': 'Apocalyptica'},\n",
|
||||
" {'ArtistId': 8, 'Name': 'Audioslave'},\n",
|
||||
" {'ArtistId': 9, 'Name': 'BackBeat'},\n",
|
||||
" {'ArtistId': 10, 'Name': 'Billy Cobham'},\n",
|
||||
" {'ArtistId': 11, 'Name': 'Black Label Society'},\n",
|
||||
" {'ArtistId': 12, 'Name': 'Black Sabbath'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3,
|
||||
"id": "42bd5a41-672a-4a53-b70a-2f0c0555758c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = db.run(\"SELECT * FROM Artist LIMIT 12;\", fetch=\"cursor\")\n",
|
||||
"print(type(result))\n",
|
||||
"pprint(list(result.mappings()))"
|
||||
"from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit\n",
|
||||
"\n",
|
||||
"toolkit = SQLDatabaseToolkit(db=db, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "b2f882cf-4156-4a9f-a714-db97ec8ccc37",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query as string payload\n",
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"The fetch modes `all` and `one` return results in string format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'str'>\n",
|
||||
"[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham'), (11, 'Black Label Society'), (12, 'Black Sabbath')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = db.run(\"SELECT * FROM Artist LIMIT 12;\", fetch=\"all\")\n",
|
||||
"print(type(result))\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'str'>\n",
|
||||
"[(1, 'AC/DC')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = db.run(\"SELECT * FROM Artist LIMIT 12;\", fetch=\"one\")\n",
|
||||
"print(type(result))\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Query with parameters\n",
|
||||
"\n",
|
||||
"In order to bind query parameters, use the optional `parameters` argument."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'ArtistId': 35, 'Name': 'Pedro Luís & A Parede'},\n",
|
||||
" {'ArtistId': 115, 'Name': 'Page & Plant'},\n",
|
||||
" {'ArtistId': 116, 'Name': 'Passengers'},\n",
|
||||
" {'ArtistId': 117, 'Name': \"Paul D'Ianno\"},\n",
|
||||
" {'ArtistId': 118, 'Name': 'Pearl Jam'},\n",
|
||||
" {'ArtistId': 119, 'Name': 'Peter Tosh'},\n",
|
||||
" {'ArtistId': 120, 'Name': 'Pink Floyd'},\n",
|
||||
" {'ArtistId': 121, 'Name': 'Planet Hemp'},\n",
|
||||
" {'ArtistId': 186, 'Name': 'Pedro Luís E A Parede'},\n",
|
||||
" {'ArtistId': 256, 'Name': 'Philharmonia Orchestra & Sir Neville Marriner'},\n",
|
||||
" {'ArtistId': 275, 'Name': 'Philip Glass Ensemble'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = db.run(\n",
|
||||
" \"SELECT * FROM Artist WHERE Name LIKE :search;\",\n",
|
||||
" parameters={\"search\": \"p%\"},\n",
|
||||
" fetch=\"cursor\",\n",
|
||||
")\n",
|
||||
"pprint(list(result.mappings()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Query with SQLAlchemy selectable\n",
|
||||
"\n",
|
||||
"Other than plain-text SQL statements, the adapter also accepts SQLAlchemy selectables."
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
"id": "a18c3e69-bee0-4f5d-813e-eeb540f41b98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[QuerySQLDataBaseTool(description=\"Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.\", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>),\n",
|
||||
" InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>),\n",
|
||||
" ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>),\n",
|
||||
" QuerySQLCheckerTool(description='Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x105e02860>, llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1148a97b0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1148aaec0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['dialect', 'query'], template='\\n{query}\\nDouble check the {dialect} query above for common mistakes, including:\\n- Using NOT IN with NULL values\\n- Using UNION when UNION ALL should have been used\\n- Using BETWEEN for exclusive ranges\\n- Data type mismatch in predicates\\n- Properly quoting identifiers\\n- Using the correct number of arguments for functions\\n- Casting to the correct data type\\n- Using the proper columns for joins\\n\\nIf there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.\\n\\nOutput the final SQL query only.\\n\\nSQL Query: '), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1148a97b0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1148aaec0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
},
|
||||
],
|
||||
"source": [
|
||||
"toolkit.get_tools()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f5751e3-2e98-485f-8164-db8094039c25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"API references:\n",
|
||||
"\n",
|
||||
"- [QuerySQLDataBaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLDataBaseTool.html)\n",
|
||||
"- [InfoSQLDatabaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.InfoSQLDatabaseTool.html)\n",
|
||||
"- [ListSQLDatabaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.ListSQLDatabaseTool.html)\n",
|
||||
"- [QuerySQLCheckerTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLCheckerTool.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c067e0ed-dcca-4dcc-81b2-a0eeb4fc2a9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Following the [SQL Q&A Tutorial](/docs/tutorials/sql_qa/#agents), below we equip a simple question-answering agent with the tools in our toolkit. First we pull a relevant prompt and populate it with its required parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "eda12f8b-be90-4697-ac84-2ece9e2d1708",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'ArtistId': 35, 'Name': 'Pedro Luís & A Parede'},\n",
|
||||
" {'ArtistId': 115, 'Name': 'Page & Plant'},\n",
|
||||
" {'ArtistId': 116, 'Name': 'Passengers'},\n",
|
||||
" {'ArtistId': 117, 'Name': \"Paul D'Ianno\"},\n",
|
||||
" {'ArtistId': 118, 'Name': 'Pearl Jam'},\n",
|
||||
" {'ArtistId': 119, 'Name': 'Peter Tosh'},\n",
|
||||
" {'ArtistId': 120, 'Name': 'Pink Floyd'},\n",
|
||||
" {'ArtistId': 121, 'Name': 'Planet Hemp'},\n",
|
||||
" {'ArtistId': 186, 'Name': 'Pedro Luís E A Parede'},\n",
|
||||
" {'ArtistId': 256, 'Name': 'Philharmonia Orchestra & Sir Neville Marriner'},\n",
|
||||
" {'ArtistId': 275, 'Name': 'Philip Glass Ensemble'}]\n"
|
||||
"['dialect', 'top_k']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# In order to build a selectable on SA's Core API, you need a table definition.\n",
|
||||
"metadata = sa.MetaData()\n",
|
||||
"artist = sa.Table(\n",
|
||||
" \"Artist\",\n",
|
||||
" metadata,\n",
|
||||
" sa.Column(\"ArtistId\", sa.INTEGER, primary_key=True),\n",
|
||||
" sa.Column(\"Name\", sa.TEXT),\n",
|
||||
")\n",
|
||||
"from langchain import hub\n",
|
||||
"\n",
|
||||
"# Build a selectable with the same semantics of the recent query.\n",
|
||||
"query = sa.select(artist).where(artist.c.Name.like(\"p%\"))\n",
|
||||
"result = db.run(query, fetch=\"cursor\")\n",
|
||||
"pprint(list(result.mappings()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Query with execution options\n",
|
||||
"prompt_template = hub.pull(\"langchain-ai/sql-agent-system-prompt\")\n",
|
||||
"\n",
|
||||
"It is possible to augment the statement invocation with custom execution options.\n",
|
||||
"For example, when applying a schema name translation, subsequent statements will\n",
|
||||
"fail, because they try to hit a non-existing table."
|
||||
"assert len(prompt_template.messages) == 1\n",
|
||||
"print(prompt_template.input_variables)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"execution_count": 6,
|
||||
"id": "3470ae96-e5e5-4717-a6d6-d7d28c7b7347",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = sa.select(artist).where(artist.c.Name.like(\"p%\"))\n",
|
||||
"db.run(query, fetch=\"cursor\", execution_options={\"schema_translate_map\": {None: \"bar\"}})"
|
||||
"system_message = prompt_template.format(dialect=\"SQLite\", top_k=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97930c07-36d1-4137-94ae-fe5ac83ecc44",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We then instantiate the agent:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "48bca92c-9b4b-4d5c-bcce-1b239c9e901c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(\n",
|
||||
" llm, toolkit.get_tools(), state_modifier=system_message\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09fb1845-1105-4f41-98b4-24756452a3e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And issue it a query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "39e6d2bf-3194-4aba-854b-63faf919157b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Which country's customers spent the most?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_list_tables (call_eiheSxiL0s90KE50XyBnBtJY)\n",
|
||||
" Call ID: call_eiheSxiL0s90KE50XyBnBtJY\n",
|
||||
" Args:\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_list_tables\n",
|
||||
"\n",
|
||||
"Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_schema (call_YKwGWt4UUVmxxY7vjjBDzFLJ)\n",
|
||||
" Call ID: call_YKwGWt4UUVmxxY7vjjBDzFLJ\n",
|
||||
" Args:\n",
|
||||
" table_names: Customer, Invoice, InvoiceLine\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_schema\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Customer\" (\n",
|
||||
"\t\"CustomerId\" INTEGER NOT NULL, \n",
|
||||
"\t\"FirstName\" NVARCHAR(40) NOT NULL, \n",
|
||||
"\t\"LastName\" NVARCHAR(20) NOT NULL, \n",
|
||||
"\t\"Company\" NVARCHAR(80), \n",
|
||||
"\t\"Address\" NVARCHAR(70), \n",
|
||||
"\t\"City\" NVARCHAR(40), \n",
|
||||
"\t\"State\" NVARCHAR(40), \n",
|
||||
"\t\"Country\" NVARCHAR(40), \n",
|
||||
"\t\"PostalCode\" NVARCHAR(10), \n",
|
||||
"\t\"Phone\" NVARCHAR(24), \n",
|
||||
"\t\"Fax\" NVARCHAR(24), \n",
|
||||
"\t\"Email\" NVARCHAR(60) NOT NULL, \n",
|
||||
"\t\"SupportRepId\" INTEGER, \n",
|
||||
"\tPRIMARY KEY (\"CustomerId\"), \n",
|
||||
"\tFOREIGN KEY(\"SupportRepId\") REFERENCES \"Employee\" (\"EmployeeId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Customer table:\n",
|
||||
"CustomerId\tFirstName\tLastName\tCompany\tAddress\tCity\tState\tCountry\tPostalCode\tPhone\tFax\tEmail\tSupportRepId\n",
|
||||
"1\tLuís\tGonçalves\tEmbraer - Empresa Brasileira de Aeronáutica S.A.\tAv. Brigadeiro Faria Lima, 2170\tSão José dos Campos\tSP\tBrazil\t12227-000\t+55 (12) 3923-5555\t+55 (12) 3923-5566\tluisg@embraer.com.br\t3\n",
|
||||
"2\tLeonie\tKöhler\tNone\tTheodor-Heuss-Straße 34\tStuttgart\tNone\tGermany\t70174\t+49 0711 2842222\tNone\tleonekohler@surfeu.de\t5\n",
|
||||
"3\tFrançois\tTremblay\tNone\t1498 rue Bélanger\tMontréal\tQC\tCanada\tH2G 1A7\t+1 (514) 721-4711\tNone\tftremblay@gmail.com\t3\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Invoice\" (\n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"CustomerId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceDate\" DATETIME NOT NULL, \n",
|
||||
"\t\"BillingAddress\" NVARCHAR(70), \n",
|
||||
"\t\"BillingCity\" NVARCHAR(40), \n",
|
||||
"\t\"BillingState\" NVARCHAR(40), \n",
|
||||
"\t\"BillingCountry\" NVARCHAR(40), \n",
|
||||
"\t\"BillingPostalCode\" NVARCHAR(10), \n",
|
||||
"\t\"Total\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceId\"), \n",
|
||||
"\tFOREIGN KEY(\"CustomerId\") REFERENCES \"Customer\" (\"CustomerId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Invoice table:\n",
|
||||
"InvoiceId\tCustomerId\tInvoiceDate\tBillingAddress\tBillingCity\tBillingState\tBillingCountry\tBillingPostalCode\tTotal\n",
|
||||
"1\t2\t2021-01-01 00:00:00\tTheodor-Heuss-Straße 34\tStuttgart\tNone\tGermany\t70174\t1.98\n",
|
||||
"2\t4\t2021-01-02 00:00:00\tUllevålsveien 14\tOslo\tNone\tNorway\t0171\t3.96\n",
|
||||
"3\t8\t2021-01-03 00:00:00\tGrétrystraat 63\tBrussels\tNone\tBelgium\t1000\t5.94\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"InvoiceLine\" (\n",
|
||||
"\t\"InvoiceLineId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\t\"Quantity\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceLineId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from InvoiceLine table:\n",
|
||||
"InvoiceLineId\tInvoiceId\tTrackId\tUnitPrice\tQuantity\n",
|
||||
"1\t1\t2\t0.99\t1\n",
|
||||
"2\t1\t4\t0.99\t1\n",
|
||||
"3\t2\t6\t0.99\t1\n",
|
||||
"*/\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_7WBDcMxl1h7MnI05njx1q8V9)\n",
|
||||
" Call ID: call_7WBDcMxl1h7MnI05njx1q8V9\n",
|
||||
" Args:\n",
|
||||
" query: SELECT c.Country, SUM(i.Total) AS TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.Country ORDER BY TotalSpent DESC LIMIT 1\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"[('USA', 523.0600000000003)]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Customers from the USA spent the most, with a total amount spent of $523.06.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Which country's customers spent the most?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "adbf3d8d-7570-45a5-950f-ce84db5145ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also observe the agent recover from an error:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "23c1235c-6d18-43e4-98ab-85b426b53d94",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Who are the top 3 best selling artists?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_9F6Bp2vwsDkeLW6FsJFqLiet)\n",
|
||||
" Call ID: call_9F6Bp2vwsDkeLW6FsJFqLiet\n",
|
||||
" Args:\n",
|
||||
" query: SELECT artist_name, SUM(quantity) AS total_sold FROM sales GROUP BY artist_name ORDER BY total_sold DESC LIMIT 3\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"Error: (sqlite3.OperationalError) no such table: sales\n",
|
||||
"[SQL: SELECT artist_name, SUM(quantity) AS total_sold FROM sales GROUP BY artist_name ORDER BY total_sold DESC LIMIT 3]\n",
|
||||
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_list_tables (call_Gx5adzWnrBDIIxzUDzsn83zO)\n",
|
||||
" Call ID: call_Gx5adzWnrBDIIxzUDzsn83zO\n",
|
||||
" Args:\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_list_tables\n",
|
||||
"\n",
|
||||
"Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_schema (call_ftywrZgEgGWLrnk9dYC0xtZv)\n",
|
||||
" Call ID: call_ftywrZgEgGWLrnk9dYC0xtZv\n",
|
||||
" Args:\n",
|
||||
" table_names: Artist, Album, InvoiceLine\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_schema\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Album\" (\n",
|
||||
"\t\"AlbumId\" INTEGER NOT NULL, \n",
|
||||
"\t\"Title\" NVARCHAR(160) NOT NULL, \n",
|
||||
"\t\"ArtistId\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"AlbumId\"), \n",
|
||||
"\tFOREIGN KEY(\"ArtistId\") REFERENCES \"Artist\" (\"ArtistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Album table:\n",
|
||||
"AlbumId\tTitle\tArtistId\n",
|
||||
"1\tFor Those About To Rock We Salute You\t1\n",
|
||||
"2\tBalls to the Wall\t2\n",
|
||||
"3\tRestless and Wild\t2\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Artist\" (\n",
|
||||
"\t\"ArtistId\" INTEGER NOT NULL, \n",
|
||||
"\t\"Name\" NVARCHAR(120), \n",
|
||||
"\tPRIMARY KEY (\"ArtistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Artist table:\n",
|
||||
"ArtistId\tName\n",
|
||||
"1\tAC/DC\n",
|
||||
"2\tAccept\n",
|
||||
"3\tAerosmith\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"InvoiceLine\" (\n",
|
||||
"\t\"InvoiceLineId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\t\"Quantity\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceLineId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from InvoiceLine table:\n",
|
||||
"InvoiceLineId\tInvoiceId\tTrackId\tUnitPrice\tQuantity\n",
|
||||
"1\t1\t2\t0.99\t1\n",
|
||||
"2\t1\t4\t0.99\t1\n",
|
||||
"3\t2\t6\t0.99\t1\n",
|
||||
"*/\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_i6n3lmS7E2ZivN758VOayTiy)\n",
|
||||
" Call ID: call_i6n3lmS7E2ZivN758VOayTiy\n",
|
||||
" Args:\n",
|
||||
" query: SELECT Artist.Name AS artist_name, SUM(InvoiceLine.Quantity) AS total_sold FROM Artist JOIN Album ON Artist.ArtistId = Album.ArtistId JOIN Track ON Album.AlbumId = Track.AlbumId JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY total_sold DESC LIMIT 3\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"[('Iron Maiden', 140), ('U2', 107), ('Metallica', 91)]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The top 3 best selling artists are:\n",
|
||||
"1. Iron Maiden - 140 units sold\n",
|
||||
"2. U2 - 107 units sold\n",
|
||||
"3. Metallica - 91 units sold\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Who are the top 3 best selling artists?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73521f1b-be03-44e6-8b27-a9a46ae8e962",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specific functionality\n",
|
||||
"\n",
|
||||
"`SQLDatabaseToolkit` implements a [.get_context](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html#langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.get_context) method as a convenience for use in prompts or other contexts.\n",
|
||||
"\n",
|
||||
"**⚠️ Disclaimer ⚠️** : The agent may generate insert/update/delete queries. When this is not expected, use a custom prompt or create a SQL users without write permissions.\n",
|
||||
"\n",
|
||||
"The final user might overload your SQL database by asking a simple question such as \"run the biggest query possible\". The generated query might look like:\n",
|
||||
"\n",
|
||||
"```sql\n",
|
||||
"SELECT * FROM \"public\".\"users\"\n",
|
||||
" JOIN \"public\".\"user_permissions\" ON \"public\".\"users\".id = \"public\".\"user_permissions\".user_id\n",
|
||||
" JOIN \"public\".\"projects\" ON \"public\".\"users\".id = \"public\".\"projects\".user_id\n",
|
||||
" JOIN \"public\".\"events\" ON \"public\".\"projects\".id = \"public\".\"events\".project_id;\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"For a transactional SQL database, if one of the table above contains millions of rows, the query might cause trouble to other applications using the same database.\n",
|
||||
"\n",
|
||||
"Most datawarehouse oriented databases support user-level quota, for limiting resource usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1aa8a7e3-87ca-4963-a224-0cbdc9d88714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SQLDatabaseToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -405,9 +606,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Steam Game Recommendation & Game Details\n",
|
||||
"# Steam Toolkit\n",
|
||||
"\n",
|
||||
">[Steam (Wikipedia)](https://en.wikipedia.org/wiki/Steam_(service)) is a video game digital distribution service and storefront developed by `Valve Corporation`. It provides game updates automatically for Valve's games, and expanded to distributing third-party titles. `Steam` offers various features, like game server matchmaking with Valve Anti-Cheat measures, social networking, and game streaming services.\n",
|
||||
"\n",
|
||||
@@ -15,20 +15,47 @@
|
||||
"source": [
|
||||
"[Tavily's Search API](https://tavily.com) is a search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"| Class | Package | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/tools/tavily_search) | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [TavilySearchResults](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.tavily_search.tool.TavilySearchResults.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | ✅ |  |\n",
|
||||
"\n",
|
||||
"### Tool features\n",
|
||||
"| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async | Return data | Pricing |\n",
|
||||
"| :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | Title, URL, content, answer | 1,000 free searches / month | \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `tavily-python` package itself.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-community tavily-python\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"We also need to set our Tavily API key."
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `tavily-python` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f85b4089",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU \"langchain-community>=0.2.11\" tavily-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b15e9266",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"We also need to set our Tavily API key. You can get an API key by visiting [this site](https://app.tavily.com/sign-in) and creating an account."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -36,7 +63,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"TAVILY_API_KEY\"] = getpass.getpass()"
|
||||
"if not os.environ.get(\"TAVILY_API_KEY\"):\n",
|
||||
" os.environ[\"TAVILY_API_KEY\"] = getpass.getpass(\"Tavily API key:\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,12 +72,12 @@
|
||||
"id": "bc5ab717-fd27-4c59-b912-bdd099541478",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "a6c2f136-6367-4f1f-825d-ae741e1bf281",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -63,166 +91,274 @@
|
||||
"id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here we show how to use the tool individually."
|
||||
"Here we show how to instantiate an instance of the Tavily search tools, with "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 1,
|
||||
"id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools.tavily_search import TavilySearchResults\n",
|
||||
"from langchain_community.tools import TavilySearchResults\n",
|
||||
"\n",
|
||||
"tool = TavilySearchResults()"
|
||||
"tool = TavilySearchResults(\n",
|
||||
" max_results=5,\n",
|
||||
" search_depth=\"advanced\",\n",
|
||||
" include_answer=True,\n",
|
||||
" include_raw_content=True,\n",
|
||||
" include_images=True,\n",
|
||||
" # include_domains=[...],\n",
|
||||
" # exclude_domains=[...],\n",
|
||||
" # name=\"...\", # overwrite default tool name\n",
|
||||
" # description=\"...\", # overwrite default tool description\n",
|
||||
" # args_schema=..., # overwrite default args_schema: BaseModel\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74147a1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation\n",
|
||||
"\n",
|
||||
"### [Invoke directly with args](/docs/concepts/#invoke-with-just-the-arguments)\n",
|
||||
"\n",
|
||||
"The `TavilySearchResults` tool takes a single \"query\" argument, which should be a natural language query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 2,
|
||||
"id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'url': 'https://apnews.com/article/burning-man-flooding-nevada-stranded-3971a523f4b993f8f35e158fd1a17a1e',\n",
|
||||
" 'content': 'festival goers are helped off a truck from the Burning Man festival site in Black Rock, Nev., on Monday, Sept. 4, 2023. festival goers are helped off a truck from the Burning Man festival site in Black Rock, Nev., on Monday, Sept. 4, 2023. at the site of the Burning Man festival where thousands of attendees remained stranded as flooding from storms swept at the site of the Burning Man festival where thousands of attendees remained stranded as flooding from storms sweptRENO, Nev. (AP) — The traffic jam leaving the Burning Man festival eased up considerably Tuesday as the exodus from the mud-caked Nevada desert entered another day following massive rain that left tens of thousands of partygoers stranded for days.'},\n",
|
||||
" {'url': 'https://www.theguardian.com/culture/2023/sep/03/burning-man-nevada-festival-floods',\n",
|
||||
" 'content': 'Officials investigate death at Burning Man as thousands stranded by floods Burning Man festival-goers trapped in desert as rain turns site to mud the burning of a giant sculpture to cap off the event, if weather permits. The festival said the roads remain too wet Burning Man festivalgoers surrounded by mud in Nevada desert – videoMichael Sainato @msainat1 Sun 3 Sep 2023 14.31 EDT Over 70,000 attendees of the annual Burning Man festival in the Black Rock desert of Nevada are stranded as the festival comes to a close on...'},\n",
|
||||
" {'url': 'https://abcnews.go.com/US/burning-man-flooding-happened-stranded-festivalgoers/story?id=102908331',\n",
|
||||
" 'content': 'ABC News Video Live Shows Election 2024 538 Stream on Burning Man flooding: What happened to stranded festivalgoers? Tens of thousands of Burning Man attendees are now able to leave the festival after a downpour and massive flooding Burning Man has been hosted for over 30 years, according to a statement from the organizers. people last year, and just as many were expected this year. Burning Man began on Aug. 28 and was scheduled to runJulie Jammot/AFP via Getty Images Tens of thousands of Burning Man attendees are now able to leave the festival after a downpour and massive flooding left them stranded over the weekend. The festival, held in the Black Rock Desert in northwestern Nevada, was attended by more than 70,000 people last year, and just as many were expected this year.'},\n",
|
||||
" {'url': 'https://www.vox.com/culture/2023/9/6/23861675/burning-man-2023-mud-stranded-climate-change-playa-foot',\n",
|
||||
" 'content': 'This year’s rains opened the floodgates for Burning Man criticism Pray for him people #burningman #burningman2023 #titanicsound #mud #festival who went to Burning Man that large wooden Man won’t be the only one burning.Celebrity Culture The Burning Man flameout, explained Climate change — and schadenfreude — finally caught up to the survivalist cosplayers. By Aja Romano @ajaromano Sep 6, 2023, 3:00pm EDT Share'},\n",
|
||||
" {'url': 'https://www.cnn.com/2023/09/03/us/burning-man-storms-shelter-sunday/index.html',\n",
|
||||
" 'content': 'Editor’s Note: Find the latest Burning Man festival coverage here. CNN values your feedback More than 70,000 Burning Man festival attendees remain stuck in Nevada desert after rain Burning Man organizers said Sunday night. Thousands of people remain trapped at the Burning Man festival in the Nevada desert after heavy rains inundated the\"A mucky, muddy, environment.\" This is what Burning Man looks like See More Videos Editor\\'s Note: Find the latest Burning Man festival coverage here. CNN —'}]"
|
||||
"[{'url': 'https://www.theguardian.com/sport/live/2023/jul/16/wimbledon-mens-singles-final-2023-carlos-alcaraz-v-novak-djokovic-live?page=with:block-64b3ff568f08df28470056bf',\n",
|
||||
" 'content': 'Carlos Alcaraz recovered from a set down to topple Djokovic 1-6, 7-6(6), 6-1, 3-6, 6-4 and win his first Wimbledon title in a battle for the ages'},\n",
|
||||
" {'url': 'https://www.nytimes.com/athletic/live-blogs/wimbledon-2024-live-updates-alcaraz-djokovic-mens-final-result/kJJdTKhOgkZo/',\n",
|
||||
" 'content': \"It was Djokovic's first straight-sets defeat at Wimbledon since the 2013 final, when he lost to Andy Murray. Below, The Athletic 's writers, Charlie Eccleshare and Matt Futterman, analyze the ...\"},\n",
|
||||
" {'url': 'https://www.foxsports.com.au/tennis/wimbledon/fk-you-stars-explosion-stuns-wimbledon-as-massive-final-locked-in/news-story/41cf7d28a12845cdab6be4150a22a170',\n",
|
||||
" 'content': 'The last time Djokovic and Wimbledon met was at the French Open in June when the Serb claimed victory in a third round tie which ended at 3:07 in the morning. On Friday, however, Djokovic was ...'},\n",
|
||||
" {'url': 'https://www.cnn.com/2024/07/09/sport/novak-djokovic-wimbledon-crowd-quarterfinals-spt-intl/index.html',\n",
|
||||
" 'content': 'Novak Djokovic produced another impressive performance at Wimbledon on Monday to cruise into the quarterfinals, but the 24-time grand slam champion was far from happy after his win.'},\n",
|
||||
" {'url': 'https://www.cnn.com/2024/07/05/sport/andy-murray-wimbledon-farewell-ceremony-spt-intl/index.html',\n",
|
||||
" 'content': \"It was an emotional night for three-time grand slam champion Andy Murray on Thursday, as the 37-year-old's Wimbledon farewell began with doubles defeat.. Murray will retire from the sport this ...\"}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.invoke({\"query\": \"What happened in the latest burning man floods\"})"
|
||||
"tool.invoke({\"query\": \"What happened at the last wimbledon\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "21c5b56f-0da0-485f-b6f5-38950bae4fd0",
|
||||
"id": "d6e73897",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### [Invoke with ToolCall](/docs/concepts/#invoke-with-toolcall)\n",
|
||||
"\n",
|
||||
"We can also invoke the tool with a model-generated ToolCall, in which case a ToolMessage will be returned:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f90e33a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{\"url\": \"https://www.radiotimes.com/tv/sport/football/euro-2024-location/\", \"content\": \"Euro 2024 host cities. Germany have 10 host cities for Euro 2024, topped by the country's capital Berlin. Berlin. Cologne. Dortmund. Dusseldorf. Frankfurt. Gelsenkirchen. Hamburg.\"}, {\"url\": \"https://www.sportingnews.com/ca/soccer/news/list-euros-host-nations-uefa-european-championship-countries/85f8069d69c9f4\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This is usually generated by a model, but we'll create a tool call directly for demon purposes.\n",
|
||||
"model_generated_tool_call = {\n",
|
||||
" \"args\": {\"query\": \"euro 2024 host nation\"},\n",
|
||||
" \"id\": \"1\",\n",
|
||||
" \"name\": \"tavily\",\n",
|
||||
" \"type\": \"tool_call\",\n",
|
||||
"}\n",
|
||||
"tool_msg = tool.invoke(model_generated_tool_call)\n",
|
||||
"\n",
|
||||
"# The content is a JSON string of results\n",
|
||||
"print(tool_msg.content[:400])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "d8e27be0-1098-4688-8d8c-6e257aae8d56",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': str,\n",
|
||||
" 'follow_up_questions': NoneType,\n",
|
||||
" 'answer': str,\n",
|
||||
" 'images': list,\n",
|
||||
" 'results': list,\n",
|
||||
" 'response_time': float}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The artifact is a dict with richer, raw results\n",
|
||||
"{k: type(v) for k, v in tool_msg.artifact.items()}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "237ca620-ac31-449a-826b-b4f2e265b194",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"query\": \"euro 2024 host nation\",\n",
|
||||
" \"follow_up_questions\": \"None\",\n",
|
||||
" \"answer\": \"Germany will be the host nation for Euro 2024, with the tournament scheduled to take place from June 14 to July 14. The matches will be held in 10 different cities across Germany, including Berlin, Co\",\n",
|
||||
" \"images\": \"['https://i.ytimg.com/vi/3hsX0vLatNw/maxresdefault.jpg', 'https://img.planetafobal.com/2021/10/sedes-uefa-euro-2024-alemania-fg.jpg', 'https://editorial.uefa.com/resources/0274-14fe4fafd0d4-413fc8a7b7\",\n",
|
||||
" \"results\": \"[{'title': 'Where is Euro 2024? Country, host cities and venues', 'url': 'https://www.radiotimes.com/tv/sport/football/euro-2024-location/', 'content': \\\"Euro 2024 host cities. Germany have 10 host cit\",\n",
|
||||
" \"response_time\": \"3.97\"\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Abbreviate the results for demo purposes\n",
|
||||
"print(json.dumps({k: str(v)[:200] for k, v in tool_msg.artifact.items()}, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "659f9fbd-6fcf-445f-aa8c-72d8e60154bd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"We show here how to use it as part of an [agent](/docs/tutorials/agents). We use the OpenAI Functions Agent, so we will need to setup and install the required dependencies for that. We will also use [LangSmith Hub](https://smith.langchain.com/hub) to pull the prompt from, so we will need to install that.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-openai langchainhub\n",
|
||||
"We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a1c8ea19-7100-407d-8e8c-f037f9317255",
|
||||
"id": "af3123ad-7a02-40e5-b58e-7d56e23e5830",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
|
||||
"# !pip install -qU langchain langchain-openai\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "520767b8-9e61-4485-840a-d16f1da5eb3a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-10-21T13:15:37.974229Z",
|
||||
"start_time": "2023-10-21T13:15:10.007898Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"instructions = \"\"\"You are an assistant.\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"tavily_tool = TavilySearchResults()\n",
|
||||
"tools = [tavily_tool]\n",
|
||||
"agent = create_openai_functions_agent(llm, tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=tools,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e9303451-3853-47ce-93c9-1898436a6472",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-10-21T13:15:37.974229Z",
|
||||
"start_time": "2023-10-21T13:15:10.007898Z"
|
||||
}
|
||||
},
|
||||
"execution_count": 23,
|
||||
"id": "fdbf35b5-3aaf-4947-9ec6-48c21533fb95",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `tavily_search_results_json` with `{'query': 'latest burning man floods'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://www.politifact.com/factchecks/2023/sep/06/instagram-posts/there-were-floods-there-was-mud-but-burning-man-sp/', 'content': 'The Associated Press, Burning Man flooding triggers false claims of Ebola outbreak, ‘national emergency,’ Sept. 5, 2023 BBC, Thousands queue for hours to leave Burning Man festival, Sept. 5, 2023 Newsweek, Is FEMA at Burning Man? Virus Outbreak Conspiracy Theory Spreads Online, Sept. 4, 2023 CBS News, Burning Man \"exodus operations\" begin as driving ban is lifted, organizers say, Sept. 4, 2023(AP) By Madison Czopek September 6, 2023 There were floods, there was mud, but Burning Man sparked no emergency declaration If Your Time is short Heavy rain began falling Sept. 1 in Nevada\\'s...'}, {'url': 'https://www.nbcnews.com/news/us-news/live-blog/live-updates-burning-man-flooding-keeps-thousands-stranded-nevada-site-rcna103193', 'content': 'As heavy rain turns Burning Man 2023 into a muddy mess, a deluge of unsympathetic jokes has swamped the internet Burning Man flooding keeps thousands stranded at Nevada site as authorities investigate 1 death Burning Man revelers unfazed by deluge and deep mud Reuters The flash flood watch is in effect until tomorrow morning. Burning Man is ‘absolutely soaked,’ festivalgoer saysThousands of Burning Man attendees partied hard on Sunday despite downpours that turned the Nevada desert where the annual arts and music festival takes place into a sea of sticky mud and led...'}, {'url': 'https://apnews.com/article/burning-man-flooding-nevada-stranded-3971a523f4b993f8f35e158fd1a17a1e', 'content': 'festival goers are helped off a truck from the Burning Man festival site in Black Rock, Nev., on Monday, Sept. 4, 2023. festival goers are helped off a truck from the Burning Man festival site in Black Rock, Nev., on Monday, Sept. 4, 2023. at the site of the Burning Man festival where thousands of attendees remained stranded as flooding from storms swept at the site of the Burning Man festival where thousands of attendees remained stranded as flooding from storms sweptRENO, Nev. (AP) — The traffic jam leaving the Burning Man festival eased up considerably Tuesday as the exodus from the mud-caked Nevada desert entered another day following massive rain that left tens of thousands of partygoers stranded for days.'}, {'url': 'https://apnews.com/article/burning-man-flooding-nevada-stranded-0726190c9f8378935e2a3cce7f154785', 'content': 'festival goers are helped off a truck from the Burning Man festival site in Black Rock, Nev., on Monday, Sept. 4, 2023. festival goers are helped off a truck from the Burning Man festival site in Black Rock, Nev., on Monday, Sept. 4, 2023. at the site of the Burning Man festival where thousands of attendees remained stranded as flooding from storms swept Wait times to exit Burning Man drop after flooding left tens of thousands stranded in Nevada desertFILE - In this satellite photo provided by Maxar Technologies, an overview of Burning Man festival in Black Rock, Nev on Monday, Aug. 28, 2023. Authorities in Nevada were investigating a death at the site of the Burning Man festival where thousands of attendees remained stranded as flooding from storms swept through the Nevada desert.'}, {'url': 'https://www.theguardian.com/culture/2023/sep/03/burning-man-nevada-festival-floods', 'content': 'Officials investigate death at Burning Man as thousands stranded by floods Burning Man festival-goers trapped in desert as rain turns site to mud the burning of a giant sculpture to cap off the event, if weather permits. The festival said the roads remain too wet Burning Man festivalgoers surrounded by mud in Nevada desert – videoMichael Sainato @msainat1 Sun 3 Sep 2023 14.31 EDT Over 70,000 attendees of the annual Burning Man festival in the Black Rock desert of Nevada are stranded as the festival comes to a close on...'}]\u001b[0m\u001b[32;1m\u001b[1;3mThe latest Burning Man festival experienced heavy rain, resulting in floods and muddy conditions. Thousands of attendees were stranded at the festival site in Nevada. There were false claims of an Ebola outbreak and a national emergency, but no emergency declaration was made. One death was reported at the festival, which is currently under investigation. Despite the challenging conditions, many festivalgoers remained unfazed and continued to enjoy the event. The exodus from the festival site began as the mud-caked desert started to dry up. Authorities issued a flash flood watch, and investigations are ongoing regarding the death at the festival.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What happened in the latest burning man floods?',\n",
|
||||
" 'output': 'The latest Burning Man festival experienced heavy rain, resulting in floods and muddy conditions. Thousands of attendees were stranded at the festival site in Nevada. There were false claims of an Ebola outbreak and a national emergency, but no emergency declaration was made. One death was reported at the festival, which is currently under investigation. Despite the challenging conditions, many festivalgoers remained unfazed and continued to enjoy the event. The exodus from the festival site began as the mud-caked desert started to dry up. Authorities issued a flash flood watch, and investigations are ongoing regarding the death at the festival.'}"
|
||||
"AIMessage(content=\"The last women's singles champion at Wimbledon was Markéta Vondroušová, who won the title in 2023.\", response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 802, 'total_tokens': 828}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-2bfeec6e-8f04-477e-bf51-9500f18bd514-0', usage_metadata={'input_tokens': 802, 'output_tokens': 26, 'total_tokens': 828})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke({\"input\": \"What happened in the latest burning man floods?\"})"
|
||||
"import datetime\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnableConfig, chain\n",
|
||||
"\n",
|
||||
"today = datetime.datetime.today().strftime(\"%D\")\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\"system\", f\"You are a helpful assistant. The date today is {today}.\"),\n",
|
||||
" (\"human\", \"{user_input}\"),\n",
|
||||
" (\"placeholder\", \"{messages}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# specifying tool_choice will force the model to call this tool.\n",
|
||||
"llm_with_tools = llm.bind_tools([tool])\n",
|
||||
"\n",
|
||||
"llm_chain = prompt | llm_with_tools\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@chain\n",
|
||||
"def tool_chain(user_input: str, config: RunnableConfig):\n",
|
||||
" input_ = {\"user_input\": user_input}\n",
|
||||
" ai_msg = llm_chain.invoke(input_, config=config)\n",
|
||||
" tool_msgs = tool.batch(ai_msg.tool_calls, config=config)\n",
|
||||
" return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tool_chain.invoke(\"who won the last womens singles wimbledon\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "86cd0a02",
|
||||
"cell_type": "markdown",
|
||||
"id": "fb115693-e89e-40f2-a460-0d0d39a17963",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"Here's the [LangSmith trace](https://smith.langchain.com/public/b43232c1-b243-4a7f-afeb-5fba8c84ba56/r) for this run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ac8146c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all TavilySearchResults features and configurations head to the API reference: https://api.python.langchain.com/en/latest/tools/langchain_community.tools.tavily_search.tool.TavilySearchResults.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -234,7 +370,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
2
docs/docs/integrations/vectorstores/.gitignore
vendored
Normal file
2
docs/docs/integrations/vectorstores/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# files generated by faiss.ipynb
|
||||
faiss_index
|
||||
@@ -5,33 +5,13 @@
|
||||
"id": "66d0270a-b74f-4110-901e-7960b00297af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Astra DB\n",
|
||||
"# Astra DB Vector Store\n",
|
||||
"\n",
|
||||
"This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as a Vector Store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab8cd64f-3bb2-4f16-a0a9-12d7b1789bf6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available through an easy-to-use JSON API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"_Note: in addition to access to the database, an OpenAI API Key is required to run the full example._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bb9be7ce-8c70-4d46-9f11-71c42a36e928",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup and general dependencies"
|
||||
"This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as a Vector Store.\n",
|
||||
"\n",
|
||||
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,7 +19,7 @@
|
||||
"id": "dbe7c156-0413-47e3-9237-4769c4248869",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use of the integration requires the corresponding Python package:"
|
||||
"Use of the integration requires the `langchain-astradb` partner package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -49,54 +29,61 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -qU langchain-astradb"
|
||||
"pip install -qU \"langchain-astradb>=0.3.3\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2453d83a-bc8f-41e1-a692-befe4dd90156",
|
||||
"id": "319bf84b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"_Make sure you have installed the packages required to run all of this demo:_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "56c1f86e-5921-4976-ac8f-1d62e5a512b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -qU langchain langchain-community langchain-openai datasets pypdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2910035-e61f-48d9-a110-d68c401b62aa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b06619af-fea2-4863-8149-7f239a8c9c82",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"from astrapy.info import CollectionVectorServiceOptions\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
"In order to use the AstraDB vector store, you must first head to the [AstraDB website](https://astra.datastax.com), create an account, and then create a new database - the initialization might take a few minutes. \n",
|
||||
"\n",
|
||||
"Once the database has been initialized, you should [create an application token](https://docs.datastax.com/en/astra-db-serverless/administration/manage-application-tokens.html#generate-application-token) and save it for later use. \n",
|
||||
"\n",
|
||||
"You will also want to copy the `API Endpoint` from the `Database Details` and store that in the `ASTRA_DB_API_ENDPOINT` variable.\n",
|
||||
"\n",
|
||||
"You may optionally provide a namespace, which you can manage from the `Data Explorer` tab of your database dashboard. If you don't wish to set a namespace, you can leave the `getpass` prompt for `ASTRA_DB_NAMESPACE` empty."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "b7843c22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"ASTRA_DB_API_ENDPOINT = getpass.getpass(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n",
|
||||
"\n",
|
||||
"desired_namespace = getpass.getpass(\"ASTRA_DB_NAMESPACE = \")\n",
|
||||
"if desired_namespace:\n",
|
||||
" ASTRA_DB_NAMESPACE = desired_namespace\n",
|
||||
"else:\n",
|
||||
" ASTRA_DB_NAMESPACE = None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1c5cd9e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3cb739c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -104,48 +91,59 @@
|
||||
"id": "22866f09-e10d-4f05-a24b-b9420129462e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import the Vector Store"
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"There are two ways to create an Astra DB vector store, which differ in how the embeddings are computed.\n",
|
||||
"\n",
|
||||
"#### Method 1: Explicit embeddings\n",
|
||||
"\n",
|
||||
"You can separately instantiate a `langchain_core.embeddings.Embeddings` class and pass it to the `AstraDBVectorStore` constructor, just like with most other LangChain vector stores.\n",
|
||||
"\n",
|
||||
"#### Method 2: Integrated embedding computation\n",
|
||||
"\n",
|
||||
"Alternatively, you can use the [Vectorize](https://www.datastax.com/blog/simplifying-vector-embedding-generation-with-astra-vectorize) feature of Astra DB and simply specify the name of a supported embedding model when creating the store. The embedding computations are entirely handled within the database. (To proceed with this method, you must have enabled the desired embedding integration for your database, as described [in the docs](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).)\n",
|
||||
"\n",
|
||||
"### Explicit Embedding Initialization\n",
|
||||
"\n",
|
||||
"Below, we instantiate our vector store using the explicit embedding class:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"id": "d71a1dcb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "0b32730d-176e-414c-9d91-fd3644c54211",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_astradb import AstraDBVectorStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68f61b01-3e09-47c1-9d67-5d6915c86626",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## DB Connection parameters\n",
|
||||
"from langchain_astradb import AstraDBVectorStore\n",
|
||||
"\n",
|
||||
"These are found on your Astra DB dashboard:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"- the Token looks like `AstraCS:6gBhNmsk135....`\n",
|
||||
"- you may optionally provide a _Namespace_ such as `my_namespace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d78af8ed-cff9-4f14-aa5d-016f99ab547c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n",
|
||||
"\n",
|
||||
"desired_namespace = input(\"(optional) Namespace = \")\n",
|
||||
"if desired_namespace:\n",
|
||||
" ASTRA_DB_KEYSPACE = desired_namespace\n",
|
||||
"else:\n",
|
||||
" ASTRA_DB_KEYSPACE = None"
|
||||
"vector_store = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vector_langchain\",\n",
|
||||
" embedding=embeddings,\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" namespace=ASTRA_DB_NAMESPACE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -153,85 +151,14 @@
|
||||
"id": "84a1fe85-a42c-4f15-92e1-f79f1dd43ea2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the vector store\n",
|
||||
"\n",
|
||||
"There are two ways to create an Astra DB vector store, which differ in how the embeddings are computed.\n",
|
||||
"\n",
|
||||
"*Explicit embeddings*. You can separately instantiate a `langchain_core.embeddings.Embeddings` class and pass it to the `AstraDBVectorStore` constructor, just like with most other LangChain vector stores.\n",
|
||||
"\n",
|
||||
"*Integrated embedding computation*. Alternatively, you can use the [Vectorize](https://www.datastax.com/blog/simplifying-vector-embedding-generation-with-astra-vectorize) feature of Astra DB and simply specify the name of a supported embedding model when creating the store. The embedding computations are entirely handled within the database. (To proceed with this method, you must have enabled the desired embedding integration for your database, as described [in the docs](https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html).)\n",
|
||||
"\n",
|
||||
"**Please choose one method and run the corresponding cells only.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c435386-e8d5-41f4-a9e5-7b609ef781f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Method 1: provide embeddings explicitly\n",
|
||||
"\n",
|
||||
"This demo will use an OpenAI embedding model:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dfa5c005-9738-4c53-b8a8-8540fcbb8bad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass(\"OPENAI_API_KEY = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3accae6f-73e2-483a-83f7-76eb33558a1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"my_embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "465b1b16-5363-4c4f-9917-a49e02a86c14",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can create the vector store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b77553b-8bb5-4949-b87b-8c6abac56a26",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vstore = AstraDBVectorStore(\n",
|
||||
" embedding=my_embeddings,\n",
|
||||
" collection_name=\"astra_vector_demo\",\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" namespace=ASTRA_DB_KEYSPACE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5d5d2bfa-c071-4a5b-8b6e-3daa1b6de164",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Method 2: use Astra Vectorize (embeddings integrated in Astra DB)\n",
|
||||
"### Integrated Embedding Initialization\n",
|
||||
"\n",
|
||||
"Here it is assumed that you have\n",
|
||||
"\n",
|
||||
"- enabled the OpenAI integration in your Astra DB organization,\n",
|
||||
"- added an API Key named `\"MY_OPENAI_API_KEY\"` to the integration, and\n",
|
||||
"- scoped it to the database you are using.\n",
|
||||
"- Enabled the OpenAI integration in your Astra DB organization,\n",
|
||||
"- Added an API Key named `\"OPENAI_API_KEY\"` to the integration, and scoped it to the database you are using.\n",
|
||||
"\n",
|
||||
"For more details please consult the [documentation](https://docs.datastax.com/en/astra-db-serverless/integrations/embedding-providers/openai.html)."
|
||||
"For more details on how to do this, please consult the [documentation](https://docs.datastax.com/en/astra-db-serverless/integrations/embedding-providers/openai.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -241,312 +168,300 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from astrapy.info import CollectionVectorServiceOptions\n",
|
||||
"\n",
|
||||
"openai_vectorize_options = CollectionVectorServiceOptions(\n",
|
||||
" provider=\"openai\",\n",
|
||||
" model_name=\"text-embedding-3-small\",\n",
|
||||
" authentication={\n",
|
||||
" \"providerKey\": \"MY_OPENAI_API_KEY\",\n",
|
||||
" \"providerKey\": \"OPENAI_API_KEY\",\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vstore = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vectorize_demo\",\n",
|
||||
"vector_store_integrated = AstraDBVectorStore(\n",
|
||||
" collection_name=\"astra_vector_langchain_integrated\",\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" namespace=ASTRA_DB_KEYSPACE,\n",
|
||||
" namespace=ASTRA_DB_NAMESPACE,\n",
|
||||
" collection_vector_service_options=openai_vectorize_options,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a348678-b2f6-46ca-9a0d-2eb4cc6b66b1",
|
||||
"id": "d3796b39",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load a dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "552e56b0-301a-4b06-99c7-57ba6faa966f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Convert each entry in the source dataset into a `Document`, then write them into the vector store:"
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3a1f532f-ad63-4256-9730-a183841bd8e9",
|
||||
"execution_count": 23,
|
||||
"id": "afb3e155",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[UUID('89a5cea1-5f3d-47c1-89dc-7e36e12cf4de'),\n",
|
||||
" UUID('d4e78c48-f954-4612-8a38-af22923ba23b'),\n",
|
||||
" UUID('058e4046-ded0-4fc1-b8ac-60e5a5f08ea0'),\n",
|
||||
" UUID('50ab2a9a-762c-4b78-b102-942a86d77288'),\n",
|
||||
" UUID('1da5a3c1-ba51-4f2f-aaaf-79a8f5011ce3'),\n",
|
||||
" UUID('f3055d9e-2eb1-4d25-838e-2c70548f91b5'),\n",
|
||||
" UUID('4bf0613d-08d0-4fbc-a43c-4955e4c9e616'),\n",
|
||||
" UUID('18008625-8fd4-45c2-a0d7-92a2cde23dbc'),\n",
|
||||
" UUID('c712e06f-790b-4fd4-9040-7ab3898965d0'),\n",
|
||||
" UUID('a9b84820-3445-4810-a46c-e77b76ab85bc')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"philo_dataset = load_dataset(\"datastax/philosopher-quotes\")[\"train\"]\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"docs = []\n",
|
||||
"for entry in philo_dataset:\n",
|
||||
" metadata = {\"author\": entry[\"author\"]}\n",
|
||||
" doc = Document(page_content=entry[\"quote\"], metadata=metadata)\n",
|
||||
" docs.append(doc)\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"inserted_ids = vstore.add_documents(docs)\n",
|
||||
"print(f\"\\nInserted {len(inserted_ids)} documents.\")"
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79d4f436-ef04-4288-8f79-97c9abb983ed",
|
||||
"id": "dfce4edc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In the above, `metadata` dictionaries are created from the source data and are part of the `Document`.\n",
|
||||
"### Delete items from vector store\n",
|
||||
"\n",
|
||||
"_Note: check the [Astra DB API Docs](https://docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html#_json_api_limits) for the valid metadata field names: some characters are reserved and cannot be used._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "084d8802-ab39-4262-9a87-42eafb746f92",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Add some more entries, this time with `add_texts`:"
|
||||
"We can delete items from our vector store by ID by using the `delete` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b6b157f5-eb31-4907-a78e-2e2b06893936",
|
||||
"execution_count": 24,
|
||||
"id": "d3f69315",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"texts = [\"I think, therefore I am.\", \"To the things themselves!\"]\n",
|
||||
"metadatas = [{\"author\": \"descartes\"}, {\"author\": \"husserl\"}]\n",
|
||||
"ids = [\"desc_01\", \"huss_xy\"]\n",
|
||||
"vector_store.delete(ids=uuids[-1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d12e1a07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"inserted_ids_2 = vstore.add_texts(texts=texts, metadatas=metadatas, ids=ids)\n",
|
||||
"print(f\"\\nInserted {len(inserted_ids_2)} documents.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "63840eb3-8b29-4017-bc2f-301bf5001f28",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"_Note: you may want to speed up the execution of `add_texts` and `add_documents` by increasing the concurrency level for_\n",
|
||||
"_these bulk operations - check out the `*_concurrency` parameters in the class constructor and the `add_texts` docstrings_\n",
|
||||
"_for more details. Depending on the network and the client machine specifications, your best-performing choice of parameters may vary._"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c031760a-1fc5-4855-adf2-02ed52fe2181",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run searches"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02a77d8e-1aae-4054-8805-01c77947c49f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This section demonstrates metadata filtering and getting the similarity scores back:"
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search with filtering on metadata can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1761806a-1afd-4491-867c-25a80d92b9fe",
|
||||
"execution_count": 15,
|
||||
"id": "770b3467",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vstore.similarity_search(\"Our life is what we make of it\", k=3)\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eebc4f7c-f61a-438e-b3c8-17e6888d8a0b",
|
||||
"cell_type": "markdown",
|
||||
"id": "ce112165",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results_filtered = vstore.similarity_search(\n",
|
||||
" \"Our life is what we make of it\",\n",
|
||||
" k=3,\n",
|
||||
" filter={\"author\": \"plato\"},\n",
|
||||
")\n",
|
||||
"for res in results_filtered:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"You can also search with score:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "11bbfe64-c0cd-40c6-866a-a5786538450e",
|
||||
"execution_count": 16,
|
||||
"id": "5924309a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.776585] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vstore.similarity_search_with_score(\"Our life is what we make of it\", k=3)\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
")\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b14ea558-bfbe-41ce-807e-d70670060ada",
|
||||
"id": "fead7af5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MMR (Maximal-marginal-relevance) search\n",
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"_Note: the MMR search method is not (yet) supported for vector stores built with Astra Vectorize._"
|
||||
"There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `AstraDBVectorStore` check out the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7e40f714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
|
||||
"\n",
|
||||
"Here is how to transform your vector store into a retriever and then invoke the retreiever with a simple query and filter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "76381ce8-780a-4e3b-97b1-056d6782d7d5",
|
||||
"execution_count": 17,
|
||||
"id": "dcee50e6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vstore.max_marginal_relevance_search(\n",
|
||||
" \"Our life is what we make of it\",\n",
|
||||
" k=3,\n",
|
||||
" filter={\"author\": \"aristotle\"},\n",
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"similarity_score_threshold\",\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60fda5df-14e4-4fb0-bd17-65a393fab8a9",
|
||||
"id": "734e683a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Async\n",
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"Note that the Astra DB vector store supports all fully async methods (`asimilarity_search`, `afrom_texts`, `adelete` and so on) natively, i.e. without thread wrapping involved."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1cc86edd-692b-4495-906c-ccfd13b03c23",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deleting stored documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38a70ec4-b522-4d32-9ead-c642864fca37",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"delete_1 = vstore.delete(inserted_ids[:3])\n",
|
||||
"print(f\"all_succeed={delete_1}\") # True, all documents deleted"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d4cf49ed-9d29-4ed9-bdab-51a308c41b8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"delete_2 = vstore.delete(inserted_ids[2:5])\n",
|
||||
"print(f\"some_succeeds={delete_2}\") # True, though some IDs were gone already"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "847181ba-77d1-4a17-b7f9-9e2c3d8efd13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## A minimal RAG chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cd64b844-846f-43c5-a7dd-c26b9ed417d0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The next cells will implement a simple RAG pipeline:\n",
|
||||
"- download a sample PDF file and load it onto the store;\n",
|
||||
"- create a RAG chain with LCEL (LangChain Expression Language), with the vector store at its heart;\n",
|
||||
"- run the question-answering chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5cbc4dba-0d5e-4038-8fc5-de6cadd1c2a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!curl -L \\\n",
|
||||
" \"https://github.com/awesome-astra/datasets/blob/main/demo-resources/what-is-philosophy/what-is-philosophy.pdf?raw=true\" \\\n",
|
||||
" -o \"what-is-philosophy.pdf\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "459385be-5e9c-47ff-ba53-2b7ae6166b09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pdf_loader = PyPDFLoader(\"what-is-philosophy.pdf\")\n",
|
||||
"splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)\n",
|
||||
"docs_from_pdf = pdf_loader.load_and_split(text_splitter=splitter)\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"print(f\"Documents from PDF: {len(docs_from_pdf)}.\")\n",
|
||||
"inserted_ids_from_pdf = vstore.add_documents(docs_from_pdf)\n",
|
||||
"print(f\"Inserted {len(inserted_ids_from_pdf)} documents.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5010a66c-4298-4e32-82b5-2da0d36a5c70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vstore.as_retriever(search_kwargs={\"k\": 3})\n",
|
||||
"\n",
|
||||
"philo_template = \"\"\"\n",
|
||||
"You are a philosopher that draws inspiration from great thinkers of the past\n",
|
||||
"to craft well-thought answers to user questions. Use the provided context as the basis\n",
|
||||
"for your answers and do not make up new reasoning paths - just mix-and-match what you are given.\n",
|
||||
"Your answers must be concise and to the point, and refrain from answering about other topics than philosophy.\n",
|
||||
"\n",
|
||||
"CONTEXT:\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"QUESTION: {question}\n",
|
||||
"\n",
|
||||
"YOUR ANSWER:\"\"\"\n",
|
||||
"\n",
|
||||
"philo_prompt = ChatPromptTemplate.from_template(philo_template)\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
|
||||
" | philo_prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fcbc1296-6c7c-478b-b55b-533ba4e54ddb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"How does Russel elaborate on Peirce's idea of the security blanket?\")"
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -562,7 +477,7 @@
|
||||
"id": "177610c7-50d0-4b7b-8634-b03338054c8e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cleanup"
|
||||
"## Cleanup vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -582,7 +497,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vstore.delete_collection()"
|
||||
"vector_store.delete_collection()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a14c34be",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `AstraDBVectorStore` features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -602,7 +527,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -7,30 +7,23 @@
|
||||
"source": [
|
||||
"# Chroma\n",
|
||||
"\n",
|
||||
">[Chroma](https://docs.trychroma.com/getting-started) is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0.\n",
|
||||
"This notebook covers how to get started with the `Chroma` vector store.\n",
|
||||
"\n",
|
||||
">[Chroma](https://docs.trychroma.com/getting-started) is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. View the full docs of `Chroma` at [this page](https://docs.trychroma.com/reference/py-collection), and find the API reference for the LangChain integration at [this page](https://api.python.langchain.com/en/latest/vectorstores/langchain_chroma.vectorstores.Chroma.html).\n",
|
||||
"\n",
|
||||
"Install Chroma with:\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"```sh\n",
|
||||
"pip install langchain-chroma\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Chroma runs in various modes. See below for examples of each integrated with LangChain.\n",
|
||||
"- `in-memory` - in a python script or jupyter notebook\n",
|
||||
"- `in-memory with persistance` - in a script or notebook and save/load to disk\n",
|
||||
"- `in a docker container` - as a server running your local machine or in the cloud\n",
|
||||
"\n",
|
||||
"Like any other database, you can: \n",
|
||||
"- `.add` \n",
|
||||
"- `.get` \n",
|
||||
"- `.update`\n",
|
||||
"- `.upsert`\n",
|
||||
"- `.delete`\n",
|
||||
"- `.peek`\n",
|
||||
"- and `.query` runs the similarity search.\n",
|
||||
"\n",
|
||||
"View full docs at [docs](https://docs.trychroma.com/reference/py-collection). To access these methods directly, you can do `._collection.method()`\n"
|
||||
"To access `Chroma` vector stores you'll need to install the `langchain-chroma` integration package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "83a43688",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -qU \"langchain-chroma>=0.1.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -38,149 +31,94 @@
|
||||
"id": "2b5ffbf8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Example\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"In this basic example, we take the most recent State of the Union Address, split it into chunks, embed it using an open-source embedding model, load it into Chroma, and then query it."
|
||||
"You can use the `Chroma` vector store without any credentials, simply installing the package above is enough!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cd17cfed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd7e1243",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f47f73f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"### Basic Initialization \n",
|
||||
"\n",
|
||||
"Below is a basic initialization, including the use of a directory to save the data locally.\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ae9fcf3e",
|
||||
"id": "d3ed0a9a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import\n",
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.sentence_transformer import (\n",
|
||||
" SentenceTransformerEmbeddings,\n",
|
||||
")\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"# load the document and split it into chunks\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"# split it into chunks\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"# create the open-source embedding function\n",
|
||||
"embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"\n",
|
||||
"# load it into Chroma\n",
|
||||
"db = Chroma.from_documents(docs, embedding_function)\n",
|
||||
"\n",
|
||||
"# query it\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)\n",
|
||||
"\n",
|
||||
"# print results\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c9a11cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Example (including saving to disk)\n",
|
||||
"\n",
|
||||
"Extending the previous example, if you want to save to disk, simply initialize the Chroma client and pass the directory where you want the data to be saved to. \n",
|
||||
"\n",
|
||||
"`Caution`: Chroma makes a best-effort to automatically save data to disk, however multiple in-memory clients can stop each other's work. As a best practice, only have one client per path running at any given time."
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "49f9bd49",
|
||||
"execution_count": 16,
|
||||
"id": "3ea11a7b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# save to disk\n",
|
||||
"db2 = Chroma.from_documents(docs, embedding_function, persist_directory=\"./chroma_db\")\n",
|
||||
"docs = db2.similarity_search(query)\n",
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"\n",
|
||||
"# load from disk\n",
|
||||
"db3 = Chroma(persist_directory=\"./chroma_db\", embedding_function=embedding_function)\n",
|
||||
"docs = db3.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
"vector_store = Chroma(\n",
|
||||
" collection_name=\"example_collection\",\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" persist_directory=\"./chroma_langchain_db\", # Where to save data locally, remove if not neccesary\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "63318cc9",
|
||||
"id": "ccb62a8c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Passing a Chroma Client into Langchain\n",
|
||||
"### Initialization from client\n",
|
||||
"\n",
|
||||
"You can also create a Chroma Client and pass it to LangChain. This is particularly useful if you want easier access to the underlying database.\n",
|
||||
"\n",
|
||||
"You can also specify the collection name that you want LangChain to use."
|
||||
"You can also initialize from a `Chroma` client, which is particularly useful if you want easier access to the underlying database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "22f4a0ce",
|
||||
"id": "3fe4457f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Add of existing embedding ID: 1\n",
|
||||
"Add of existing embedding ID: 2\n",
|
||||
"Add of existing embedding ID: 3\n",
|
||||
"Add of existing embedding ID: 1\n",
|
||||
"Add of existing embedding ID: 2\n",
|
||||
"Add of existing embedding ID: 3\n",
|
||||
"Add of existing embedding ID: 1\n",
|
||||
"Insert of existing embedding ID: 1\n",
|
||||
"Add of existing embedding ID: 2\n",
|
||||
"Insert of existing embedding ID: 2\n",
|
||||
"Add of existing embedding ID: 3\n",
|
||||
"Insert of existing embedding ID: 3\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"There are 3 in the collection\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
@@ -188,320 +126,320 @@
|
||||
"collection = persistent_client.get_or_create_collection(\"collection_name\")\n",
|
||||
"collection.add(ids=[\"1\", \"2\", \"3\"], documents=[\"a\", \"b\", \"c\"])\n",
|
||||
"\n",
|
||||
"langchain_chroma = Chroma(\n",
|
||||
"vector_store_from_client = Chroma(\n",
|
||||
" client=persistent_client,\n",
|
||||
" collection_name=\"collection_name\",\n",
|
||||
" embedding_function=embedding_function,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"There are\", langchain_chroma._collection.count(), \"in the collection\")"
|
||||
" embedding_function=embeddings,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e9cf6d70",
|
||||
"id": "9d037340",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Example (using the Docker Container)\n",
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"You can also run the Chroma Server in a Docker container separately, create a Client to connect to it, and then pass that to LangChain. \n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"Chroma has the ability to handle multiple `Collections` of documents, but the LangChain interface expects one, so we need to specify the collection name. The default collection name used by LangChain is \"langchain\".\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"Here is how to clone, build, and run the Docker Image:\n",
|
||||
"```sh\n",
|
||||
"git clone git@github.com:chroma-core/chroma.git\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Edit the `docker-compose.yml` file and add `ALLOW_RESET=TRUE` under `environment`\n",
|
||||
"```yaml\n",
|
||||
" ...\n",
|
||||
" command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config log_config.yml\n",
|
||||
" environment:\n",
|
||||
" - IS_PERSISTENT=TRUE\n",
|
||||
" - ALLOW_RESET=TRUE\n",
|
||||
" ports:\n",
|
||||
" - 8000:8000\n",
|
||||
" ...\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then run `docker-compose up -d --build`"
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "74aee70e",
|
||||
"execution_count": 17,
|
||||
"id": "da279339",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# create the chroma client\n",
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"import chromadb\n",
|
||||
"from chromadb.config import Settings\n",
|
||||
"\n",
|
||||
"client = chromadb.HttpClient(settings=Settings(allow_reset=True))\n",
|
||||
"client.reset() # resets the database\n",
|
||||
"collection = client.create_collection(\"my_collection\")\n",
|
||||
"for doc in docs:\n",
|
||||
" collection.add(\n",
|
||||
" ids=[str(uuid.uuid1())], metadatas=doc.metadata, documents=doc.page_content\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# tell LangChain to use our client and collection name\n",
|
||||
"db4 = Chroma(\n",
|
||||
" client=client,\n",
|
||||
" collection_name=\"my_collection\",\n",
|
||||
" embedding_function=embedding_function,\n",
|
||||
")\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db4.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9ed3ec50",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Update and Delete\n",
|
||||
"\n",
|
||||
"While building toward a real application, you want to go beyond adding data, and also update and delete data. \n",
|
||||
"\n",
|
||||
"Chroma has users provide `ids` to simplify the bookkeeping here. `ids` can be the name of the file, or a combined has like `filename_paragraphNumber`, etc.\n",
|
||||
"\n",
|
||||
"Chroma supports all these operations - though some of them are still being integrated all the way through the LangChain interface. Additional workflow improvements will be added soon.\n",
|
||||
"\n",
|
||||
"Here is a basic example showing how to do various operations:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "81a02810",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '../../../state_of_the_union.txt'}\n",
|
||||
"{'ids': ['1'], 'embeddings': None, 'metadatas': [{'new_value': 'hello world', 'source': '../../../state_of_the_union.txt'}], 'documents': ['Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.']}\n",
|
||||
"count before 46\n",
|
||||
"count after 45\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# create simple ids\n",
|
||||
"ids = [str(i) for i in range(1, len(docs) + 1)]\n",
|
||||
"\n",
|
||||
"# add data\n",
|
||||
"example_db = Chroma.from_documents(docs, embedding_function, ids=ids)\n",
|
||||
"docs = example_db.similarity_search(query)\n",
|
||||
"print(docs[0].metadata)\n",
|
||||
"\n",
|
||||
"# update the metadata for a document\n",
|
||||
"docs[0].metadata = {\n",
|
||||
" \"source\": \"../../how_to/state_of_the_union.txt\",\n",
|
||||
" \"new_value\": \"hello world\",\n",
|
||||
"}\n",
|
||||
"example_db.update_document(ids[0], docs[0])\n",
|
||||
"print(example_db._collection.get(ids=[ids[0]]))\n",
|
||||
"\n",
|
||||
"# delete the last document\n",
|
||||
"print(\"count before\", example_db._collection.count())\n",
|
||||
"example_db._collection.delete(ids=[ids[-1]])\n",
|
||||
"print(\"count after\", example_db._collection.count())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac6bc71a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use OpenAI Embeddings\n",
|
||||
"\n",
|
||||
"Many people like to use OpenAIEmbeddings, here is how to set that up."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "42080f37-8fd1-4cec-acd9-15d2b03b2f4d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get a token: https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"OPENAI_API_KEY = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "c7a94d6c-b4d4-4498-9bdd-eb50c92b85c5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "5eabdb75",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"new_client = chromadb.EphemeralClient()\n",
|
||||
"openai_lc_client = Chroma.from_documents(\n",
|
||||
" docs, embeddings, client=new_client, collection_name=\"openai_collection\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = openai_lc_client.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d9c28ad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"***\n",
|
||||
"\n",
|
||||
"## Other Information"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18152965",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Similarity search with score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "346347d7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The returned distance score is cosine distance. Therefore, a lower score is better."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "72aaa9c8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = db.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "d88e958e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" 1.1972057819366455)"
|
||||
"['f22ed484-6db3-4b76-adb1-18a777426cd6',\n",
|
||||
" 'e0d5bab4-6453-4511-9a37-023d9d288faa',\n",
|
||||
" '877d76b8-3580-4d9e-a13f-eed0fa3d134a',\n",
|
||||
" '26eaccab-81ce-4c0a-8e76-bf542647df18',\n",
|
||||
" 'bcaa8239-7986-4050-bf40-e14fb7dab997',\n",
|
||||
" 'cdc44b38-a83f-4e49-b249-7765b334e09d',\n",
|
||||
" 'a7a35354-2687-4bc2-8242-3849a4d18d34',\n",
|
||||
" '8780caf1-d946-4f27-a707-67d037e9e1d8',\n",
|
||||
" 'dec6af2a-7326-408f-893d-7d7d717dfda9',\n",
|
||||
" '3b18e210-bb59-47a0-8e17-c8e51176ea5e']"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=2,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=3,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=4,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=5,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
" id=6,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
" id=7,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=8,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=9,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "794a7552",
|
||||
"id": "7add6366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retriever options\n",
|
||||
"### Update items in vector store\n",
|
||||
"\n",
|
||||
"This section goes over different options for how to use Chroma as a retriever.\n",
|
||||
"\n",
|
||||
"#### MMR\n",
|
||||
"\n",
|
||||
"In addition to using similarity search in the retriever object, you can also use `mmr`."
|
||||
"Now that we have added documents to our vector store, we can update existing documents by using the `update_documents` function. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "96ff911a",
|
||||
"execution_count": 5,
|
||||
"id": "ef5dbd1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = db.as_retriever(search_type=\"mmr\")"
|
||||
"updated_document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and fried eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
" id=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"updated_document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is sunny and warm, with a high of 82 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
" id=2,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store.update_document(document_id=uuids[0], document=updated_document_1)\n",
|
||||
"# You can also update multiple documents at once\n",
|
||||
"vector_store.update_documents(\n",
|
||||
" ids=uuids[:2], documents=[updated_document_1, updated_document_1]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74b9a13a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store\n",
|
||||
"\n",
|
||||
"We can also delete items from our vector store as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "56f17791",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=uuids[-1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "213acf08",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e2b96fcf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cdd117ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2768a331",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=1.726390] The stock market is down 500 points today due to fears of a recession. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
")\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92b436c8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Search by vector\n",
|
||||
"\n",
|
||||
"You can also search by vector:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8ea434a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* I had chocalate chip pancakes and fried eggs for breakfast this morning. [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_by_vector(\n",
|
||||
" embedding=embeddings.embed_query(\"I love green eggs and ham!\"), k=1\n",
|
||||
")\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9c1c1e6f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `AstraDBVectorStore` check out the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html).\n",
|
||||
"\n",
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. For more information on the different search types and kwargs you can pass, please visit the API reference [here](https://api.python.langchain.com/en/latest/vectorstores/langchain_chroma.vectorstores.Chroma.html#langchain_chroma.vectorstores.Chroma.as_retriever)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "f00be6d0",
|
||||
"id": "7b6f7867",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
@@ -510,41 +448,34 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.invoke(query)[0]"
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"mmr\", search_kwargs={\"k\": 1, \"fetch_k\": 5}\n",
|
||||
")\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "275dbd0a",
|
||||
"id": "a2b7b73c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Filtering on metadata\n",
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"It can be helpful to narrow down the collection before working with it.\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"For example, collections can be filtered on metadata using the get method."
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "81600dc1",
|
||||
"cell_type": "markdown",
|
||||
"id": "fed28359",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [], 'embeddings': None, 'metadatas': [], 'documents': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# filter collection for updated source\n",
|
||||
"example_db.get(where={\"source\": \"some_other_source\"})"
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `Chroma` vector store features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_chroma.vectorstores.Chroma.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -564,7 +495,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,37 +9,18 @@
|
||||
"\n",
|
||||
"> [ClickHouse](https://clickhouse.com/) is the fastest and most resource efficient open-source database for real-time apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries. Lately added data structures and distance search functions (like `L2Distance`) as well as [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes) enable ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration\n",
|
||||
"This notebook shows how to use functionality related to the `ClickHouse` vector store.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `ClickHouse` vector search."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43ead5d5-2c1f-4dce-a69a-cb00e4f9d6f0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up environments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2c434bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Setting up local clickhouse server with docker (optional)"
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"First set up a local clickhouse server with docker:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "249a7751",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:43:43.035606Z",
|
||||
"start_time": "2023-06-03T08:43:42.618531Z"
|
||||
}
|
||||
},
|
||||
"id": "8c4d2e16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! docker run -d -p 8123:8123 -p9000:9000 --name langchain-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server:23.4.2.11"
|
||||
@@ -47,52 +28,82 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7bd3c1c0",
|
||||
"id": "0acb2a8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Setup up clickhouse client driver"
|
||||
"You'll need to install `langchain-community` and `clickhouse-connect` to use this integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9d614bf8",
|
||||
"id": "d454fb7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet clickhouse-connect"
|
||||
"pip install -qU langchain-community clickhouse-connect"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "15a1d477-9cdb-4d82-b019-96951ecb2b72",
|
||||
"id": "3df5501b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use OpenAIEmbeddings so we have to get the OpenAI API Key."
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"There are no credentials for this notebook, just make sure you have installed the packages as shown above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54d5276f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "91003ea5-0c8c-436c-a5de-aaeaeef2f458",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:49:35.383673Z",
|
||||
"start_time": "2023-06-03T08:49:33.984547Z"
|
||||
}
|
||||
},
|
||||
"execution_count": null,
|
||||
"id": "f6fd5b03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b87fe34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"if not os.environ[\"OPENAI_API_KEY\"]:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "60276097",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -104,176 +115,178 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import Clickhouse, ClickhouseSettings\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
"\n",
|
||||
"settings = ClickhouseSettings(table=\"clickhouse_example\")\n",
|
||||
"vector_store = Clickhouse(embeddings, config=settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "32dd3f67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:33:32.527387Z",
|
||||
"start_time": "2023-06-03T08:33:32.501312Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": null,
|
||||
"id": "944743ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6e104aee",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:33:35.503823Z",
|
||||
"start_time": "2023-06-03T08:33:33.745832Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Inserting data...: 100%|██████████| 42/42 [00:00<00:00, 2801.49it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for d in docs:\n",
|
||||
" d.metadata = {\"some\": \"metadata\"}\n",
|
||||
"settings = ClickhouseSettings(table=\"clickhouse_vector_search_example\")\n",
|
||||
"docsearch = Clickhouse.from_documents(docs, embeddings, config=settings)\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18af81cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store\n",
|
||||
"\n",
|
||||
"We can delete items from our vector store by ID by using the `delete` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9c608226",
|
||||
"execution_count": null,
|
||||
"id": "12b32762",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
"vector_store.delete(ids=uuids[-1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3a8b105",
|
||||
"id": "ada27577",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get connection info and data schema"
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "69996818",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:28:58.252991Z",
|
||||
"start_time": "2023-06-03T08:28:58.197560Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[92m\u001b[1mdefault.clickhouse_vector_search_example @ localhost:8123\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1musername: None\u001b[0m\n",
|
||||
"\n",
|
||||
"Table Schema:\n",
|
||||
"---------------------------------------------------\n",
|
||||
"|\u001b[94mid \u001b[0m|\u001b[96mNullable(String) \u001b[0m|\n",
|
||||
"|\u001b[94mdocument \u001b[0m|\u001b[96mNullable(String) \u001b[0m|\n",
|
||||
"|\u001b[94membedding \u001b[0m|\u001b[96mArray(Float32) \u001b[0m|\n",
|
||||
"|\u001b[94mmetadata \u001b[0m|\u001b[96mObject('json') \u001b[0m|\n",
|
||||
"|\u001b[94muuid \u001b[0m|\u001b[96mUUID \u001b[0m|\n",
|
||||
"---------------------------------------------------\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"id": "015831a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(str(docsearch))"
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\", k=2\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "324ac147",
|
||||
"id": "623d3b9d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Clickhouse table schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b5bd7c5b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> Clickhouse table will be automatically created if not exist by default. Advanced users could pre-create the table with optimized settings. For distributed Clickhouse cluster with sharding, table engine should be configured as `Distributed`."
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"You can also search with score:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "54f4f561",
|
||||
"execution_count": null,
|
||||
"id": "e7d43430",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Clickhouse Table DDL:\n",
|
||||
"\n",
|
||||
"CREATE TABLE IF NOT EXISTS default.clickhouse_vector_search_example(\n",
|
||||
" id Nullable(String),\n",
|
||||
" document Nullable(String),\n",
|
||||
" embedding Array(Float32),\n",
|
||||
" metadata JSON,\n",
|
||||
" uuid UUID DEFAULT generateUUIDv4(),\n",
|
||||
" CONSTRAINT cons_vec_len CHECK length(embedding) = 1536,\n",
|
||||
" INDEX vec_idx embedding TYPE annoy(100,'L2Distance') GRANULARITY 1000\n",
|
||||
") ENGINE = MergeTree ORDER BY uuid SETTINGS index_granularity = 8192\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"Clickhouse Table DDL:\\n\\n{docsearch.schema}\")"
|
||||
"results = vector_store.similarity_search_with_score(\"Will it be hot tomorrow?\", k=1)\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f59360c0",
|
||||
"id": "f5a90c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering\n",
|
||||
@@ -287,94 +300,87 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "232055f6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:29:36.680805Z",
|
||||
"start_time": "2023-06-03T08:29:34.963676Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Inserting data...: 100%|██████████| 42/42 [00:00<00:00, 6939.56it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"id": "169d01d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Clickhouse, ClickhouseSettings\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"for i, d in enumerate(docs):\n",
|
||||
" d.metadata = {\"doc_id\": i}\n",
|
||||
"\n",
|
||||
"docsearch = Clickhouse.from_documents(docs, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ddbcee77",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:29:43.487436Z",
|
||||
"start_time": "2023-06-03T08:29:43.040831Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6779101415357189 {'doc_id': 0} Madam Speaker, Madam...\n",
|
||||
"0.6997970363474885 {'doc_id': 8} And so many families...\n",
|
||||
"0.7044504914336727 {'doc_id': 1} Groups of citizens b...\n",
|
||||
"0.7053558702165094 {'doc_id': 6} And I’m taking robus...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"meta = docsearch.metadata_column\n",
|
||||
"output = docsearch.similarity_search_with_relevance_scores(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson?\",\n",
|
||||
"meta = vector_store.metadata_column\n",
|
||||
"results = vector_store.similarity_search_with_relevance_scores(\n",
|
||||
" \"What did I eat for breakfast?\",\n",
|
||||
" k=4,\n",
|
||||
" where_str=f\"{meta}.doc_id<10\",\n",
|
||||
" where_str=f\"{meta}.source = 'tweet'\",\n",
|
||||
")\n",
|
||||
"for d, dist in output:\n",
|
||||
" print(dist, d.metadata, d.page_content[:20] + \"...\")"
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a359ed74",
|
||||
"id": "d86fa4bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deleting your data"
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `Clickhouse` vector store check out the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.clickhouse.Clickhouse.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "afacfd4e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
|
||||
"\n",
|
||||
"Here is how to transform your vector store into a retriever and then invoke the retreiever with a simple query and filter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fb6a9d36",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:30:24.822384Z",
|
||||
"start_time": "2023-06-03T08:30:24.798571Z"
|
||||
}
|
||||
},
|
||||
"execution_count": null,
|
||||
"id": "97187188",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch.drop()"
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"similarity_score_threshold\",\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
|
||||
")\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "57fade30",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db24787c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more, check out a complete RAG template using Astra DB [here](https://github.com/langchain-ai/langchain/tree/master/templates/rag-astradb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02452d34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `AstraDBVectorStore` features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.clickhouse.Clickhouse.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -394,7 +400,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"\n",
|
||||
"Vector Search is a part of the [Full Text Search Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/search-service.html) (Search Service) in Couchbase.\n",
|
||||
"\n",
|
||||
"This tutorial explains how to use Vector Search in Couchbase. You can work with both [Couchbase Capella](https://www.couchbase.com/products/capella/) and your self-managed Couchbase Server."
|
||||
"This tutorial explains how to use Vector Search in Couchbase. You can work with either [Couchbase Capella](https://www.couchbase.com/products/capella/) and your self-managed Couchbase Server."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,30 +18,64 @@
|
||||
"id": "43326be4-4433-4de2-ad42-6eb91a722bad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access the `CouchbaseVectorStore` you first need to install the `langchain-couchbase` partner package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "bec8d532-fec7-4dc7-9be3-020aa7bdb01f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai langchain-couchbase"
|
||||
"pip install -qU langchain-couchbase"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30d6861e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head over to the Couchbase [website](https://cloud.couchbase.com) and create a new connection, making sure to save your database username and password:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4a972cbc-bf59-46eb-9b50-e5dc3a69dcf0",
|
||||
"execution_count": null,
|
||||
"id": "d98e3baa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
"COUCHBASE_CONNECTION_STRING = getpass.getpass(\n",
|
||||
" \"Enter the connection string for the Couchbase cluster: \"\n",
|
||||
")\n",
|
||||
"DB_USERNAME = getpass.getpass(\"Enter the username for the Couchbase cluster: \")\n",
|
||||
"DB_PASSWORD = getpass.getpass(\"Enter the password for the Couchbase cluster: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23ac2c64",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c25ec38",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -49,18 +83,9 @@
|
||||
"id": "acf1b168-622f-465c-a9a5-d27a6d7e7a8f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import the Vector Store and Embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "23ce45ab-bfd2-42e1-b681-514a550f0232",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_couchbase.vectorstores import CouchbaseVectorStore\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Before instantiating we need to create a connection."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,31 +93,18 @@
|
||||
"id": "3144ba02-1eaa-4449-853e-f034ca5706bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Couchbase Connection Object\n",
|
||||
"### Create Couchbase Connection Object\n",
|
||||
"\n",
|
||||
"We create a connection to the Couchbase cluster initially and then pass the cluster object to the Vector Store. \n",
|
||||
"\n",
|
||||
"Here, we are connecting using the username and password. You can also connect using any other supported way to your cluster. \n",
|
||||
"Here, we are connecting using the username and password from above. You can also connect using any other supported way to your cluster. \n",
|
||||
"\n",
|
||||
"For more information on connecting to the Couchbase cluster, please check the [Python SDK documentation](https://docs.couchbase.com/python-sdk/current/hello-world/start-using-sdk.html#connect)."
|
||||
"For more information on connecting to the Couchbase cluster, please check the [documentation](https://docs.couchbase.com/python-sdk/current/hello-world/start-using-sdk.html#connect)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "52fe583a-12db-4dc2-9281-1174bf1d4e5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"COUCHBASE_CONNECTION_STRING = (\n",
|
||||
" \"couchbase://localhost\" # or \"couchbases://localhost\" if using TLS\n",
|
||||
")\n",
|
||||
"DB_USERNAME = \"Administrator\"\n",
|
||||
"DB_PASSWORD = \"Password\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "9986c6b9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -123,145 +135,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "1b1d0a26-e9d4-4823-9800-9549d24d3d16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"BUCKET_NAME = \"testing\"\n",
|
||||
"BUCKET_NAME = \"langchain_bucket\"\n",
|
||||
"SCOPE_NAME = \"_default\"\n",
|
||||
"COLLECTION_NAME = \"_default\"\n",
|
||||
"SEARCH_INDEX_NAME = \"vector-index\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "efbac6ff-c2ac-4443-9250-7cc88061346b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For this tutorial, we will use OpenAI embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "87625579-86d7-4de4-8a4d-cee674a6b676",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3677b4b0-3711-419c-89ff-32ef4d3e3022",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Search Index\n",
|
||||
"Currently, the Search index needs to be created from the Couchbase Capella or Server UI or using the REST interface. \n",
|
||||
"\n",
|
||||
"Let us define a Search index with the name `vector-index` on the testing bucket\n",
|
||||
"\n",
|
||||
"For this example, let us use the Import Index feature on the Search Service on the UI. \n",
|
||||
"\n",
|
||||
"We are defining an index on the `testing` bucket's `_default` scope on the `_default` collection with the vector field set to `embedding` with 1536 dimensions and the text field set to `text`. We are also indexing and storing all the fields under `metadata` in the document as a dynamic mapping to account for varying document structures. The similarity metric is set to `dot_product`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "655117ae-9b1f-4139-b437-ca7685975a54",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### How to Import an Index to the Full Text Search service?\n",
|
||||
" - [Couchbase Server](https://docs.couchbase.com/server/current/search/import-search-index.html)\n",
|
||||
" - Click on Search -> Add Index -> Import\n",
|
||||
" - Copy the following Index definition in the Import screen\n",
|
||||
" - Click on Create Index to create the index.\n",
|
||||
" - [Couchbase Capella](https://docs.couchbase.com/cloud/search/import-search-index.html)\n",
|
||||
" - Copy the index definition to a new file `index.json`\n",
|
||||
" - Import the file in Capella using the instructions in the documentation.\n",
|
||||
" - Click on Create Index to create the index.\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f85bc468-d9b8-487d-999a-3b5d2fb78e41",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Index Definition\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"name\": \"vector-index\",\n",
|
||||
" \"type\": \"fulltext-index\",\n",
|
||||
" \"params\": {\n",
|
||||
" \"doc_config\": {\n",
|
||||
" \"docid_prefix_delim\": \"\",\n",
|
||||
" \"docid_regexp\": \"\",\n",
|
||||
" \"mode\": \"type_field\",\n",
|
||||
" \"type_field\": \"type\"\n",
|
||||
" },\n",
|
||||
" \"mapping\": {\n",
|
||||
" \"default_analyzer\": \"standard\",\n",
|
||||
" \"default_datetime_parser\": \"dateTimeOptional\",\n",
|
||||
" \"default_field\": \"_all\",\n",
|
||||
" \"default_mapping\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"properties\": {\n",
|
||||
" \"metadata\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"enabled\": true\n",
|
||||
" },\n",
|
||||
" \"embedding\": {\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"dynamic\": false,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"dims\": 1536,\n",
|
||||
" \"index\": true,\n",
|
||||
" \"name\": \"embedding\",\n",
|
||||
" \"similarity\": \"dot_product\",\n",
|
||||
" \"type\": \"vector\",\n",
|
||||
" \"vector_index_optimized_for\": \"recall\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" \"text\": {\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"dynamic\": false,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"index\": true,\n",
|
||||
" \"name\": \"text\",\n",
|
||||
" \"store\": true,\n",
|
||||
" \"type\": \"text\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"default_type\": \"_default\",\n",
|
||||
" \"docvalues_dynamic\": false,\n",
|
||||
" \"index_dynamic\": true,\n",
|
||||
" \"store_dynamic\": true,\n",
|
||||
" \"type_field\": \"_type\"\n",
|
||||
" },\n",
|
||||
" \"store\": {\n",
|
||||
" \"indexType\": \"scorch\",\n",
|
||||
" \"segmentVersion\": 16\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"sourceType\": \"gocbcore\",\n",
|
||||
" \"sourceName\": \"testing\",\n",
|
||||
" \"sourceParams\": {},\n",
|
||||
" \"planParams\": {\n",
|
||||
" \"maxPartitionsPerPIndex\": 103,\n",
|
||||
" \"indexPartitions\": 10,\n",
|
||||
" \"numReplicas\": 0\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```"
|
||||
"COLLECTION_NAME = \"default\"\n",
|
||||
"SEARCH_INDEX_NAME = \"langchain-test-index\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -269,7 +151,7 @@
|
||||
"id": "556dc68c-9089-4390-8dc9-b77051e7fc34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more details on how to create a Search index with support for Vector fields, please refer to the documentation.\n",
|
||||
"For details on how to create a Search index with support for Vector fields, please refer to the documentation.\n",
|
||||
"\n",
|
||||
"- [Couchbase Capella](https://docs.couchbase.com/cloud/vector-search/create-vector-search-index-ui.html)\n",
|
||||
" \n",
|
||||
@@ -281,17 +163,40 @@
|
||||
"id": "75f4037d-e509-4de7-a8d1-63a05de24e9d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Vector Store\n",
|
||||
"We create the vector store object with the cluster information and the search index name."
|
||||
"### Simple Instantiation\n",
|
||||
"\n",
|
||||
"Below, we create the vector store object with the cluster information and the search index name. \n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"id": "6706efdd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "33db4670-76c5-49ba-94d6-a8fa35583058",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_couchbase.vectorstores import CouchbaseVectorStore\n",
|
||||
"\n",
|
||||
"vector_store = CouchbaseVectorStore(\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
@@ -308,9 +213,18 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify the Text & Embeddings Field\n",
|
||||
"You can optionally specify the text & embeddings field for the document using the `text_key` and `embedding_key` fields.\n",
|
||||
"```\n",
|
||||
"vector_store = CouchbaseVectorStore(\n",
|
||||
"\n",
|
||||
"You can optionally specify the text & embeddings field for the document using the `text_key` and `embedding_key` fields."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "49c38634",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store_specific = CouchbaseVectorStore(\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
@@ -319,73 +233,148 @@
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
" text_key=\"text\",\n",
|
||||
" embedding_key=\"embedding\",\n",
|
||||
")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "790dc1ac-0ab8-4cb5-989d-31ca7c241068",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Vector Search Example\n",
|
||||
"For this example, we are going to load the \"state_of_the_union.txt\" file via the TextLoader, chunk the text into 500 character chunks with no overlaps and index all these chunks into Couchbase.\n",
|
||||
"\n",
|
||||
"After the data is indexed, we perform a simple query to find the top 4 chunks that are similar to the query \"What did president say about Ketanji Brown Jackson\".\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "440350df-cbc6-48f7-8009-2e783be18306",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9d3b4c7c-abd6-4dfa-ad63-470f16661319",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = CouchbaseVectorStore.from_documents(\n",
|
||||
" documents=docs,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "91fdce6c-8f7c-4060-865a-2fd742846664",
|
||||
"cell_type": "markdown",
|
||||
"id": "50e95fa6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../how_to/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query)\n",
|
||||
"print(results[0])"
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "65a35f00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd33b030",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3a05f294",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[uuids[-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2cc4126",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent.\n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e00bb23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -393,31 +382,21 @@
|
||||
"id": "d9b46c93-65f6-4e4f-87a2-5cebea3b7a6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search with Score\n",
|
||||
"You can fetch the scores for the results by calling the `similarity_search_with_score` method."
|
||||
"#### Similarity search with Score\n",
|
||||
"\n",
|
||||
"You can also fetch the scores for the results by calling the `similarity_search_with_score` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"id": "24b146b2-55a2-4fe8-8659-3649032f5dc7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../how_to/state_of_the_union.txt'}\n",
|
||||
"Score: 0.8211871385574341\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search_with_score(query)\n",
|
||||
"document, score = results[0]\n",
|
||||
"print(document)\n",
|
||||
"print(f\"Score: {score}\")"
|
||||
"results = vector_store.similarity_search_with_score(\"Will it be hot tomorrow?\", k=1)\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -425,7 +404,8 @@
|
||||
"id": "9983e83d-efd0-4b75-80db-150e0694e822",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specifying Fields to Return\n",
|
||||
"### Specifying Fields to Return\n",
|
||||
"\n",
|
||||
"You can specify the fields to return from the document using `fields` parameter in the searches. These fields are returned as part of the `metadata` object in the returned Document. You can fetch any field that is stored in the Search index. The `text_key` of the document is returned as part of the document's `page_content`.\n",
|
||||
"\n",
|
||||
"If you do not specify any fields to be fetched, all the fields stored in the index are returned.\n",
|
||||
@@ -437,20 +417,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"id": "ffa743dc-4e89-405b-ad71-7390338889e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../how_to/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"query = \"What did I eat for breakfast today?\"\n",
|
||||
"results = vector_store.similarity_search(query, fields=[\"metadata.source\"])\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
@@ -460,7 +432,8 @@
|
||||
"id": "a5e45eb2-aa97-45df-bcc5-410e9626e506",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hybrid Search\n",
|
||||
"### Hybrid Queries\n",
|
||||
"\n",
|
||||
"Couchbase allows you to do hybrid searches by combining Vector Search results with searches on non-vector fields of the document like the `metadata` object. \n",
|
||||
"\n",
|
||||
"The results will be based on the combination of the results from both Vector Search and the searches supported by Search Service. The scores of each of the component searches are added up to get the total score of the result.\n",
|
||||
@@ -474,26 +447,26 @@
|
||||
"id": "a5db3685-1918-4c63-8148-0bb3a71ea677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Diverse Metadata for Hybrid Search\n",
|
||||
"#### Create Diverse Metadata for Hybrid Search\n",
|
||||
"In order to simulate hybrid search, let us create some random metadata from the existing documents. \n",
|
||||
"We uniformly add three fields to the metadata, `date` between 2010 & 2020, `rating` between 1 & 5 and `author` set to either John Doe or Jane Doe. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"id": "7d2e607d-6bbc-4cef-83e3-b6a28bb269ea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'author': 'John Doe', 'date': '2016-01-01', 'rating': 2, 'source': '../../how_to/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"# Adding metadata to documents\n",
|
||||
"for i, doc in enumerate(docs):\n",
|
||||
" doc.metadata[\"date\"] = f\"{range(2010, 2020)[i % 10]}-01-01\"\n",
|
||||
@@ -512,24 +485,16 @@
|
||||
"id": "6cad893b-3977-4556-ab1d-d12bce68b306",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Exact Value\n",
|
||||
"### Query by Exact Value\n",
|
||||
"We can search for exact matches on a textual field like the author in the `metadata` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": null,
|
||||
"id": "dc06ba4a-8a6b-4c55-bb69-95cd92db273f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='This is personal to me and Jill, to Kamala, and to so many of you. \\n\\nCancer is the #2 cause of death in America–second only to heart disease. \\n\\nLast month, I announced our plan to supercharge \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients and families.' metadata={'author': 'John Doe'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
@@ -545,7 +510,7 @@
|
||||
"id": "9106b594-b41e-4329-b98c-9b9f8a34d6f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Partial Match\n",
|
||||
"### Query by Partial Match\n",
|
||||
"We can search for partial matches by specifying a fuzziness for the search. This is useful when you want to search for slight variations or misspellings of a search query.\n",
|
||||
"\n",
|
||||
"Here, \"Jae\" is close (fuzziness of 1) to \"Jane\"."
|
||||
@@ -553,18 +518,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"id": "fd4749e6-ef4f-4cb5-95ff-37c4fa8283d8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.' metadata={'author': 'Jane Doe'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
@@ -582,24 +539,16 @@
|
||||
"id": "1bbf9449-6e30-4bd1-9eeb-f3b60952fcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Date Range Query\n",
|
||||
"### Query by Date Range Query\n",
|
||||
"We can search for documents that are within a date range query on a date field like `metadata.date`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"id": "b7b47e7d-c32f-4999-bce9-3c3c3cebffd0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.' metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../how_to/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
@@ -622,24 +571,16 @@
|
||||
"id": "a18d4ea2-bfab-4f15-9839-674faf1c6f0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Numeric Range Query\n",
|
||||
"### Query by Numeric Range Query\n",
|
||||
"We can search for documents that are within a range for a numeric field like `metadata.rating`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"id": "7e8bf7c5-07d1-4c3f-86d7-1fa3a454dc7f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.', metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../how_to/state_of_the_union.txt'}), 0.9000703597577832)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
@@ -662,7 +603,7 @@
|
||||
"id": "0f16bf86-f01c-4a77-8406-275f7313f493",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Combining Multiple Search Queries\n",
|
||||
"### Combining Multiple Search Queries\n",
|
||||
"Different search queries can be combined using AND (conjuncts) or OR (disjuncts) operators.\n",
|
||||
"\n",
|
||||
"In this example, we are checking for documents with a rating between 3 & 4 and dated between 2015 & 2018."
|
||||
@@ -670,18 +611,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": null,
|
||||
"id": "dd0fe7f1-aa40-4c6f-889b-99ad5efcd88b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.', metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../how_to/state_of_the_union.txt'}), 1.3598770370389914)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
@@ -710,6 +643,46 @@
|
||||
"- [Couchbase Server](https://docs.couchbase.com/server/current/search/search-request-params.html#query-object)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db0a1d74",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
|
||||
"\n",
|
||||
"Here is how to transform your vector store into a retriever and then invoke the retreiever with a simple query and filter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3666265a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"similarity_score_threshold\",\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
|
||||
")\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28ab35ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "80958c2b-6a67-45e6-b7f0-fd2461d75e0f",
|
||||
@@ -761,6 +734,16 @@
|
||||
"* [Couchbase Capella](https://docs.couchbase.com/cloud/search/create-child-mapping.html)\n",
|
||||
"* [Couchbase Server](https://docs.couchbase.com/server/current/search/create-child-mapping.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d876b769",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `CouchbaseVectorStore` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_couchbase.vectorstores.CouchbaseVectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -779,7 +762,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,11 +7,9 @@
|
||||
"source": [
|
||||
"# Faiss\n",
|
||||
"\n",
|
||||
">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
|
||||
">[Facebook AI Similarity Search (FAISS)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
|
||||
"\n",
|
||||
"[Faiss documentation](https://faiss.ai/).\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration\n",
|
||||
"You can find the FAISS documentation at [this page](https://faiss.ai/).\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `FAISS` vector database. It will show functionality specific to this integration. After going through, it may be useful to explore [relevant use-case pages](/docs/how_to#qa-with-rag) to learn how to use this vectorstore as part of a larger chain."
|
||||
]
|
||||
@@ -25,28 +23,19 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `faiss` package itself. We will also be using OpenAI for embeddings, so we need to install those requirements. We can install these with:\n",
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `faiss` package itself. We can install these with:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-community faiss-cpu langchain-openai tiktoken\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Note that you can also install `faiss-gpu` if you want to use the GPU enabled version\n",
|
||||
"\n",
|
||||
"Since we are using OpenAI, you will need an OpenAI API Key."
|
||||
"Note that you can also install `faiss-gpu` if you want to use the GPU enabled version"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23984e60-c29a-461a-be2b-219108ac37ee",
|
||||
"id": "08165d56",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
|
||||
"pip install -qU langchain-community faiss-cpu"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,7 +43,7 @@
|
||||
"id": "408be78f-7b0e-44d4-8d48-56a6cb9b3fb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,200 +62,311 @@
|
||||
"id": "78dde98a-584f-4f2a-98d5-e776fd9558fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Ingestion\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Here, we ingest documents into the vectorstore"
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"42"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
|
||||
"# os.environ['FAISS_NO_AVX2'] = '1'\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"db = FAISS.from_documents(docs, embeddings)\n",
|
||||
"print(db.index.ntotal)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ecdd7a65-f310-4b36-bc1e-2a39dfd58d5f",
|
||||
"id": "5b394da3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Querying\n",
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"Now, we can query the vectorstore. There a few methods to do this. The most standard is to use `similarity_search`."
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5eabdb75",
|
||||
"id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
"import faiss\n",
|
||||
"from langchain_community.docstore.in_memory import InMemoryDocstore\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"\n",
|
||||
"index = faiss.IndexFlatL2(len(embeddings.embed_query(\"hello world\")))\n",
|
||||
"\n",
|
||||
"vector_store = FAISS(\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" index=index,\n",
|
||||
" docstore=InMemoryDocstore(),\n",
|
||||
" index_to_docstore_id={},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d8761614",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"### Add items to vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4b172de8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d9286c2-0802-4f02-8f9a-9f7fae7c79b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## As a Retriever\n",
|
||||
"\n",
|
||||
"We can also convert the vectorstore into a [Retriever](/docs/how_to#retrievers) class. This allows us to easily use it in other LangChain methods, which largely work with retrievers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6e91b475-3878-44e0-8720-98d903754b46",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = db.as_retriever()\n",
|
||||
"docs = retriever.invoke(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "046739d2-91fe-4101-8b72-c0bcdd9e02b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f13473b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search with score\n",
|
||||
"There are some FAISS specific methods. One of them is `similarity_search_with_score`, which allows you to return not only the documents but also the distance score of the query to them. The returned distance score is L2 distance. Therefore, a lower score is better."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "186ee1d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_and_scores = db.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "284e04b5",
|
||||
"id": "3867e154",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" 0.36913747)"
|
||||
"['22f5ce99-cd6f-4e0c-8dab-664128307c72',\n",
|
||||
" 'dc3f061b-5f88-4fa1-a966-413550c51891',\n",
|
||||
" 'd33d890b-baad-47f7-b7c1-175f5f7b4e59',\n",
|
||||
" '6e6c01d2-6020-4a7b-95da-ef43d43f01b5',\n",
|
||||
" 'e677223d-ad75-4c1a-bef6-b5912bd1de03',\n",
|
||||
" '47e2a168-6462-4ed2-b1d9-d9edfd7391d6',\n",
|
||||
" '1e4d66d6-e155-4891-9212-f7be97f36c6a',\n",
|
||||
" 'c0663096-e1a5-4665-b245-1c2e6c4fb653',\n",
|
||||
" '8297474a-7f7c-4006-9865-398c1781b1bc',\n",
|
||||
" '44e4be03-0a8d-4316-b3c4-f35f4bb2b532']"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs_and_scores[0]"
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f34420cf",
|
||||
"id": "a410a2dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is also possible to do a search for documents similar to a given embedding vector using `similarity_search_by_vector` which accepts an embedding vector as a parameter instead of a string."
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b558ebb7",
|
||||
"execution_count": 4,
|
||||
"id": "c3db04bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"embedding_vector = embeddings.embed_query(query)\n",
|
||||
"docs_and_scores = db.similarity_search_by_vector(embedding_vector)"
|
||||
"vector_store.delete(ids=[uuids[-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77de24ff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search with filtering on metadata can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "53d95d3f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ae35069",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"You can also search with score:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a9078ce9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.893688] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
")\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e9091b1f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"There are a variety of other ways to search a FAISS vector store. For a complete list of those methods, please refer to the [API Reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.faiss.FAISS.html)\n",
|
||||
"\n",
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "10da64fa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5edd1909",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -280,31 +380,33 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"id": "1b31fe27-e0b3-42c6-b17c-8270b517ee1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.save_local(\"faiss_index\")\n",
|
||||
"vector_store.save_local(\"faiss_index\")\n",
|
||||
"\n",
|
||||
"new_db = FAISS.load_local(\"faiss_index\", embeddings)\n",
|
||||
"new_vector_store = FAISS.load_local(\n",
|
||||
" \"faiss_index\", embeddings, allow_dangerous_deserialization=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = new_db.similarity_search(query)"
|
||||
"docs = new_vector_store.similarity_search(\"qux\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"id": "98378c4e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
"Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -313,33 +415,6 @@
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30c8f57b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Serializing and De-Serializing to bytes\n",
|
||||
"\n",
|
||||
"you can pickle the FAISS Index by these functions. If you use embeddings model which is of 90 mb (sentence-transformers/all-MiniLM-L6-v2 or any other model), the resultant pickle size would be more than 90 mb. the size of the model is also included in the overall size. To overcome this, use the below functions. These functions only serializes FAISS index and size would be much lesser. this can be helpful if you wish to store the index in database like sql."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d8faead5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"pkl = db.serialize_to_bytes() # serializes the faiss\n",
|
||||
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"\n",
|
||||
"db = FAISS.deserialize_from_bytes(\n",
|
||||
" embeddings=embeddings, serialized=pkl\n",
|
||||
") # Load the index"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "57da60d4",
|
||||
@@ -351,10 +426,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 13,
|
||||
"id": "9b8f5e31-3f40-4e94-8d97-5883125efba7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'b752e805-350e-4cf5-ba54-0883d46a3a44': Document(page_content='foo')}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db1 = FAISS.from_texts([\"foo\"], embeddings)\n",
|
||||
"db2 = FAISS.from_texts([\"bar\"], embeddings)\n",
|
||||
@@ -364,17 +450,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 14,
|
||||
"id": "83392605",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'807e0c63-13f6-4070-9774-5c6f0fbb9866': Document(page_content='bar', metadata={})}"
|
||||
"{'08192d92-746d-4cd1-b681-bdfba411f459': Document(page_content='bar')}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -385,7 +471,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 15,
|
||||
"id": "a3fcc1c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -395,18 +481,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 16,
|
||||
"id": "41c51f89",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'068c473b-d420-487a-806b-fb0ccea7f711': Document(page_content='foo', metadata={}),\n",
|
||||
" '807e0c63-13f6-4070-9774-5c6f0fbb9866': Document(page_content='bar', metadata={})}"
|
||||
"{'b752e805-350e-4cf5-ba54-0883d46a3a44': Document(page_content='foo'),\n",
|
||||
" '08192d92-746d-4cd1-b681-bdfba411f459': Document(page_content='bar')}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -417,169 +503,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f4294b96",
|
||||
"id": "65654d80",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search with filtering\n",
|
||||
"FAISS vectorstore can also support filtering, since the FAISS does not natively support filtering we have to do it manually. This is done by first fetching more results than `k` and then filtering them. This filter is either a callble that takes as input a metadata dict and returns a bool, or a metadata dict where each missing key is ignored and each present k must be in a list of values. You can also set the `fetch_k` parameter when calling any search method to set how many documents you want to fetch before filtering. Here is a small example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "d5bf812c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}, Score: 5.159960813797904e-15\n",
|
||||
"Content: foo, Metadata: {'page': 2}, Score: 5.159960813797904e-15\n",
|
||||
"Content: foo, Metadata: {'page': 3}, Score: 5.159960813797904e-15\n",
|
||||
"Content: foo, Metadata: {'page': 4}, Score: 5.159960813797904e-15\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"list_of_documents = [\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=1)),\n",
|
||||
" Document(page_content=\"bar\", metadata=dict(page=1)),\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=2)),\n",
|
||||
" Document(page_content=\"barbar\", metadata=dict(page=2)),\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=3)),\n",
|
||||
" Document(page_content=\"bar burr\", metadata=dict(page=3)),\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=4)),\n",
|
||||
" Document(page_content=\"bar bruh\", metadata=dict(page=4)),\n",
|
||||
"]\n",
|
||||
"db = FAISS.from_documents(list_of_documents, embeddings)\n",
|
||||
"results_with_scores = db.similarity_search_with_score(\"foo\")\n",
|
||||
"for doc, score in results_with_scores:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3d33c126",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we make the same query call but we filter for only `page = 1` "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "83159330",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}, Score: 5.159960813797904e-15\n",
|
||||
"Content: bar, Metadata: {'page': 1}, Score: 0.3131446838378906\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results_with_scores = db.similarity_search_with_score(\"foo\", filter=dict(page=1))\n",
|
||||
"# Or with a callable:\n",
|
||||
"# results_with_scores = db.similarity_search_with_score(\"foo\", filter=lambda d: d[\"page\"] == 1)\n",
|
||||
"for doc, score in results_with_scores:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0be136e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Same thing can be done with the `max_marginal_relevance_search` as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "432c6980",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}\n",
|
||||
"Content: bar, Metadata: {'page': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = db.max_marginal_relevance_search(\"foo\", filter=dict(page=1))\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b4ecd86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here is an example of how to set `fetch_k` parameter when calling `similarity_search`. Usually you would want the `fetch_k` parameter >> `k` parameter. This is because the `fetch_k` parameter is the number of documents that will be fetched before filtering. If you set `fetch_k` to a low number, you might not get enough documents to filter from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "1fd60fd1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = db.similarity_search(\"foo\", filter=dict(page=1), k=1, fetch_k=4)\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1becca53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Delete\n",
|
||||
"\n",
|
||||
"You can also delete records from vectorstore. In the example below `db.index_to_docstore_id` represents a dictionary with elements of the FAISS index."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "1408b870",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"count before: 8\n",
|
||||
"count after: 7"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"count before:\", db.index.ntotal)\n",
|
||||
"db.delete([db.index_to_docstore_id[0]])\n",
|
||||
"print(\"count after:\", db.index.ntotal)"
|
||||
"For detailed documentation of all `FAISS` vector store features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.faiss.FAISS.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -599,7 +528,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Binary file not shown.
29
docs/docs/integrations/vectorstores/index.mdx
Normal file
29
docs/docs/integrations/vectorstores/index.mdx
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_class_name: hidden
|
||||
keywords: [compatibility]
|
||||
custom_edit_url:
|
||||
---
|
||||
|
||||
# Vectorstores
|
||||
|
||||
## Features
|
||||
|
||||
The table below lists the features for some of our most popular vector stores.
|
||||
|
||||
Vectorstore|Delete by ID|Filtering|Search by Vector|Search with score|Async|Passes Standard Tests|Multi Tenancy|Local/Cloud|IDs in add Documents
|
||||
:-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:
|
||||
AstraDBVectorStore|✅|✅|✅|✅|✅|❌|❌|❌|Local|✅
|
||||
Chroma|✅|✅|✅|✅|✅|❌|❌|❌|Local|✅
|
||||
Clickhouse|✅|✅|❌|✅|❌|❌|❌|❌|Local|✅
|
||||
CouchbaseVectorStore|✅|✅|❌|✅|✅|❌|❌|❌|Local|✅
|
||||
ElasticsearchStore|✅|✅|✅|❌|✅|❌|❌|❌|Local|✅
|
||||
FAISS|✅|✅|✅|✅|✅|❌|❌|❌|Local|✅
|
||||
InMemoryVectorStore|✅|✅|❌|✅|✅|❌|❌|❌|Local|✅
|
||||
Milvus|✅|✅|❌|✅|✅|❌|❌|❌|Local|✅
|
||||
MongoDBAtlasVectorSearch|✅|✅|❌|❌|✅|❌|❌|❌|Local|✅
|
||||
PGVector|✅|✅|✅|✅|✅|❌|❌|❌|Local|✅
|
||||
PineconeVectorStore|✅|✅|✅|❌|✅|❌|❌|❌|Local|✅
|
||||
QdrantVectorStore|✅|✅|✅|✅|✅|❌|❌|❌|Local|✅
|
||||
Redis|✅|✅|✅|✅|✅|❌|❌|❌|Local|✅
|
||||
|
||||
@@ -11,7 +11,9 @@
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Milvus vector database.\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-milvus` with `pip install -qU langchain-milvus` to use this integration\n"
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-milvus` with `pip install -qU langchain-milvus` to use this integration.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -23,7 +25,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain_milvus"
|
||||
"%pip install -qU langchain_milvus"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -31,119 +33,59 @@
|
||||
"id": "633addc3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The latest version of pymilvus comes with a local vector database Milvus Lite, good for prototyping. If you have large scale of data such as more than a million docs, we recommend setting up a more performant Milvus server on [docker or kubernetes](https://milvus.io/docs/install_standalone-docker.md#Start-Milvus)."
|
||||
"The latest version of pymilvus comes with a local vector database Milvus Lite, good for prototyping. If you have large scale of data such as more than a million docs, we recommend setting up a more performant Milvus server on [docker or kubernetes](https://milvus.io/docs/install_standalone-docker.md#Start-Milvus).\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `Milvus` vector store.\n",
|
||||
"\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7a0f9e02-8eb0-4aef-b11f-8861360472ee",
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "a7dd253f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use OpenAIEmbeddings so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8b6ed9cd-81b9-46e5-9c20-5aafca2844d0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_milvus.vectorstores import Milvus\n",
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 28,
|
||||
"id": "dcf88bdf",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_milvus import Milvus\n",
|
||||
"\n",
|
||||
"# The easiest way is to use Milvus Lite where everything is stored in a local file.\n",
|
||||
"# If you have a Milvus server you can use the server URI such as \"http://localhost:19530\".\n",
|
||||
"URI = \"./milvus_demo.db\"\n",
|
||||
"URI = \"./milvus_example.db\"\n",
|
||||
"\n",
|
||||
"vector_db = Milvus.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
"vector_store = Milvus(\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" connection_args={\"uri\": URI},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vector_db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fc516993",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e40d558b",
|
||||
"id": "cae1a7d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Compartmentalize the data with Milvus Collections\n",
|
||||
@@ -153,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82c00f6e",
|
||||
"id": "c07cd24b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here's how you can create a new collection"
|
||||
@@ -161,22 +103,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f7ff38ab",
|
||||
"execution_count": 29,
|
||||
"id": "c6f4973d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_db = Milvus.from_documents(\n",
|
||||
" docs,\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"vector_store_saved = Milvus.from_documents(\n",
|
||||
" [Document(page_content=\"foo!\")],\n",
|
||||
" embeddings,\n",
|
||||
" collection_name=\"collection_1\",\n",
|
||||
" collection_name=\"langchain_example\",\n",
|
||||
" connection_args={\"uri\": URI},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "891cec1f",
|
||||
"id": "3b12df8c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And here is how you retrieve that stored collection"
|
||||
@@ -184,24 +128,278 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e9e873e9",
|
||||
"execution_count": 30,
|
||||
"id": "12817d16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_db = Milvus(\n",
|
||||
"vector_store_loaded = Milvus(\n",
|
||||
" embeddings,\n",
|
||||
" connection_args={\"uri\": URI},\n",
|
||||
" collection_name=\"collection_1\",\n",
|
||||
" collection_name=\"langchain_example\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9cc65535",
|
||||
"id": "f1fc3818",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After retrieval you can go on querying it as usual."
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "3ced24f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['b0248595-2a41-4f6b-9c25-3a24c1278bb3',\n",
|
||||
" 'fa642726-5329-4495-a072-187e948dd71f',\n",
|
||||
" '9905001c-a4a3-455e-ab94-72d0ed11b476',\n",
|
||||
" 'eacc7256-d7fa-4036-b1f7-83d7a4bee0c5',\n",
|
||||
" '7508f7ff-c0c9-49ea-8189-634f8a0244d8',\n",
|
||||
" '2e179609-3ff7-4c6a-9e05-08978903fe26',\n",
|
||||
" 'fab1f2ac-43e1-45f9-b81b-fc5d334c6508',\n",
|
||||
" '1206d237-ee3a-484f-baf2-b5ac38eeb314',\n",
|
||||
" 'd43cbf9a-a772-4c40-993b-9439065fec01',\n",
|
||||
" '25e667bb-6f09-4574-a368-661069301906']"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e23c22d8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "1f387fa8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(insert count: 0, delete count: 1, upsert count: 0, timestamp: 0, success count: 0, err count: 0, cost: 0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[uuids[-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb12fa75",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search with filtering on metadata can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "35801a55",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'pk': '9905001c-a4a3-455e-ab94-72d0ed11b476', 'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'pk': '1206d237-ee3a-484f-baf2-b5ac38eeb314', 'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35574409",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"You can also search with score:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "c360af3d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=21192.628906] bar [{'pk': '2', 'source': 'https://example.com'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
")\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14db337f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For a full list of all the search options available when using the `Milvus` vector store, you can visit the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_milvus.vectorstores.milvus.Milvus.html).\n",
|
||||
"\n",
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "f6d9357c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'pk': 'eacc7256-d7fa-4036-b1f7-83d7a4bee0c5', 'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ac953f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -325,47 +523,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "89756e9e",
|
||||
"id": "f1a873c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### To delete or upsert (update/insert) one or more entities"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21c4edcf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"# Insert data sample\n",
|
||||
"docs = [\n",
|
||||
" Document(page_content=\"foo\", metadata={\"id\": 1}),\n",
|
||||
" Document(page_content=\"bar\", metadata={\"id\": 2}),\n",
|
||||
" Document(page_content=\"baz\", metadata={\"id\": 3}),\n",
|
||||
"]\n",
|
||||
"vector_db = Milvus.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_args={\"uri\": URI},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Search pks (primary keys) using expression\n",
|
||||
"expr = \"id in [1,2]\"\n",
|
||||
"pks = vector_db.get_pks(expr)\n",
|
||||
"\n",
|
||||
"# Delete entities by pks\n",
|
||||
"result = vector_db.delete(pks)\n",
|
||||
"\n",
|
||||
"# Upsert (Update/Insert)\n",
|
||||
"new_docs = [\n",
|
||||
" Document(page_content=\"new_foo\", metadata={\"id\": 1}),\n",
|
||||
" Document(page_content=\"new_bar\", metadata={\"id\": 2}),\n",
|
||||
" Document(page_content=\"upserted_bak\", metadata={\"id\": 3}),\n",
|
||||
"]\n",
|
||||
"upserted_pks = vector_db.upsert(pks, new_docs)"
|
||||
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_milvus.vectorstores.milvus.Milvus.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -385,7 +548,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -19,74 +19,40 @@
|
||||
"id": "359b8e9b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
">*An Atlas cluster running MongoDB version 6.0.11, 7.0.2, or later (including RCs).\n",
|
||||
"\n",
|
||||
">*An OpenAI API Key. You must have a paid OpenAI account with credits available for API requests.\n",
|
||||
"To use MongoDB Atlas, you must first deploy a cluster. We have a Forever-Free tier of clusters available. To get started head over to Atlas here: [quick start](https://www.mongodb.com/docs/atlas/getting-started/).\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-mongodb` to use this integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d899e588",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up MongoDB Atlas Cluster\n",
|
||||
"To use MongoDB Atlas, you must first deploy a cluster. We have a Forever-Free tier of clusters available. To get started head over to Atlas here: [quick start](https://www.mongodb.com/docs/atlas/getting-started/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5ce18d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"In the notebook we will demonstrate how to perform `Retrieval Augmented Generation` (RAG) using MongoDB Atlas, OpenAI and Langchain. We will be performing Similarity Search, Similarity Search with Metadata Pre-Filtering, and Question Answering over the PDF document for [GPT 4 technical report](https://arxiv.org/pdf/2303.08774.pdf) that came out in March 2023 and hence is not part of the OpenAI's Large Language Model(LLM)'s parametric memory, which had a knowledge cutoff of September 2021."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we need to set up our OpenAI API Key. "
|
||||
"You'll need to install `langchain-mongodb` and `pymongo` to use this integration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d8f240d",
|
||||
"id": "73cf7c9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
"pip install -qU langchain-mongodb pymongo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "70482cd8",
|
||||
"id": "a61832ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we will setup the environment variables for the MongoDB Atlas cluster"
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"For this notebook you will need to find your MongoDB cluster URI.\n",
|
||||
"\n",
|
||||
"For information on finding your cluster URI read through [this guide](https://www.mongodb.com/docs/manual/reference/connection-string/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d7788cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-mongodb pypdf pymongo langchain-openai tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 33,
|
||||
"id": "7ef41b37",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -96,76 +62,78 @@
|
||||
"MONGODB_ATLAS_CLUSTER_URI = getpass.getpass(\"MongoDB Atlas Cluster URI:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f23de23",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "908e7772",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a53673ae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"id": "f5fed614",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"id": "00d78318",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from pymongo import MongoClient\n",
|
||||
"\n",
|
||||
"# initialize MongoDB python client\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)\n",
|
||||
"\n",
|
||||
"DB_NAME = \"langchain_db\"\n",
|
||||
"COLLECTION_NAME = \"test\"\n",
|
||||
"ATLAS_VECTOR_SEARCH_INDEX_NAME = \"index_name\"\n",
|
||||
"DB_NAME = \"langchain_test_db\"\n",
|
||||
"COLLECTION_NAME = \"langchain_test_vectorstores\"\n",
|
||||
"ATLAS_VECTOR_SEARCH_INDEX_NAME = \"langchain-test-index-vectorstores\"\n",
|
||||
"\n",
|
||||
"MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb0cc10f-b84e-4e5e-b445-eb61f10bf085",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Vector Search Index"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3ecc42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create a vector search index on your cluster. More detailed steps can be found at [Create Vector Search Index for LangChain](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/#create-the-atlas-vector-search-index) section.\n",
|
||||
"In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/) to get more details on how to define an Atlas Vector Search index.\n",
|
||||
"You can name the index `{ATLAS_VECTOR_SEARCH_INDEX_NAME}` and create the index on the namespace `{DB_NAME}.{COLLECTION_NAME}`. Finally, write the following definition in the JSON editor on MongoDB Atlas:\n",
|
||||
"MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"fields\":[\n",
|
||||
" {\n",
|
||||
" \"type\": \"vector\",\n",
|
||||
" \"path\": \"embedding\",\n",
|
||||
" \"numDimensions\": 1536,\n",
|
||||
" \"similarity\": \"cosine\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Additionally, if you are running a MongoDB M10 cluster with server version 6.0+, you can leverage the `MongoDBAtlasVectorSearch.create_index`. To add the above index its usage would look like this.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain_community.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from pymongo import MongoClient\n",
|
||||
"\n",
|
||||
"mongo_client = MongoClient(\"<YOUR-CONNECTION-STRING>\")\n",
|
||||
"collection = mongo_client[\"<db_name>\"][\"<collection_name>\"]\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"vectorstore = MongoDBAtlasVectorSearch(\n",
|
||||
" collection=collection,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" index_name=\"<ATLAS_VECTOR_SEARCH_INDEX_NAME>\",\n",
|
||||
" relevance_score_fn=\"cosine\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Creates an index using the index_name provided and relevance_score_fn type\n",
|
||||
"vectorstore.create_index(dimensions=1536)\n",
|
||||
"```"
|
||||
"vector_store = MongoDBAtlasVectorSearch(\n",
|
||||
" collection=MONGODB_COLLECTION,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,\n",
|
||||
" relevance_score_fn=\"cosine\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -173,126 +141,224 @@
|
||||
"id": "42873e5a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Insert Data"
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 57,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['03ad81e8-32a0-46f0-b7d8-f5b977a6b52a',\n",
|
||||
" '8396a68d-f4a3-4176-a581-a1a8c303eea4',\n",
|
||||
" 'e7d95150-67f6-499f-b611-84367c50fa60',\n",
|
||||
" '8c31b84e-2636-48b6-8b99-9fccb47f7051',\n",
|
||||
" 'aa02e8a2-a811-446a-9785-8cea0faba7a9',\n",
|
||||
" '19bd72ff-9766-4c3b-b1fd-195c732c562b',\n",
|
||||
" '642d6f2f-3e34-4efa-a1ed-c4ba4ef0da8d',\n",
|
||||
" '7614bb54-4eb5-4b3b-990c-00e35cb31f99',\n",
|
||||
" '69e18c67-bf1b-43e5-8a6e-64fb3f240e52',\n",
|
||||
" '30d599a7-4a1a-47a9-bbf8-6ed393e2e33c']"
|
||||
]
|
||||
},
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"# Load the PDF\n",
|
||||
"loader = PyPDFLoader(\"https://arxiv.org/pdf/2303.08774.pdf\")\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5578113",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)\n",
|
||||
"docs = text_splitter.split_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d378168f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# insert the documents in MongoDB Atlas with their embedding\n",
|
||||
"vector_search = MongoDBAtlasVectorSearch.from_documents(\n",
|
||||
" documents=docs,\n",
|
||||
" embedding=OpenAIEmbeddings(disallowed_special=()),\n",
|
||||
" collection=MONGODB_COLLECTION,\n",
|
||||
" index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bf6841e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Perform a similarity search between the embedding of the query and the embeddings of the documents\n",
|
||||
"query = \"What were the compute requirements for training GPT 4\"\n",
|
||||
"results = vector_search.similarity_search(query)\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(results[0].page_content)"
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9e58c2d8",
|
||||
"id": "639f29da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Querying data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also instantiate the vector store directly and execute a query as follows:"
|
||||
"### Delete items from vector store\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "985d28fe",
|
||||
"execution_count": 58,
|
||||
"id": "bbb5fd5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"vector_store.delete(ids=[uuids[-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6111eb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"vector_search = MongoDBAtlasVectorSearch.from_connection_string(\n",
|
||||
" MONGODB_ATLAS_CLUSTER_URI,\n",
|
||||
" DB_NAME + \".\" + COLLECTION_NAME,\n",
|
||||
" OpenAIEmbeddings(disallowed_special=()),\n",
|
||||
" index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"#### Similarity search\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"id": "19b60ac0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'_id': 'e7d95150-67f6-499f-b611-84367c50fa60', 'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'_id': '7614bb54-4eb5-4b3b-990c-00e35cb31f99', 'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\", k=2\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02aef29c-5da0-41b8-b4fc-98fd71b94abf",
|
||||
"id": "6c624606",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pre-filtering with Similarity Search"
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"You can also search with score:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"id": "e919fa51",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.784560] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'_id': '8396a68d-f4a3-4176-a581-a1a8c303eea4', 'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\"Will it be hot tomorrow?\", k=1)\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f3b2d36d-d47a-482f-999d-85c23eb67eed",
|
||||
"id": "513a1416",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pre-filtering with Similarity Search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac58c6c7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Atlas Vector Search supports pre-filtering using MQL Operators for filtering. Below is an example index and query on the same data loaded above that allows you do metadata filtering on the \"page\" field. You can update your existing index with the filter defined and do pre-filtering with vector search."
|
||||
@@ -300,7 +366,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b385a46-1e54-471f-95b2-202813d90bb2",
|
||||
"id": "dacac7b8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```json\n",
|
||||
@@ -314,7 +380,7 @@
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"filter\",\n",
|
||||
" \"path\": \"page\"\n",
|
||||
" \"path\": \"source\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
@@ -325,128 +391,79 @@
|
||||
"```python\n",
|
||||
"vectorstore.create_index(\n",
|
||||
" dimensions=1536,\n",
|
||||
" filters=[{\"type\":\"filter\", \"path\":\"page\"}],\n",
|
||||
" filters=[{\"type\":\"filter\", \"path\":\"source\"}],\n",
|
||||
" update=True\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"And then you can run a query with filter as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"results = vector_store.similarity_search(query=\"foo\",k=1,pre_filter={\"source\": {\"$eq\": \"https://example.com\"}})\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dfc8487d-14ec-42c9-9670-80fe02816196",
|
||||
"cell_type": "markdown",
|
||||
"id": "32b13a9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What were the compute requirements for training GPT 4\"\n",
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"results = vector_search.similarity_search_with_score(\n",
|
||||
" query=query, k=5, pre_filter={\"page\": {\"$eq\": 1}}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Display results\n",
|
||||
"for result in results:\n",
|
||||
" print(result)"
|
||||
"There are a variety of other search methods that are not covered in this notebook, such as MMR search or searching by vector. For a full list of the search abilities available for `AstraDBVectorStore` check out the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_astradb.vectorstores.AstraDBVectorStore.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d9a2dbe",
|
||||
"id": "01316a42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search with Score"
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n",
|
||||
"\n",
|
||||
"Here is how to transform your vector store into a retriever and then invoke the retreiever with a simple query and filter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "497baffa",
|
||||
"execution_count": 65,
|
||||
"id": "8f246301",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'_id': '8c31b84e-2636-48b6-8b99-9fccb47f7051', 'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What were the compute requirements for training GPT 4\"\n",
|
||||
"\n",
|
||||
"results = vector_search.similarity_search_with_score(\n",
|
||||
" query=query,\n",
|
||||
" k=5,\n",
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"similarity_score_threshold\",\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.2},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Display results\n",
|
||||
"for result in results:\n",
|
||||
" print(result)"
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cbade5f0",
|
||||
"id": "72312657",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question Answering "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bc6475f9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa_retriever = vector_search.as_retriever(\n",
|
||||
" search_type=\"similarity\",\n",
|
||||
" search_kwargs={\"k\": 25},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e13e96c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"prompt_template = \"\"\"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"Question: {question}\n",
|
||||
"\"\"\"\n",
|
||||
"PROMPT = PromptTemplate(\n",
|
||||
" template=prompt_template, input_variables=[\"context\", \"question\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff0edb02",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"qa = RetrievalQA.from_chain_type(\n",
|
||||
" llm=OpenAI(),\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=qa_retriever,\n",
|
||||
" return_source_documents=True,\n",
|
||||
" chain_type_kwargs={\"prompt\": PROMPT},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = qa({\"query\": \"gpt-4 compute requirements\"})\n",
|
||||
"\n",
|
||||
"print(docs[\"result\"])\n",
|
||||
"print(docs[\"source_documents\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61636bb2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"GPT-4 requires significantly more compute than earlier GPT models. On a dataset derived from OpenAI's internal codebase, GPT-4 requires 100p (petaflops) of compute to reach the lowest loss, while the smaller models require 1-10n (nanoflops)."
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -460,6 +477,16 @@
|
||||
">* The langchain version 0.0.305 ([release notes](https://github.com/langchain-ai/langchain/releases/tag/v0.0.305)) introduces the support for $vectorSearch MQL stage, which is available with MongoDB Atlas 6.0.11 and 7.0.2. Users utilizing earlier versions of MongoDB Atlas need to pin their LangChain version to <=0.0.304\n",
|
||||
"> "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "186ef502",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `MongoDBAtlasVectorSearch` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/mongodb_api_reference.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -478,7 +505,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -11,12 +11,6 @@
|
||||
"\n",
|
||||
"The code lives in an integration package called: [langchain_postgres](https://github.com/langchain-ai/langchain-postgres/).\n",
|
||||
"\n",
|
||||
"You can run the following command to spin up a a postgres container with the `pgvector` extension:\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"docker run --name pgvector-container -e POSTGRES_USER=langchain -e POSTGRES_PASSWORD=langchain -e POSTGRES_DB=langchain -p 6024:5432 -d pgvector/pgvector:pg16\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Status\n",
|
||||
"\n",
|
||||
"This code has been ported over from `langchain_community` into a dedicated package called `langchain-postgres`. The following changes have been made:\n",
|
||||
@@ -27,30 +21,39 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"Currently, there is **no mechanism** that supports easy data migration on schema changes. So any schema changes in the vectorstore will require the user to recreate the tables and re-add the documents.\n",
|
||||
"If this is a concern, please use a different vectorstore. If not, this implementation should be fine for your use case."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "342cd5e9-f349-42b4-9713-12e63779835b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install dependencies\n",
|
||||
"If this is a concern, please use a different vectorstore. If not, this implementation should be fine for your use case.\n",
|
||||
"\n",
|
||||
"Here, we're using `langchain_cohere` for embeddings, but you can use other embeddings providers."
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"First donwload the partner package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "42d42297-11b8-44e3-bf21-7c3d1bce8277",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": null,
|
||||
"id": "92df32f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --quiet -U langchain_cohere\n",
|
||||
"!pip install --quiet -U langchain_postgres"
|
||||
"pip install -qU langchain_postgres"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0dd87fcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can run the following command to spin up a a postgres container with the `pgvector` extension:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2acbaf9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%docker run --name pgvector-container -e POSTGRES_USER=langchain -e POSTGRES_PASSWORD=langchain -e POSTGRES_DB=langchain -p 6024:5432 -d pgvector/pgvector:pg16"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -58,7 +61,56 @@
|
||||
"id": "eee31ce1-2c28-484d-82be-d22d9f9a31fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize the vectorstore"
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"There are no credentials needed to run this notebook, just make sure you downloaded the `langchain_postgres` package and correctly started the postgres container."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fa4026f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0f8e2f23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec44dfcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "94f5c129",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,7 +122,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import CohereEmbeddings\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_postgres import PGVector\n",
|
||||
"from langchain_postgres.vectorstores import PGVector\n",
|
||||
@@ -78,9 +129,9 @@
|
||||
"# See docker command above to launch a postgres instance with pgvector enabled.\n",
|
||||
"connection = \"postgresql+psycopg://langchain:langchain@localhost:6024/langchain\" # Uses psycopg3!\n",
|
||||
"collection_name = \"my_docs\"\n",
|
||||
"embeddings = CohereEmbeddings(model=\"embed-english-v3.0\")\n",
|
||||
"\n",
|
||||
"vectorstore = PGVector(\n",
|
||||
"\n",
|
||||
"vector_store = PGVector(\n",
|
||||
" embeddings=embeddings,\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" connection=connection,\n",
|
||||
@@ -88,95 +139,22 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0fc32168-5a82-4629-a78d-158fe2615086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Drop tables\n",
|
||||
"\n",
|
||||
"If you need to drop tables (e.g., updating the embedding to a different dimension or just updating the embedding provider): "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5de5ef98-7dbb-4892-853f-47c7dc87b70e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"```python\n",
|
||||
"vectorstore.drop_tables()\n",
|
||||
"````"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61a224a1-d70b-4daf-86ba-ab6e43c08b50",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Add documents\n",
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Add documents to the vectorstore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "88a288cc-ffd4-4800-b011-750c72b9fd10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"there are cats in the pond\",\n",
|
||||
" metadata={\"id\": 1, \"location\": \"pond\", \"topic\": \"animals\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"ducks are also found in the pond\",\n",
|
||||
" metadata={\"id\": 2, \"location\": \"pond\", \"topic\": \"animals\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"fresh apples are available at the market\",\n",
|
||||
" metadata={\"id\": 3, \"location\": \"market\", \"topic\": \"food\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the market also sells fresh oranges\",\n",
|
||||
" metadata={\"id\": 4, \"location\": \"market\", \"topic\": \"food\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the new art exhibit is fascinating\",\n",
|
||||
" metadata={\"id\": 5, \"location\": \"museum\", \"topic\": \"art\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"a sculpture exhibit is also at the museum\",\n",
|
||||
" metadata={\"id\": 6, \"location\": \"museum\", \"topic\": \"art\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"a new coffee shop opened on Main Street\",\n",
|
||||
" metadata={\"id\": 7, \"location\": \"Main Street\", \"topic\": \"food\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the book club meets at the library\",\n",
|
||||
" metadata={\"id\": 8, \"location\": \"library\", \"topic\": \"reading\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"the library hosts a weekly story time for kids\",\n",
|
||||
" metadata={\"id\": 9, \"location\": \"library\", \"topic\": \"reading\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"a cooking class for beginners is offered at the community center\",\n",
|
||||
" metadata={\"id\": 10, \"location\": \"community center\", \"topic\": \"classes\"},\n",
|
||||
" ),\n",
|
||||
"]"
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"Note that adding documents by ID will over-write any existing documents that match that ID."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "73aa9124-9d49-4e10-8ed3-82255e7a4106",
|
||||
"id": "88a288cc-ffd4-4800-b011-750c72b9fd10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -192,58 +170,6 @@
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.add_documents(docs, ids=[doc.metadata[\"id\"] for doc in docs])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "a5b2b71f-49eb-407d-b03a-dea4c0a517d6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='there are cats in the pond', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}),\n",
|
||||
" Document(page_content='the book club meets at the library', metadata={'id': 8, 'topic': 'reading', 'location': 'library'}),\n",
|
||||
" Document(page_content='the library hosts a weekly story time for kids', metadata={'id': 9, 'topic': 'reading', 'location': 'library'}),\n",
|
||||
" Document(page_content='the new art exhibit is fascinating', metadata={'id': 5, 'topic': 'art', 'location': 'museum'}),\n",
|
||||
" Document(page_content='ducks are also found in the pond', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}),\n",
|
||||
" Document(page_content='the market also sells fresh oranges', metadata={'id': 4, 'topic': 'food', 'location': 'market'}),\n",
|
||||
" Document(page_content='a cooking class for beginners is offered at the community center', metadata={'id': 10, 'topic': 'classes', 'location': 'community center'}),\n",
|
||||
" Document(page_content='fresh apples are available at the market', metadata={'id': 3, 'topic': 'food', 'location': 'market'}),\n",
|
||||
" Document(page_content='a sculpture exhibit is also at the museum', metadata={'id': 6, 'topic': 'art', 'location': 'museum'}),\n",
|
||||
" Document(page_content='a new coffee shop opened on Main Street', metadata={'id': 7, 'topic': 'food', 'location': 'Main Street'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.similarity_search(\"kitty\", k=10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1d87a413-015a-4b46-a64e-332f30806524",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Adding documents by ID will over-write any existing documents that match that ID."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "13c69357-aaee-4de0-bcc2-7ab4419c920e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
@@ -286,7 +212,29 @@
|
||||
" page_content=\"a cooking class for beginners is offered at the community center\",\n",
|
||||
" metadata={\"id\": 10, \"location\": \"community center\", \"topic\": \"classes\"},\n",
|
||||
" ),\n",
|
||||
"]"
|
||||
"]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(docs, ids=[doc.metadata[\"id\"] for doc in docs])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c712fa3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a5b2b71f-49eb-407d-b03a-dea4c0a517d6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[\"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -294,7 +242,11 @@
|
||||
"id": "59f82250-7903-4279-8300-062542c83416",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering Support\n",
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Filtering Support\n",
|
||||
"\n",
|
||||
"The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
|
||||
"\n",
|
||||
@@ -312,33 +264,38 @@
|
||||
"| \\$like | Text (like) |\n",
|
||||
"| \\$ilike | Text (case-insensitive like) |\n",
|
||||
"| \\$and | Logical (and) |\n",
|
||||
"| \\$or | Logical (or) |"
|
||||
"| \\$or | Logical (or) |\n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 15,
|
||||
"id": "f15a2359-6dc3-4099-8214-785f167a9ca4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='there are cats in the pond', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}),\n",
|
||||
" Document(page_content='the library hosts a weekly story time for kids', metadata={'id': 9, 'topic': 'reading', 'location': 'library'}),\n",
|
||||
" Document(page_content='the new art exhibit is fascinating', metadata={'id': 5, 'topic': 'art', 'location': 'museum'}),\n",
|
||||
" Document(page_content='ducks are also found in the pond', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* there are cats in the pond [{'id': 1, 'topic': 'animals', 'location': 'pond'}]\n",
|
||||
"* the library hosts a weekly story time for kids [{'id': 9, 'topic': 'reading', 'location': 'library'}]\n",
|
||||
"* ducks are also found in the pond [{'id': 2, 'topic': 'animals', 'location': 'pond'}]\n",
|
||||
"* the new art exhibit is fascinating [{'id': 5, 'topic': 'art', 'location': 'museum'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.similarity_search(\"kitty\", k=10, filter={\"id\": {\"$in\": [1, 5, 2, 9]}})"
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"kitty\", k=10, filter={\"id\": {\"$in\": [1, 5, 2, 9]}}\n",
|
||||
")\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -351,7 +308,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 16,
|
||||
"id": "88f919e4-e4b0-4b5f-99b3-24c675c26d33",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -360,17 +317,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='ducks are also found in the pond', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}),\n",
|
||||
" Document(page_content='there are cats in the pond', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'})]"
|
||||
"[Document(metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}, page_content='there are cats in the pond'),\n",
|
||||
" Document(metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}, page_content='ducks are also found in the pond')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.similarity_search(\n",
|
||||
"vector_store.similarity_search(\n",
|
||||
" \"ducks\",\n",
|
||||
" k=10,\n",
|
||||
" filter={\"id\": {\"$in\": [1, 5, 2, 9]}, \"location\": {\"$in\": [\"pond\", \"market\"]}},\n",
|
||||
@@ -379,7 +336,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 17,
|
||||
"id": "88f423a4-6575-4fb8-9be2-a3da01106591",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -388,17 +345,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='ducks are also found in the pond', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}),\n",
|
||||
" Document(page_content='there are cats in the pond', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'})]"
|
||||
"[Document(metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}, page_content='there are cats in the pond'),\n",
|
||||
" Document(metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}, page_content='ducks are also found in the pond')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.similarity_search(\n",
|
||||
"vector_store.similarity_search(\n",
|
||||
" \"ducks\",\n",
|
||||
" k=10,\n",
|
||||
" filter={\n",
|
||||
@@ -410,34 +367,90 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e65adc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "65133340-2acd-4957-849e-029b6b5d60f0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": 18,
|
||||
"id": "7d92e7b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.763449] there are cats in the pond [{'id': 1, 'topic': 'animals', 'location': 'pond'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(query=\"cats\", k=1)\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8d40db8c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For a full list of the different searches you can execute on a `PGVector` vector store, please refer to the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_postgres.vectorstores.PGVector.html).\n",
|
||||
"\n",
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "7cd1fb75",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='the book club meets at the library', metadata={'id': 8, 'topic': 'reading', 'location': 'library'}),\n",
|
||||
" Document(page_content='the new art exhibit is fascinating', metadata={'id': 5, 'topic': 'art', 'location': 'museum'}),\n",
|
||||
" Document(page_content='the library hosts a weekly story time for kids', metadata={'id': 9, 'topic': 'reading', 'location': 'library'}),\n",
|
||||
" Document(page_content='a sculpture exhibit is also at the museum', metadata={'id': 6, 'topic': 'art', 'location': 'museum'}),\n",
|
||||
" Document(page_content='the market also sells fresh oranges', metadata={'id': 4, 'topic': 'food', 'location': 'market'}),\n",
|
||||
" Document(page_content='a cooking class for beginners is offered at the community center', metadata={'id': 10, 'topic': 'classes', 'location': 'community center'}),\n",
|
||||
" Document(page_content='a new coffee shop opened on Main Street', metadata={'id': 7, 'topic': 'food', 'location': 'Main Street'}),\n",
|
||||
" Document(page_content='fresh apples are available at the market', metadata={'id': 3, 'topic': 'food', 'location': 'market'})]"
|
||||
"[Document(metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}, page_content='there are cats in the pond')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.similarity_search(\"bird\", k=10, filter={\"location\": {\"$ne\": \"pond\"}})"
|
||||
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"kitty\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ecd77a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f451f361",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_postgres.vectorstores.PGVector.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -457,7 +470,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,8 +12,9 @@
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Pinecone` vector database.\n",
|
||||
"\n",
|
||||
"Set the following environment variables to follow along in this doc:\n",
|
||||
"- `OPENAI_API_KEY`: Your OpenAI API key, for using `OpenAIEmbeddings`"
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use the `PineconeVectorStore` you first need to install the partner package, as well as the other packages used throughout this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -25,12 +26,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet \\\n",
|
||||
" langchain-pinecone \\\n",
|
||||
" langchain-openai \\\n",
|
||||
" langchain \\\n",
|
||||
" langchain-community \\\n",
|
||||
" pinecone-notebooks"
|
||||
"%pip install -qU langchain-pinecone pinecone-notebooks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -43,76 +39,52 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42f2ea67",
|
||||
"id": "ef6dc4de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's split our state of the union document into chunked `docs`."
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Create a new Pinecone account, or sign into your existing one, and create an API key to use in this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "eb554814",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"from pinecone import Pinecone, ServerlessSpec\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"PINECONE_API_KEY\"):\n",
|
||||
" os.environ[\"PINECONE_API_KEY\"] = getpass.getpass(\"Enter your Pinecone API key: \")\n",
|
||||
"\n",
|
||||
"pinecone_api_key = os.environ.get(\"PINECONE_API_KEY\")\n",
|
||||
"\n",
|
||||
"pc = Pinecone(api_key=pinecone_api_key)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ef1d828",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a3c3999a",
|
||||
"id": "23b5ac5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef6dc4de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's create a new Pinecone account, or sign into your existing one, and create an API key to use in this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fdc3c36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pinecone_notebooks.colab import Authenticate\n",
|
||||
"\n",
|
||||
"Authenticate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54da1a39",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The newly created API key has been stored in the `PINECONE_API_KEY` environment variable. We will use it to setup the Pinecone client."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb554814",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"pinecone_api_key = os.environ.get(\"PINECONE_API_KEY\")\n",
|
||||
"pinecone_api_key\n",
|
||||
"\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"from pinecone import Pinecone, ServerlessSpec\n",
|
||||
"\n",
|
||||
"pc = Pinecone(api_key=pinecone_api_key)"
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -120,26 +92,28 @@
|
||||
"id": "658706a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, let's connect to your Pinecone index. If one named `index_name` doesn't exist, it will be created."
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Before initializing our vector store, let's connect to a Pinecone index. If one named `index_name` doesn't exist, it will be created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"id": "276a06dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"index_name = \"langchain-index\" # change if desired\n",
|
||||
"index_name = \"langchain-test-index\" # change if desired\n",
|
||||
"\n",
|
||||
"existing_indexes = [index_info[\"name\"] for index_info in pc.list_indexes()]\n",
|
||||
"\n",
|
||||
"if index_name not in existing_indexes:\n",
|
||||
" pc.create_index(\n",
|
||||
" name=index_name,\n",
|
||||
" dimension=1536,\n",
|
||||
" dimension=3072,\n",
|
||||
" metric=\"cosine\",\n",
|
||||
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
|
||||
" )\n",
|
||||
@@ -154,24 +128,188 @@
|
||||
"id": "3a4d377f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that our Pinecone index is setup, we can upsert those chunked docs as contents with `PineconeVectorStore.from_documents`."
|
||||
"Now that our Pinecone index is setup, we can initialize our vector store. \n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 13,
|
||||
"id": "1485db56",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_pinecone import PineconeVectorStore\n",
|
||||
"\n",
|
||||
"docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)"
|
||||
"vector_store = PineconeVectorStore(index=index, embedding=embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "48721e29",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 15,
|
||||
"id": "70e688f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['167b8681-5974-467f-adcb-6e987a18df01',\n",
|
||||
" 'd16010fd-41f8-4d49-9c22-c66d5555a3fe',\n",
|
||||
" 'ffcacfb3-2bc2-44c3-a039-c2256a905c0e',\n",
|
||||
" 'cf3bfc9f-5dc7-4f5e-bb41-edb957394126',\n",
|
||||
" 'e99b07eb-fdff-4cb9-baa8-619fd8efeed3',\n",
|
||||
" '68c93033-a24f-40bd-8492-92fa26b631a4',\n",
|
||||
" 'b27a4ecb-b505-4c5d-89ff-526e3d103558',\n",
|
||||
" '4868a9e6-e6fb-4079-b400-4a1dfbf0d4c4',\n",
|
||||
" '921c0e9c-0550-4eb5-9a6c-ed44410788b2',\n",
|
||||
" 'c446fc23-64e8-47e7-8c19-ecf985e9411e']"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "120922b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "5b8437cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[uuids[-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ee21c89",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "ffbcb3fb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -179,214 +317,114 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
|
||||
" k=2,\n",
|
||||
" filter={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "86a4b96b",
|
||||
"id": "79f3494d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Adding More Text to an Existing Index\n",
|
||||
"#### Similarity search with score\n",
|
||||
"\n",
|
||||
"More text can embedded and upserted to an existing Pinecone index using the `add_texts` function\n"
|
||||
"You can also search with score:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "38a7a60e",
|
||||
"execution_count": 18,
|
||||
"id": "5fb24583",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.553187] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
|
||||
")\n",
|
||||
"for res, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1855941b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Other search methods\n",
|
||||
"\n",
|
||||
"There are more search methods (such as MMR) not listed in this notebook, to find all of them be sure to read the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html).\n",
|
||||
"\n",
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "78140e87",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['24631802-4bad-44a7-a4ba-fd71f00cc160']"
|
||||
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)\n",
|
||||
"\n",
|
||||
"vectorstore.add_texts([\"More text!\"])"
|
||||
"retriever = vector_store.as_retriever(\n",
|
||||
" search_type=\"similarity_score_threshold\",\n",
|
||||
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
|
||||
")\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d46d1452",
|
||||
"id": "72990cb5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Maximal Marginal Relevance Searches\n",
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"In addition to using similarity search in the retriever object, you can also use `mmr` as retriever.\n"
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "a359ed74",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"## Document 0\n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"\n",
|
||||
"## Document 1\n",
|
||||
"\n",
|
||||
"And I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n",
|
||||
"\n",
|
||||
"Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n",
|
||||
"\n",
|
||||
"America will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n",
|
||||
"\n",
|
||||
"These steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n",
|
||||
"\n",
|
||||
"But I want you to know that we are going to be okay. \n",
|
||||
"\n",
|
||||
"When the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger. \n",
|
||||
"\n",
|
||||
"While it shouldn’t have taken something so terrible for people around the world to see what’s at stake now everyone sees it clearly.\n",
|
||||
"\n",
|
||||
"## Document 2\n",
|
||||
"\n",
|
||||
"We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n",
|
||||
"\n",
|
||||
"I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n",
|
||||
"\n",
|
||||
"They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n",
|
||||
"\n",
|
||||
"Officer Mora was 27 years old. \n",
|
||||
"\n",
|
||||
"Officer Rivera was 22. \n",
|
||||
"\n",
|
||||
"Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n",
|
||||
"\n",
|
||||
"I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n",
|
||||
"\n",
|
||||
"I’ve worked on these issues a long time. \n",
|
||||
"\n",
|
||||
"I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.\n",
|
||||
"\n",
|
||||
"## Document 3\n",
|
||||
"\n",
|
||||
"One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \n",
|
||||
"\n",
|
||||
"When they came home, many of the world’s fittest and best trained warriors were never the same. \n",
|
||||
"\n",
|
||||
"Headaches. Numbness. Dizziness. \n",
|
||||
"\n",
|
||||
"A cancer that would put them in a flag-draped coffin. \n",
|
||||
"\n",
|
||||
"I know. \n",
|
||||
"\n",
|
||||
"One of those soldiers was my son Major Beau Biden. \n",
|
||||
"\n",
|
||||
"We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \n",
|
||||
"\n",
|
||||
"But I’m committed to finding out everything we can. \n",
|
||||
"\n",
|
||||
"Committed to military families like Danielle Robinson from Ohio. \n",
|
||||
"\n",
|
||||
"The widow of Sergeant First Class Heath Robinson. \n",
|
||||
"\n",
|
||||
"He was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \n",
|
||||
"\n",
|
||||
"Stationed near Baghdad, just yards from burn pits the size of football fields. \n",
|
||||
"\n",
|
||||
"Heath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever = docsearch.as_retriever(search_type=\"mmr\")\n",
|
||||
"matched_docs = retriever.invoke(query)\n",
|
||||
"for i, d in enumerate(matched_docs):\n",
|
||||
" print(f\"\\n## Document {i}\\n\")\n",
|
||||
" print(d.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7c477287",
|
||||
"id": "0d5722bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or use `max_marginal_relevance_search` directly:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9ca82740",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1. Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n",
|
||||
"\n",
|
||||
"2. We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n",
|
||||
"\n",
|
||||
"I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n",
|
||||
"\n",
|
||||
"They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n",
|
||||
"\n",
|
||||
"Officer Mora was 27 years old. \n",
|
||||
"\n",
|
||||
"Officer Rivera was 22. \n",
|
||||
"\n",
|
||||
"Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n",
|
||||
"\n",
|
||||
"I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n",
|
||||
"\n",
|
||||
"I’ve worked on these issues a long time. \n",
|
||||
"\n",
|
||||
"I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"found_docs = docsearch.max_marginal_relevance_search(query, k=2, fetch_k=10)\n",
|
||||
"for i, doc in enumerate(found_docs):\n",
|
||||
" print(f\"{i + 1}.\", doc.page_content, \"\\n\")"
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -406,7 +444,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -14,6 +14,9 @@
|
||||
"\n",
|
||||
"> This page documents the `QdrantVectorStore` class that supports multiple retrieval modes via Qdrant's new [Query API](https://qdrant.tech/blog/qdrant-1.10.x/). It requires you to run Qdrant v1.10.0 or above.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"There are various modes of how to run `Qdrant`, and depending on the chosen one, there will be some subtle differences. The options include:\n",
|
||||
"- Local mode, no server required\n",
|
||||
"- Docker deployments\n",
|
||||
@@ -31,56 +34,30 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install langchain-qdrant langchain-openai langchain"
|
||||
"%pip install -qU langchain-qdrant 'qdrant-client[fastembed]'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7b2f111b-357a-4f42-9730-ef0603bdc1b5",
|
||||
"id": "7d387fea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will use `OpenAIEmbeddings` for demonstration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:22.282884Z",
|
||||
"start_time": "2023-04-04T10:51:21.408077Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_qdrant import QdrantVectorStore\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:22.520144Z",
|
||||
"start_time": "2023-04-04T10:51:22.285826Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"some-file.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"There are no credentials needed to run the code in this notebook.\n",
|
||||
"\n",
|
||||
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4912937d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -89,7 +66,7 @@
|
||||
"id": "eeead681",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Connecting to Qdrant from LangChain\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"### Local mode\n",
|
||||
"\n",
|
||||
@@ -97,12 +74,33 @@
|
||||
"\n",
|
||||
"#### In-memory\n",
|
||||
"\n",
|
||||
"For some testing scenarios and quick experiments, you may prefer to keep all the data in memory only, so it gets lost when the client is destroyed - usually at the end of your script/notebook."
|
||||
"For some testing scenarios and quick experiments, you may prefer to keep all the data in memory only, so it gets lost when the client is destroyed - usually at the end of your script/notebook.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 1,
|
||||
"id": "1df86797",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8429667e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -113,11 +111,21 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qdrant = QdrantVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" location=\":memory:\", # Local mode with in-memory storage only\n",
|
||||
" collection_name=\"my_documents\",\n",
|
||||
"from langchain_qdrant import QdrantVectorStore\n",
|
||||
"from qdrant_client import QdrantClient\n",
|
||||
"from qdrant_client.http.models import Distance, VectorParams\n",
|
||||
"\n",
|
||||
"client = QdrantClient(\":memory:\")\n",
|
||||
"\n",
|
||||
"client.create_collection(\n",
|
||||
" collection_name=\"demo_collection\",\n",
|
||||
" vectors_config=VectorParams(size=3072, distance=Distance.COSINE),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store = QdrantVectorStore(\n",
|
||||
" client=client,\n",
|
||||
" collection_name=\"demo_collection\",\n",
|
||||
" embedding=embeddings,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -134,7 +142,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "24b370e2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -145,11 +153,17 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qdrant = QdrantVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" path=\"/tmp/local_qdrant\",\n",
|
||||
" collection_name=\"my_documents\",\n",
|
||||
"client = QdrantClient(path=\"/tmp/langchain_qdrant\")\n",
|
||||
"\n",
|
||||
"client.create_collection(\n",
|
||||
" collection_name=\"demo_collection\",\n",
|
||||
" vectors_config=VectorParams(size=3072, distance=Distance.COSINE),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store = QdrantVectorStore(\n",
|
||||
" client=client,\n",
|
||||
" collection_name=\"demo_collection\",\n",
|
||||
" embedding=embeddings,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -177,6 +191,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = \"<---qdrant url here --->\"\n",
|
||||
"docs = [] # put docs here\n",
|
||||
"qdrant = QdrantVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
@@ -245,44 +260,151 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qdrant = QdrantVectorStore.from_existing_collection(\n",
|
||||
" embeddings=embeddings,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" collection_name=\"my_documents\",\n",
|
||||
" url=\"http://localhost:6333\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "93540013",
|
||||
"id": "3cddef6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Recreating the collection\n",
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"The collection is reused if it already exists. Setting `force_recreate` to `True` allows to remove the old collection and start from scratch."
|
||||
"Once you have created your vector store, we can interact with it by adding and deleting different items.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"We can add items to our vector store by using the `add_documents` function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "30a87570",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:24.854117Z",
|
||||
"start_time": "2023-04-04T10:51:24.845385Z"
|
||||
"id": "7697a362",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['c04134c3-273d-4766-949a-eee46052ad32',\n",
|
||||
" '9e6ba50c-794f-4b88-94e5-411f15052a02',\n",
|
||||
" 'd3202666-6f2b-4186-ac43-e35389de8166',\n",
|
||||
" '50d8d6ee-69bf-4173-a6a2-b254e9928965',\n",
|
||||
" 'bd2eae02-74b5-43ec-9fcf-09e9d9db6fd3',\n",
|
||||
" '6dae6b37-826d-4f14-8376-da4603b35de3',\n",
|
||||
" 'b0964ab5-5a14-47b4-a983-37fa5c5bd154',\n",
|
||||
" '91ed6c56-fe53-49e2-8199-c3bb3c33c3eb',\n",
|
||||
" '42a580cb-7469-4324-9927-0febab57ce92',\n",
|
||||
" 'ff774e5c-f158-4d12-94e2-0a0162b22f27']"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
],
|
||||
"source": [
|
||||
"url = \"<---qdrant url here --->\"\n",
|
||||
"qdrant = QdrantVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" url=url,\n",
|
||||
" prefer_grpc=True,\n",
|
||||
" collection_name=\"my_documents\",\n",
|
||||
" force_recreate=True,\n",
|
||||
")"
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(\n",
|
||||
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_2 = Document(\n",
|
||||
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_3 = Document(\n",
|
||||
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_4 = Document(\n",
|
||||
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_5 = Document(\n",
|
||||
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_6 = Document(\n",
|
||||
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_7 = Document(\n",
|
||||
" page_content=\"The top 10 soccer players in the world right now.\",\n",
|
||||
" metadata={\"source\": \"website\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_8 = Document(\n",
|
||||
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_9 = Document(\n",
|
||||
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
|
||||
" metadata={\"source\": \"news\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_10 = Document(\n",
|
||||
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
|
||||
" metadata={\"source\": \"tweet\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" document_1,\n",
|
||||
" document_2,\n",
|
||||
" document_3,\n",
|
||||
" document_4,\n",
|
||||
" document_5,\n",
|
||||
" document_6,\n",
|
||||
" document_7,\n",
|
||||
" document_8,\n",
|
||||
" document_9,\n",
|
||||
" document_10,\n",
|
||||
"]\n",
|
||||
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=uuids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5fd23102",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "999cafcc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[uuids[-1]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -296,22 +418,55 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Similarity search\n",
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"The simplest scenario for using Qdrant vector store is to perform a similarity search. Under the hood, our query will be encoded into vector embeddings and used to find similar documents in Qdrant collection.\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"`QdrantVectorStore` supports 3 modes for similarity searches. They can be configured using the `retrieval_mode` parameter when setting up the class.\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"- Dense Vector Search(Default)\n",
|
||||
"- Sparse Vector Search\n",
|
||||
"- Hybrid Search"
|
||||
"The simplest scenario for using Qdrant vector store is to perform a similarity search. Under the hood, our query will be encoded into vector embeddings and used to find similar documents in Qdrant collection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:25.204469Z",
|
||||
"start_time": "2023-04-04T10:51:24.855618Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet', '_id': 'd3202666-6f2b-4186-ac43-e35389de8166', '_collection_name': 'demo_collection'}]\n",
|
||||
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet', '_id': '91ed6c56-fe53-49e2-8199-c3bb3c33c3eb', '_collection_name': 'demo_collection'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" \"LangChain provides abstractions to make working with LLMs easy\", k=2\n",
|
||||
")\n",
|
||||
"for res in results:\n",
|
||||
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3a78d46",
|
||||
"id": "79bcb0ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`QdrantVectorStore` supports 3 modes for similarity searches. They can be configured using the `retrieval_mode` parameter when setting up the class.\n",
|
||||
"\n",
|
||||
"- Dense Vector Search(Default)\n",
|
||||
"- Sparse Vector Search\n",
|
||||
"- Hybrid Search\n",
|
||||
"\n",
|
||||
"### Dense Vector Search\n",
|
||||
"\n",
|
||||
"To search with only dense vectors,\n",
|
||||
@@ -323,14 +478,8 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:25.204469Z",
|
||||
"start_time": "2023-04-04T10:51:24.855618Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"id": "5e097299",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_qdrant import RetrievalMode\n",
|
||||
@@ -367,7 +516,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ceb493a3",
|
||||
"id": "8435c0f1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -377,7 +526,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "052e3412",
|
||||
"id": "7cf1e3ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -399,7 +548,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f4b6c456",
|
||||
"id": "26e20c61",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Hybrid Vector Search\n",
|
||||
@@ -416,7 +565,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce56f6e9",
|
||||
"id": "f37c8519",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -443,15 +592,12 @@
|
||||
"id": "1bda9bf5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search with score\n",
|
||||
"\n",
|
||||
"Sometimes we might want to perform the search, but also obtain a relevancy score to know how good is a particular result. \n",
|
||||
"The returned distance score is cosine distance. Therefore, a lower score is better."
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "8804a21d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -459,27 +605,21 @@
|
||||
"start_time": "2023-04-04T10:51:25.227384Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = qdrant.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "756a6887",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:25.642282Z",
|
||||
"start_time": "2023-04-04T10:51:25.635947Z"
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.531834] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news', '_id': '9e6ba50c-794f-4b88-94e5-411f15052a02', '_collection_name': 'demo_collection'}]\n"
|
||||
]
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
],
|
||||
"source": [
|
||||
"document, score = found_docs[0]\n",
|
||||
"print(document.page_content)\n",
|
||||
"print(f\"\\nScore: {score}\")"
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" query=\"Will it be hot tomorrow\", k=1\n",
|
||||
")\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -488,73 +628,46 @@
|
||||
"id": "525e3582",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For a full list of all the search functions available for a `QdrantVectorStore`, read the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_qdrant.vectorstores.Qdrant.html)\n",
|
||||
"\n",
|
||||
"### Metadata filtering\n",
|
||||
"\n",
|
||||
"Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1c2c58dc",
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "dc7cffc8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* The top 10 soccer players in the world right now. [{'source': 'website', '_id': 'b0964ab5-5a14-47b4-a983-37fa5c5bd154', '_collection_name': 'demo_collection'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"```python\n",
|
||||
"from qdrant_client.http import models\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = qdrant.similarity_search_with_score(query, filter=models.Filter(...))\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c58c30bf",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:39:53.032744Z",
|
||||
"start_time": "2023-04-04T10:39:53.028673Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Maximum marginal relevance search (MMR)\n",
|
||||
"\n",
|
||||
"If you'd like to look up some similar documents, but you'd also like to receive diverse results, MMR is the method you should consider. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.\n",
|
||||
"\n",
|
||||
"Note that MMR search is only available if you've added documents with `DENSE` or `HYBRID` modes. Since it requires dense vectors."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "76810fb6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:26.010947Z",
|
||||
"start_time": "2023-04-04T10:51:25.647687Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = qdrant.max_marginal_relevance_search(query, k=2, fetch_k=10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "80c6db11",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:26.016979Z",
|
||||
"start_time": "2023-04-04T10:51:26.013329Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i, doc in enumerate(found_docs):\n",
|
||||
" print(f\"{i + 1}.\", doc.page_content, \"\\n\")"
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query=\"Who are the best soccer players in the world?\",\n",
|
||||
" k=1,\n",
|
||||
" filter=models.Filter(\n",
|
||||
" should=[\n",
|
||||
" models.FieldCondition(\n",
|
||||
" key=\"page_content\",\n",
|
||||
" match=models.MatchValue(\n",
|
||||
" value=\"The top 10 soccer players in the world right now.\"\n",
|
||||
" ),\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -563,14 +676,14 @@
|
||||
"id": "691a82d6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Qdrant as a Retriever\n",
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"Qdrant, as all the other vector stores, is a LangChain Retriever. "
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 15,
|
||||
"id": "9427195f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -578,49 +691,35 @@
|
||||
"start_time": "2023-04-04T10:51:26.018763Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': 'news', '_id': '50d8d6ee-69bf-4173-a6a2-b254e9928965', '_collection_name': 'demo_collection'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever = qdrant.as_retriever()"
|
||||
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"Stealing from the bank is a crime\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0c851b4f",
|
||||
"id": "6ac07288",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It might be also specified to use MMR as a search strategy, instead of similarity."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64348f1b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:26.043909Z",
|
||||
"start_time": "2023-04-04T10:51:26.034284Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = qdrant.as_retriever(search_type=\"mmr\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3c70c31",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:26.495652Z",
|
||||
"start_time": "2023-04-04T10:51:26.046407Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"retriever.invoke(query)[0]"
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials: working with external knowledge](https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/v0.2/docs/concepts/#retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -647,6 +746,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_qdrant import RetrievalMode\n",
|
||||
"\n",
|
||||
"QdrantVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embedding=embeddings,\n",
|
||||
@@ -707,12 +808,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"id": "2300e785",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `QdrantVectorStore` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_qdrant.vectorstores.Qdrant.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -731,7 +834,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -601,7 +601,7 @@
|
||||
"relevant documents, constructs a prompt, passes that to a model, and\n",
|
||||
"parses the output.\n",
|
||||
"\n",
|
||||
"We’ll use the gpt-3.5-turbo OpenAI chat model, but any LangChain `LLM`\n",
|
||||
"We’ll use the gpt-4o-mini OpenAI chat model, but any LangChain `LLM`\n",
|
||||
"or `ChatModel` could be substituted in.\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_label: Overview
|
||||
sidebar_label: Overview of v0.2
|
||||
---
|
||||
|
||||
# LangChain over time
|
||||
# Overview of LangChain v0.2
|
||||
|
||||
## What’s new in LangChain?
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user