mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-06 09:10:27 +00:00
Compare commits
182 Commits
erick/cli-
...
eugene/int
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0b5a99e020 | ||
|
|
02d6c1969e | ||
|
|
dadb6f1445 | ||
|
|
b6f0174bb9 | ||
|
|
c3ced4c6ce | ||
|
|
bd6c31617e | ||
|
|
6e57aa7c36 | ||
|
|
a2b4c33bd6 | ||
|
|
4825dc0d76 | ||
|
|
02300471be | ||
|
|
66b7206ab6 | ||
|
|
c81c77b465 | ||
|
|
3b7437d184 | ||
|
|
91ea4b7449 | ||
|
|
652b3fa4a4 | ||
|
|
7040013140 | ||
|
|
dc7423e88f | ||
|
|
25f2e25be1 | ||
|
|
786ef021a3 | ||
|
|
429a0ee7fd | ||
|
|
da9281feb2 | ||
|
|
c6ece6a96d | ||
|
|
86355640c3 | ||
|
|
b9f65e5038 | ||
|
|
30fb345342 | ||
|
|
98779797fe | ||
|
|
663638d6a8 | ||
|
|
2f209d84fa | ||
|
|
c72e522e96 | ||
|
|
bf5193bb99 | ||
|
|
11adc09e02 | ||
|
|
6b32810b68 | ||
|
|
3da2713172 | ||
|
|
425f6ffa5b | ||
|
|
15a36dd0a2 | ||
|
|
249945a572 | ||
|
|
59b8850909 | ||
|
|
4828c441a7 | ||
|
|
725e4912ae | ||
|
|
d895db11d6 | ||
|
|
d77c7c4236 | ||
|
|
7b1a132aff | ||
|
|
df99b832a7 | ||
|
|
803eba3163 | ||
|
|
6e9a8b188f | ||
|
|
f337408b0f | ||
|
|
0b4608f71e | ||
|
|
a4086119f8 | ||
|
|
b4c12346cc | ||
|
|
dff83cce66 | ||
|
|
130e80b60f | ||
|
|
09fbce13c5 | ||
|
|
0ba125c3cd | ||
|
|
28e0958ff4 | ||
|
|
a2e9910268 | ||
|
|
7e7fcf5b1f | ||
|
|
04dd8d3b0a | ||
|
|
63d84e93b9 | ||
|
|
4d28c70000 | ||
|
|
46a47710b0 | ||
|
|
35ebd2620c | ||
|
|
23c9aba575 | ||
|
|
1331e8589c | ||
|
|
7882d5c978 | ||
|
|
70677202c7 | ||
|
|
78403a3746 | ||
|
|
5d10139fc7 | ||
|
|
d283f452cc | ||
|
|
264ab96980 | ||
|
|
267855b3c1 | ||
|
|
71c0698ee4 | ||
|
|
a72fddbf8d | ||
|
|
2c798622cd | ||
|
|
3abf1b6905 | ||
|
|
1028af17e7 | ||
|
|
061ed250f6 | ||
|
|
293a4a78de | ||
|
|
ffa0c838d8 | ||
|
|
111c7df117 | ||
|
|
6eb42c657e | ||
|
|
88a9a6a758 | ||
|
|
8f33fce871 | ||
|
|
423d286546 | ||
|
|
e572521f2a | ||
|
|
63ddf0afb4 | ||
|
|
4bcd2aad6c | ||
|
|
427a04151c | ||
|
|
d166967003 | ||
|
|
a74e466507 | ||
|
|
a02a09c973 | ||
|
|
41dfad5104 | ||
|
|
e7b95e0802 | ||
|
|
16bd0697dc | ||
|
|
6890daa90c | ||
|
|
335894893b | ||
|
|
c5cb52a3c6 | ||
|
|
cda79dbb6c | ||
|
|
cea3f72485 | ||
|
|
02c35da445 | ||
|
|
208042e0f2 | ||
|
|
f5da0d6d87 | ||
|
|
2c3e3dc6b1 | ||
|
|
7de62abc91 | ||
|
|
f9a11a9197 | ||
|
|
1dcee68cb8 | ||
|
|
e81ddb32a6 | ||
|
|
57747892ce | ||
|
|
679843abb0 | ||
|
|
73570873ab | ||
|
|
2ae76cecde | ||
|
|
4305f78e40 | ||
|
|
64ccddf3cb | ||
|
|
dd8e4cd020 | ||
|
|
0de0cd2d31 | ||
|
|
8e2316b8c2 | ||
|
|
c2538e7834 | ||
|
|
acba38a18e | ||
|
|
22c1a4041b | ||
|
|
4797b806c2 | ||
|
|
7061869aec | ||
|
|
98c22e9082 | ||
|
|
c04d95b962 | ||
|
|
54e9ea433a | ||
|
|
fe1820cdaf | ||
|
|
71c0564c9f | ||
|
|
c65e48996c | ||
|
|
d7688a4328 | ||
|
|
7b08de8909 | ||
|
|
245cb5a252 | ||
|
|
199e9c5ae0 | ||
|
|
fba65ba04f | ||
|
|
4092876863 | ||
|
|
6e45dba471 | ||
|
|
ad16eed119 | ||
|
|
3ab09d87d6 | ||
|
|
9cb69a8746 | ||
|
|
db3ceb4d0a | ||
|
|
db42576b09 | ||
|
|
be5294e35d | ||
|
|
41ed23a050 | ||
|
|
ea505985c4 | ||
|
|
02db66d764 | ||
|
|
2204d8cb7d | ||
|
|
ff6274d32d | ||
|
|
c72f0d2f20 | ||
|
|
75776e4a54 | ||
|
|
1827bb4042 | ||
|
|
769c3bb838 | ||
|
|
bfc59c1d26 | ||
|
|
8eb0bdead3 | ||
|
|
b2dd9ffaaf | ||
|
|
f14121faaf | ||
|
|
b7abac9f92 | ||
|
|
42c686bc28 | ||
|
|
600fc233ef | ||
|
|
25b93cc4c0 | ||
|
|
492df75937 | ||
|
|
a24c445e02 | ||
|
|
5098f9dc79 | ||
|
|
c776471ac6 | ||
|
|
752a71b688 | ||
|
|
1213a59f87 | ||
|
|
17a06cb7a6 | ||
|
|
a7380dd531 | ||
|
|
e98e4be0f7 | ||
|
|
210623b409 | ||
|
|
7d1694040d | ||
|
|
add16111b9 | ||
|
|
a4a444f73d | ||
|
|
69c656aa5f | ||
|
|
5099a9c9b4 | ||
|
|
8461934c2b | ||
|
|
d24b82357f | ||
|
|
7720483432 | ||
|
|
2019e31bc5 | ||
|
|
30f18c7b02 | ||
|
|
4da3d4b18e | ||
|
|
40b4a3de6e | ||
|
|
b00c0fc558 | ||
|
|
a6add89bd4 | ||
|
|
096b66db4a | ||
|
|
99eb31ec41 |
6
.github/scripts/check_diff.py
vendored
6
.github/scripts/check_diff.py
vendored
@@ -1,7 +1,6 @@
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tomllib
|
||||
from collections import defaultdict
|
||||
@@ -86,6 +85,11 @@ def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:
|
||||
|
||||
|
||||
def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
|
||||
if dir_ == "libs/core":
|
||||
return [
|
||||
{"working-directory": dir_, "python-version": f"3.{v}"}
|
||||
for v in range(8, 13)
|
||||
]
|
||||
min_python = "3.8"
|
||||
max_python = "3.12"
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ Now:
|
||||
|
||||
`from langchain_experimental.sql import SQLDatabaseChain`
|
||||
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out [`create_sql_query_chain`](https://github.com/langchain-ai/langchain/blob/master/docs/extras/use_cases/tabular/sql_query.ipynb)
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out this [`SQL question-answering tutorial`](https://python.langchain.com/v0.2/docs/tutorials/sql_qa/#convert-question-to-sql-query)
|
||||
|
||||
`from langchain.chains import create_sql_query_chain`
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://pypistats.org/packages/langchain-core)
|
||||
[](https://star-history.com/#langchain-ai/langchain)
|
||||
[](https://libraries.io/github/langchain-ai/langchain)
|
||||
[](https://github.com/langchain-ai/langchain/issues)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
|
||||
[](https://codespaces.new/langchain-ai/langchain)
|
||||
|
||||
@@ -166,7 +166,7 @@
|
||||
"source": [
|
||||
"### SQL Database Agent example\n",
|
||||
"\n",
|
||||
"This example demonstrates the use of the [SQL Database Agent](/docs/integrations/toolkits/sql_database.html) for answering questions over a Databricks database."
|
||||
"This example demonstrates the use of the [SQL Database Agent](/docs/integrations/tools/sql_database) for answering questions over a Databricks database."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -13,7 +13,12 @@ OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs
|
||||
|
||||
PYTHON = .venv/bin/python
|
||||
|
||||
PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec test -e "{}/pyproject.toml" \; -print | grep -vE "airbyte|ibm|couchbase" | tr '\n' ' ')
|
||||
PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec sh -c ' \
|
||||
for dir; do \
|
||||
if find "$$dir" -maxdepth 1 -type f \( -name "pyproject.toml" -o -name "setup.py" \) | grep -q .; then \
|
||||
echo "$$dir"; \
|
||||
fi \
|
||||
done' sh {} + | grep -vE "airbyte|ibm|couchbase" | tr '\n' ' ')
|
||||
|
||||
PORT ?= 3001
|
||||
|
||||
@@ -42,6 +47,8 @@ generate-files:
|
||||
|
||||
$(PYTHON) scripts/document_loader_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/kv_store_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/copy_templates.py $(INTERMEDIATE_DIR)
|
||||
@@ -67,10 +74,13 @@ render:
|
||||
md-sync:
|
||||
rsync -avm --include="*/" --include="*.mdx" --include="*.md" --include="*.png" --include="*/_category_.yml" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_NEW_DOCS_DIR)
|
||||
|
||||
append-related:
|
||||
$(PYTHON) scripts/append_related_links.py $(OUTPUT_NEW_DOCS_DIR)
|
||||
|
||||
generate-references:
|
||||
$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)
|
||||
|
||||
build: install-py-deps generate-files copy-infra render md-sync
|
||||
build: install-py-deps generate-files copy-infra render md-sync append-related
|
||||
|
||||
vercel-build: install-vercel-deps build generate-references
|
||||
rm -rf docs
|
||||
|
||||
@@ -15,6 +15,8 @@ from pathlib import Path
|
||||
|
||||
import toml
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst.directives.admonitions import BaseAdmonition
|
||||
from docutils.statemachine import StringList
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
@@ -66,8 +68,23 @@ class ExampleLinksDirective(SphinxDirective):
|
||||
return [list_node]
|
||||
|
||||
|
||||
class Beta(BaseAdmonition):
|
||||
required_arguments = 0
|
||||
node_class = nodes.admonition
|
||||
|
||||
def run(self):
|
||||
self.content = self.content or StringList(
|
||||
[
|
||||
"This feature is in beta. It is actively being worked on, so the API may change."
|
||||
]
|
||||
)
|
||||
self.arguments = self.arguments or ["Beta"]
|
||||
return super().run()
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_directive("example_links", ExampleLinksDirective)
|
||||
app.add_directive("beta", Beta)
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
@@ -38,6 +38,8 @@ class ClassInfo(TypedDict):
|
||||
"""The kind of the class."""
|
||||
is_public: bool
|
||||
"""Whether the class is public or not."""
|
||||
is_deprecated: bool
|
||||
"""Whether the class is deprecated."""
|
||||
|
||||
|
||||
class FunctionInfo(TypedDict):
|
||||
@@ -49,6 +51,8 @@ class FunctionInfo(TypedDict):
|
||||
"""The fully qualified name of the function."""
|
||||
is_public: bool
|
||||
"""Whether the function is public or not."""
|
||||
is_deprecated: bool
|
||||
"""Whether the function is deprecated."""
|
||||
|
||||
|
||||
class ModuleMembers(TypedDict):
|
||||
@@ -121,6 +125,7 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
kind=kind,
|
||||
is_public=not name.startswith("_"),
|
||||
is_deprecated=".. deprecated::" in (type_.__doc__ or ""),
|
||||
)
|
||||
)
|
||||
elif inspect.isfunction(type_):
|
||||
@@ -129,6 +134,7 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
name=name,
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
is_public=not name.startswith("_"),
|
||||
is_deprecated=".. deprecated::" in (type_.__doc__ or ""),
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -255,8 +261,24 @@ def _construct_doc(
|
||||
|
||||
for module in namespaces:
|
||||
_members = members_by_namespace[module]
|
||||
classes = [el for el in _members["classes_"] if el["is_public"]]
|
||||
functions = [el for el in _members["functions"] if el["is_public"]]
|
||||
classes = [
|
||||
el
|
||||
for el in _members["classes_"]
|
||||
if el["is_public"] and not el["is_deprecated"]
|
||||
]
|
||||
functions = [
|
||||
el
|
||||
for el in _members["functions"]
|
||||
if el["is_public"] and not el["is_deprecated"]
|
||||
]
|
||||
deprecated_classes = [
|
||||
el for el in _members["classes_"] if el["is_public"] and el["is_deprecated"]
|
||||
]
|
||||
deprecated_functions = [
|
||||
el
|
||||
for el in _members["functions"]
|
||||
if el["is_public"] and el["is_deprecated"]
|
||||
]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
section = f":mod:`{package_namespace}.{module}`"
|
||||
@@ -310,6 +332,54 @@ Functions
|
||||
--------------
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: function.rst
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
if deprecated_classes:
|
||||
full_doc += f"""\
|
||||
Deprecated classes
|
||||
--------------
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
for class_ in sorted(deprecated_classes, key=lambda c: c["qualified_name"]):
|
||||
if class_["kind"] == "TypedDict":
|
||||
template = "typeddict.rst"
|
||||
elif class_["kind"] == "enum":
|
||||
template = "enum.rst"
|
||||
elif class_["kind"] == "Pydantic":
|
||||
template = "pydantic.rst"
|
||||
elif class_["kind"] == "RunnablePydantic":
|
||||
template = "runnable_pydantic.rst"
|
||||
elif class_["kind"] == "RunnableNonPydantic":
|
||||
template = "runnable_non_pydantic.rst"
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
full_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
|
||||
if deprecated_functions:
|
||||
_functions = [f["qualified_name"] for f in deprecated_functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
full_doc += f"""\
|
||||
Deprecated functions
|
||||
--------------
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: function.rst
|
||||
|
||||
@@ -897,6 +897,13 @@ div.admonition {
|
||||
background-color: #eee;
|
||||
}
|
||||
|
||||
div.admonition-beta {
|
||||
color: #d35400; /* A darker rich orange color */
|
||||
background-color: #FDF2E9; /* A light orange-tinted background color */
|
||||
border-color: #E59866; /* A darker soft orange border color */
|
||||
}
|
||||
|
||||
|
||||
div.admonition p:last-child,
|
||||
div.admonition dl:last-child,
|
||||
div.admonition dd:last-child,
|
||||
@@ -912,6 +919,13 @@ div.deprecated {
|
||||
border-color: #eed3d7;
|
||||
}
|
||||
|
||||
|
||||
div.warning {
|
||||
color: #b94a48;
|
||||
background-color: #F3E5E5;
|
||||
border-color: #eed3d7;
|
||||
}
|
||||
|
||||
div.seealso {
|
||||
background-color: #FFFBE8;
|
||||
border-color: #fbeed5;
|
||||
|
||||
@@ -500,7 +500,8 @@ For specifics on how to use retrievers, see the [relevant how-to guides here](/d
|
||||
|
||||
### Key-value stores
|
||||
|
||||
For some techniques, such as [indexing and retrieval with multiple vectors per document](/docs/how_to/multi_vector/), having some sort of key-value (KV) storage is helpful.
|
||||
For some techniques, such as [indexing and retrieval with multiple vectors per document](/docs/how_to/multi_vector/) or
|
||||
[caching embeddings](/docs/how_to/caching_embeddings/), having a form of key-value (KV) storage is helpful.
|
||||
|
||||
LangChain includes a [`BaseStore`](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.BaseStore.html) interface,
|
||||
which allows for storage of arbitrary data. However, LangChain components that require KV-storage accept a
|
||||
@@ -541,7 +542,8 @@ Typical usage may look like the following:
|
||||
```python
|
||||
tools = [...] # Define a list of tools
|
||||
llm_with_tools = llm.bind_tools(tools)
|
||||
ai_msg = llm_with_tools.invoke("do xyz...") # AIMessage(tool_calls=[ToolCall(...), ...], ...)
|
||||
ai_msg = llm_with_tools.invoke("do xyz...")
|
||||
# -> AIMessage(tool_calls=[ToolCall(...), ...], ...)
|
||||
```
|
||||
|
||||
The `AIMessage` returned from the model MAY have `tool_calls` associated with it.
|
||||
@@ -558,9 +560,14 @@ This generally looks like:
|
||||
|
||||
```python
|
||||
# You will want to previously check that the LLM returned tool calls
|
||||
tool_call = ai_msg.tool_calls[0] # ToolCall(args={...}, id=..., ...)
|
||||
tool_call = ai_msg.tool_calls[0]
|
||||
# ToolCall(args={...}, id=..., ...)
|
||||
tool_output = tool.invoke(tool_call["args"])
|
||||
tool_message = ToolMessage(content=tool_output, tool_call_id=tool_call["id"], name=tool_call["name"])
|
||||
tool_message = ToolMessage(
|
||||
content=tool_output,
|
||||
tool_call_id=tool_call["id"],
|
||||
name=tool_call["name"]
|
||||
)
|
||||
```
|
||||
|
||||
Note that the `content` field will generally be passed back to the model.
|
||||
@@ -570,7 +577,12 @@ you can transform the tool output but also pass it as an artifact (read more abo
|
||||
```python
|
||||
... # Same code as above
|
||||
response_for_llm = transform(response)
|
||||
tool_message = ToolMessage(content=response_for_llm, tool_call_id=tool_call["id"], name=tool_call["name"], artifact=tool_output)
|
||||
tool_message = ToolMessage(
|
||||
content=response_for_llm,
|
||||
tool_call_id=tool_call["id"],
|
||||
name=tool_call["name"],
|
||||
artifact=tool_output
|
||||
)
|
||||
```
|
||||
|
||||
#### Invoke with `ToolCall`
|
||||
@@ -581,9 +593,14 @@ The benefits of this are that you don't have to write the logic yourself to tran
|
||||
This generally looks like:
|
||||
|
||||
```python
|
||||
tool_call = ai_msg.tool_calls[0] # ToolCall(args={...}, id=..., ...)
|
||||
tool_call = ai_msg.tool_calls[0]
|
||||
# -> ToolCall(args={...}, id=..., ...)
|
||||
tool_message = tool.invoke(tool_call)
|
||||
# -> ToolMessage(content="tool result foobar...", tool_call_id=..., name="tool_name")
|
||||
# -> ToolMessage(
|
||||
content="tool result foobar...",
|
||||
tool_call_id=...,
|
||||
name="tool_name"
|
||||
)
|
||||
```
|
||||
|
||||
If you are invoking the tool this way and want to include an [artifact](/docs/concepts/#toolmessage) for the ToolMessage, you will need to have the tool return two things.
|
||||
|
||||
@@ -409,7 +409,7 @@
|
||||
" # When configuring the end runnable, we can then use this id to configure this field\n",
|
||||
" ConfigurableField(id=\"prompt\"),\n",
|
||||
" # This sets a default_key.\n",
|
||||
" # If we specify this key, the default LLM (ChatAnthropic initialized above) will be used\n",
|
||||
" # If we specify this key, the default prompt (asking for a joke, as initialized above) will be used\n",
|
||||
" default_key=\"joke\",\n",
|
||||
" # This adds a new option, with name `poem`\n",
|
||||
" poem=PromptTemplate.from_template(\"Write a short poem about {topic}\"),\n",
|
||||
@@ -494,7 +494,7 @@
|
||||
" # When configuring the end runnable, we can then use this id to configure this field\n",
|
||||
" ConfigurableField(id=\"prompt\"),\n",
|
||||
" # This sets a default_key.\n",
|
||||
" # If we specify this key, the default LLM (ChatAnthropic initialized above) will be used\n",
|
||||
" # If we specify this key, the default prompt (asking for a joke, as initialized above) will be used\n",
|
||||
" default_key=\"joke\",\n",
|
||||
" # This adds a new option, with name `poem`\n",
|
||||
" poem=PromptTemplate.from_template(\"Write a short poem about {topic}\"),\n",
|
||||
|
||||
@@ -88,6 +88,7 @@ These are the core building blocks you can use when building applications.
|
||||
- [How to: few shot prompt tool behavior](/docs/how_to/tools_few_shot)
|
||||
- [How to: bind model-specific formatted tools](/docs/how_to/tools_model_specific)
|
||||
- [How to: force a specific tool call](/docs/how_to/tool_choice)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
- [How to: init any model in one line](/docs/how_to/chat_models_universal_init/)
|
||||
|
||||
### Messages
|
||||
@@ -106,7 +107,7 @@ What LangChain calls [LLMs](/docs/concepts/#llms) are older forms of language mo
|
||||
- [How to: create a custom LLM class](/docs/how_to/custom_llm)
|
||||
- [How to: stream a response back](/docs/how_to/streaming_llm)
|
||||
- [How to: track token usage](/docs/how_to/llm_token_usage_tracking)
|
||||
- [How to: work with local LLMs](/docs/how_to/local_llms)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
|
||||
### Output parsers
|
||||
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
"id": "b8982428",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Run LLMs locally\n",
|
||||
"# Run models locally\n",
|
||||
"\n",
|
||||
"## Use case\n",
|
||||
"\n",
|
||||
"The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), [Ollama](https://github.com/ollama/ollama), [GPT4All](https://github.com/nomic-ai/gpt4all), [llamafile](https://github.com/Mozilla-Ocho/llamafile), and others underscore the demand to run LLMs locally (on your own device).\n",
|
||||
"The popularity of projects like [llama.cpp](https://github.com/ggerganov/llama.cpp), [Ollama](https://github.com/ollama/ollama), [GPT4All](https://github.com/nomic-ai/gpt4all), [llamafile](https://github.com/Mozilla-Ocho/llamafile), and others underscore the demand to run LLMs locally (on your own device).\n",
|
||||
"\n",
|
||||
"This has at least two important benefits:\n",
|
||||
"\n",
|
||||
@@ -66,6 +66,12 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Formatting prompts\n",
|
||||
"\n",
|
||||
"Some providers have [chat model](/docs/concepts/#chat-models) wrappers that takes care of formatting your input prompt for the specific local model you're using. However, if you are prompting local models with a [text-in/text-out LLM](/docs/concepts/#llms) wrapper, you may need to use a prompt tailed for your specific model.\n",
|
||||
"\n",
|
||||
"This can [require the inclusion of special tokens](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). [Here's an example for LLaMA 2](https://smith.langchain.com/hub/rlm/rag-prompt-llama).\n",
|
||||
"\n",
|
||||
"## Quickstart\n",
|
||||
"\n",
|
||||
"[`Ollama`](https://ollama.ai/) is one way to easily run inference on macOS.\n",
|
||||
@@ -73,10 +79,20 @@
|
||||
"The instructions [here](https://github.com/jmorganca/ollama?tab=readme-ov-file#ollama) provide details, which we summarize:\n",
|
||||
" \n",
|
||||
"* [Download and run](https://ollama.ai/download) the app\n",
|
||||
"* From command line, fetch a model from this [list of options](https://github.com/jmorganca/ollama): e.g., `ollama pull llama2`\n",
|
||||
"* From command line, fetch a model from this [list of options](https://github.com/jmorganca/ollama): e.g., `ollama pull llama3.1:8b`\n",
|
||||
"* When the app is running, all models are automatically served on `localhost:11434`\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29450fc9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -86,7 +102,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The first man on the moon was Neil Armstrong, who landed on the moon on July 20, 1969 as part of the Apollo 11 mission. obviously.'"
|
||||
"'...Neil Armstrong!\\n\\nOn July 20, 1969, Neil Armstrong became the first person to set foot on the lunar surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind\" as he stepped off the lunar module Eagle onto the Moon\\'s surface.\\n\\nWould you like to know more about the Apollo 11 mission or Neil Armstrong\\'s achievements?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
@@ -95,51 +111,78 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.llms import Ollama\n",
|
||||
"from langchain_ollama import OllamaLLM\n",
|
||||
"\n",
|
||||
"llm = OllamaLLM(model=\"llama3.1:8b\")\n",
|
||||
"\n",
|
||||
"llm = Ollama(model=\"llama2\")\n",
|
||||
"llm.invoke(\"The first man on the moon was ...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "343ab645",
|
||||
"id": "674cc672",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Stream tokens as they are being generated."
|
||||
"Stream tokens as they are being generated:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "9cd83603",
|
||||
"execution_count": 3,
|
||||
"id": "1386a852",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon's surface, famously declaring \"That's one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission."
|
||||
"...|"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Neil| Armstrong|,| an| American| astronaut|.| He| stepped| out| of| the| lunar| module| Eagle| and| onto| the| surface| of| the| Moon| on| July| |20|,| |196|9|,| famously| declaring|:| \"|That|'s| one| small| step| for| man|,| one| giant| leap| for| mankind|.\"||"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in llm.stream(\"The first man on the moon was ...\"):\n",
|
||||
" print(chunk, end=\"|\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e5731060",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Ollama also includes a chat model wrapper that handles formatting conversation turns:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f14a778a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission.'"
|
||||
"AIMessage(content='The answer is a historic one!\\n\\nThe first man to walk on the Moon was Neil Armstrong, an American astronaut and commander of the Apollo 11 mission. On July 20, 1969, Armstrong stepped out of the lunar module Eagle onto the surface of the Moon, famously declaring:\\n\\n\"That\\'s one small step for man, one giant leap for mankind.\"\\n\\nArmstrong was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the Moon during the mission. Michael Collins remained in orbit around the Moon in the command module Columbia.\\n\\nNeil Armstrong passed away on August 25, 2012, but his legacy as a pioneering astronaut and engineer continues to inspire people around the world!', response_metadata={'model': 'llama3.1:8b', 'created_at': '2024-08-01T00:38:29.176717Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 10681861417, 'load_duration': 34270292, 'prompt_eval_count': 19, 'prompt_eval_duration': 6209448000, 'eval_count': 141, 'eval_duration': 4432022000}, id='run-7bed57c5-7f54-4092-912c-ae49073dcd48-0', usage_metadata={'input_tokens': 19, 'output_tokens': 141, 'total_tokens': 160})"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler\n",
|
||||
"from langchain_ollama import ChatOllama\n",
|
||||
"\n",
|
||||
"llm = Ollama(\n",
|
||||
" model=\"llama2\", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])\n",
|
||||
")\n",
|
||||
"llm.invoke(\"The first man on the moon was ...\")"
|
||||
"chat_model = ChatOllama(model=\"llama3.1:8b\")\n",
|
||||
"\n",
|
||||
"chat_model.invoke(\"Who was the first man on the moon?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -199,7 +242,7 @@
|
||||
"\n",
|
||||
"With [Ollama](https://github.com/jmorganca/ollama), fetch a model via `ollama pull <model family>:<tag>`:\n",
|
||||
"\n",
|
||||
"* E.g., for Llama-7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
|
||||
"* E.g., for Llama 2 7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
|
||||
"* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama?tab=readme-ov-file#model-library), e.g., `ollama pull llama2:13b`\n",
|
||||
"* See the full set of parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama.Ollama.html)"
|
||||
]
|
||||
@@ -222,9 +265,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.llms import Ollama\n",
|
||||
"\n",
|
||||
"llm = Ollama(model=\"llama2:13b\")\n",
|
||||
"llm = OllamaLLM(model=\"llama2:13b\")\n",
|
||||
"llm.invoke(\"The first man on the moon was ... think step by step\")"
|
||||
]
|
||||
},
|
||||
@@ -268,11 +309,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5eba38dc",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%env CMAKE_ARGS=\"-DLLAMA_METAL=on\"\n",
|
||||
@@ -542,7 +579,6 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.chains.prompt_selector import ConditionalPromptSelector\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
@@ -613,9 +649,9 @@
|
||||
],
|
||||
"source": [
|
||||
"# Chain\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"chain = prompt | llm\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year that Justin Bieber was born?\"\n",
|
||||
"llm_chain.run({\"question\": question})"
|
||||
"chain.invoke({\"question\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -666,7 +702,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,811 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f331037f-be3f-4782-856f-d55dab952488",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to migrate chains to LCEL\n",
|
||||
"\n",
|
||||
":::info Prerequisites\n",
|
||||
"\n",
|
||||
"This guide assumes familiarity with the following concepts:\n",
|
||||
"- [LangChain Expression Language](/docs/concepts#langchain-expression-language-lcel)\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"LCEL is designed to streamline the process of building useful apps with LLMs and combining related components. It does this by providing:\n",
|
||||
"\n",
|
||||
"1. **A unified interface**: Every LCEL object implements the `Runnable` interface, which defines a common set of invocation methods (`invoke`, `batch`, `stream`, `ainvoke`, ...). This makes it possible to also automatically and consistently support useful operations like streaming of intermediate steps and batching, since every chain composed of LCEL objects is itself an LCEL object.\n",
|
||||
"2. **Composition primitives**: LCEL provides a number of primitives that make it easy to compose chains, parallelize components, add fallbacks, dynamically configure chain internals, and more.\n",
|
||||
"\n",
|
||||
"LangChain maintains a number of legacy abstractions. Many of these can be reimplemented via short combinations of LCEL primitives. Doing so confers some general advantages:\n",
|
||||
"\n",
|
||||
"- The resulting chains typically implement the full `Runnable` interface, including streaming and asynchronous support where appropriate;\n",
|
||||
"- The chains may be more easily extended or modified;\n",
|
||||
"- The parameters of the chain are typically surfaced for easier customization (e.g., prompts) over previous versions, which tended to be subclasses and had opaque parameters and internals.\n",
|
||||
"\n",
|
||||
"The LCEL implementations can be slightly more verbose, but there are significant benefits in transparency and customizability.\n",
|
||||
"\n",
|
||||
"In this guide we review LCEL implementations of common legacy abstractions. Where appropriate, we link out to separate guides with more detail."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b99b47ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community langchain langchain-openai faiss-cpu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "717c8673",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3621b62-a037-42b8-8faa-59575608bb8b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `LLMChain`\n",
|
||||
"<span data-heading-keywords=\"llmchain\"></span>\n",
|
||||
"\n",
|
||||
"[`LLMChain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm.LLMChain.html) combined a prompt template, LLM, and output parser into a class.\n",
|
||||
"\n",
|
||||
"Some advantages of switching to the LCEL implementation are:\n",
|
||||
"\n",
|
||||
"- Clarity around contents and parameters. The legacy `LLMChain` contains a default output parser and other options.\n",
|
||||
"- Easier streaming. `LLMChain` only supports streaming via callbacks.\n",
|
||||
"- Easier access to raw message outputs if desired. `LLMChain` only exposes these via a parameter or via callback.\n",
|
||||
"\n",
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e628905c-430e-4e4a-9d7c-c91d2f42052e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'adjective': 'funny',\n",
|
||||
" 'text': \"Why couldn't the bicycle find its way home?\\n\\nBecause it lost its bearings!\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [(\"user\", \"Tell me a {adjective} joke\")],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = LLMChain(llm=ChatOpenAI(), prompt=prompt)\n",
|
||||
"\n",
|
||||
"chain({\"adjective\": \"funny\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cdc3b527-c09e-4c77-9711-c3cc4506cd95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "0d2a7cf8-1bc7-405c-bb0d-f2ab2ba3b6ab",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two tired!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [(\"user\", \"Tell me a {adjective} joke\")],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | ChatOpenAI() | StrOutputParser()\n",
|
||||
"\n",
|
||||
"chain.invoke({\"adjective\": \"funny\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3c0b0513-77b8-4371-a20e-3e487cec7e7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"Note that `LLMChain` by default returns a `dict` containing both the input and the output. If this behavior is desired, we can replicate it using another LCEL primitive, [`RunnablePassthrough`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "529206c5-abbe-4213-9e6c-3b8586c8000d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'adjective': 'funny',\n",
|
||||
" 'text': \"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two tired!\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"outer_chain = RunnablePassthrough().assign(text=chain)\n",
|
||||
"\n",
|
||||
"outer_chain.invoke({\"adjective\": \"funny\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29d2e26c-2854-4971-9c2b-613450993921",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See [this tutorial](/docs/tutorials/llm_chain) for more detail on building with prompt templates, LLMs, and output parsers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "00df631d-5121-4918-94aa-b88acce9b769",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `ConversationChain`\n",
|
||||
"<span data-heading-keywords=\"conversationchain\"></span>\n",
|
||||
"\n",
|
||||
"[`ConversationChain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversation.base.ConversationChain.html) incorporates a memory of previous messages to sustain a stateful conversation.\n",
|
||||
"\n",
|
||||
"Some advantages of switching to the LCEL implementation are:\n",
|
||||
"\n",
|
||||
"- Innate support for threads/separate sessions. To make this work with `ConversationChain`, you'd need to instantiate a separate memory class outside the chain.\n",
|
||||
"- More explicit parameters. `ConversationChain` contains a hidden default prompt, which can cause confusion.\n",
|
||||
"- Streaming support. `ConversationChain` only supports streaming via callbacks.\n",
|
||||
"\n",
|
||||
"`RunnableWithMessageHistory` implements sessions via configuration parameters. It should be instantiated with a callable that returns a [chat message history](https://api.python.langchain.com/en/latest/chat_history/langchain_core.chat_history.BaseChatMessageHistory.html). By default, it expects this function to take a single argument `session_id`.\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "4f2cc6dc-d70a-4c13-9258-452f14290da6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'how are you?',\n",
|
||||
" 'history': '',\n",
|
||||
" 'response': \"Arrr, I be doin' well, me matey! Just sailin' the high seas in search of treasure and adventure. How can I assist ye today?\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationChain\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"template = \"\"\"\n",
|
||||
"You are a pirate. Answer the following questions as best you can.\n",
|
||||
"Chat history: {history}\n",
|
||||
"Question: {input}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"memory = ConversationBufferMemory()\n",
|
||||
"\n",
|
||||
"chain = ConversationChain(\n",
|
||||
" llm=ChatOpenAI(),\n",
|
||||
" memory=memory,\n",
|
||||
" prompt=prompt,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain({\"input\": \"how are you?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f8e36b0e-c7dc-4130-a51b-189d4b756c7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "173e1a9c-2a18-4669-b0de-136f39197786",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, I be doin' well, me heartie! Just sailin' the high seas in search of treasure and adventure. How be ye?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.chat_history import InMemoryChatMessageHistory\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a pirate. Answer the following questions as best you can.\"),\n",
|
||||
" (\"placeholder\", \"{chat_history}\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history = InMemoryChatMessageHistory()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_history():\n",
|
||||
" return history\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = prompt | ChatOpenAI() | StrOutputParser()\n",
|
||||
"\n",
|
||||
"wrapped_chain = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" get_history,\n",
|
||||
" history_messages_key=\"chat_history\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wrapped_chain.invoke({\"input\": \"how are you?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b386ce6-895e-442c-88f3-7bec0ab9f401",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"The above example uses the same `history` for all sessions. The example below shows how to use a different chat history for each session."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4e05994f-1fbc-4699-bf2e-62cb0e4deeb8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Ahoy matey! What can this old pirate do for ye today?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.chat_history import BaseChatMessageHistory\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"\n",
|
||||
"store = {}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_session_history(session_id: str) -> BaseChatMessageHistory:\n",
|
||||
" if session_id not in store:\n",
|
||||
" store[session_id] = InMemoryChatMessageHistory()\n",
|
||||
" return store[session_id]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = prompt | ChatOpenAI() | StrOutputParser()\n",
|
||||
"\n",
|
||||
"wrapped_chain = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" get_session_history,\n",
|
||||
" history_messages_key=\"chat_history\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wrapped_chain.invoke(\n",
|
||||
" {\"input\": \"Hello!\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": \"abc123\"}},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c36ebecb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See [this tutorial](/docs/tutorials/chatbot) for a more end-to-end guide on building with [`RunnableWithMessageHistory`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html).\n",
|
||||
"\n",
|
||||
"## `RetrievalQA`\n",
|
||||
"<span data-heading-keywords=\"retrievalqa\"></span>\n",
|
||||
"\n",
|
||||
"The [`RetrievalQA`](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval_qa.base.RetrievalQA.html) chain performed natural-language question answering over a data source using retrieval-augmented generation.\n",
|
||||
"\n",
|
||||
"Some advantages of switching to the LCEL implementation are:\n",
|
||||
"\n",
|
||||
"- Easier customizability. Details such as the prompt and how documents are formatted are only configurable via specific parameters in the `RetrievalQA` chain.\n",
|
||||
"- More easily return source documents.\n",
|
||||
"- Support for runnable methods like streaming and async operations.\n",
|
||||
"\n",
|
||||
"Now let's look at them side-by-side. We'll use the same ingestion code to load a [blog post by Lilian Weng](https://lilianweng.github.io/posts/2023-06-23-agent/) on autonomous agents into a local vector store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "1efbe16e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load docs\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai.chat_models import ChatOpenAI\n",
|
||||
"from langchain_openai.embeddings import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
"\n",
|
||||
"# Store splits\n",
|
||||
"vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())\n",
|
||||
"\n",
|
||||
"# LLM\n",
|
||||
"llm = ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c7e16438",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "43bf55a0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What are autonomous agents?',\n",
|
||||
" 'result': 'Autonomous agents are LLM-empowered agents that handle autonomous design, planning, and performance of complex tasks, such as scientific experiments. These agents can browse the Internet, read documentation, execute code, call robotics experimentation APIs, and leverage other LLMs. They are capable of reasoning and planning ahead for complicated tasks by breaking them down into smaller steps.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"\n",
|
||||
"# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt\n",
|
||||
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
|
||||
"\n",
|
||||
"qa_chain = RetrievalQA.from_llm(\n",
|
||||
" llm, retriever=vectorstore.as_retriever(), prompt=prompt\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"qa_chain(\"What are autonomous agents?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "081948e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "9efcc931",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Autonomous agents are agents that can handle autonomous design, planning, and performance of complex tasks, such as scientific experiments. They can browse the Internet, read documentation, execute code, call robotics experimentation APIs, and leverage other language model models. These agents use reasoning steps to develop solutions to specific tasks, like creating a novel anticancer drug.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt\n",
|
||||
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"qa_chain = (\n",
|
||||
" {\n",
|
||||
" \"context\": vectorstore.as_retriever() | format_docs,\n",
|
||||
" \"question\": RunnablePassthrough(),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"qa_chain.invoke(\"What are autonomous agents?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6f44fe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"The LCEL implementation exposes the internals of what's happening around retrieving, formatting documents, and passing them through a prompt to the LLM, but it is more verbose. You can customize and wrap this composition logic in a helper function, or use the higher-level [`create_retrieval_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html) and [`create_stuff_documents_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) helper method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "5fe42761",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What are autonomous agents?',\n",
|
||||
" 'context': [Document(page_content='Boiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\\nFor example, when requested to \"develop a novel anticancer drug\", the model came up with the following reasoning steps:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),\n",
|
||||
" Document(page_content='Weng, Lilian. (Jun 2023). “LLM-powered Autonomous Agents”. Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),\n",
|
||||
" Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),\n",
|
||||
" Document(page_content=\"LLM Powered Autonomous Agents | Lil'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLil'Log\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPosts\\n\\n\\n\\n\\nArchive\\n\\n\\n\\n\\nSearch\\n\\n\\n\\n\\nTags\\n\\n\\n\\n\\nFAQ\\n\\n\\n\\n\\nemojisearch.app\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n LLM Powered Autonomous Agents\\n \\nDate: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\\n\\n\\n \\n\\n\\nTable of Contents\\n\\n\\n\\nAgent System Overview\\n\\nComponent One: Planning\\n\\nTask Decomposition\\n\\nSelf-Reflection\\n\\n\\nComponent Two: Memory\\n\\nTypes of Memory\\n\\nMaximum Inner Product Search (MIPS)\", metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'})],\n",
|
||||
" 'answer': 'Autonomous agents are entities that can operate independently, making decisions and taking actions without direct human intervention. These agents can perform tasks such as planning, executing complex experiments, and leveraging various tools and resources to achieve objectives. In the context provided, LLM-powered autonomous agents are specifically designed for scientific discovery, capable of handling tasks like designing novel anticancer drugs through reasoning steps.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.chains import create_retrieval_chain\n",
|
||||
"from langchain.chains.combine_documents import create_stuff_documents_chain\n",
|
||||
"\n",
|
||||
"# See full prompt at https://smith.langchain.com/hub/langchain-ai/retrieval-qa-chat\n",
|
||||
"retrieval_qa_chat_prompt = hub.pull(\"langchain-ai/retrieval-qa-chat\")\n",
|
||||
"\n",
|
||||
"combine_docs_chain = create_stuff_documents_chain(llm, retrieval_qa_chat_prompt)\n",
|
||||
"rag_chain = create_retrieval_chain(vectorstore.as_retriever(), combine_docs_chain)\n",
|
||||
"\n",
|
||||
"rag_chain.invoke({\"input\": \"What are autonomous agents?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2772f4e9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `ConversationalRetrievalChain`\n",
|
||||
"<span data-heading-keywords=\"conversationalretrievalchain\"></span>\n",
|
||||
"\n",
|
||||
"The [`ConversationalRetrievalChain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain.html) was an all-in one way that combined retrieval-augmented generation with chat history, allowing you to \"chat with\" your documents.\n",
|
||||
"\n",
|
||||
"Advantages of switching to the LCEL implementation are similar to the `RetrievalQA` section above:\n",
|
||||
"\n",
|
||||
"- Clearer internals. The `ConversationalRetrievalChain` chain hides an entire question rephrasing step which dereferences the initial query against the chat history.\n",
|
||||
" - This means the class contains two sets of configurable prompts, LLMs, etc.\n",
|
||||
"- More easily return source documents.\n",
|
||||
"- Support for runnable methods like streaming and async operations.\n",
|
||||
"\n",
|
||||
"Here are side-by-side implementations with custom prompts. We'll reuse the loaded documents and vector store from the previous section:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8bc06416",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "54eb9576",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'question': 'What are autonomous agents?',\n",
|
||||
" 'chat_history': '',\n",
|
||||
" 'answer': 'Autonomous agents are powered by Large Language Models (LLMs) to handle tasks like scientific discovery and complex experiments autonomously. These agents can browse the internet, read documentation, execute code, and leverage other LLMs to perform tasks. They can reason and plan ahead to decompose complicated tasks into manageable steps.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"condense_question_template = \"\"\"\n",
|
||||
"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.\n",
|
||||
"\n",
|
||||
"Chat History:\n",
|
||||
"{chat_history}\n",
|
||||
"Follow Up Input: {question}\n",
|
||||
"Standalone question:\"\"\"\n",
|
||||
"\n",
|
||||
"condense_question_prompt = ChatPromptTemplate.from_template(condense_question_template)\n",
|
||||
"\n",
|
||||
"qa_template = \"\"\"\n",
|
||||
"You are an assistant for question-answering tasks.\n",
|
||||
"Use the following pieces of retrieved context to answer\n",
|
||||
"the question. If you don't know the answer, say that you\n",
|
||||
"don't know. Use three sentences maximum and keep the\n",
|
||||
"answer concise.\n",
|
||||
"\n",
|
||||
"Chat History:\n",
|
||||
"{chat_history}\n",
|
||||
"\n",
|
||||
"Other context:\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"Question: {question}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"qa_prompt = ChatPromptTemplate.from_template(qa_template)\n",
|
||||
"\n",
|
||||
"convo_qa_chain = ConversationalRetrievalChain.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore.as_retriever(),\n",
|
||||
" condense_question_prompt=condense_question_prompt,\n",
|
||||
" combine_docs_chain_kwargs={\n",
|
||||
" \"prompt\": qa_prompt,\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"convo_qa_chain(\n",
|
||||
" {\n",
|
||||
" \"question\": \"What are autonomous agents?\",\n",
|
||||
" \"chat_history\": \"\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43a8a23c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "c884b138",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What are autonomous agents?',\n",
|
||||
" 'chat_history': [],\n",
|
||||
" 'context': [Document(page_content='Boiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\\nFor example, when requested to \"develop a novel anticancer drug\", the model came up with the following reasoning steps:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),\n",
|
||||
" Document(page_content='Weng, Lilian. (Jun 2023). “LLM-powered Autonomous Agents”. Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),\n",
|
||||
" Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'}),\n",
|
||||
" Document(page_content='Or\\n@article{weng2023agent,\\n title = \"LLM-powered Autonomous Agents\",\\n author = \"Weng, Lilian\",\\n journal = \"lilianweng.github.io\",\\n year = \"2023\",\\n month = \"Jun\",\\n url = \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\\n}\\nReferences#\\n[1] Wei et al. “Chain of thought prompting elicits reasoning in large language models.” NeurIPS 2022\\n[2] Yao et al. “Tree of Thoughts: Dliberate Problem Solving with Large Language Models.” arXiv preprint arXiv:2305.10601 (2023).', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en'})],\n",
|
||||
" 'answer': 'Autonomous agents are entities capable of acting independently, making decisions, and performing tasks without direct human intervention. These agents can interact with their environment, perceive information, and take actions based on their goals or objectives. They often use artificial intelligence techniques to navigate and accomplish tasks in complex or dynamic environments.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import create_history_aware_retriever, create_retrieval_chain\n",
|
||||
"\n",
|
||||
"condense_question_system_template = (\n",
|
||||
" \"Given a chat history and the latest user question \"\n",
|
||||
" \"which might reference context in the chat history, \"\n",
|
||||
" \"formulate a standalone question which can be understood \"\n",
|
||||
" \"without the chat history. Do NOT answer the question, \"\n",
|
||||
" \"just reformulate it if needed and otherwise return it as is.\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"condense_question_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", condense_question_system_template),\n",
|
||||
" (\"placeholder\", \"{chat_history}\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"history_aware_retriever = create_history_aware_retriever(\n",
|
||||
" llm, vectorstore.as_retriever(), condense_question_prompt\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"system_prompt = (\n",
|
||||
" \"You are an assistant for question-answering tasks. \"\n",
|
||||
" \"Use the following pieces of retrieved context to answer \"\n",
|
||||
" \"the question. If you don't know the answer, say that you \"\n",
|
||||
" \"don't know. Use three sentences maximum and keep the \"\n",
|
||||
" \"answer concise.\"\n",
|
||||
" \"\\n\\n\"\n",
|
||||
" \"{context}\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"qa_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", system_prompt),\n",
|
||||
" (\"placeholder\", \"{chat_history}\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"qa_chain = create_stuff_documents_chain(llm, qa_prompt)\n",
|
||||
"\n",
|
||||
"convo_qa_chain = create_retrieval_chain(history_aware_retriever, qa_chain)\n",
|
||||
"\n",
|
||||
"convo_qa_chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"What are autonomous agents?\",\n",
|
||||
" \"chat_history\": [],\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"You've now seen how to migrate existing usage of some legacy chains to LCEL.\n",
|
||||
"\n",
|
||||
"Next, check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -13,9 +13,14 @@ the v1 namespace of Pydantic 2.
|
||||
Because Pydantic does not support mixing .v1 and .v2 objects, users should be aware of a number of issues
|
||||
when using LangChain with Pydantic.
|
||||
|
||||
:::caution
|
||||
While LangChain supports Pydantic V2 objects in some APIs (listed below), it's suggested that users keep using Pydantic V1 objects until LangChain 0.3 is released.
|
||||
:::
|
||||
|
||||
|
||||
## 1. Passing Pydantic objects to LangChain APIs
|
||||
|
||||
Most LangChain APIs that accept Pydantic objects have been updated to accept both Pydantic v1 and v2 objects.
|
||||
Most LangChain APIs for *tool usage* (see list below) have been updated to accept either Pydantic v1 or v2 objects.
|
||||
|
||||
* Pydantic v1 objects correspond to subclasses of `pydantic.BaseModel` if `pydantic 1` is installed or subclasses of `pydantic.v1.BaseModel` if `pydantic 2` is installed.
|
||||
* Pydantic v2 objects correspond to subclasses of `pydantic.BaseModel` if `pydantic 2` is installed.
|
||||
@@ -38,6 +43,7 @@ Partner packages that accept pydantic v2 objects via `bind_tools` or `with_struc
|
||||
| langchain-robocorp | Yes | >=0.0.10 |
|
||||
| langchain-openai | Yes | >=0.1.19 |
|
||||
| langchain-fireworks | Yes | >=0.1.5 |
|
||||
| langchain-aws | Yes | >=0.1.15 |
|
||||
|
||||
Additional partner packages will be updated to accept Pydantic v2 objects in the future.
|
||||
|
||||
@@ -169,4 +175,4 @@ If you need OpenAPI docs, your options are to either install Pydantic 1:
|
||||
or else to use the `APIHandler` object in LangChain to manually create the
|
||||
routes for your API.
|
||||
|
||||
See: https://python.langchain.com/v0.2/docs/langserve/#pydantic
|
||||
See: https://python.langchain.com/v0.2/docs/langserve/#pydantic
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
"We will cover two approaches:\n",
|
||||
"\n",
|
||||
"1. Using the built-in [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html), which returns sources by default;\n",
|
||||
"2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle."
|
||||
"2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle.\n",
|
||||
"\n",
|
||||
"We will also show how to structure sources into the model response, such that a model can report what specific sources it used in generating its answer."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -130,8 +132,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "820244ae-74b4-4593-b392-822979dd91b8",
|
||||
"execution_count": null,
|
||||
"id": "24a69b8c-024e-4e34-b827-9c9de46512a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -211,11 +213,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Task Decomposition?',\n",
|
||||
" 'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\", metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down a complex task into smaller and simpler steps. This process helps agents or models handle challenging tasks by dividing them into more manageable subtasks. Techniques like Chain of Thought and Tree of Thoughts are used to decompose tasks into multiple steps for better problem-solving.'}"
|
||||
" 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\")],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down a complex task into smaller and more manageable steps. This process helps agents or models tackle difficult tasks by dividing them into simpler subtasks or components. Task decomposition can be achieved through techniques like Chain of Thought or Tree of Thoughts, which guide the agent in breaking down tasks into sequential or branching steps.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -251,18 +253,18 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "22ea137c-1a7a-44dd-ac73-281213979957",
|
||||
"id": "1950953a-e6f1-439d-b7b9-c3bd456e388d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Task Decomposition',\n",
|
||||
" 'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down complex tasks into smaller and simpler steps to make them more manageable for autonomous agents or models. This process can be achieved by techniques like Chain of Thought (CoT) or Tree of Thoughts, which guide the model to think step by step or explore multiple reasoning possibilities at each step. Task decomposition can be done through simple prompting with language models, task-specific instructions, or human inputs.'}"
|
||||
" 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:')],\n",
|
||||
" 'answer': 'Task decomposition is a technique used in artificial intelligence to break down complex tasks into smaller and more manageable subtasks. This approach helps agents or models to tackle difficult problems by dividing them into simpler steps, improving performance and interpretability. Different methods like Chain of Thought and Tree of Thoughts have been developed to enhance task decomposition in AI systems.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
@@ -279,15 +281,25 @@
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# This Runnable takes a dict with keys 'input' and 'context',\n",
|
||||
"# formats them into a prompt, and generates a response.\n",
|
||||
"rag_chain_from_docs = (\n",
|
||||
" RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"], # input query\n",
|
||||
" \"context\": lambda x: format_docs(x[\"context\"]), # context\n",
|
||||
" }\n",
|
||||
" | prompt # format query and context into prompt\n",
|
||||
" | llm # generate response\n",
|
||||
" | StrOutputParser() # coerce to string\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Pass input query to retriever\n",
|
||||
"retrieve_docs = (lambda x: x[\"input\"]) | retriever\n",
|
||||
"\n",
|
||||
"# Below, we chain `.assign` calls. This takes a dict and successively\n",
|
||||
"# adds keys-- \"context\" and \"answer\"-- where the value for each key\n",
|
||||
"# is determined by a Runnable. The Runnable operates on all existing\n",
|
||||
"# keys in the dict.\n",
|
||||
"chain = RunnablePassthrough.assign(context=retrieve_docs).assign(\n",
|
||||
" answer=rag_chain_from_docs\n",
|
||||
")\n",
|
||||
@@ -302,7 +314,105 @@
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"Check out the [LangSmith trace](https://smith.langchain.com/public/0cb42685-e29e-4280-a503-bef2014d7ba2/r)\n",
|
||||
"Check out the [LangSmith trace](https://smith.langchain.com/public/1c055a3b-0236-4670-a3fb-023d418ba796/r)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1c17797-d965-4fd2-b8d4-d386f25dd352",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Structure sources in model response\n",
|
||||
"\n",
|
||||
"Up to this point, we've simply propagated the documents returned from the retrieval step through to the final response. But this may not illustrate what subset of information the model relied on when generating its answer. Below, we show how to structure sources into the model response, allowing the model to report what specific context it relied on for its answer.\n",
|
||||
"\n",
|
||||
"Because the above LCEL implementation is composed of [Runnable](/docs/concepts/#runnable-interface) primitives, it is straightforward to extend. Below, we make a simple change:\n",
|
||||
"\n",
|
||||
"- We use the model's tool-calling features to generate [structured output](/docs/how_to/structured_output/), consisting of an answer and list of sources. The schema for the response is represented in the `AnswerWithSources` TypedDict, below.\n",
|
||||
"- We remove the `StrOutputParser()`, as we expect `dict` output in this scenario."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "8f916b14-1b0a-4975-a62f-52f1353bde15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Desired schema for response\n",
|
||||
"class AnswerWithSources(TypedDict):\n",
|
||||
" \"\"\"An answer to the question, with sources.\"\"\"\n",
|
||||
"\n",
|
||||
" answer: str\n",
|
||||
" sources: Annotated[\n",
|
||||
" List[str],\n",
|
||||
" ...,\n",
|
||||
" \"List of sources (author + year) used to answer the question\",\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Our rag_chain_from_docs has the following changes:\n",
|
||||
"# - add `.with_structured_output` to the LLM;\n",
|
||||
"# - remove the output parser\n",
|
||||
"rag_chain_from_docs = (\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"context\": lambda x: format_docs(x[\"context\"]),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | llm.with_structured_output(AnswerWithSources)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retrieve_docs = (lambda x: x[\"input\"]) | retriever\n",
|
||||
"\n",
|
||||
"chain = RunnablePassthrough.assign(context=retrieve_docs).assign(\n",
|
||||
" answer=rag_chain_from_docs\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = chain.invoke({\"input\": \"What is Chain of Thought?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "7a8fc0c5-afb3-4012-a467-3951996a6850",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"answer\": \"Chain of Thought (CoT) is a prompting technique that enhances model performance on complex tasks by instructing the model to \\\"think step by step\\\" to decompose hard tasks into smaller and simpler steps. It transforms big tasks into multiple manageable tasks and sheds light on the interpretation of the model's thinking process.\",\n",
|
||||
" \"sources\": [\n",
|
||||
" \"Wei et al. 2022\"\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"print(json.dumps(response[\"answer\"], indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7440f785-29c5-4c6b-9656-0d9d5efbac05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"View [LangSmith trace](https://smith.langchain.com/public/0eeddf06-3a7b-4f27-974c-310ca8160f60/r)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
|
||||
@@ -761,7 +761,7 @@
|
||||
"* [SQL tutorial](/docs/tutorials/sql_qa): Many of the challenges of working with SQL db's and CSV's are generic to any structured data type, so it's useful to read the SQL techniques even if you're using Pandas for CSV data analysis.\n",
|
||||
"* [Tool use](/docs/how_to/tool_calling): Guides on general best practices when working with chains and agents that invoke tools\n",
|
||||
"* [Agents](/docs/tutorials/agents): Understand the fundamentals of building LLM agents.\n",
|
||||
"* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/toolkits/spark)."
|
||||
"* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/tools/spark_sql)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
"\n",
|
||||
"This is the easiest and most reliable way to get structured outputs. `with_structured_output()` is implemented for models that provide native APIs for structuring outputs, like tool/function calling or JSON mode, and makes use of these capabilities under the hood.\n",
|
||||
"\n",
|
||||
"This method takes a schema as input which specifies the names, types, and descriptions of the desired output attributes. The method returns a model-like Runnable, except that instead of outputting strings or Messages it outputs objects corresponding to the given schema. The schema can be specified as a [JSON Schema](https://json-schema.org/) or a Pydantic class. If JSON Schema is used then a dictionary will be returned by the Runnable, and if a Pydantic class is used then Pydantic objects will be returned.\n",
|
||||
"This method takes a schema as input which specifies the names, types, and descriptions of the desired output attributes. The method returns a model-like Runnable, except that instead of outputting strings or Messages it outputs objects corresponding to the given schema. The schema can be specified as a TypedDict class, [JSON Schema](https://json-schema.org/) or a Pydantic class. If TypedDict or JSON Schema are used then a dictionary will be returned by the Runnable, and if a Pydantic class is used then a Pydantic object will be returned.\n",
|
||||
"\n",
|
||||
"As an example, let's get a model to generate a joke and separate the setup from the punchline:\n",
|
||||
"\n",
|
||||
@@ -58,7 +58,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "6d55008f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -68,7 +68,7 @@
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4-0125-preview\", temperature=0)"
|
||||
"llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -76,22 +76,24 @@
|
||||
"id": "a808a401-be1f-49f9-ad13-58dd68f7db5f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we want the model to return a Pydantic object, we just need to pass in the desired Pydantic class:"
|
||||
"### Pydantic class\n",
|
||||
"\n",
|
||||
"If we want the model to return a Pydantic object, we just need to pass in the desired Pydantic class. The key advantage of using Pydantic is that the model-generated output will be validated. Pydantic will raise an error if any required fields are missing or if any fields are of the wrong type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "070bf702",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=8)"
|
||||
"Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=7)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -102,12 +104,15 @@
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Pydantic\n",
|
||||
"class Joke(BaseModel):\n",
|
||||
" \"\"\"Joke to tell user.\"\"\"\n",
|
||||
"\n",
|
||||
" setup: str = Field(description=\"The setup of the joke\")\n",
|
||||
" punchline: str = Field(description=\"The punchline to the joke\")\n",
|
||||
" rating: Optional[int] = Field(description=\"How funny the joke is, from 1 to 10\")\n",
|
||||
" rating: Optional[int] = Field(\n",
|
||||
" default=None, description=\"How funny the joke is, from 1 to 10\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"structured_llm = llm.with_structured_output(Joke)\n",
|
||||
@@ -130,12 +135,73 @@
|
||||
"id": "deddb6d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also pass in a [JSON Schema](https://json-schema.org/) dict if you prefer not to use Pydantic. In this case, the response is also a dict:"
|
||||
"### TypedDict or JSON Schema\n",
|
||||
"\n",
|
||||
"If you don't want to use Pydantic, explicitly don't want validation of the arguments, or want to be able to stream the model outputs, you can define your schema using a TypedDict class. We can optionally use a special `Annotated` syntax supported by LangChain that allows you to specify the default value and description of a field. Note, the default value is *not* filled in automatically if the model doesn't generate it, it is only used in defining the schema that is passed to the model.\n",
|
||||
"\n",
|
||||
":::info Requirements\n",
|
||||
"\n",
|
||||
"- Core: `langchain-core>=0.2.26`\n",
|
||||
"- Typing extensions: It is highly recommended to import `Annotated` and `TypedDict` from `typing_extensions` instead of `typing` to ensure consistent behavior across Python versions.\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "70d82891-42e8-424a-919e-07d83bcfec61",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'setup': 'Why was the cat sitting on the computer?',\n",
|
||||
" 'punchline': 'Because it wanted to keep an eye on the mouse!',\n",
|
||||
" 'rating': 7}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# TypedDict\n",
|
||||
"class Joke(TypedDict):\n",
|
||||
" \"\"\"Joke to tell user.\"\"\"\n",
|
||||
"\n",
|
||||
" setup: Annotated[str, ..., \"The setup of the joke\"]\n",
|
||||
"\n",
|
||||
" # Alternatively, we could have specified setup as:\n",
|
||||
"\n",
|
||||
" # setup: str # no default, no description\n",
|
||||
" # setup: Annotated[str, ...] # no default, no description\n",
|
||||
" # setup: Annotated[str, \"foo\"] # default, no description\n",
|
||||
"\n",
|
||||
" punchline: Annotated[str, ..., \"The punchline of the joke\"]\n",
|
||||
" rating: Annotated[Optional[int], None, \"How funny the joke is, from 1 to 10\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"structured_llm = llm.with_structured_output(Joke)\n",
|
||||
"\n",
|
||||
"structured_llm.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e4d7b4dc-f617-4ea8-aa58-847c228791b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Equivalently, we can pass in a [JSON Schema](https://json-schema.org/) dict. This requires no imports or classes and makes it very clear exactly how each parameter is documented, at the cost of being a bit more verbose."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "6700994a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -144,10 +210,10 @@
|
||||
"text/plain": [
|
||||
"{'setup': 'Why was the cat sitting on the computer?',\n",
|
||||
" 'punchline': 'Because it wanted to keep an eye on the mouse!',\n",
|
||||
" 'rating': 8}"
|
||||
" 'rating': 7}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -169,6 +235,7 @@
|
||||
" \"rating\": {\n",
|
||||
" \"type\": \"integer\",\n",
|
||||
" \"description\": \"How funny the joke is, from 1 to 10\",\n",
|
||||
" \"default\": None,\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" \"required\": [\"setup\", \"punchline\"],\n",
|
||||
@@ -185,7 +252,7 @@
|
||||
"source": [
|
||||
"### Choosing between multiple schemas\n",
|
||||
"\n",
|
||||
"The simplest way to let the model choose from multiple schemas is to create a parent Pydantic class that has a Union-typed attribute:"
|
||||
"The simplest way to let the model choose from multiple schemas is to create a parent schema that has a Union-typed attribute:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -209,6 +276,17 @@
|
||||
"from typing import Union\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Pydantic\n",
|
||||
"class Joke(BaseModel):\n",
|
||||
" \"\"\"Joke to tell user.\"\"\"\n",
|
||||
"\n",
|
||||
" setup: str = Field(description=\"The setup of the joke\")\n",
|
||||
" punchline: str = Field(description=\"The punchline to the joke\")\n",
|
||||
" rating: Optional[int] = Field(\n",
|
||||
" default=None, description=\"How funny the joke is, from 1 to 10\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class ConversationalResponse(BaseModel):\n",
|
||||
" \"\"\"Respond in a conversational manner. Be kind and helpful.\"\"\"\n",
|
||||
"\n",
|
||||
@@ -260,7 +338,7 @@
|
||||
"source": [
|
||||
"### Streaming\n",
|
||||
"\n",
|
||||
"We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a JSON Schema dict). \n",
|
||||
"We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a TypedDict class or JSON Schema dict). \n",
|
||||
"\n",
|
||||
":::info\n",
|
||||
"\n",
|
||||
@@ -271,7 +349,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 9,
|
||||
"id": "aff89877-28a3-472f-a1aa-eff893fe7736",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -302,12 +380,24 @@
|
||||
"{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the'}\n",
|
||||
"{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse'}\n",
|
||||
"{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!'}\n",
|
||||
"{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 8}\n"
|
||||
"{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 7}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"structured_llm = llm.with_structured_output(json_schema)\n",
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# TypedDict\n",
|
||||
"class Joke(TypedDict):\n",
|
||||
" \"\"\"Joke to tell user.\"\"\"\n",
|
||||
"\n",
|
||||
" setup: Annotated[str, ..., \"The setup of the joke\"]\n",
|
||||
" punchline: Annotated[str, ..., \"The punchline of the joke\"]\n",
|
||||
" rating: Annotated[Optional[int], None, \"How funny the joke is, from 1 to 10\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"structured_llm = llm.with_structured_output(Joke)\n",
|
||||
"\n",
|
||||
"for chunk in structured_llm.stream(\"Tell me a joke about cats\"):\n",
|
||||
" print(chunk)"
|
||||
@@ -327,7 +417,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 11,
|
||||
"id": "283ba784-2072-47ee-9b2c-1119e3c69e8e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -335,11 +425,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'setup': 'Woodpecker',\n",
|
||||
" 'punchline': \"Woodpecker goes 'knock knock', but don't worry, they never expect you to answer the door!\",\n",
|
||||
" 'rating': 8}"
|
||||
" 'punchline': \"Woodpecker who? Woodpecker who can't find a tree is just a bird with a headache!\",\n",
|
||||
" 'rating': 7}"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -377,7 +467,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 12,
|
||||
"id": "d7381cb0-b2c3-4302-a319-ed72d0b9e43f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -385,11 +475,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'setup': 'Crocodile',\n",
|
||||
" 'punchline': \"Crocodile 'see you later', but in a while, it becomes an alligator!\",\n",
|
||||
" 'punchline': 'Crocodile be seeing you later, alligator!',\n",
|
||||
" 'rating': 7}"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -491,23 +581,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 15,
|
||||
"id": "df0370e3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None)"
|
||||
"{'setup': 'Why was the cat sitting on the computer?',\n",
|
||||
" 'punchline': 'Because it wanted to keep an eye on the mouse!'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"structured_llm = llm.with_structured_output(Joke, method=\"json_mode\")\n",
|
||||
"structured_llm = llm.with_structured_output(None, method=\"json_mode\")\n",
|
||||
"\n",
|
||||
"structured_llm.invoke(\n",
|
||||
" \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n",
|
||||
@@ -526,19 +617,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 17,
|
||||
"id": "10ed2842",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_ASK4EmZeZ69Fi3p554Mb4rWy', 'function': {'arguments': '{\"setup\":\"Why was the cat sitting on the computer?\",\"punchline\":\"Because it wanted to keep an eye on the mouse!\"}', 'name': 'Joke'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 107, 'total_tokens': 143}, 'model_name': 'gpt-4-0125-preview', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6491d35b-9164-4656-b75c-d7882cfb76cb-0', tool_calls=[{'name': 'Joke', 'args': {'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!'}, 'id': 'call_ASK4EmZeZ69Fi3p554Mb4rWy'}], usage_metadata={'input_tokens': 107, 'output_tokens': 36, 'total_tokens': 143}),\n",
|
||||
" 'parsed': Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None),\n",
|
||||
"{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_f25ZRmh8u5vHlOWfTUw8sJFZ', 'function': {'arguments': '{\"setup\":\"Why was the cat sitting on the computer?\",\"punchline\":\"Because it wanted to keep an eye on the mouse!\",\"rating\":7}', 'name': 'Joke'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 93, 'total_tokens': 126}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-d880d7e2-df08-4e9e-ad92-dfc29f2fd52f-0', tool_calls=[{'name': 'Joke', 'args': {'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 7}, 'id': 'call_f25ZRmh8u5vHlOWfTUw8sJFZ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 93, 'output_tokens': 33, 'total_tokens': 126}),\n",
|
||||
" 'parsed': {'setup': 'Why was the cat sitting on the computer?',\n",
|
||||
" 'punchline': 'Because it wanted to keep an eye on the mouse!',\n",
|
||||
" 'rating': 7},\n",
|
||||
" 'parsing_error': None}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -546,9 +639,7 @@
|
||||
"source": [
|
||||
"structured_llm = llm.with_structured_output(Joke, include_raw=True)\n",
|
||||
"\n",
|
||||
"structured_llm.invoke(\n",
|
||||
" \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n",
|
||||
")"
|
||||
"structured_llm.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -824,7 +915,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -838,7 +929,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -24,10 +24,9 @@
|
||||
"This guide assumes familiarity with the following concepts:\n",
|
||||
"\n",
|
||||
"- [Chat models](/docs/concepts/#chat-models)\n",
|
||||
"- [LangChain Tools](/docs/concepts/#tools)\n",
|
||||
"- [Tool calling](/docs/concepts/#functiontool-calling)\n",
|
||||
"- [Tools](/docs/concepts/#tools)\n",
|
||||
"- [Output parsers](/docs/concepts/#output-parsers)\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"[Tool calling](/docs/concepts/#functiontool-calling) allows a chat model to respond to a given prompt by \"calling a tool\".\n",
|
||||
@@ -38,15 +37,11 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If you want to see how to use the model-generated tool call to actually run a tool function [check out this guide](/docs/how_to/tool_results_pass_to_model/).\n",
|
||||
"If you want to see how to use the model-generated tool call to actually run a tool [check out this guide](/docs/how_to/tool_results_pass_to_model/).\n",
|
||||
"\n",
|
||||
":::note Supported models\n",
|
||||
"\n",
|
||||
"Tool calling is not universal, but is supported by many popular LLM providers, including [Anthropic](/docs/integrations/chat/anthropic/), \n",
|
||||
"[Cohere](/docs/integrations/chat/cohere/), [Google](/docs/integrations/chat/google_vertex_ai_palm/), \n",
|
||||
"[Mistral](/docs/integrations/chat/mistralai/), [OpenAI](/docs/integrations/chat/openai/), and even for locally-running models via [Ollama](/docs/integrations/chat/ollama/).\n",
|
||||
"\n",
|
||||
"You can find a [list of all models that support tool calling here](/docs/integrations/chat/).\n",
|
||||
"Tool calling is not universal, but is supported by many popular LLM providers. You can find a [list of all models that support tool calling here](/docs/integrations/chat/).\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
@@ -58,14 +53,12 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Passing tools to chat models\n",
|
||||
"## Defining tool schemas\n",
|
||||
"\n",
|
||||
"Chat models that support tool calling features implement a `.bind_tools` method, which \n",
|
||||
"receives a list of functions, Pydantic models, or LangChain [tool objects](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool) \n",
|
||||
"and binds them to the chat model in its expected format. Subsequent invocations of the \n",
|
||||
"chat model will include tool schemas in its calls to the LLM.\n",
|
||||
"For a model to be able to call tools, we need to pass in tool schemas that describe what the tool does and what it's arguments are. Chat models that support tool calling features implement a `.bind_tools()` method for passing tool schemas to the model. Tool schemas can be passed in as Python functions (with typehints and docstrings), Pydantic models, TypedDict classes, or LangChain [Tool objects](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool). Subsequent invocations of the model will pass in these tool schemas along with the prompt.\n",
|
||||
"\n",
|
||||
"For example, below we implement simple tools for arithmetic:"
|
||||
"### Python functions\n",
|
||||
"Our tool schemas can be Python functions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -74,26 +67,41 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The function name, type hints, and docstring are all part of the tool\n",
|
||||
"# schema that's passed to the model. Defining good, descriptive schemas\n",
|
||||
"# is an extension of prompt engineering and is an important part of\n",
|
||||
"# getting models to perform well.\n",
|
||||
"def add(a: int, b: int) -> int:\n",
|
||||
" \"\"\"Adds a and b.\"\"\"\n",
|
||||
" \"\"\"Add two integers.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" a: First integer\n",
|
||||
" b: Second integer\n",
|
||||
" \"\"\"\n",
|
||||
" return a + b\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def multiply(a: int, b: int) -> int:\n",
|
||||
" \"\"\"Multiplies a and b.\"\"\"\n",
|
||||
" return a * b\n",
|
||||
" \"\"\"Multiply two integers.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tools = [add, multiply]"
|
||||
" Args:\n",
|
||||
" a: First integer\n",
|
||||
" b: Second integer\n",
|
||||
" \"\"\"\n",
|
||||
" return a * b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### LangChain Tool\n",
|
||||
"\n",
|
||||
"LangChain also implements a `@tool` decorator that allows for further control of the tool schema, such as tool names and argument descriptions. See the how-to guide [here](/docs/how_to/custom_tools/#creating-tools-from-functions) for details.\n",
|
||||
"\n",
|
||||
"We can also define the schemas without the accompanying functions using [Pydantic](https://docs.pydantic.dev):"
|
||||
"### Pydantic class\n",
|
||||
"\n",
|
||||
"You can equivalently define the schemas without the accompanying functions using [Pydantic](https://docs.pydantic.dev):"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -105,23 +113,57 @@
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Note that the docstrings here are crucial, as they will be passed along\n",
|
||||
"# to the model along with the class name.\n",
|
||||
"class Add(BaseModel):\n",
|
||||
" \"\"\"Add two integers together.\"\"\"\n",
|
||||
"class add(BaseModel):\n",
|
||||
" \"\"\"Add two integers.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Multiply(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers together.\"\"\"\n",
|
||||
"class multiply(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### TypedDict class\n",
|
||||
"\n",
|
||||
":::info Requires `langchain-core>=0.2.25`\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"Or using TypedDicts and annotations:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tools = [Add, Multiply]"
|
||||
"class add(TypedDict):\n",
|
||||
" \"\"\"Add two integers.\"\"\"\n",
|
||||
"\n",
|
||||
" # Annotations must have the type and can optionally include a default value and description (in that order).\n",
|
||||
" a: Annotated[int, ..., \"First integer\"]\n",
|
||||
" b: Annotated[int, ..., \"Second integer\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class multiply(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers.\"\"\"\n",
|
||||
"\n",
|
||||
" a: Annotated[int, ..., \"First integer\"]\n",
|
||||
" b: Annotated[int, ..., \"Second integer\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tools = [add, multiply]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -129,7 +171,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To actually bind those schemas to a chat model, we'll use the `.bind_tools()` method. This handles converting\n",
|
||||
"the `Add` and `Multiply` schemas to the proper format for the model. The tool schema will then be passed it in each time the model is invoked.\n",
|
||||
"the `add` and `multiply` schemas to the proper format for the model. The tool schema will then be passed it in each time the model is invoked.\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
@@ -164,16 +206,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_wLTBasMppAwpdiA5CD92l9x7', 'function': {'arguments': '{\"a\":3,\"b\":12}', 'name': 'Multiply'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 89, 'total_tokens': 107}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0f03d4f0ee', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d3f36cca-f225-416f-ac16-0217046f0b38-0', tool_calls=[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_wLTBasMppAwpdiA5CD92l9x7', 'type': 'tool_call'}], usage_metadata={'input_tokens': 89, 'output_tokens': 18, 'total_tokens': 107})"
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_BwYJ4UgU5pRVCBOUmiu7NhF9', 'function': {'arguments': '{\"a\":3,\"b\":12}', 'name': 'multiply'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 80, 'total_tokens': 97}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_ba606877f9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-7f05e19e-4561-40e2-a2d0-8f4e28e9a00f-0', tool_calls=[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_BwYJ4UgU5pRVCBOUmiu7NhF9', 'type': 'tool_call'}], usage_metadata={'input_tokens': 80, 'output_tokens': 17, 'total_tokens': 97})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -214,23 +256,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'Multiply',\n",
|
||||
"[{'name': 'multiply',\n",
|
||||
" 'args': {'a': 3, 'b': 12},\n",
|
||||
" 'id': 'call_uqJsNrDJ8ZZnFa1BHHYAllEv',\n",
|
||||
" 'id': 'call_rcdMie7E89Xx06lEKKxJyB5N',\n",
|
||||
" 'type': 'tool_call'},\n",
|
||||
" {'name': 'Add',\n",
|
||||
" {'name': 'add',\n",
|
||||
" 'args': {'a': 11, 'b': 49},\n",
|
||||
" 'id': 'call_ud1uHAaYsdpWuxugwoJ63BDs',\n",
|
||||
" 'id': 'call_nheGN8yfvSJsnIuGZaXihou3',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -252,31 +294,49 @@
|
||||
"are populated in the `.invalid_tool_calls` attribute. An `InvalidToolCall` can have \n",
|
||||
"a name, string arguments, identifier, and error message.\n",
|
||||
"\n",
|
||||
"If desired, [output parsers](/docs/how_to#output-parsers) can further \n",
|
||||
"process the output. For example, we can convert existing values populated on the `.tool_calls` attribute back to the original Pydantic class using the\n",
|
||||
"\n",
|
||||
"## Parsing\n",
|
||||
"\n",
|
||||
"If desired, [output parsers](/docs/how_to#output-parsers) can further process the output. For example, we can convert existing values populated on the `.tool_calls` to Pydantic objects using the\n",
|
||||
"[PydanticToolsParser](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.openai_tools.PydanticToolsParser.html):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Multiply(a=3, b=12), Add(a=11, b=49)]"
|
||||
"[multiply(a=3, b=12), add(a=11, b=49)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import PydanticToolsParser\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"chain = llm_with_tools | PydanticToolsParser(tools=[Multiply, Add])\n",
|
||||
"\n",
|
||||
"class add(BaseModel):\n",
|
||||
" \"\"\"Add two integers.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class multiply(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = llm_with_tools | PydanticToolsParser(tools=[add, multiply])\n",
|
||||
"chain.invoke(query)"
|
||||
]
|
||||
},
|
||||
@@ -294,18 +354,18 @@
|
||||
"\n",
|
||||
"You can also check out some more specific uses of tool calling:\n",
|
||||
"\n",
|
||||
"- Getting [structured outputs](/docs/how_to/structured_output/) from models\n",
|
||||
"- Few shot prompting [with tools](/docs/how_to/tools_few_shot/)\n",
|
||||
"- Stream [tool calls](/docs/how_to/tool_streaming/)\n",
|
||||
"- Pass [runtime values to tools](/docs/how_to/tool_runtime)\n",
|
||||
"- Getting [structured outputs](/docs/how_to/structured_output/) from models"
|
||||
"- Pass [runtime values to tools](/docs/how_to/tool_runtime)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -317,7 +377,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,26 +6,20 @@
|
||||
"source": [
|
||||
"# How to pass run time values to tools\n",
|
||||
"\n",
|
||||
":::info Prerequisites\n",
|
||||
"import Prerequisites from \"@theme/Prerequisites\";\n",
|
||||
"import Compatibility from \"@theme/Compatibility\";\n",
|
||||
"\n",
|
||||
"This guide assumes familiarity with the following concepts:\n",
|
||||
"- [Chat models](/docs/concepts/#chat-models)\n",
|
||||
"- [LangChain Tools](/docs/concepts/#tools)\n",
|
||||
"- [How to create tools](/docs/how_to/custom_tools)\n",
|
||||
"- [How to use a model to call tools](/docs/how_to/tool_calling)\n",
|
||||
":::\n",
|
||||
"<Prerequisites titlesAndLinks={[\n",
|
||||
" [\"Chat models\", \"/docs/concepts/#chat-models\"],\n",
|
||||
" [\"LangChain Tools\", \"/docs/concepts/#tools\"],\n",
|
||||
" [\"How to create tools\", \"/docs/how_to/custom_tools\"],\n",
|
||||
" [\"How to use a model to call tools\", \"/docs/how_to/tool_calling\"],\n",
|
||||
"]} />\n",
|
||||
"\n",
|
||||
":::info Using with LangGraph\n",
|
||||
"\n",
|
||||
"If you're using LangGraph, please refer to [this how-to guide](https://langchain-ai.github.io/langgraph/how-tos/pass-run-time-values-to-tools/)\n",
|
||||
"which shows how to create an agent that keeps track of a given user's favorite pets.\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
":::caution Added in `langchain-core==0.2.21`\n",
|
||||
"\n",
|
||||
"Must have `langchain-core>=0.2.21` to use this functionality.\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"<Compatibility packagesAndVersions={[\n",
|
||||
" [\"langchain-core\", \"0.2.21\"],\n",
|
||||
"]} />\n",
|
||||
"\n",
|
||||
"You may need to bind values to a tool that are only known at runtime. For example, the tool logic may require using the ID of the user who made the request.\n",
|
||||
"\n",
|
||||
@@ -33,7 +27,13 @@
|
||||
"\n",
|
||||
"Instead, the LLM should only control the parameters of the tool that are meant to be controlled by the LLM, while other parameters (such as user ID) should be fixed by the application logic.\n",
|
||||
"\n",
|
||||
"This how-to guide shows you how to prevent the model from generating certain tool arguments and injecting them in directly at runtime."
|
||||
"This how-to guide shows you how to prevent the model from generating certain tool arguments and injecting them in directly at runtime.\n",
|
||||
"\n",
|
||||
":::info Using with LangGraph\n",
|
||||
"\n",
|
||||
"If you're using LangGraph, please refer to [this how-to guide](https://langchain-ai.github.io/langgraph/how-tos/pass-run-time-values-to-tools/)\n",
|
||||
"which shows how to create an agent that keeps track of a given user's favorite pets.\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -597,9 +597,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-311",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-311"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -611,7 +611,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,7 +5,6 @@ sidebar_position: 3
|
||||
|
||||
|
||||
Toolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.
|
||||
For a complete list of available ready-made toolkits, visit [Integrations](/docs/integrations/toolkits/).
|
||||
|
||||
All Toolkits expose a `get_tools` method which returns a list of tools.
|
||||
You can therefore do:
|
||||
|
||||
@@ -196,8 +196,6 @@
|
||||
"\n",
|
||||
"Toolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.\n",
|
||||
"\n",
|
||||
"For a complete list of available ready-made toolkits, visit [Integrations](/docs/integrations/toolkits/).\n",
|
||||
"\n",
|
||||
"All Toolkits expose a `get_tools` method which returns a list of tools.\n",
|
||||
"\n",
|
||||
"You're usually meant to use them this way:\n",
|
||||
|
||||
@@ -17,26 +17,25 @@
|
||||
"source": [
|
||||
"# ChatAI21\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with AI21 chat models.\n",
|
||||
"Note that different chat models support different parameters. See the ",
|
||||
"[AI21 documentation](https://docs.ai21.com/reference) to learn more about the parameters in your chosen model.\n",
|
||||
"Note that different chat models support different parameters. See the [AI21 documentation](https://docs.ai21.com/reference) to learn more about the parameters in your chosen model.\n",
|
||||
"[See all AI21's LangChain components.](https://pypi.org/project/langchain-ai21/) \n",
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c3bef91",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-02-15T06:50:44.929635Z",
|
||||
"start_time": "2024-02-15T06:50:41.209704Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -qU langchain-ai21"
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/__package_name_short_snake__) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatAI21](https://api.python.langchain.com/en/latest/chat_models/langchain_ai21.chat_models.ChatAI21.html#langchain_ai21.chat_models.ChatAI21) | [langchain-ai21](https://api.python.langchain.com/en/latest/ai21_api_reference.html) | ❌ | beta | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,10 +43,9 @@
|
||||
"id": "2b4f3e15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Environment Setup\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"We'll need to get an [AI21 API key](https://docs.ai21.com/) and set the ",
|
||||
"`AI21_API_KEY` environment variable:\n"
|
||||
"We'll need to get an [AI21 API key](https://docs.ai21.com/) and set the `AI21_API_KEY` environment variable:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,48 +65,166 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4828829d3da430ce",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "f6844fff-3702-4489-ab74-732f69f3b9d7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "39353473fce5dd2e",
|
||||
"execution_count": null,
|
||||
"id": "7c2e19d3-7c58-4470-9e1a-718b27a32056",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98e22f31-8acc-42d6-916d-415d1263c56e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f9699cd9-58f2-450e-aa64-799e66906c0f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"!pip install -qU langchain-ai21"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4828829d3da430ce",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c40756fb-cbf8-4d44-a293-3989d707237e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_ai21 import ChatAI21\n",
|
||||
"\n",
|
||||
"llm = ChatAI21(model=\"jamba-instruct\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2bdc5d68-2a19-495e-8c04-d11adc86d3ae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "46b982dc-5d8a-46da-a711-81c03ccd6adc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Bonjour, comment vas-tu?')"
|
||||
"AIMessage(content=\"J'adore programmer.\", id='run-2e8d16d6-a06e-45cb-8d0c-1c8208645033-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "10a30f84-b531-4fd5-8b5b-91512fbdc75b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "39353473fce5dd2e",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe das Programmieren.', id='run-e1bd82dc-1a7e-4b2e-bde9-ac995929ac0f-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_ai21 import ChatAI21\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"chat = ChatAI21(model=\"jamba-instruct\")\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful assistant that translates English to French.\"),\n",
|
||||
" (\"human\", \"Translate this sentence from English to French. {english_text}.\"),\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"chain.invoke({\"english_text\": \"Hello, how are you?\"})"
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e79de691-9dd6-4697-b57e-59a4a3cc073a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatAI21 features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_ai21.chat_models.ChatAI21.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -128,7 +244,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -115,7 +115,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -123,8 +123,8 @@
|
||||
"from langchain_openai import AzureChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = AzureChatOpenAI(\n",
|
||||
" azure_deployment=\"YOUR-DEPLOYMENT\",\n",
|
||||
" api_version=\"2024-05-01-preview\",\n",
|
||||
" azure_deployment=\"gpt-35-turbo\", # or your deployment\n",
|
||||
" api_version=\"2023-06-01-preview\", # or your api version\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
@@ -143,7 +143,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -152,10 +152,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 31, 'total_tokens': 39}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, id='run-a6a732c2-cb02-4e50-9a9c-ab30eab034fc-0', usage_metadata={'input_tokens': 31, 'output_tokens': 8, 'total_tokens': 39})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 31, 'total_tokens': 39}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, id='run-bea4b46c-e3e1-4495-9d3a-698370ad963d-0', usage_metadata={'input_tokens': 31, 'output_tokens': 8, 'total_tokens': 39})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -174,7 +174,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 4,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -202,17 +202,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 5,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 26, 'total_tokens': 32}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, id='run-084967d7-06f2-441f-b5c1-477e2a9e9d03-0', usage_metadata={'input_tokens': 26, 'output_tokens': 6, 'total_tokens': 32})"
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 26, 'total_tokens': 32}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, id='run-cbc44038-09d3-40d4-9da2-c5910ee636ca-0', usage_metadata={'input_tokens': 26, 'output_tokens': 6, 'total_tokens': 32})"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -264,8 +264,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "84c411b0-1790-4798-8bb7-47d8ece4c2dc",
|
||||
"execution_count": 6,
|
||||
"id": "2ca02d23-60d0-43eb-8d04-070f61f8fefd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -288,22 +288,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "21234693-d92b-4d69-8a7f-55aa062084bf",
|
||||
"execution_count": 7,
|
||||
"id": "e1b07ae2-3de7-44bd-bfdc-b76f4ba45a35",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Total Cost (USD): $0.000078\n"
|
||||
"Total Cost (USD): $0.000074\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_0301 = AzureChatOpenAI(\n",
|
||||
" azure_deployment=\"YOUR-DEPLOYMENT\",\n",
|
||||
" api_version=\"2024-05-01-preview\",\n",
|
||||
" azure_deployment=\"gpt-35-turbo\", # or your deployment\n",
|
||||
" api_version=\"2023-06-01-preview\", # or your api version\n",
|
||||
" model_version=\"0301\",\n",
|
||||
")\n",
|
||||
"with get_openai_callback() as cb:\n",
|
||||
@@ -338,7 +338,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"id": "53fbf15f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
@@ -12,129 +12,103 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"id": "bf733a38-db84-4363-89e2-de6735c37230",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatCohere\n",
|
||||
"# Cohere\n",
|
||||
"\n",
|
||||
"This doc will help you get started with Cohere [chat models](/docs/concepts/#chat-models). For detailed documentation of all ChatCohere features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_cohere.chat_models.ChatCohere.html).\n",
|
||||
"\n",
|
||||
"For an overview of all Cohere models head to the [Cohere docs](https://docs.cohere.com/docs/models).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/cohere) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatCohere](https://api.python.langchain.com/en/latest/chat_models/langchain_cohere.chat_models.ChatCohere.html) | [langchain-cohere](https://api.python.langchain.com/en/latest/cohere_api_reference.html) | ❌ | beta | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | \n",
|
||||
"This notebook covers how to get started with [Cohere chat models](https://cohere.com/chat).\n",
|
||||
"\n",
|
||||
"Head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.cohere.ChatCohere.html) for detailed documentation of all attributes and methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3607d67e-e56c-4102-bbba-df2edc0e109e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Cohere models you'll need to create a Cohere account, get an API key, and install the `langchain-cohere` integration package.\n",
|
||||
"The integration lives in the `langchain-cohere` package. We can install these with:\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-cohere\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Head to https://dashboard.cohere.com/welcome/login to sign up to Cohere and generate an API key. Once you've done this set the COHERE_API_KEY environment variable:"
|
||||
"We'll also need to get a [Cohere API key](https://cohere.com/) and set the `COHERE_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
|
||||
"execution_count": 11,
|
||||
"id": "2108b517-1e8d-473d-92fa-4f930e8072a7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Enter your Cohere API key: \")"
|
||||
"os.environ[\"COHERE_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
|
||||
"id": "cf690fbb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
|
||||
"execution_count": 12,
|
||||
"id": "7f11de02",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"id": "4c26754b-b3c9-4d93-8f36-43049bd943bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"The LangChain Cohere integration lives in the `langchain-cohere` package:"
|
||||
"ChatCohere supports all [ChatModel](/docs/how_to#chat-models) functionality:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-cohere"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"execution_count": 5,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import ChatCohere\n",
|
||||
"\n",
|
||||
"llm = ChatCohere(\n",
|
||||
" model=\"command-r-plus\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
" max_retries=2,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b4f3e15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "62e0dbc3",
|
||||
"execution_count": 13,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatCohere()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -142,110 +116,223 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore programmer.\", additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': 'd84f80f3-4611-46e6-aed0-9d8665a20a11', 'token_count': {'input_tokens': 89, 'output_tokens': 5}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': 'd84f80f3-4611-46e6-aed0-9d8665a20a11', 'token_count': {'input_tokens': 89, 'output_tokens': 5}}, id='run-514ab516-ed7e-48ac-b132-2598fb80ebef-0')"
|
||||
"AIMessage(content='4 && 5 \\n6 || 7 \\n\\nWould you like to play a game of odds and evens?', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '2076b614-52b3-4082-a259-cc92cd3d9fea', 'token_count': {'prompt_tokens': 68, 'response_tokens': 23, 'total_tokens': 91, 'billed_tokens': 77}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '2076b614-52b3-4082-a259-cc92cd3d9fea', 'token_count': {'prompt_tokens': 68, 'response_tokens': 23, 'total_tokens': 91, 'billed_tokens': 77}}, id='run-3475e0c8-c89b-4937-9300-e07d652455e1-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
"messages = [HumanMessage(content=\"1\"), HumanMessage(content=\"2 3\")]\n",
|
||||
"chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"execution_count": 16,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='4 && 5', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': 'f0708a92-f874-46ee-9b93-334d616ad92e', 'token_count': {'prompt_tokens': 68, 'response_tokens': 3, 'total_tokens': 71, 'billed_tokens': 57}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': 'f0708a92-f874-46ee-9b93-334d616ad92e', 'token_count': {'prompt_tokens': 68, 'response_tokens': 3, 'total_tokens': 71, 'billed_tokens': 57}}, id='run-1635e63e-2994-4e7f-986e-152ddfc95777-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chat.ainvoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J'adore programmer.\n"
|
||||
"4 && 5"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
"for chunk in chat.stream(messages):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"execution_count": 18,
|
||||
"id": "064288e4-f184-4496-9427-bcf148fa055e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmierung.', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '053bebde-4e1d-4d06-8ee6-3446e7afa25e', 'token_count': {'input_tokens': 84, 'output_tokens': 6}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '053bebde-4e1d-4d06-8ee6-3446e7afa25e', 'token_count': {'input_tokens': 84, 'output_tokens': 6}}, id='run-53700708-b7fb-417b-af36-1a6fcde38e7d-0')"
|
||||
"[AIMessage(content='4 && 5', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '6770ca86-f6c3-4ba3-a285-c4772160612f', 'token_count': {'prompt_tokens': 68, 'response_tokens': 3, 'total_tokens': 71, 'billed_tokens': 57}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '6770ca86-f6c3-4ba3-a285-c4772160612f', 'token_count': {'prompt_tokens': 68, 'response_tokens': 3, 'total_tokens': 71, 'billed_tokens': 57}}, id='run-8d6fade2-1b39-4e31-ab23-4be622dd0027-0')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
"chat.batch([messages])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
"id": "f1c56460",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatCohere features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_cohere.chat_models.ChatCohere.html"
|
||||
"You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language-lcel)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "0851b103",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Tell me a joke about {topic}\")\n",
|
||||
"chain = prompt | chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "ae950c0f-1691-47f1-b609-273033cae707",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='What color socks do bears wear?\\n\\nThey don’t wear socks, they have bear feet. \\n\\nHope you laughed! If not, maybe this will help: laughter is the best medicine, and a good sense of humor is infectious!', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '6edccf44-9bc8-4139-b30e-13b368f3563c', 'token_count': {'prompt_tokens': 68, 'response_tokens': 51, 'total_tokens': 119, 'billed_tokens': 108}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '6edccf44-9bc8-4139-b30e-13b368f3563c', 'token_count': {'prompt_tokens': 68, 'response_tokens': 51, 'total_tokens': 119, 'billed_tokens': 108}}, id='run-ef7f9789-0d4d-43bf-a4f7-f2a0e27a5320-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"topic\": \"bears\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12db8d69",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool calling\n",
|
||||
"\n",
|
||||
"Cohere supports tool calling functionalities!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "337e24af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import (\n",
|
||||
" HumanMessage,\n",
|
||||
" ToolMessage,\n",
|
||||
")\n",
|
||||
"from langchain_core.tools import tool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "74d292e7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@tool\n",
|
||||
"def magic_function(number: int) -> int:\n",
|
||||
" \"\"\"Applies a magic operation to an integer\n",
|
||||
" Args:\n",
|
||||
" number: Number to have magic operation performed on\n",
|
||||
" \"\"\"\n",
|
||||
" return number + 10\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def invoke_tools(tool_calls, messages):\n",
|
||||
" for tool_call in tool_calls:\n",
|
||||
" selected_tool = {\"magic_function\": magic_function}[tool_call[\"name\"].lower()]\n",
|
||||
" tool_output = selected_tool.invoke(tool_call[\"args\"])\n",
|
||||
" messages.append(ToolMessage(tool_output, tool_call_id=tool_call[\"id\"]))\n",
|
||||
" return messages\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tools = [magic_function]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ecafcbc6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_with_tools = chat.bind_tools(tools=tools)\n",
|
||||
"messages = [HumanMessage(content=\"What is the value of magic_function(2)?\")]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "aa34fc39",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='The value of magic_function(2) is 12.', additional_kwargs={'documents': [{'id': 'magic_function:0:2:0', 'output': '12', 'tool_name': 'magic_function'}], 'citations': [ChatCitation(start=34, end=36, text='12', document_ids=['magic_function:0:2:0'])], 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '96a55791-0c58-4e2e-bc2a-8550e137c46d', 'token_count': {'input_tokens': 998, 'output_tokens': 59}}, response_metadata={'documents': [{'id': 'magic_function:0:2:0', 'output': '12', 'tool_name': 'magic_function'}], 'citations': [ChatCitation(start=34, end=36, text='12', document_ids=['magic_function:0:2:0'])], 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': '96a55791-0c58-4e2e-bc2a-8550e137c46d', 'token_count': {'input_tokens': 998, 'output_tokens': 59}}, id='run-f318a9cf-55c8-44f4-91d1-27cf46c6a465-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res = llm_with_tools.invoke(messages)\n",
|
||||
"while res.tool_calls:\n",
|
||||
" messages.append(res)\n",
|
||||
" messages = invoke_tools(res.tool_calls, messages)\n",
|
||||
" res = llm_with_tools.invoke(messages)\n",
|
||||
"\n",
|
||||
"res"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-2",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-2"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -257,7 +344,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"| ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"### Supported Methods\n",
|
||||
"\n",
|
||||
@@ -395,6 +395,66 @@
|
||||
"chat_model_external.invoke(\"How to use Databricks?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Function calling on Databricks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Databricks Function Calling is OpenAI-compatible and is only available during model serving as part of Foundation Model APIs.\n",
|
||||
"\n",
|
||||
"See [Databricks function calling introduction](https://docs.databricks.com/en/machine-learning/model-serving/function-calling.html#supported-models) for supported models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.databricks import ChatDatabricks\n",
|
||||
"\n",
|
||||
"llm = ChatDatabricks(endpoint=\"databricks-meta-llama-3-70b-instruct\")\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# supported tool_choice values: \"auto\", \"required\", \"none\", function name in string format,\n",
|
||||
"# or a dictionary as {\"type\": \"function\", \"function\": {\"name\": <<tool_name>>}}\n",
|
||||
"model = llm.bind_tools(tools, tool_choice=\"auto\")\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": \"What is the current temperature of Chicago?\"}]\n",
|
||||
"print(model.invoke(messages))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See [Databricks Unity Catalog](docs/integrations/tools/databricks.ipynb) about how to use UC functions in chains."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -4,18 +4,68 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hugging Face\n",
|
||||
"# ChatHuggingFace\n",
|
||||
"\n",
|
||||
"This notebook shows how to get started using `Hugging Face` LLM's as chat models.\n",
|
||||
"This will help you getting started with `langchain_huggingface` [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatHuggingFace` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html). For a list of models supported by Hugging Face check out [this page](https://huggingface.co/models).\n",
|
||||
"\n",
|
||||
"In particular, we will:\n",
|
||||
"1. Utilize the [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py) integrations to instantiate an `LLM`.\n",
|
||||
"2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/concepts/#message-types) abstraction.\n",
|
||||
"3. Explore tool calling with the `ChatHuggingFace`.\n",
|
||||
"4. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"> Note: To get started, you'll need to have a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) saved as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatHuggingFace](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html) | [langchain-huggingface](https://api.python.langchain.com/en/latest/huggingface_api_reference.html) | ✅ | beta | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Hugging Face models you'll need to create a Hugging Face account, get an API key, and install the `langchain-huggingface` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Generate a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) and store it as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"HUGGINGFACEHUB_API_TOKEN\"):\n",
|
||||
" os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = getpass.getpass(\"Enter your token: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatHuggingFace](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html) | [langchain_huggingface](https://api.python.langchain.com/en/latest/huggingface_api_reference.html) | ✅ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `langchain_huggingface` models you'll need to create a/an `Hugging Face` account, get an API key, and install the `langchain_huggingface` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"You'll need to have a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) saved as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,14 +74,41 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = getpass.getpass(\n",
|
||||
" \"Enter your Hugging Face API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2 bitsandbytes accelerate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Instantiate an LLM"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"You can instantiate a `ChatHuggingFace` model in two different ways, either from a `HuggingFaceEndpoint` or from a `HuggingFacePipeline`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -43,19 +120,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
||||
"Token is valid (permission: fineGrained).\n",
|
||||
"Your token has been saved to /Users/isaachershenson/.cache/huggingface/token\n",
|
||||
"Login successful\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFaceEndpoint\n",
|
||||
"from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceEndpoint(\n",
|
||||
" repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
|
||||
" repo_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" task=\"text-generation\",\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
")"
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,11 +157,194 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "da32ae8ec8864ccfb480044fe2eec065",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"config.json: 0%| | 0.00/638 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "ee1891b7e5f64fba88ba35f444e598fb",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model.safetensors.index.json: 0%| | 0.00/23.9k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9ff1ec7f575b42adb608c15955de7888",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Downloading shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5214696698814b919f561647a684d1e4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00001-of-00008.safetensors: 0%| | 0.00/1.89G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9ac334c69a2048a0a77340cca44d8c80",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00002-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "465ad1a51d414e0daf1cd9308455be94",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00003-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a329c43c3d574df0afd38c7457cc639c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00004-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a736a6c4023542af8c6ecc232b823d18",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00005-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8bdee70b843d433e8236fff83ecda022",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00006-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5ecb6103e0304ae188a14d598119a361",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00007-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "174e3cb487bd453c9c70d7614254a35e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00008-of-00008.safetensors: 0%| | 0.00/816M [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "28f8c233b04b45d7800e12c785a8c4bc",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "449dfa023dc8430fbcde94544ba01c4f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"generation_config.json: 0%| | 0.00/111 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFacePipeline\n",
|
||||
"from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline\n",
|
||||
"\n",
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
@@ -81,81 +354,7 @@
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" ),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To run a quantized version, you might specify a `bitsandbytes` quantization config as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from transformers import BitsAndBytesConfig\n",
|
||||
"\n",
|
||||
"quantization_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||
" bnb_4bit_compute_dtype=\"float16\",\n",
|
||||
" bnb_4bit_use_double_quant=True\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"pipeline = HuggingFacePipeline(\n",
|
||||
" ...\n",
|
||||
"\n",
|
||||
" model_kwargs={\"quantization_config\": quantization_config},\n",
|
||||
" \n",
|
||||
" ...\n",
|
||||
")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Instantiate the `ChatHuggingFace` to apply chat templates"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Instantiate the chat model and some messages to pass. \n",
|
||||
"\n",
|
||||
"**Note**: you need to pass the `model_id` explicitly if you are using self-hosted `text-generation-inference`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import (\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
")\n",
|
||||
"from langchain_huggingface import ChatHuggingFace\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You're a helpful assistant\"),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"What happens when an unstoppable force meets an immovable object?\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
@@ -164,284 +363,24 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check the `model_id`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'meta-llama/Meta-Llama-3-70B-Instruct'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_model.model_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Inspect how the chat messages are formatted for the LLM call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nYou're a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nWhat happens when an unstoppable force meets an immovable object?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_model._to_chat_prompt(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the model."
|
||||
"### Instatiating with Quantization\n",
|
||||
"\n",
|
||||
"To run a quantized version of your model, you can specify a `bitsandbytes` quantization config as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"One of the classic thought experiments in physics!\n",
|
||||
"\n",
|
||||
"The concept of an unstoppable force meeting an immovable object is a paradox that has puzzled philosophers and physicists for centuries. It's a mind-bending scenario that challenges our understanding of the fundamental laws of physics.\n",
|
||||
"\n",
|
||||
"In essence, an unstoppable force is something that cannot be halted or slowed down, while an immovable object is something that cannot be moved or displaced. If we assume that both entities exist in the same universe, we run into a logical contradiction.\n",
|
||||
"\n",
|
||||
"Here\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res = chat_model.invoke(messages)\n",
|
||||
"print(res.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Explore the tool calling with `ChatHuggingFace`\n",
|
||||
"\n",
|
||||
"`text-generation-inference` supports tool with open source LLMs starting from v2.0.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a basic tool (`Calculator`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from transformers import BitsAndBytesConfig\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Calculator(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers together.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bind the tool to the `chat_model` and give it a try:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Calculator(a=3, b=12)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers.openai_tools import PydanticToolsParser\n",
|
||||
"\n",
|
||||
"llm_with_multiply = chat_model.bind_tools([Calculator], tool_choice=\"auto\")\n",
|
||||
"parser = PydanticToolsParser(tools=[Calculator])\n",
|
||||
"tool_chain = llm_with_multiply | parser\n",
|
||||
"tool_chain.invoke(\"How much is 3 multiplied by 12?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Take it for a spin as an agent!\n",
|
||||
"\n",
|
||||
"Here we'll test out `Zephyr-7B-beta` as a zero-shot `ReAct` Agent. \n",
|
||||
"\n",
|
||||
"The agent is based on the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629)\n",
|
||||
"\n",
|
||||
"The example below is taken from [here](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/react/#using-chat-models).\n",
|
||||
"\n",
|
||||
"> Note: To run this section, you'll need to have a [SerpAPI Token](https://serpapi.com/) saved as an environment variable: `SERPAPI_API_KEY`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, load_tools\n",
|
||||
"from langchain.agents.format_scratchpad import format_log_to_str\n",
|
||||
"from langchain.agents.output_parsers import (\n",
|
||||
" ReActJsonSingleInputOutputParser,\n",
|
||||
")\n",
|
||||
"from langchain.tools.render import render_text_description\n",
|
||||
"from langchain_community.utilities import SerpAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Configure the agent with a `react-json` style prompt and access to a search engine and calculator."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# setup tools\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"\n",
|
||||
"# setup ReAct style prompt\n",
|
||||
"prompt = hub.pull(\"hwchase17/react-json\")\n",
|
||||
"prompt = prompt.partial(\n",
|
||||
" tools=render_text_description(tools),\n",
|
||||
" tool_names=\", \".join([t.name for t in tools]),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# define the agent\n",
|
||||
"chat_model_with_stop = chat_model.bind(stop=[\"\\nObservation\"])\n",
|
||||
"agent = (\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | chat_model_with_stop\n",
|
||||
" | ReActJsonSingleInputOutputParser()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# instantiate AgentExecutor\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\n",
|
||||
"\n",
|
||||
"Thought: I need to use the Search tool to find out who Leo DiCaprio's current girlfriend is. Then, I can use the Calculator tool to raise her current age to the power of 0.43.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"leo dicaprio girlfriend\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mLeonardo DiCaprio may have found The One in Vittoria Ceretti. “They are in love,” a source exclusively reveals in the latest issue of Us Weekly. “Leo was clearly very proud to be showing Vittoria off and letting everyone see how happy they are together.”\u001b[0m\u001b[32;1m\u001b[1;3mNow that we know Leo DiCaprio's current girlfriend is Vittoria Ceretti, let's find out her current age.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"vittoria ceretti age\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m25 years\u001b[0m\u001b[32;1m\u001b[1;3mNow that we know Vittoria Ceretti's current age is 25, let's use the Calculator tool to raise it to the power of 0.43.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"25^0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3mAnswer: 3.991298452658078\u001b[0m\u001b[32;1m\u001b[1;3mFinal Answer: Vittoria Ceretti, Leo DiCaprio's current girlfriend, when raised to the power of 0.43 is approximately 4.0 rounded to two decimal places. Her current age is 25 years old.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\",\n",
|
||||
" 'output': \"Vittoria Ceretti, Leo DiCaprio's current girlfriend, when raised to the power of 0.43 is approximately 4.0 rounded to two decimal places. Her current age is 25 years old.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\"\n",
|
||||
" }\n",
|
||||
"quantization_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||
" bnb_4bit_compute_dtype=\"float16\",\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -449,14 +388,92 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wahoo! Our open-source 7b parameter Zephyr model was able to:\n",
|
||||
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" task=\"text-generation\",\n",
|
||||
" pipeline_kwargs=dict(\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" ),\n",
|
||||
" model_kwargs={\"quantization_config\": quantization_config},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"1. Plan out a series of actions: `I need to use the Search tool to find out who Leo DiCaprio's current girlfriend is. Then, I can use the Calculator tool to raise her current age to the power of 0.43.`\n",
|
||||
"2. Then execute a search using the SerpAPI tool to find who Leo DiCaprio's current girlfriend is\n",
|
||||
"3. Execute another search to find her age\n",
|
||||
"4. And finally use a calculator tool to calculate her age raised to the power of 0.43\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import (\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"It's exciting to see how far open-source LLM's can go as general purpose reasoning agents. Give it a try yourself!"
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You're a helpful assistant\"),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"What happens when an unstoppable force meets an immovable object?\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"ai_msg = chat_model.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"According to the popular phrase and hypothetical scenario, when an unstoppable force meets an immovable object, a paradoxical situation arises as both forces are seemingly contradictory. On one hand, an unstoppable force is an entity that cannot be stopped or prevented from moving forward, while on the other hand, an immovable object is something that cannot be moved or displaced from its position. \n",
|
||||
"\n",
|
||||
"In this scenario, it is un\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `ChatHuggingFace` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatHuggingFace features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -476,7 +493,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kinetica SqlAssist LLM Demo\n",
|
||||
"# Kinetica Language To SQL Chat Model\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use Kinetica to transform natural language into SQL\n",
|
||||
"and simplify the process of data retrieval. This demo is intended to show the mechanics\n",
|
||||
|
||||
@@ -12,254 +12,228 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bf733a38-db84-4363-89e2-de6735c37230",
|
||||
"id": "d295c2a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAI\n",
|
||||
"# ChatMistralAI\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with MistralAI chat models, via their [API](https://docs.mistral.ai/api/).\n",
|
||||
"This will help you getting started with Mistral [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatMistralAI` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html). The `ChatMistralAI` class is built on top of the [Mistral API](https://docs.mistral.ai/api/). For a list of all the models supported by Mistral, check out [this page](https://docs.mistral.ai/getting-started/models/).\n",
|
||||
"\n",
|
||||
"A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API.\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"Head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html) for detailed documentation of all attributes and methods."
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/mistral) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatMistralAI](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html) | [langchain_mistralai](https://api.python.langchain.com/en/latest/mistralai_api_reference.html) | ❌ | beta | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To access `ChatMistralAI` models you'll need to create a Mistral account, get an API key, and install the `langchain_mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API. Once you've done this set the MISTRAL_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2461605e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"MISTRAL_API_KEY\"] = getpass.getpass(\"Enter your Mistral API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc686b8f",
|
||||
"id": "788f37ac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "007209d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f5c74f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"You will need the `langchain-core` and `langchain-mistralai` package to use the API. You can install these with:\n",
|
||||
"The LangChain Mistral integration lives in the `langchain_mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1ab11a65",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb1a335e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-core langchain-mistralai\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e6c38580",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import ChatMistralAI\n",
|
||||
"\n",
|
||||
"We'll also need to get a [Mistral API key](https://console.mistral.ai/users/api-keys/)"
|
||||
"llm = ChatMistralAI(\n",
|
||||
" model=\"mistral-large-latest\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_retries=2,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aec79099",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8838c3cc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Sure, I\\'d be happy to help you translate that sentence into French! The English sentence \"I love programming\" translates to \"J\\'aime programmer\" in French. Let me know if you have any other questions or need further assistance!', response_metadata={'token_usage': {'prompt_tokens': 32, 'total_tokens': 84, 'completion_tokens': 52}, 'model': 'mistral-small', 'finish_reason': 'stop'}, id='run-64bac156-7160-4b68-b67e-4161f63e021f-0', usage_metadata={'input_tokens': 32, 'output_tokens': 52, 'total_tokens': 84})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "c3fd4184",
|
||||
"id": "bbf6a048",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"api_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502127fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_mistralai.chat_models import ChatMistralAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If api_key is not passed, default behavior is to use the `MISTRAL_API_KEY` environment variable.\n",
|
||||
"chat = ChatMistralAI(api_key=api_key)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Who's there? I was just about to ask the same thing! How can I assist you today?\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [HumanMessage(content=\"knock knock\")]\n",
|
||||
"chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Async"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Who\\'s there?\\n\\n(You can then continue the \"knock knock\" joke by saying the name of the person or character who should be responding. For example, if I say \"Banana,\" you could respond with \"Banana who?\" and I would say \"Banana bunch! Get it? Because a group of bananas is called a \\'bunch\\'!\" and then we would both laugh and have a great time. But really, you can put anything you want in the spot where I put \"Banana\" and it will still technically be a \"knock knock\" joke. The possibilities are endless!)')"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chat.ainvoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86ccef97",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Who's there?\n",
|
||||
"\n",
|
||||
"(After this, the conversation can continue as a call and response \"who's there\" joke. Here is an example of how it could go:\n",
|
||||
"\n",
|
||||
"You say: Orange.\n",
|
||||
"I say: Orange who?\n",
|
||||
"You say: Orange you glad I didn't say banana!?)\n",
|
||||
"\n",
|
||||
"But since you asked for a knock knock joke specifically, here's one for you:\n",
|
||||
"\n",
|
||||
"Knock knock.\n",
|
||||
"\n",
|
||||
"Me: Who's there?\n",
|
||||
"\n",
|
||||
"You: Lettuce.\n",
|
||||
"\n",
|
||||
"Me: Lettuce who?\n",
|
||||
"\n",
|
||||
"You: Lettuce in, it's too cold out here!\n",
|
||||
"\n",
|
||||
"I hope this brings a smile to your face! Do you have a favorite knock knock joke you'd like to share? I'd love to hear it."
|
||||
"Sure, I'd be happy to help you translate that sentence into French! The English sentence \"I love programming\" translates to \"J'aime programmer\" in French. Let me know if you have any other questions or need further assistance!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chat.stream(messages):\n",
|
||||
" print(chunk.content, end=\"\")"
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f6189577",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Batch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "e63aebcb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Who's there? I was just about to ask the same thing! Go ahead and tell me who's there. I love a good knock-knock joke.\")]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat.batch([messages])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38e39e71",
|
||||
"id": "32b87f87",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language-lcel)"
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ee43a1ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Tell me a joke about {topic}\")\n",
|
||||
"chain = prompt | chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "0dc49212",
|
||||
"execution_count": 8,
|
||||
"id": "24e2c51c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Why do bears hate shoes so much? They like to run around in their bear feet.')"
|
||||
"AIMessage(content='Ich liebe Programmierung. (German translation)', response_metadata={'token_usage': {'prompt_tokens': 26, 'total_tokens': 38, 'completion_tokens': 12}, 'model': 'mistral-small', 'finish_reason': 'stop'}, id='run-dfd4094f-e347-47b0-9056-8ebd7ea35fe7-0', usage_metadata={'input_tokens': 26, 'output_tokens': 12, 'total_tokens': 38})"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"topic\": \"bears\"})"
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cb9b5834",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"Head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html) for detailed documentation of all attributes and methods."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -279,7 +253,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,13 +2,24 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc6caafa",
|
||||
"metadata": {
|
||||
"id": "cc6caafa"
|
||||
},
|
||||
"id": "1f666798-8635-4bc0-a515-04d318588d67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NVIDIA NIMs\n",
|
||||
"---\n",
|
||||
"sidebar_label: NVIDIA AI Endpoints\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fa8eb20e-4db8-45e3-9e79-c595f4f274da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatNVIDIA\n",
|
||||
"\n",
|
||||
"This will help you getting started with NVIDIA [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatNVIDIA` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_nvidia_ai_endpoints.chat_models.ChatNVIDIA.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on \n",
|
||||
"NVIDIA NIM inference microservice. NIM supports models across domains like chat, embedding, and re-ranking models \n",
|
||||
"from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA \n",
|
||||
@@ -24,7 +35,66 @@
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with NVIDIA supported via the `ChatNVIDIA` class.\n",
|
||||
"\n",
|
||||
"For more information on accessing the chat models through this api, check out the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) documentation."
|
||||
"For more information on accessing the chat models through this api, check out the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) documentation.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatNVIDIA](https://api.python.langchain.com/en/latest/chat_models/langchain_nvidia_ai_endpoints.chat_models.ChatNVIDIA.html) | [langchain_nvidia_ai_endpoints](https://api.python.langchain.com/en/latest/nvidia_ai_endpoints_api_reference.html) | ✅ | beta | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"**To get started:**\n",
|
||||
"\n",
|
||||
"1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.\n",
|
||||
"\n",
|
||||
"2. Click on your model of choice.\n",
|
||||
"\n",
|
||||
"3. Under `Input` select the `Python` tab, and click `Get API Key`. Then click `Generate Key`.\n",
|
||||
"\n",
|
||||
"4. Copy and save the generated key as `NVIDIA_API_KEY`. From there, you should have access to the endpoints.\n",
|
||||
"\n",
|
||||
"### Credentials\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "208b72da-1535-4249-bbd3-2500028e25e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NVIDIA_API_KEY\"):\n",
|
||||
" # Note: the API key should start with \"nvapi-\"\n",
|
||||
" os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "52dc8dcb-0a48-4a4e-9947-764116d2ffd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2cd9cb12-6ca5-432a-9e42-8a57da073c7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,7 +102,9 @@
|
||||
"id": "f2be90a9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain NVIDIA AI Endpoints integration lives in the `langchain_nvidia_ai_endpoints` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,51 +117,14 @@
|
||||
"%pip install --upgrade --quiet langchain-nvidia-ai-endpoints"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ccff689e",
|
||||
"metadata": {
|
||||
"id": "ccff689e"
|
||||
},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"**To get started:**\n",
|
||||
"\n",
|
||||
"1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.\n",
|
||||
"\n",
|
||||
"2. Click on your model of choice.\n",
|
||||
"\n",
|
||||
"3. Under `Input` select the `Python` tab, and click `Get API Key`. Then click `Generate Key`.\n",
|
||||
"\n",
|
||||
"4. Copy and save the generated key as `NVIDIA_API_KEY`. From there, you should have access to the endpoints."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "686c4d2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n",
|
||||
"if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n",
|
||||
" print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n",
|
||||
"else:\n",
|
||||
" nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n",
|
||||
" assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n",
|
||||
" os.environ[\"NVIDIA_API_KEY\"] = nvapi_key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af0ce26b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Working with NVIDIA API Catalog"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can access models in the NVIDIA API Catalog:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,7 +143,24 @@
|
||||
"## Core LC Chat Interface\n",
|
||||
"from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
|
||||
"\n",
|
||||
"llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")\n",
|
||||
"llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "469c8c7f-de62-457f-a30f-674763a8b717",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9512c81b-1f3a-4eca-9470-f52cedff5c74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = llm.invoke(\"Write a ballad about LangChain.\")\n",
|
||||
"print(result.content)"
|
||||
]
|
||||
@@ -630,6 +682,55 @@
|
||||
"source": [
|
||||
"See [How to use chat models to call tools](https://python.langchain.com/v0.2/docs/how_to/tool_calling/) for additional examples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9a3c438-121d-46eb-8fb5-b8d5a13cd4a4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "af585c6b-fe0a-4833-9860-a4209a71b3c6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f2f25dd3-0b4a-465f-a53e-95521cdc253c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `ChatNVIDIA` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_nvidia_ai_endpoints.chat_models.ChatNVIDIA.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -651,7 +752,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -56,23 +56,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "e817fe2e-4f1d-4533-b19e-2400b1cf6ce8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Enter your OpenAI API key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
"if not os.environ.get(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -126,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "522686de",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -281,12 +274,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 4,
|
||||
"id": "b7ea7690-ec7a-4337-b392-e87d1f39a6ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetWeather(BaseModel):\n",
|
||||
@@ -322,6 +315,47 @@
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67b0f63d-15e6-45e0-9e86-2852ddcff54f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ``strict=True``\n",
|
||||
"\n",
|
||||
":::info Requires ``langchain-openai>=0.1.21rc1``\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"As of Aug 6, 2024, OpenAI supports a `strict` argument when calling tools that will enforce that the tool argument schema is respected by the model. See more here: https://platform.openai.com/docs/guides/function-calling\n",
|
||||
"\n",
|
||||
"**Note**: If ``strict=True`` the tool definition will also be validated, and a subset of JSON schema are accepted. Crucially, schema cannot have optional args (those with default values). Read the full docs on what types of schema are supported here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "dc8ac4f1-4039-4392-90c1-2d8331cd6910",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VYEfpPDh3npMQ95J9EWmWvSn', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a4c6749b-adbb-45c7-8b17-8d6835d5c443-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_VYEfpPDh3npMQ95J9EWmWvSn', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_with_tools = llm.bind_tools([GetWeather], strict=True)\n",
|
||||
"ai_msg = llm_with_tools.invoke(\n",
|
||||
" \"what is the weather like in San Francisco\",\n",
|
||||
")\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "768d1ae4-4b1a-48eb-a329-c8d5051067a3",
|
||||
@@ -412,9 +446,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-2",
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-2"
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -12,14 +12,83 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb7e5679-aa06-47e4-a1a3-b6b70e604017",
|
||||
"id": "8f82e243-f4ee-44e2-b417-099b6401ae3e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# vLLM Chat\n",
|
||||
"\n",
|
||||
"vLLM can be deployed as a server that mimics the OpenAI API protocol. This allows vLLM to be used as a drop-in replacement for applications using OpenAI API. This server can be queried in the same format as OpenAI API.\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with vLLM chat models using langchain's `ChatOpenAI` **as it is**."
|
||||
"## Overview\n",
|
||||
"This will help you getting started with vLLM [chat models](/docs/concepts/#chat-models), which leverage the `langchain-openai` package. For detailed documentation of all `ChatOpenAI` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatOpenAI](https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html) | [langchain_openai](https://api.python.langchain.com/en/latest/langchain_openai.html) | ✅ | beta | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"Specific model features-- such as tool calling, support for multi-modal inputs, support for token-level streaming, etc.-- will depend on the hosted model.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"See the vLLM docs [here](https://docs.vllm.ai/en/latest/).\n",
|
||||
"\n",
|
||||
"To access vLLM models through LangChain, you'll need to install the `langchain-openai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Authentication will depend on specifics of the inference server."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c3b1707a-cf2c-4367-94e3-436c43402503",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1e40bd5e-cbaa-41ef-aaf9-0858eb207184",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0739b647-609b-46d3-bdd3-e86fe4463288",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain vLLM integration can be accessed via the `langchain-openai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7afcfbdc-56aa-4529-825a-8acbe7aa5241",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cf576d6-7b67-4937-bf99-39071e85720c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -51,7 +120,7 @@
|
||||
"source": [
|
||||
"inference_server_url = \"http://localhost:8000/v1\"\n",
|
||||
"\n",
|
||||
"chat = ChatOpenAI(\n",
|
||||
"llm = ChatOpenAI(\n",
|
||||
" model=\"mosaicml/mpt-7b\",\n",
|
||||
" openai_api_key=\"EMPTY\",\n",
|
||||
" openai_api_base=inference_server_url,\n",
|
||||
@@ -60,6 +129,14 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "34b18328-5e8b-4ff2-9b89-6fbb76b5c7f0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
@@ -88,82 +165,66 @@
|
||||
" content=\"Translate the following sentence from English to Italian: I love programming.\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
"llm.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55fc7046-a6dc-4720-8c0c-24a6db76a4f4",
|
||||
"id": "a580a1e4-11a3-4277-bfba-bfb414ac7201",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can make use of templating by using a `MessagePromptTemplate`. You can build a `ChatPromptTemplate` from one or more `MessagePromptTemplates`. You can use ChatPromptTemplate's format_prompt -- this returns a `PromptValue`, which you can convert to a string or `Message` object, depending on whether you want to use the formatted value as input to an llm or chat model.\n",
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"For convenience, there is a `from_template` method exposed on the template. If you were to use this template, this is what it would look like:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "123980e9-0dee-4ce5-bde6-d964dd90129c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = (\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n",
|
||||
")\n",
|
||||
"system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n",
|
||||
"human_template = \"{text}\"\n",
|
||||
"human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b2fb8c59-8892-4270-85a2-4f8ab276b75d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' I love programming too.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [system_message_prompt, human_message_prompt]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# get a chat completion from the formatted messages\n",
|
||||
"chat(\n",
|
||||
" chat_prompt.format_prompt(\n",
|
||||
" input_language=\"English\", output_language=\"Italian\", text=\"I love programming.\"\n",
|
||||
" ).to_messages()\n",
|
||||
")"
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0bbd9861-2b94-4920-8708-b690004f4c4d",
|
||||
"id": "dd0f4043-48bd-4245-8bdb-e7669666a277",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "265f5d51-0a76-4808-8d13-ef598ee6e366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all features and configurations exposed via `langchain-openai`, head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html\n",
|
||||
"\n",
|
||||
"Refer to the vLLM [documentation](https://docs.vllm.ai/en/latest/) as well."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "conda_pytorch_p310",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "conda_pytorch_p310"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -175,7 +236,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"You need to have an existing dataset on the Apify platform. If you don't have one, please first check out [this notebook](/docs/integrations/tools/apify) on how to use Apify to extract content from documentation, knowledge bases, help centers, or blogs. This example shows how to load a dataset produced by the [Website Content Crawler](https://apify.com/apify/website-content-crawler)."
|
||||
"You need to have an existing dataset on the Apify platform. This example shows how to load a dataset produced by the [Website Content Crawler](https://apify.com/apify/website-content-crawler)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because one or more lines are too long
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
@@ -0,0 +1,182 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with `PyPDF` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use `PyPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"To use `PyPDFLoader` you need to have the `langchain-community` python package downloaded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"len(page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `PyPDFLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,7 +7,18 @@
|
||||
"source": [
|
||||
"# Recursive URL\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents."
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/recursive_url_loader/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [RecursiveUrlLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| RecursiveUrlLoader | ✅ | ❌ | \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,6 +28,12 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `RecursiveUrlLoader`.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lives in the `langchain-community` package. There's no other required packages, though you will get richer default Document metadata if you have ``beautifulsoup4` installed as well."
|
||||
]
|
||||
},
|
||||
@@ -186,6 +203,50 @@
|
||||
"That certainly looks like HTML that comes from the url https://docs.python.org/3.9/, which is what we expected. Let's now look at some variations we can make to our basic example that can be helpful in different situations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b17b7202",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4b13e4d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb039682",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f41cc89",
|
||||
@@ -256,50 +317,6 @@
|
||||
"You can similarly pass in a `metadata_extractor` to customize how Document metadata is extracted from the HTTP response. See the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) for more on this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1dddbc94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "7d0114fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f88a7c2f-35df-4c3a-b238-f91be2674b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e4d1c8f",
|
||||
|
||||
@@ -7,20 +7,41 @@
|
||||
"source": [
|
||||
"# Unstructured\n",
|
||||
"\n",
|
||||
"This notebook covers how to use `Unstructured` package to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"This notebook covers how to use `Unstructured` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders) to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"\n",
|
||||
"Please see [this guide](/docs/integrations/providers/unstructured/) for more instructions on setting up Unstructured locally, including setting up required system dependencies."
|
||||
"Please see [this guide](../../integrations/providers/unstructured.mdx) for more instructions on setting up Unstructured locally, including setting up required system dependencies.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/unstructured/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [UnstructuredLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/unstructured_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| UnstructuredLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires offloading of the partitioning logic to the Unstructured API, which requires an API key. If you use the local installation, you do not need an API key. To get your API key, head over to [this site](https://unstructured.io) and get an API key, and then set it in the cell below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "2886982e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet \"langchain-unstructured\""
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your Unstructured API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,14 +49,32 @@
|
||||
"id": "e75e2a6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Local Partitioning (Optional)\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires\n",
|
||||
"offloading of the partitioning logic to the Unstructured API.\n",
|
||||
"#### Normal Installation\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install\n",
|
||||
"a combination of system dependencies, as outlined in the \n",
|
||||
"[Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"The following packages are required to run the rest of this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9de83b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet langchain-unstructured unstructured-client unstructured \"unstructured[pdf]\" python-magic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "637eda35",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Installation for Local\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install a combination of system dependencies, as outlined in the [Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"\n",
|
||||
"For example, on Macs you can install the required dependencies with:\n",
|
||||
"\n",
|
||||
@@ -47,7 +86,7 @@
|
||||
"brew install libxml2 libxslt\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can install the required `pip` dependencies with:\n",
|
||||
"You can install the required `pip` dependencies needed for local with:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install \"langchain-unstructured[local]\"\n",
|
||||
@@ -59,120 +98,117 @@
|
||||
"id": "a9c1c775",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Quickstart\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"To simply load a file as a document, you can use the LangChain `DocumentLoader.load` \n",
|
||||
"interface:"
|
||||
"The `UnstructuredLoader` allows loading from a variety of different file types. To read all about the `unstructured` package please refer to their [documentation](https://docs.unstructured.io/open-source/introduction/overview)/. In this example, we show loading from both a text file and a PDF file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "79d3e549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/state_of_the_union.txt\")\n",
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4ab0a79",
|
||||
"id": "8b68dcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load list of files"
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "092d9a0b",
|
||||
"execution_count": 2,
|
||||
"id": "8da59ef8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO: NumExpr defaulting to 12 threads.\n",
|
||||
"INFO: pikepdf C++ to Python logger bridge initialized\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "97f7aa1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"whatsapp_chat.txt : 1/22/23, 6:30 PM - User 1: Hi! Im interested in your bag. Im offering $50. Let me know if you are in\n",
|
||||
"state_of_the_union.txt : May God bless you all. May God protect our troops.\n"
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/whatsapp_chat.txt\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"print(docs[0].metadata.get(\"filename\"), \": \", docs[0].page_content[:100])\n",
|
||||
"print(docs[-1].metadata.get(\"filename\"), \": \", docs[-1].page_content[:100])"
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8de9ef16",
|
||||
"id": "0d7f991b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PDF Example\n",
|
||||
"\n",
|
||||
"Processing PDF documents works exactly the same way. Unstructured detects the file type and extracts the same types of elements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "672733fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define a Partitioning Strategy\n",
|
||||
"\n",
|
||||
"Unstructured document loader allow users to pass in a `strategy` parameter that lets Unstructured\n",
|
||||
"know how to partition pdf and other OCR'd documents. Currently supported strategies are `\"auto\"`,\n",
|
||||
"`\"hi_res\"`, `\"ocr_only\"`, and `\"fast\"`. Learn more about the different strategies\n",
|
||||
"[here](https://docs.unstructured.io/open-source/core-functionality/partitioning#partition-pdf). \n",
|
||||
"\n",
|
||||
"Not all document types have separate hi res and fast partitioning strategies. For those document types, the `strategy` kwarg is\n",
|
||||
"ignored. In some cases, the high res strategy will fallback to fast if there is a dependency missing\n",
|
||||
"(i.e. a model for document partitioning). You can see how to apply a strategy to an\n",
|
||||
"`UnstructuredLoader` below."
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "60685353",
|
||||
"execution_count": 4,
|
||||
"id": "b05604d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 393.9), (16.34, 560.0), (36.34, 560.0), (36.34, 393.9)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': '89565df026a24279aaea20dc08cedbec', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'e9fa370aef7ee5c05744eb7bb7d9981b'}, page_content='2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((157.62199999999999, 114.23496279999995), (157.62199999999999, 146.5141628), (457.7358962799999, 146.5141628), (457.7358962799999, 114.23496279999995)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'Title', 'element_id': 'bde0b230a1aa488e3ce837d33015181b'}, page_content='LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((134.809, 168.64029940800003), (134.809, 192.2517444), (480.5464199080001, 192.2517444), (480.5464199080001, 168.64029940800003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': '54700f902899f0c8c90488fa8d825bce'}, page_content='Zejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain Lee4, Jacob Carlson3, and Weining Li5'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((207.23000000000002, 202.57205439999996), (207.23000000000002, 311.8195408), (408.12676, 311.8195408), (408.12676, 202.57205439999996)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'b650f5867bad9bb4e30384282c79bcfe'}, page_content='1 Allen Institute for AI shannons@allenai.org 2 Brown University ruochen zhang@brown.edu 3 Harvard University {melissadell,jacob carlson}@fas.harvard.edu 4 University of Washington bcgl@cs.washington.edu 5 University of Waterloo w422li@uwaterloo.ca'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((162.779, 338.45008160000003), (162.779, 566.8455408), (454.0372021523199, 566.8455408), (454.0372021523199, 338.45008160000003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'links': [{'text': ':// layout - parser . github . io', 'url': 'https://layout-parser.github.io', 'start_index': 1477}], 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'NarrativeText', 'element_id': 'cfc957c94fe63c8fd7c7f4bcb56e75a7'}, page_content='Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model configurations complicate the easy reuse of im- portant innovations by a wide audience. Though there have been on-going efforts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applica- tions. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout de- tection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digiti- zation pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io.')]"
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/layout-parser-paper.pdf\", strategy=\"fast\")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[5:10]"
|
||||
"pages[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -241,23 +277,6 @@
|
||||
"if you’d like to self-host the Unstructured API or run it locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e5fde16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package\n",
|
||||
"%pip install \"langchain-unstructured\"\n",
|
||||
"%pip install \"unstructured-client\"\n",
|
||||
"\n",
|
||||
"# Set API key\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = \"FAKE_API_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@@ -358,8 +377,9 @@
|
||||
"Partitioning with the Unstructured API relies on the [Unstructured SDK\n",
|
||||
"Client](https://docs.unstructured.io/api-reference/api-services/sdk).\n",
|
||||
"\n",
|
||||
"Below is an example showing how you can customize some features of the client and use your own\n",
|
||||
"`requests.Session()`, pass in an alternative `server_url`, or customize the `RetryConfig` object for more control over how failed requests are handled."
|
||||
"Below is an example showing how you can customize some features of the client and use your own `requests.Session()`, pass in an alternative `server_url`, or customize the `RetryConfig` object for more control over how failed requests are handled.\n",
|
||||
"\n",
|
||||
"Note that the example below may not use the latest version of the UnstructuredClient and there could be breaking changes in future releases. For the latest examples, refer to the [Unstructured Python SDK](https://docs.unstructured.io/api-reference/api-services/sdk-python) docs."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -494,6 +514,16 @@
|
||||
"print(\"Number of LangChain documents:\", len(docs))\n",
|
||||
"print(\"Length of text in the document:\", len(docs[0].page_content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce01aa40",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `UnstructuredLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -512,7 +542,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -334,6 +334,121 @@
|
||||
"llm.invoke(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b29dd776",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Semantic Cache\n",
|
||||
"Use [Upstash Vector](https://upstash.com/docs/vector/overall/whatisvector) to do a semantic similarity search and cache the most similar response in the database. The vectorization is automatically done by the selected embedding model while creating Upstash Vector database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b37fb3c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install upstash-semantic-cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "8470eedc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.globals import set_llm_cache\n",
|
||||
"from upstash_semantic_cache import SemanticCache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "16b9fb03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"UPSTASH_VECTOR_REST_URL = \"<UPSTASH_VECTOR_REST_URL>\"\n",
|
||||
"UPSTASH_VECTOR_REST_TOKEN = \"<UPSTASH_VECTOR_REST_TOKEN>\"\n",
|
||||
"\n",
|
||||
"cache = SemanticCache(\n",
|
||||
" url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN, min_proximity=0.7\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "8d37104b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"set_llm_cache(cache)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "926a08b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 28.4 ms, sys: 3.93 ms, total: 32.3 ms\n",
|
||||
"Wall time: 1.89 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nNew York City is the most crowded city in the USA.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"llm.invoke(\"Which city is the most crowded city in the USA?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "0ce37d57",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 3.22 ms, sys: 940 μs, total: 4.16 ms\n",
|
||||
"Wall time: 97.7 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nNew York City is the most crowded city in the USA.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"llm.invoke(\"Which city has the highest population in the USA?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "278ad7ae",
|
||||
@@ -2684,7 +2799,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -108,7 +108,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = Cohere(model=\"command\", max_tokens=256, temperature=0.75)"
|
||||
"model = Cohere(max_tokens=256, temperature=0.75)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -787,7 +787,7 @@ We need to install `langchain-google-community` with required dependencies:
|
||||
pip install langchain-google-community[gmail]
|
||||
```
|
||||
|
||||
See a [usage example and authorization instructions](/docs/integrations/toolkits/gmail).
|
||||
See a [usage example and authorization instructions](/docs/integrations/tools/gmail).
|
||||
|
||||
```python
|
||||
from langchain_google_community import GmailToolkit
|
||||
|
||||
@@ -370,7 +370,7 @@ We need to install several python packages.
|
||||
pip install azure-ai-formrecognizer azure-cognitiveservices-speech azure-ai-vision-imageanalysis
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/azure_ai_services).
|
||||
See a [usage example](/docs/integrations/tools/azure_ai_services).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import azure_ai_services
|
||||
@@ -385,7 +385,7 @@ pip install O365
|
||||
```
|
||||
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/office365).
|
||||
See a [usage example](/docs/integrations/tools/office365).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import O365Toolkit
|
||||
@@ -399,7 +399,7 @@ We need to install `azure-identity` python package.
|
||||
pip install azure-identity
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/powerbi).
|
||||
See a [usage example](/docs/integrations/tools/powerbi).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import PowerBIToolkit
|
||||
|
||||
@@ -15,7 +15,7 @@ pip install ain-py
|
||||
You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key.
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/ainetwork).
|
||||
See a [usage example](/docs/integrations/tools/ainetwork).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
|
||||
@@ -27,7 +27,7 @@ You can use the `ApifyWrapper` to run Actors on the Apify platform.
|
||||
from langchain_community.utilities import ApifyWrapper
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integrations/tools/apify).
|
||||
For more information on this wrapper, see [the API reference](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.apify.ApifyWrapper.html).
|
||||
|
||||
|
||||
## Document loader
|
||||
|
||||
@@ -80,6 +80,6 @@ from langchain_community.agent_toolkits.cassandra_database.toolkit import (
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/toolkits/cassandra_database).
|
||||
Learn more in the [example notebook](/docs/integrations/tools/cassandra_database).
|
||||
|
||||
|
||||
|
||||
@@ -46,6 +46,55 @@ print(llm.invoke("Come up with a pet name"))
|
||||
```
|
||||
|
||||
Usage of the Cohere (legacy) [LLM model](/docs/integrations/llms/cohere)
|
||||
|
||||
### Tool calling
|
||||
```python
|
||||
from langchain_cohere import ChatCohere
|
||||
from langchain_core.messages import (
|
||||
HumanMessage,
|
||||
ToolMessage,
|
||||
)
|
||||
from langchain_core.tools import tool
|
||||
|
||||
@tool
|
||||
def magic_function(number: int) -> int:
|
||||
"""Applies a magic operation to an integer
|
||||
|
||||
Args:
|
||||
number: Number to have magic operation performed on
|
||||
"""
|
||||
return number + 10
|
||||
|
||||
def invoke_tools(tool_calls, messages):
|
||||
for tool_call in tool_calls:
|
||||
selected_tool = {"magic_function":magic_function}[
|
||||
tool_call["name"].lower()
|
||||
]
|
||||
tool_output = selected_tool.invoke(tool_call["args"])
|
||||
messages.append(ToolMessage(tool_output, tool_call_id=tool_call["id"]))
|
||||
return messages
|
||||
|
||||
tools = [magic_function]
|
||||
|
||||
llm = ChatCohere()
|
||||
llm_with_tools = llm.bind_tools(tools=tools)
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content="What is the value of magic_function(2)?"
|
||||
)
|
||||
]
|
||||
|
||||
res = llm_with_tools.invoke(messages)
|
||||
while res.tool_calls:
|
||||
messages.append(res)
|
||||
messages = invoke_tools(res.tool_calls, messages)
|
||||
res = llm_with_tools.invoke(messages)
|
||||
|
||||
print(res.content)
|
||||
```
|
||||
Tool calling with Cohere LLM can be done by binding the necessary tools to the llm as seen above.
|
||||
An alternative, is to support multi hop tool calling with the ReAct agent as seen below.
|
||||
|
||||
### ReAct Agent
|
||||
|
||||
The agent is based on the paper
|
||||
@@ -77,6 +126,7 @@ agent_executor.invoke({
|
||||
"input": "In what year was the company that was founded as Sound of Music added to the S&P 500?",
|
||||
})
|
||||
```
|
||||
The ReAct agent can be used to call multiple tools in sequence.
|
||||
|
||||
### RAG Retriever
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ The Kinetica LLM wrapper uses the [Kinetica SqlAssist
|
||||
LLM](https://docs.kinetica.com/7.2/sql-gpt/concepts/) to transform natural language into
|
||||
SQL to simplify the process of data retrieval.
|
||||
|
||||
See [Kinetica SqlAssist LLM Demo](/docs/integrations/chat/kinetica) for usage.
|
||||
See [Kinetica Language To SQL Chat Model](/docs/integrations/chat/kinetica) for usage.
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models.kinetica import ChatKinetica
|
||||
|
||||
@@ -30,7 +30,7 @@ from langchain_robocorp.toolkits import ActionServerRequestTool
|
||||
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/robocorp).
|
||||
See a [usage example](/docs/integrations/tools/robocorp).
|
||||
|
||||
```python
|
||||
from langchain_robocorp import ActionServerToolkit
|
||||
|
||||
@@ -17,7 +17,7 @@ from langchain_community.document_loaders import SlackDirectoryLoader
|
||||
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/slack).
|
||||
See a [usage example](/docs/integrations/tools/slack).
|
||||
|
||||
```python
|
||||
from langchain_community.agent_toolkits import SlackToolkit
|
||||
|
||||
@@ -2,14 +2,49 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9fc6205b",
|
||||
"id": "00a924a0-57e2-43fa-95dc-3ea48a56d3a5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Arxiv\n",
|
||||
"---\n",
|
||||
"sidebar_label: Arxiv\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f1b8ddb-8b06-4e7e-b0bb-8786dea15e2b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArxivRetriever\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
">[arXiv](https://arxiv.org/) is an open-access archive for 2 million scholarly articles in the fields of physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics.\n",
|
||||
"\n",
|
||||
"This notebook shows how to retrieve scientific articles from `Arxiv.org` into the Document format that is used downstream."
|
||||
"This notebook shows how to retrieve scientific articles from Arxiv.org into the [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) format that is used downstream.\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `ArxivRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.arxiv.ArxivRetriever.html).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Source | Package |\n",
|
||||
"| :--- | :--- | :---: |\n",
|
||||
"[ArxivRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.arxiv.ArxivRetriever.html) | Scholarly articles on [arxiv.org](https://arxiv.org/) | langchain_community |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75d179b4-abc3-48db-9f8b-1cdb46d3aa77",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,15 +52,9 @@
|
||||
"id": "51489529-5dcd-4b86-bda6-de0a39d8ffd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1435c804-069d-4ade-9a7b-006b97b767c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install `arxiv` python package."
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This retriever lives in the `langchain-community` package. We will also need the [arxiv](https://pypi.org/project/arxiv/) dependency:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,7 +66,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet arxiv"
|
||||
"%pip install -qU langchain-community arxiv"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,54 +74,44 @@
|
||||
"id": "6c15470b-a16b-4e0d-bc6a-6998bafbb5a4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`ArxivRetriever` has these arguments:\n",
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"`ArxivRetriever` parameters include:\n",
|
||||
"- optional `load_max_docs`: default=100. Use it to limit number of downloaded documents. It takes time to download all 100 documents, so use a small number for experiments. There is a hard limit of 300 for now.\n",
|
||||
"- optional `load_all_available_meta`: default=False. By default only the most important fields downloaded: `Published` (date when document was published/last updated), `Title`, `Authors`, `Summary`. If True, other fields also downloaded.\n",
|
||||
"- `get_full_documents`: boolean, default False. Determines whether to fetch full text of documents.\n",
|
||||
"\n",
|
||||
"`get_relevant_documents()` has one argument, `query`: free text which used to find documents in `Arxiv.org`"
|
||||
"See [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.arxiv.ArxivRetriever.html) for more detail."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a13f9e92-24b3-4cea-8541-2584c1cdb2d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import ArxivRetriever\n",
|
||||
"\n",
|
||||
"retriever = ArxivRetriever(\n",
|
||||
" load_max_docs=2,\n",
|
||||
" get_ful_documents=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae3c3d16",
|
||||
"id": "30c27047-16cf-46b5-bb29-754f1696f2bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fafb73b-d6ec-4822-b161-edf0aaf5224a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Running retriever"
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"`ArxivRetriever` supports retrieval by article identifier:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d0e6f506",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import ArxivRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "f381f642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = ArxivRetriever(load_max_docs=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"id": "20ae1a74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -102,20 +121,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 3,
|
||||
"id": "1d5a5088",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'Published': '2016-05-26',\n",
|
||||
"{'Entry ID': 'http://arxiv.org/abs/1605.08386v1',\n",
|
||||
" 'Published': datetime.date(2016, 5, 26),\n",
|
||||
" 'Title': 'Heat-bath random walks with Markov bases',\n",
|
||||
" 'Authors': 'Caprice Stanley, Tobias Windisch',\n",
|
||||
" 'Summary': 'Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'}"
|
||||
" 'Authors': 'Caprice Stanley, Tobias Windisch'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -126,17 +145,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"id": "c0ccd0c7-f6a6-43e7-b842-5f57afb94224",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'arXiv:1605.08386v1 [math.CO] 26 May 2016\\nHEAT-BATH RANDOM WALKS WITH MARKOV BASES\\nCAPRICE STANLEY AND TOBIAS WINDISCH\\nAbstract. Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on fibers of a\\nfixed integer matrix can be bounded from above by a constant. We then study the mixing\\nbehaviour of heat-b'"
|
||||
"'Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a ge'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -147,159 +166,143 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2670363b-3806-4c7e-b14d-90a4d5d2a200",
|
||||
"id": "c525c5c2-0961-4f4c-a208-dd6ceed76ea1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Question Answering on facts"
|
||||
"`ArxivRetriever` also supports retrieval based on natural language text:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 5,
|
||||
"id": "4cd3d079-4496-4ab8-adff-b86e6418bc74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = retriever.invoke(\"What is the ImageBind model?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "9318c790-d388-45da-8d5c-57256619e2a1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'Entry ID': 'http://arxiv.org/abs/2305.05665v2',\n",
|
||||
" 'Published': datetime.date(2023, 5, 31),\n",
|
||||
" 'Title': 'ImageBind: One Embedding Space To Bind Them All',\n",
|
||||
" 'Authors': 'Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, Ishan Misra'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2670363b-3806-4c7e-b14d-90a4d5d2a200",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"Like other retrievers, `ArxivRetriever` can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n",
|
||||
"\n",
|
||||
"We will need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "bcbeeaf5-79d1-4e29-8589-11dfb26761af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "bb3601df-53ea-4826-bdbe-554387bc3ad4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get a token: https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"OPENAI_API_KEY = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "e9c1a114-0410-4804-be30-05f34a9760f9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the question based only on the context provided.\n",
|
||||
"\n",
|
||||
"Context: {context}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "51a33cc9-ec42-4afc-8a2d-3bfff476aa59",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model=\"gpt-3.5-turbo\") # switch to 'gpt-4'\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "ea537767-a8bf-4adf-ae03-b353c9145d58",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-> **Question**: What are Heat-bath random walks with Markov base? \n",
|
||||
"\n",
|
||||
"**Answer**: I'm not sure, as I don't have enough context to provide a definitive answer. The term \"Heat-bath random walks with Markov base\" is not mentioned in the given text. Could you provide more information or context about where you encountered this term? \n",
|
||||
"\n",
|
||||
"-> **Question**: What is the ImageBind model? \n",
|
||||
"\n",
|
||||
"**Answer**: ImageBind is an approach developed by Facebook AI Research to learn a joint embedding across six different modalities, including images, text, audio, depth, thermal, and IMU data. The approach uses the binding property of images to align each modality's embedding to image embeddings and achieve an emergent alignment across all modalities. This enables novel multimodal capabilities, including cross-modal retrieval, embedding-space arithmetic, and audio-to-image generation, among others. The approach sets a new state-of-the-art on emergent zero-shot recognition tasks across modalities, outperforming specialist supervised models. Additionally, it shows strong few-shot recognition results and serves as a new way to evaluate vision models for visual and non-visual tasks. \n",
|
||||
"\n",
|
||||
"-> **Question**: How does Compositional Reasoning with Large Language Models works? \n",
|
||||
"\n",
|
||||
"**Answer**: Compositional reasoning with large language models refers to the ability of these models to correctly identify and represent complex concepts by breaking them down into smaller, more basic parts and combining them in a structured way. This involves understanding the syntax and semantics of language and using that understanding to build up more complex meanings from simpler ones. \n",
|
||||
"\n",
|
||||
"In the context of the paper \"Does CLIP Bind Concepts? Probing Compositionality in Large Image Models\", the authors focus specifically on the ability of a large pretrained vision and language model (CLIP) to encode compositional concepts and to bind variables in a structure-sensitive way. They examine CLIP's ability to compose concepts in a single-object setting, as well as in situations where concept binding is needed. \n",
|
||||
"\n",
|
||||
"The authors situate their work within the tradition of research on compositional distributional semantics models (CDSMs), which seek to bridge the gap between distributional models and formal semantics by building architectures which operate over vectors yet still obey traditional theories of linguistic composition. They compare the performance of CLIP with several architectures from research on CDSMs to evaluate its ability to encode and reason about compositional concepts. \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What are Heat-bath random walks with Markov base?\",\n",
|
||||
" \"What is the ImageBind model?\",\n",
|
||||
" \"How does Compositional Reasoning with Large Language Models works?\",\n",
|
||||
"]\n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions:\n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result[\"answer\"]))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "8e0c3fc6-ae62-4036-a885-dc60176a7745",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-> **Question**: What are Heat-bath random walks with Markov base? Include references to answer. \n",
|
||||
"\n",
|
||||
"**Answer**: Heat-bath random walks with Markov base (HB-MB) is a class of stochastic processes that have been studied in the field of statistical mechanics and condensed matter physics. In these processes, a particle moves in a lattice by making a transition to a neighboring site, which is chosen according to a probability distribution that depends on the energy of the particle and the energy of its surroundings.\n",
|
||||
"\n",
|
||||
"The HB-MB process was introduced by Bortz, Kalos, and Lebowitz in 1975 as a way to simulate the dynamics of interacting particles in a lattice at thermal equilibrium. The method has been used to study a variety of physical phenomena, including phase transitions, critical behavior, and transport properties.\n",
|
||||
"\n",
|
||||
"References:\n",
|
||||
"\n",
|
||||
"Bortz, A. B., Kalos, M. H., & Lebowitz, J. L. (1975). A new algorithm for Monte Carlo simulation of Ising spin systems. Journal of Computational Physics, 17(1), 10-18.\n",
|
||||
"\n",
|
||||
"Binder, K., & Heermann, D. W. (2010). Monte Carlo simulation in statistical physics: an introduction. Springer Science & Business Media. \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What are Heat-bath random walks with Markov base? Include references to answer.\",\n",
|
||||
"]\n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions:\n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result[\"answer\"]))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "09794ab5-759c-4b56-95d4-2454d4d86da1",
|
||||
"execution_count": 9,
|
||||
"id": "62889c3c-8a49-4c76-9141-d777311af1f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The ImageBind model is an approach to learn a joint embedding across six different modalities - images, text, audio, depth, thermal, and IMU data. It shows that only image-paired data is sufficient to bind the modalities together and can leverage large scale vision-language models for zero-shot capabilities and emergent applications such as cross-modal retrieval, composing modalities with arithmetic, cross-modal detection and generation.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"What is the ImageBind model?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e419acb8-d7ac-42a1-916f-c796f23dce9b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `ArxivRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.arxiv.ArxivRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -318,7 +321,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,15 +2,39 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1edb9e6b",
|
||||
"id": "f9a62e19-b00b-4f6c-a700-1e500e4c290a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure AI Search\n",
|
||||
"---\n",
|
||||
"sidebar_label: Azure AI Search\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "76f74245-7220-4446-ae8d-4e5a9e998f1f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AzureAISearchRetriever\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Cognitive Search`) is a Microsoft cloud search service that gives developers infrastructure, APIs, and tools for information retrieval of vector, keyword, and hybrid queries at scale.\n",
|
||||
"\n",
|
||||
"`AzureAISearchRetriever` is an integration module that returns documents from an unstructured query. It's based on the BaseRetriever class and it targets the 2023-11-01 stable REST API version of Azure AI Search, which means it supports vector indexing and queries.\n",
|
||||
"\n",
|
||||
"This guide will help you getting started with the Azure AI Search [retriever](/docs/concepts/#retrievers). For detailed documentation of all `AzureAISearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.azure_ai_search.AzureAISearchRetriever.html).\n",
|
||||
"\n",
|
||||
"`AzureAISearchRetriever` replaces `AzureCognitiveSearchRetriever`, which will soon be deprecated. We recommend switching to the newer version that's based on the most recent stable version of the search APIs.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Self-host | Cloud offering | Package |\n",
|
||||
"| :--- | :--- | :---: | :---: |\n",
|
||||
"[AzureAISearchRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.azure_ai_search.AzureAISearchRetriever.html) | ❌ | ✅ | langchain_community |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use this module, you need:\n",
|
||||
"\n",
|
||||
"+ An Azure AI Search service. You can [create one](https://learn.microsoft.com/azure/search/search-create-service-portal) for free if you sign up for the Azure trial. A free service has lower quotas, but it's sufficient for running the code in this notebook.\n",
|
||||
@@ -19,7 +43,40 @@
|
||||
"\n",
|
||||
"+ An API key. API keys are generated when you create the search service. If you're just querying an index, you can use the query API key, otherwise use an admin API key. See [Find your API keys](https://learn.microsoft.com/azure/search/search-security-api-keys?tabs=rest-use%2Cportal-find%2Cportal-query#find-existing-keys) for details.\n",
|
||||
"\n",
|
||||
"`AzureAISearchRetriever` replaces `AzureCognitiveSearchRetriever`, which will soon be deprecated. We recommend switching to the newer version that's based on the most recent stable version of the search APIs."
|
||||
"We can then set the search service name, index name, and API key as environment variables (alternatively, you can pass them as arguments to `AzureAISearchRetriever`). The search index provides the searchable content."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6a56e83b-8563-4479-ab61-090fc79f5b00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"AZURE_AI_SEARCH_SERVICE_NAME\"] = \"<YOUR_SEARCH_SERVICE_NAME>\"\n",
|
||||
"os.environ[\"AZURE_AI_SEARCH_INDEX_NAME\"] = \"<YOUR_SEARCH_INDEX_NAME>\"\n",
|
||||
"os.environ[\"AZURE_AI_SEARCH_API_KEY\"] = \"<YOUR_API_KEY>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e635218-8634-4f39-abc5-39e319eeb136",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88751b84-7cb7-4dd2-af35-c1e9b369d012",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -27,9 +84,9 @@
|
||||
"id": "f99d4456",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install packages\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Use azure-documents-search package 11.4 or later."
|
||||
"This retriever lives in the `langchain-community` package. We will need some additional dependencies as well:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,9 +96,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain\n",
|
||||
"%pip install --upgrade --quiet langchain-community\n",
|
||||
"%pip install --upgrade --quiet langchain-openai\n",
|
||||
"%pip install --upgrade --quiet azure-search-documents\n",
|
||||
"%pip install --upgrade --quiet azure-search-documents>=11.4\n",
|
||||
"%pip install --upgrade --quiet azure-identity"
|
||||
]
|
||||
},
|
||||
@@ -50,7 +107,9 @@
|
||||
"id": "0474661d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import required libraries"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"For `AzureAISearchRetriever`, provide an `index_name`, `content_key`, and `top_k` set to the number of number of results you'd like to retrieve. Setting `top_k` to zero (the default) returns all results."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,52 +119,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain_community.retrievers import AzureAISearchRetriever\n",
|
||||
"\n",
|
||||
"from langchain_community.retrievers import (\n",
|
||||
" AzureAISearchRetriever,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b7243e6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure search settings\n",
|
||||
"\n",
|
||||
"Set the search service name, index name, and API key as environment variables (alternatively, you can pass them as arguments to `AzureAISearchRetriever`). The search index provides the searchable content. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "33fd23d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"AZURE_AI_SEARCH_SERVICE_NAME\"] = \"<YOUR_SEARCH_SERVICE_NAME>\"\n",
|
||||
"os.environ[\"AZURE_AI_SEARCH_INDEX_NAME\"] = \"<YOUR_SEARCH_INDEX_NAME>\"\n",
|
||||
"os.environ[\"AZURE_AI_SEARCH_API_KEY\"] = \"<YOUR_API_KEY>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "057deaad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the retriever\n",
|
||||
"\n",
|
||||
"For `AzureAISearchRetriever`, provide an `index_name`, `content_key`, and `top_k` set to the number of number of results you'd like to retrieve. Setting `top_k` to zero (the default) returns all results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "c18d0c4c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = AzureAISearchRetriever(\n",
|
||||
" content_key=\"content\", top_k=1, index_name=\"langchain-vector-demo\"\n",
|
||||
")"
|
||||
@@ -116,6 +131,8 @@
|
||||
"id": "e94ea104",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"Now you can use it to retrieve documents from Azure AI Search. \n",
|
||||
"This is the method you would call to do so. It will return all documents relevant to the query. "
|
||||
]
|
||||
@@ -259,6 +276,69 @@
|
||||
"source": [
|
||||
"retriever.invoke(\"does the president have a plan for covid-19?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd6c9ba9-978f-4e2c-9cc7-ccd1be58eafb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cbcd8ac6-12ea-4c22-8a98-c24825d598d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the question based only on the context provided.\n",
|
||||
"\n",
|
||||
"Context: {context}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db80f3c7-83e1-4965-8ff2-a3dd66a07f0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"does the president have a plan for covid-19?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a3d6140e-c2a0-40b2-a141-cab61ab39185",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `AzureAISearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.azure_ai_search.AzureAISearchRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -277,7 +357,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,19 +1,86 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0872249-1af5-4d54-b816-1babad7a8c9e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Bedrock (Knowledge Bases)\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b6636c27-35da-4ba7-8313-eca21660cab3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Bedrock (Knowledge Bases)\n",
|
||||
"# Bedrock (Knowledge Bases) Retriever\n",
|
||||
"\n",
|
||||
"> [Knowledge bases for Amazon Bedrock](https://aws.amazon.com/bedrock/knowledge-bases/) is an Amazon Web Services (AWS) offering which lets you quickly build RAG applications by using your private data to customize FM response.\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"> Implementing `RAG` requires organizations to perform several cumbersome steps to convert data into embeddings (vectors), store the embeddings in a specialized vector database, and build custom integrations into the database to search and retrieve text relevant to the user’s query. This can be time-consuming and inefficient.\n",
|
||||
"This guide will help you getting started with the AWS Knowledge Bases [retriever](/docs/concepts/#retrievers).\n",
|
||||
"\n",
|
||||
"> With `Knowledge Bases for Amazon Bedrock`, simply point to the location of your data in `Amazon S3`, and `Knowledge Bases for Amazon Bedrock` takes care of the entire ingestion workflow into your vector database. If you do not have an existing vector database, Amazon Bedrock creates an Amazon OpenSearch Serverless vector store for you. For retrievals, use the Langchain - Amazon Bedrock integration via the Retrieve API to retrieve relevant results for a user query from knowledge bases.\n",
|
||||
"[Knowledge Bases for Amazon Bedrock](https://aws.amazon.com/bedrock/knowledge-bases/) is an Amazon Web Services (AWS) offering which lets you quickly build RAG applications by using your private data to customize FM response.\n",
|
||||
"\n",
|
||||
"> Knowledge base can be configured through [AWS Console](https://aws.amazon.com/console/) or by using [AWS SDKs](https://aws.amazon.com/developer/tools/)."
|
||||
"Implementing `RAG` requires organizations to perform several cumbersome steps to convert data into embeddings (vectors), store the embeddings in a specialized vector database, and build custom integrations into the database to search and retrieve text relevant to the user’s query. This can be time-consuming and inefficient.\n",
|
||||
"\n",
|
||||
"With `Knowledge Bases for Amazon Bedrock`, simply point to the location of your data in `Amazon S3`, and `Knowledge Bases for Amazon Bedrock` takes care of the entire ingestion workflow into your vector database. If you do not have an existing vector database, Amazon Bedrock creates an Amazon OpenSearch Serverless vector store for you. For retrievals, use the Langchain - Amazon Bedrock integration via the Retrieve API to retrieve relevant results for a user query from knowledge bases.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Self-host | Cloud offering | Package |\n",
|
||||
"| :--- | :--- | :---: | :---: |\n",
|
||||
"[AmazonKnowledgeBasesRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_aws.retrievers.bedrock.AmazonKnowledgeBasesRetriever.html) | ❌ | ✅ | langchain_aws |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cd092536-61bd-4b3f-9050-076daccc9e72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Knowledge Bases can be configured through [AWS Console](https://aws.amazon.com/console/) or by using [AWS SDKs](https://aws.amazon.com/developer/tools/). We will need the `knowledge_base_id` to instantiate the retriever."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "238c0ceb-d4b6-409e-bed9-d30143d2f2c9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e4426098-820c-48dc-9826-056a91bebe9e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ede6277-ea56-45f6-8ef4-fe14734ee279",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This retriever lives in the `langchain-aws` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4db1af24-0969-43bd-8438-af5e3024b0d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-aws"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -21,17 +88,9 @@
|
||||
"id": "b34c8cbe-c6e5-4398-adf1-4925204bcaed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using the Knowledge Bases Retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26c97d36-911c-4fe0-a478-546192728f30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet boto3"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our retriever:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -41,7 +100,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import AmazonKnowledgeBasesRetriever\n",
|
||||
"from langchain_aws.retrievers import AmazonKnowledgeBasesRetriever\n",
|
||||
"\n",
|
||||
"retriever = AmazonKnowledgeBasesRetriever(\n",
|
||||
" knowledge_base_id=\"PUIJP4EQUA\",\n",
|
||||
@@ -49,6 +108,14 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9dff39f8-b6ba-41bf-b95b-d345928ed07d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -66,7 +133,7 @@
|
||||
"id": "7de9b61b-597b-4aba-95fb-49d11e84510e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using in a QA Chain"
|
||||
"## Use within a chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -78,7 +145,7 @@
|
||||
"source": [
|
||||
"from botocore.client import Config\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain_community.llms import Bedrock\n",
|
||||
"from langchain_aws import Bedrock\n",
|
||||
"\n",
|
||||
"model_kwargs_claude = {\"temperature\": 0, \"top_k\": 10, \"max_tokens_to_sample\": 3000}\n",
|
||||
"\n",
|
||||
@@ -90,6 +157,16 @@
|
||||
"\n",
|
||||
"qa(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "22e2538a-e042-4997-bb81-b68ecb27d665",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `AmazonKnowledgeBasesRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_aws.retrievers.bedrock.AmazonKnowledgeBasesRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -108,7 +185,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -34,8 +34,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_cohere import ChatCohere\n",
|
||||
"from langchain_community.retrievers import CohereRagRetriever\n",
|
||||
"from langchain_cohere import ChatCohere, CohereRagRetriever\n",
|
||||
"from langchain_core.documents import Document"
|
||||
]
|
||||
},
|
||||
@@ -200,7 +199,7 @@
|
||||
"source": [
|
||||
"docs = rag.invoke(\n",
|
||||
" \"Does langchain support cohere RAG?\",\n",
|
||||
" source_documents=[\n",
|
||||
" documents=[\n",
|
||||
" Document(page_content=\"Langchain supports cohere RAG!\"),\n",
|
||||
" Document(page_content=\"The sky is blue!\"),\n",
|
||||
" ],\n",
|
||||
@@ -208,6 +207,14 @@
|
||||
"_pretty_print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45a9470f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Please note that connectors and documents cannot be used simultaneously. If you choose to provide documents in the `invoke` method, they will take precedence, and connectors will not be utilized for that particular request, as shown in the snippet above!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
@@ -2,14 +2,72 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab66dd43",
|
||||
"id": "41ccce84-f6d9-4ba0-8281-22cbf29f20d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Elasticsearch\n",
|
||||
"---\n",
|
||||
"sidebar_label: Elasticsearch\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54c4d916-05db-4e01-9893-c711904205b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ElasticsearchRetriever\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
">[Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine. It provides a distributed, multitenant-capable full-text search engine with an HTTP web interface and schema-free JSON documents. It supports keyword search, vector search, hybrid search and complex filtering.\n",
|
||||
"\n",
|
||||
"The `ElasticsearchRetriever` is a generic wrapper to enable flexible access to all `Elasticsearch` features through the [Query DSL](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html). For most use cases the other classes (`ElasticsearchStore`, `ElasticsearchEmbeddings`, etc.) should suffice, but if they don't you can use `ElasticsearchRetriever`."
|
||||
"The `ElasticsearchRetriever` is a generic wrapper to enable flexible access to all `Elasticsearch` features through the [Query DSL](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html). For most use cases the other classes (`ElasticsearchStore`, `ElasticsearchEmbeddings`, etc.) should suffice, but if they don't you can use `ElasticsearchRetriever`.\n",
|
||||
"\n",
|
||||
"This guide will help you getting started with the Elasticsearch [retriever](/docs/concepts/#retrievers). For detailed documentation of all `ElasticsearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Self-host | Cloud offering | Package |\n",
|
||||
"| :--- | :--- | :---: | :---: |\n",
|
||||
"[ElasticsearchRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html) | ✅ | ✅ | langchain_elasticsearch |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"There are two main ways to set up an Elasticsearch instance:\n",
|
||||
"\n",
|
||||
"- Elastic Cloud: [Elastic Cloud](https://cloud.elastic.co/) is a managed Elasticsearch service. Sign up for a [free trial](https://www.elastic.co/cloud/cloud-trial-overview).\n",
|
||||
"To connect to an Elasticsearch instance that does not require login credentials (starting the docker instance with security enabled), pass the Elasticsearch URL and index name along with the embedding object to the constructor.\n",
|
||||
"\n",
|
||||
"- Local Install Elasticsearch: Get started with Elasticsearch by running it locally. The easiest way is to use the official Elasticsearch Docker image. See the [Elasticsearch Docker documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) for more information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e13a7b58-3a56-4ce6-a4d5-81a8dd2080df",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "492b81d0-c85b-4693-ae4f-3f33da571ddd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78335745-f14d-411d-9c06-64ff83eb9358",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This retriever lives in the `langchain-elasticsearch` package. For demonstration purposes, we will also install `langchain-community` to generate text [embeddings](/docs/concepts/#embedding-models)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -21,7 +79,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet elasticsearch langchain-elasticsearch"
|
||||
"%pip install -qU langchain-community langchain-elasticsearch"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -48,7 +106,7 @@
|
||||
"id": "24c0d140",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure\n",
|
||||
"### Configure\n",
|
||||
"\n",
|
||||
"Here we define the conncection to Elasticsearch. In this example we use a locally running instance. Alternatively, you can make an account in [Elastic Cloud](https://cloud.elastic.co/) and start a [free trial](https://www.elastic.co/cloud/cloud-trial-overview)."
|
||||
]
|
||||
@@ -70,7 +128,7 @@
|
||||
"id": "60aa7c20",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For vector search, we are going to use random embeddings just for illustration. For real use cases, pick one of the available LangChain `Embeddings` classes."
|
||||
"For vector search, we are going to use random embeddings just for illustration. For real use cases, pick one of the available LangChain [Embeddings](/docs/integrations/text_embedding) classes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,7 +146,7 @@
|
||||
"id": "b4eea654",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define example data"
|
||||
"#### Define example data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -118,7 +176,7 @@
|
||||
"id": "1c518c42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Index data\n",
|
||||
"#### Index data\n",
|
||||
"\n",
|
||||
"Typically, users make use of `ElasticsearchRetriever` when they already have data in an Elasticsearch index. Here we index some example text documents. If you created an index for example using `ElasticsearchStore.from_documents` that's also fine."
|
||||
]
|
||||
@@ -209,14 +267,8 @@
|
||||
"id": "08437fa2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "469aa295",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"### Vector search\n",
|
||||
"\n",
|
||||
"Dense vector retrival using fake embeddings in this example."
|
||||
@@ -543,6 +595,91 @@
|
||||
"\n",
|
||||
"custom_mapped_retriever.invoke(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1663feff-4527-4fb0-9395-b28af5c9ec99",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"Following the above examples, we use `.invoke` to issue a single query. Because retrievers are Runnables, we can use any method in the [Runnable interface](/docs/concepts/#runnable-interface), such as `.batch`, as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f4f946ed-ff3a-43d7-9e0d-7983ff13c868",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"We can also incorporate retrievers into [chains](/docs/how_to/sequence/) to build larger applications, such as a simple [RAG](/docs/tutorials/rag/) application. For demonstration purposes, we instantiate an OpenAI chat model as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "19302ef1-dd49-4f9c-8d87-4ea23b8296e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "832857a7-3b16-4a85-acc7-28efe6ebdae8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the question based only on the context provided.\n",
|
||||
"\n",
|
||||
"Context: {context}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": vector_retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7317942b-7c9a-477d-ba11-3421da804a22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\"what is foo?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eeb49714-ba5a-4b10-8e58-67d061a486d1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `ElasticsearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -561,7 +698,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,27 +1,44 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Google Vertex AI Search\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Vertex AI Search\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
">[Google Vertex AI Search](https://cloud.google.com/enterprise-search) (formerly known as `Enterprise Search` on `Generative AI App Builder`) is a part of the [Vertex AI](https://cloud.google.com/vertex-ai) machine learning platform offered by `Google Cloud`.\n",
|
||||
">\n",
|
||||
">`Vertex AI Search` lets organizations quickly build generative AI-powered search engines for customers and employees. It's underpinned by a variety of `Google Search` technologies, including semantic search, which helps deliver more relevant results than traditional keyword-based search techniques by using natural language processing and machine learning techniques to infer relationships within the content and intent from the user’s query input. Vertex AI Search also benefits from Google’s expertise in understanding how users search and factors in content relevance to order displayed results.\n",
|
||||
"\n",
|
||||
">`Vertex AI Search` is available in the `Google Cloud Console` and via an API for enterprise workflow integration.\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to configure `Vertex AI Search` and use the Vertex AI Search retriever. The Vertex AI Search retriever encapsulates the [Python client library](https://cloud.google.com/generative-ai-app-builder/docs/libraries#client-libraries-install-python) and uses it to access the [Search Service API](https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1beta.services.search_service).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install pre-requisites\n",
|
||||
"This notebook demonstrates how to configure `Vertex AI Search` and use the Vertex AI Search [retriever](/docs/concepts/#retrievers). The Vertex AI Search retriever encapsulates the [Python client library](https://cloud.google.com/generative-ai-app-builder/docs/libraries#client-libraries-install-python) and uses it to access the [Search Service API](https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1beta.services.search_service).\n",
|
||||
"\n",
|
||||
"You need to install the `google-cloud-discoveryengine` package to use the Vertex AI Search retriever.\n"
|
||||
"For detailed documentation of all `VertexAISearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/vertex_ai_search/langchain_google_community.vertex_ai_search.VertexAISearchRetriever.html).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Self-host | Cloud offering | Package |\n",
|
||||
"| :--- | :--- | :---: | :---: |\n",
|
||||
"[VertexAISearchRetriever](https://api.python.langchain.com/en/latest/vertex_ai_search/langchain_google_community.vertex_ai_search.VertexAISearchRetriever.html) | ❌ | ✅ | langchain_google_community |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"You need to install the `langchain-google-community` and `google-cloud-discoveryengine` packages to use the Vertex AI Search retriever."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -30,14 +47,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
"%pip install -qU langchain-google-community google-cloud-discoveryengine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure access to Google Cloud and Vertex AI Search\n",
|
||||
"### Configure access to Google Cloud and Vertex AI Search\n",
|
||||
"\n",
|
||||
"Vertex AI Search is generally available without allowlist as of August 2023.\n",
|
||||
"\n",
|
||||
@@ -48,7 +65,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a search engine and populate an unstructured data store\n",
|
||||
"#### Create a search engine and populate an unstructured data store\n",
|
||||
"\n",
|
||||
"- Follow the instructions in the [Vertex AI Search Getting Started guide](https://cloud.google.com/generative-ai-app-builder/docs/try-enterprise-search) to set up a Google Cloud project and Vertex AI Search.\n",
|
||||
"- [Use the Google Cloud Console to create an unstructured data store](https://cloud.google.com/generative-ai-app-builder/docs/create-engine-es#unstructured-data)\n",
|
||||
@@ -60,7 +77,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set credentials to access Vertex AI Search API\n",
|
||||
"#### Set credentials to access Vertex AI Search API\n",
|
||||
"\n",
|
||||
"The [Vertex AI Search client libraries](https://cloud.google.com/generative-ai-app-builder/docs/libraries) used by the Vertex AI Search retriever provide high-level language support for authenticating to Google Cloud programmatically.\n",
|
||||
"Client libraries support [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials); the libraries look for credentials in a set of defined locations and use those credentials to authenticate requests to the API.\n",
|
||||
@@ -87,16 +104,16 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure and use the Vertex AI Search retriever\n",
|
||||
"### Configure and use the Vertex AI Search retriever\n",
|
||||
"\n",
|
||||
"The Vertex AI Search retriever is implemented in the `langchain.retriever.GoogleVertexAISearchRetriever` class. The `get_relevant_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated the document content.\n",
|
||||
"The Vertex AI Search retriever is implemented in the `langchain_google_community.VertexAISearchRetriever` class. The `get_relevant_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated the document content.\n",
|
||||
"Depending on the data type used in Vertex AI Search (website, structured or unstructured) the `page_content` field is populated as follows:\n",
|
||||
"\n",
|
||||
"- Website with advanced indexing: an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of the document from which the segments or answers were extracted.\n",
|
||||
"- Unstructured data source: either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of the document from which the segments or answers were extracted.\n",
|
||||
"- Structured data source: a string json containing all the fields returned from the structured data source. The `metadata` field is populated with metadata (if any) of the document\n",
|
||||
"\n",
|
||||
"### Extractive answers & extractive segments\n",
|
||||
"#### Extractive answers & extractive segments\n",
|
||||
"\n",
|
||||
"An extractive answer is verbatim text that is returned with each search result. It is extracted directly from the original document. Extractive answers are typically displayed near the top of web pages to provide an end user with a brief answer that is contextually relevant to their query. Extractive answers are available for website and unstructured search.\n",
|
||||
"\n",
|
||||
@@ -108,7 +125,7 @@
|
||||
"\n",
|
||||
"When creating an instance of the retriever you can specify a number of parameters that control which data store to access and how a natural language query is processed, including configurations for extractive answers and segments.\n",
|
||||
"\n",
|
||||
"### The mandatory parameters are:\n",
|
||||
"#### The mandatory parameters are:\n",
|
||||
"\n",
|
||||
"- `project_id` - Your Google Cloud Project ID.\n",
|
||||
"- `location_id` - The location of the data store.\n",
|
||||
@@ -148,15 +165,15 @@
|
||||
"\n",
|
||||
"To update to the new retriever, make the following changes:\n",
|
||||
"\n",
|
||||
"- Change the import from: `from langchain.retrievers import GoogleCloudEnterpriseSearchRetriever` -> `from langchain.retrievers import GoogleVertexAISearchRetriever`.\n",
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n"
|
||||
"- Change the import from: `from langchain.retrievers import GoogleCloudEnterpriseSearchRetriever` -> `from langchain_google_community import VertexAISearchRetriever`.\n",
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `VertexAISearchRetriever`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever for **unstructured** data with extractive segments\n"
|
||||
"Note: When using the retriever, if you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -165,9 +182,28 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import (\n",
|
||||
" GoogleVertexAIMultiTurnSearchRetriever,\n",
|
||||
" GoogleVertexAISearchRetriever,\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"### Configure and use the retriever for **unstructured** data with extractive segments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community import (\n",
|
||||
" VertexAIMultiTurnSearchRetriever,\n",
|
||||
" VertexAISearchRetriever,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"<YOUR PROJECT ID>\" # Set to your Project ID\n",
|
||||
@@ -182,7 +218,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
"retriever = VertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" data_store_id=DATA_STORE_ID,\n",
|
||||
@@ -216,7 +252,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
"retriever = VertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" data_store_id=DATA_STORE_ID,\n",
|
||||
@@ -243,7 +279,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
"retriever = VertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" data_store_id=DATA_STORE_ID,\n",
|
||||
@@ -269,7 +305,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
"retriever = VertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" data_store_id=DATA_STORE_ID,\n",
|
||||
@@ -297,7 +333,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
"retriever = VertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" search_engine_id=SEARCH_ENGINE_ID,\n",
|
||||
@@ -325,7 +361,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAIMultiTurnSearchRetriever(\n",
|
||||
"retriever = VertexAIMultiTurnSearchRetriever(\n",
|
||||
" project_id=PROJECT_ID, location_id=LOCATION_ID, data_store_id=DATA_STORE_ID\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -333,6 +369,85 @@
|
||||
"for doc in result:\n",
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"Following the above examples, we use `.invoke` to issue a single query. Because retrievers are Runnables, we can use any method in the [Runnable interface](/docs/concepts/#runnable-interface), such as `.batch`, as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"We can also incorporate retrievers into [chains](/docs/how_to/sequence/) to build larger applications, such as a simple [RAG](/docs/tutorials/rag/) application. For demonstration purposes, we instantiate a VertexAI chat model as well. See the corresponding Vertex [integration docs](/docs/integrations/chat/google_vertex_ai_palm/) for setup instructions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-google-vertexai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_google_vertexai import ChatVertexAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the question based only on the context provided.\n",
|
||||
"\n",
|
||||
"Context: {context}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm = ChatVertexAI(model_name=\"chat-bison\", temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `VertexAISearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/vertex_ai_search/langchain_google_community.vertex_ai_search.VertexAISearchRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -351,7 +466,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
41
docs/docs/integrations/retrievers/index.mdx
Normal file
41
docs/docs/integrations/retrievers/index.mdx
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
# Retrievers
|
||||
|
||||
A [retriever](/docs/concepts/#retrievers) is an interface that returns documents given an unstructured query.
|
||||
It is more general than a vector store.
|
||||
A retriever does not need to be able to store documents, only to return (or retrieve) them.
|
||||
Retrievers can be created from vector stores, but are also broad enough to include [Wikipedia search](/docs/integrations/retrievers/wikipedia/) and [Amazon Kendra](/docs/integrations/retrievers/amazon_kendra_retriever/).
|
||||
|
||||
Retrievers accept a string query as input and return a list of [Documents](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) as output.
|
||||
|
||||
For specifics on how to use retrievers, see the [relevant how-to guides here](/docs/how_to/#retrievers).
|
||||
|
||||
Note that all [vector stores](/docs/concepts/#vector-stores) can be [cast to retrievers](/docs/how_to/vectorstore_retriever/).
|
||||
Refer to the vector store [integration docs](/docs/integrations/vectorstores/) for available vector stores.
|
||||
This page lists custom retrievers, implemented via subclassing [BaseRetriever](/docs/how_to/custom_retriever/).
|
||||
|
||||
## Bring-your-own documents
|
||||
|
||||
The below retrievers allow you to index and search a custom corpus of documents.
|
||||
|
||||
| Retriever | Self-host | Cloud offering | Package |
|
||||
|-----------|-----------|----------------|---------|
|
||||
| [AmazonKnowledgeBasesRetriever](/docs/integrations/retrievers/bedrock) | ❌ | ✅ | [langchain_aws](https://api.python.langchain.com/en/latest/retrievers/langchain_aws.retrievers.bedrock.AmazonKnowledgeBasesRetriever.html) |
|
||||
| [AzureAISearchRetriever](/docs/integrations/retrievers/azure_ai_search) | ❌ | ✅ | [langchain_community](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.azure_ai_search.AzureAISearchRetriever.html) |
|
||||
| [ElasticsearchRetriever](/docs/integrations/retrievers/elasticsearch_retriever) | ✅ | ✅ | [langchain_elasticsearch](https://api.python.langchain.com/en/latest/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html) |
|
||||
| [MilvusCollectionHybridSearchRetriever](/docs/integrations/retrievers/milvus_hybrid_search) | ✅ | ❌ | [langchain_milvus](https://api.python.langchain.com/en/latest/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html) |
|
||||
| [VertexAISearchRetriever](/docs/integrations/retrievers/google_vertex_ai_search) | ❌ | ✅ | [langchain_google_community](https://api.python.langchain.com/en/latest/vertex_ai_search/langchain_google_community.vertex_ai_search.VertexAISearchRetriever.html) |
|
||||
|
||||
## External index
|
||||
|
||||
The below retrievers will search over an external index (e.g., constructed from Internet data or similar).
|
||||
|
||||
| Retriever | Source | Package |
|
||||
|-----------|--------|---------|
|
||||
| [ArxivRetriever](/docs/integrations/retrievers/arxiv) | Scholarly articles on [arxiv.org](https://arxiv.org/) | [langchain_community](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.arxiv.ArxivRetriever.html) |
|
||||
| [TavilySearchAPIRetriever](/docs/integrations/retrievers/tavily) | Internet search | [langchain_community](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.tavily_search_api.TavilySearchAPIRetriever.html) |
|
||||
| [WikipediaRetriever](/docs/integrations/retrievers/wikipedia) | [Wikipedia](https://www.wikipedia.org/) articles | [langchain_community](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.wikipedia.WikipediaRetriever.html) |
|
||||
@@ -2,21 +2,48 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Milvus Hybrid Search\n",
|
||||
"---\n",
|
||||
"sidebar_label: Milvus Hybrid Search\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Milvus Hybrid Search Retriever\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"> [Milvus](https://milvus.io/docs) is an open-source vector database built to power embedding similarity search and AI applications. Milvus makes unstructured data search more accessible, and provides a consistent user experience regardless of the deployment environment.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the Milvus Hybrid Search retriever, which combines the strengths of both dense and sparse vector search.\n",
|
||||
"This will help you getting started with the Milvus Hybrid Search [retriever](/docs/concepts/#retrievers), which combines the strengths of both dense and sparse vector search. For detailed documentation of all `MilvusCollectionHybridSearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html).\n",
|
||||
"\n",
|
||||
"For more reference please go to [Milvus Multi-Vector Search](https://milvus.io/docs/multi-vector-search.md)\n",
|
||||
"\n"
|
||||
"See also the Milvus Multi-Vector Search [docs](https://milvus.io/docs/multi-vector-search.md).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Self-host | Cloud offering | Package |\n",
|
||||
"| :--- | :--- | :---: | :---: |\n",
|
||||
"[MilvusCollectionHybridSearchRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html) | ✅ | ❌ | langchain_milvus |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,9 +55,9 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"### Install dependencies\n",
|
||||
"You need to prepare to install the following dependencies\n"
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This retriever lives in the `langchain-milvus` package. This guide requires the following dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,32 +77,18 @@
|
||||
"%pip install --upgrade --quiet pymilvus[model] langchain-milvus langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Import necessary modules and classes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever\n",
|
||||
"from langchain_milvus.utils.sparse import BM25SparseEmbedding\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from pymilvus import (\n",
|
||||
" Collection,\n",
|
||||
" CollectionSchema,\n",
|
||||
@@ -86,34 +99,15 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever\n",
|
||||
"from langchain_milvus.utils.sparse import BM25SparseEmbedding\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Start the Milvus service\n",
|
||||
"\n",
|
||||
"Please refer to the [Milvus documentation](https://milvus.io/docs/install_standalone-docker.md) to start the Milvus service.\n",
|
||||
"\n",
|
||||
"After starting milvus, you need to specify your milvus connection URI.\n"
|
||||
"After starting milvus, you need to specify your milvus connection URI."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -155,11 +149,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"## Prepare data and Load\n",
|
||||
"### Prepare dense and sparse embedding functions\n",
|
||||
"\n",
|
||||
" Let us fictionalize 10 fake descriptions of novels. In actual production, it may be a large amount of text data."
|
||||
"Let us fictionalize 10 fake descriptions of novels. In actual production, it may be a large amount of text data."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -379,15 +371,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build RAG chain with Retriever\n",
|
||||
"### Create the Retriever\n",
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Define search parameters for sparse and dense fields, and create a retriever"
|
||||
"Now we can instantiate our retriever, defining search parameters for sparse and dense fields:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -416,6 +407,13 @@
|
||||
"In the input parameters of this Retriever, we use a dense embedding and a sparse embedding to perform hybrid search on the two fields of this Collection, and use WeightedRanker for reranking. Finally, 3 top-K Documents will be returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
@@ -442,7 +440,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Build the RAG chain\n",
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"Initialize ChatOpenAI and define a prompt template"
|
||||
]
|
||||
@@ -610,6 +608,15 @@
|
||||
"source": [
|
||||
"collection.drop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `MilvusCollectionHybridSearchRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_milvus.retrievers.milvus_hybrid_search.MilvusCollectionHybridSearchRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -628,9 +635,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,20 +4,70 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tavily Search API\n",
|
||||
"---\n",
|
||||
"sidebar_label: TavilySearchAPI\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TavilySearchAPIRetriever\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
">[Tavily's Search API](https://tavily.com) is a search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed.\n",
|
||||
"\n",
|
||||
"We can use this as a [retriever](/docs/how_to#retrievers). It will show functionality specific to this integration. After going through, it may be useful to explore [relevant use-case pages](/docs/how_to#qa-with-rag) to learn how to use this vectorstore as part of a larger chain.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `tavily-python` package itself.\n",
|
||||
"| Retriever | Source | Package |\n",
|
||||
"| :--- | :--- | :---: |\n",
|
||||
"[TavilySearchAPIRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.tavily_search_api.TavilySearchAPIRetriever.html) | Internet search | langchain_community |\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-community tavily-python\n",
|
||||
"```\n",
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `tavily-python` package itself."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community tavily-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We also need to set our Tavily API key."
|
||||
]
|
||||
},
|
||||
@@ -37,17 +87,20 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our retriever:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
"from langchain_community.retrievers import TavilySearchAPIRetriever\n",
|
||||
"\n",
|
||||
"retriever = TavilySearchAPIRetriever(k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -59,42 +112,40 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Trending topics\\nTrending topics\\nThe Legend of Zelda™: Breath of the Wild\\nSelect a product\\nThe Legend of Zelda™: Breath of the Wild\\nThe Legend of Zelda™: Breath of the Wild\\nThe Legend of Zelda™: Breath of the Wild and The Legend of Zelda™: Breath of the Wild Expansion Pass Bundle\\nThis item will be sent to your system automatically after purchase or Nintendo Switch Game Voucher redemption. The Legend of Zelda: Breath of the Wild Expansion Pass\\nMore like this\\nSuper Mario Odyssey™\\nThe Legend of Zelda™: Tears of the Kingdom\\nMario + Rabbids® Kingdom Battle\\nThe Legend of Zelda™: Link’s Awakening\\nHollow Knight\\nThe Legend of Zelda™: Skyward Sword HD\\nStarlink: Battle for Atlas™ Digital Edition\\nDRAGON QUEST BUILDERS™ 2\\nDragon Quest Builders™\\nWARNING: If you have epilepsy or have had seizures or other unusual reactions to flashing lights or patterns, consult a doctor before playing video games. Saddle up with a herd of horse-filled games!\\nESRB rating\\nSupported play modes\\nTV\\nTabletop\\nHandheld\\nProduct information\\nRelease date\\nNo. of players\\nGenre\\nPublisher\\nESRB rating\\nSupported play modes\\nGame file size\\nSupported languages\\nPlay online, access classic NES™ and Super NES™ games, and more with a Nintendo Switch Online membership.\\n Two Game Boy games are now available for Nintendo Switch Online members\\n02/01/23\\nNintendo Switch Online member exclusive: Save on two digital games\\n09/13/22\\nOut of the Shadows … the Legend of Zelda: About Nintendo\\nShop\\nMy Nintendo Store orders\\nSupport\\nParents\\nCommunity\\nPrivacy\\n© Nintendo.', metadata={'title': 'The Legend of Zelda™: Breath of the Wild - Nintendo', 'source': 'https://www.nintendo.com/us/store/products/the-legend-of-zelda-breath-of-the-wild-switch/', 'score': 0.97451, 'images': None}),\n",
|
||||
" Document(page_content='The Legend of Zelda: Breath of the Wild is a masterpiece of open-world design and exploration, released on March 3, 2017 for Nintendo Switch. Find out the latest news, reviews, guides, videos, and more for this award-winning game on IGN.', metadata={'title': 'The Legend of Zelda: Breath of the Wild - IGN', 'source': 'https://www.ign.com/games/the-legend-of-zelda-breath-of-the-wild', 'score': 0.94496, 'images': None}),\n",
|
||||
" Document(page_content='Reviewers also commented on the unexpected permutations of interactions between Link, villagers, pets, and enemies,[129][130][131] many of which were shared widely on social media.[132] A tribute to former Nintendo president Satoru Iwata, who died during development, also attracted praise.[129][134]\\nJim Sterling was more critical than most, giving Breath of the Wild a 7/10 score, criticizing the difficulty, weapon durability, and level design, but praising the open world and variety of content.[135] Other criticism focused on the unstable frame rate and the low resolution of 900p;[136] updates addressed some of these problems.[137][138]\\nSales\\nBreath of the Wild broke sales records for a Nintendo launch game in multiple regions.[139][140] In Japan, the Switch and Wii U versions sold a combined 230,000 copies in the first week of release, with the Switch version becoming the top-selling game released that week.[141] Nintendo reported that Breath of the Wild sold more than one million copies in the US that month—925,000 of which were for Switch, outselling the Switch itself.[145][146][147][148] Nintendo president Tatsumi Kimishima said that the attach rate on the Switch was \"unprecedented\".[149] Breath of the Wild had sold 31.15 million copies on the Switch by September 2023 and 1.70 million copies on the Wii U by December 2020.[150][151]\\nAwards\\nFollowing its demonstration at E3 2016, Breath of the Wild received several accolades from the Game Critics Awards[152] and from publications such as IGN and Destructoid.[153][154] It was listed among the best games at E3 by Eurogamer,[81] The game, he continued, would challenge the series\\' conventions, such as the requirement that players complete dungeons in a set order.[2][73] The next year, Nintendo introduced the game\\'s high-definition, cel-shaded visual style with in-game footage at its E3 press event.[74][75] Once planned for release in 2015, the game was delayed early in the year and did not show at that year\\'s E3.[76][77] Zelda series creator Shigeru Miyamoto reaffirmed that the game would still release for the Wii U despite the development of Nintendo\\'s next console, the Nintendo Switch.[78] The Switch version also has higher-quality environmental sounds.[53][54] Certain ideas that were planned for the game, like flying and underground dungeons were not implemented due to the Wii U’s limitations; they would eventually resurface in the game\\'s sequel.[55] Aonuma stated that the art design was inspired by gouache and en plein air art to help identify the vast world.[56] Takizawa has also cited the Jōmon period as an inspiration for the ancient Sheikah technology and architecture that is found in the game, due to the mystery surrounding the period.[57] Journalists commented on unexpected interactions between game elements,[129][130][131] with serendipitous moments proving popular on social media.[132] Chris Plante of The Verge predicted that whereas prior open-world games tended to feature prescribed challenges, Zelda would influence a new generation of games with open-ended problem-solving.[132] Digital Trends wrote that the game\\'s level of experimentation allowed players to interact with and exploit the environment in creative ways, resulting in various \"tricks\" still discovered years after release.[127]\\nReviewers lauded the sense of detail and immersion.[133][129] Kotaku recommended turning off UI elements in praise of the indirect cues that contextually indicate the same information, such as Link shivering in the cold or waypoints appearing when using the scope.[133]', metadata={'title': 'The Legend of Zelda: Breath of the Wild - Wikipedia', 'source': 'https://en.wikipedia.org/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.93348, 'images': None})]"
|
||||
"[Document(metadata={'title': 'The Legend of Zelda: Breath of the Wild - Nintendo Switch Wiki', 'source': 'https://nintendo-switch.fandom.com/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.9961155, 'images': []}, page_content='The Legend of Zelda: Breath of the Wild is an open world action-adventure game published by Nintendo for the Wii U and as a launch title for the Nintendo Switch, and was released worldwide on March 3, 2017. It is the nineteenth installment of the The Legend of Zelda series and the first to be developed with a HD resolution. The game features a gigantic open world, with the player being able to ...'),\n",
|
||||
" Document(metadata={'title': 'The Legend of Zelda: Breath of the Wild - Zelda Wiki', 'source': 'https://zelda.fandom.com/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.9804313, 'images': []}, page_content='[]\\nReferences\\nThe Legend of Zelda \\xa0·\\nThe Adventure of Link \\xa0·\\nA Link to the Past (& Four Swords) \\xa0·\\nLink\\'s Awakening (DX; Nintendo Switch) \\xa0·\\nOcarina of Time (Master Quest; 3D) \\xa0·\\nMajora\\'s Mask (3D) \\xa0·\\nOracle of Ages \\xa0·\\nOracle of Seasons \\xa0·\\nFour Swords (Anniversary Edition) \\xa0·\\nThe Wind Waker (HD) \\xa0·\\nFour Swords Adventures \\xa0·\\nThe Minish Cap \\xa0·\\nTwilight Princess (HD) \\xa0·\\nPhantom Hourglass \\xa0·\\nSpirit Tracks \\xa0·\\nSkyward Sword (HD) \\xa0·\\nA Link Between Worlds \\xa0·\\nTri Force Heroes \\xa0·\\nBreath of the Wild \\xa0·\\nTears of the Kingdom\\nZelda (Game & Watch) \\xa0·\\nThe Legend of Zelda Game Watch \\xa0·\\nLink\\'s Crossbow Training \\xa0·\\nMy Nintendo Picross: Twilight Princess \\xa0·\\nCadence of Hyrule \\xa0·\\nGame & Watch: The Legend of Zelda\\nCD-i Games\\n Listings[]\\nCharacters[]\\nBosses[]\\nEnemies[]\\nDungeons[]\\nLocations[]\\nItems[]\\nTranslations[]\\nCredits[]\\nReception[]\\nSales[]\\nEiji Aonuma and Hidemaro Fujibayashi accepting the \"Game of the Year\" award for Breath of the Wild at The Game Awards 2017\\nBreath of the Wild was estimated to have sold approximately 1.3 million copies in its first three weeks and around 89% of Switch owners were estimated to have also purchased the game.[52] Sales of the game have remained strong and as of June 30, 2022, the Switch version has sold 27.14 million copies worldwide while the Wii U version has sold 1.69 million copies worldwide as of December 31, 2019,[53][54] giving Breath of the Wild a cumulative total of 28.83 million copies sold.\\n It also earned a Metacritic score of 97 from more than 100 critics, placing it among the highest-rated games of all time.[59][60] Notably, the game received the most perfect review scores for any game listed on Metacritic up to that point.[61]\\nIn 2022, Breath of the Wild was chosen as the best Legend of Zelda game of all time in their \"Top 10 Best Zelda Games\" list countdown; but was then placed as the \"second\" best Zelda game in their new revamped version of their \"Top 10 Best Zelda Games\" list in 2023, right behind it\\'s successor Tears of Video Game Canon ranks Breath of the Wild as one of the best video games of all time.[74] Metacritic ranked Breath of the Wild as the single best game of the 2010s.[75]\\nFan Reception[]\\nWatchMojo placed Breath of the Wild at the #2 spot in their \"Top 10 Legend of Zelda Games of All Time\" list countdown, right behind Ocarina of Time.[76] The Faces of Evil \\xa0·\\nThe Wand of Gamelon \\xa0·\\nZelda\\'s Adventure\\nHyrule Warriors Series\\nHyrule Warriors (Legends; Definitive Edition) \\xa0·\\nHyrule Warriors: Age of Calamity\\nSatellaview Games\\nBS The Legend of Zelda \\xa0·\\nAncient Stone Tablets\\nTingle Series\\nFreshly-Picked Tingle\\'s Rosy Rupeeland \\xa0·\\nTingle\\'s Balloon Fight DS \\xa0·\\n'),\n",
|
||||
" Document(metadata={'title': 'The Legend of Zelda: Breath of the Wild - Zelda Wiki', 'source': 'https://zeldawiki.wiki/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.9627432, 'images': []}, page_content='The Legend of Zelda\\xa0•\\nThe Adventure of Link\\xa0•\\nA Link to the Past (& Four Swords)\\xa0•\\nLink\\'s Awakening (DX; Nintendo Switch)\\xa0•\\nOcarina of Time (Master Quest; 3D)\\xa0•\\nMajora\\'s Mask (3D)\\xa0•\\nOracle of Ages\\xa0•\\nOracle of Seasons\\xa0•\\nFour Swords (Anniversary Edition)\\xa0•\\nThe Wind Waker (HD)\\xa0•\\nFour Swords Adventures\\xa0•\\nThe Minish Cap\\xa0•\\nTwilight Princess (HD)\\xa0•\\nPhantom Hourglass\\xa0•\\nSpirit Tracks\\xa0•\\nSkyward Sword (HD)\\xa0•\\nA Link Between Worlds\\xa0•\\nTri Force Heroes\\xa0•\\nBreath of the Wild\\xa0•\\nTears of the Kingdom\\nZelda (Game & Watch)\\xa0•\\nThe Legend of Zelda Game Watch\\xa0•\\nHeroes of Hyrule\\xa0•\\nLink\\'s Crossbow Training\\xa0•\\nMy Nintendo Picross: Twilight Princess\\xa0•\\nCadence of Hyrule\\xa0•\\nVermin\\nThe Faces of Evil\\xa0•\\nThe Wand of Gamelon\\xa0•\\nZelda\\'s Adventure\\nHyrule Warriors (Legends; Definitive Edition)\\xa0•\\nHyrule Warriors: Age of Calamity\\nBS The Legend of Zelda\\xa0•\\nAncient Stone Tablets\\nFreshly-Picked Tingle\\'s Rosy Rupeeland\\xa0•\\nTingle\\'s Balloon Fight DS\\xa0•\\nToo Much Tingle Pack\\xa0•\\nRipened Tingle\\'s Balloon Trip of Love\\nSoulcalibur II\\xa0•\\nWarioWare Series\\xa0•\\nCaptain Rainbow\\xa0•\\nNintendo Land\\xa0•\\nScribblenauts Unlimited\\xa0•\\nMario Kart 8\\xa0•\\nSplatoon 3\\nSuper Smash Bros (Series)\\nSuper Smash Bros.\\xa0•\\nSuper Smash Bros. Melee\\xa0•\\nSuper Smash Bros. Brawl\\xa0•\\nSuper Smash Bros. for Nintendo 3DS / Wii U\\xa0•\\n It also earned a Metacritic score of 97 from more than 100 critics, placing it among the highest-rated games of all time.[60][61] Notably, the game received the most perfect review scores for any game listed on Metacritic up to that point.[62]\\nAwards\\nThroughout 2016, Breath of the Wild won several awards as a highly anticipated game, including IGN\\'s and Destructoid\\'s Best of E3,[63][64] at the Game Critic Awards 2016,[65] and at The Game Awards 2016.[66] Following its release, Breath of the Wild received the title of \"Game of the Year\" from the Japan Game Awards 2017,[67] the Golden Joystick Awards 2017,<ref\"Our final award is for the Ultimate Game of the Year. Official website(s)\\nOfficial website(s)\\nCanonicity\\nCanonicity\\nCanon[citation needed]\\nPredecessor\\nPredecessor\\nTri Force Heroes\\nSuccessor\\nSuccessor\\nTears of the Kingdom\\nThe Legend of Zelda: Breath of the Wild guide at StrategyWiki\\nBreath of the Wild Guide at Zelda Universe\\nThe Legend of Zelda: Breath of the Wild is the nineteenth main installment of The Legend of Zelda series. Listings\\nCharacters\\nBosses\\nEnemies\\nDungeons\\nLocations\\nItems\\nTranslations\\nCredits\\nReception\\nSales\\nBreath of the Wild was estimated to have sold approximately 1.3 million copies in its first three weeks and around 89% of Switch owners were estimated to have also purchased the game.[53] Sales of the game have remained strong and as of September 30, 2023, the Switch version has sold 31.15 million copies worldwide while the Wii U version has sold 1.7 million copies worldwide as of December 31, 2021,[54][55] giving Breath of the Wild a cumulative total of 32.85 million copies sold.\\n The Legend of Zelda: Breath of the Wild\\nThe Legend of Zelda: Breath of the Wild\\nThe Legend of Zelda: Breath of the Wild\\nDeveloper(s)\\nDeveloper(s)\\nPublisher(s)\\nPublisher(s)\\nNintendo\\nDesigner(s)\\nDesigner(s)\\n')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import TavilySearchAPIRetriever\n",
|
||||
"query = \"what year was breath of the wild released?\"\n",
|
||||
"\n",
|
||||
"retriever = TavilySearchAPIRetriever(k=3)\n",
|
||||
"\n",
|
||||
"retriever.invoke(\"what year was breath of the wild released?\")"
|
||||
"retriever.invoke(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"We can easily combine this retriever in to a chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -110,40 +161,50 @@
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" RunnablePassthrough.assign(context=(lambda x: x[\"question\"]) | retriever)\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | ChatOpenAI(model=\"gpt-4-1106-preview\")\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'As of the end of 2020, \"The Legend of Zelda: Breath of the Wild\" sold over 21.45 million copies worldwide.'"
|
||||
"'As of August 2020, The Legend of Zelda: Breath of the Wild had sold over 20.1 million copies worldwide on Nintendo Switch and Wii U.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"question\": \"how many units did bretch of the wild sell in 2020\"})"
|
||||
"chain.invoke(\"how many units did bretch of the wild sell in 2020\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `TavilySearchAPIRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.tavily_search_api.TavilySearchAPIRetriever.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -162,7 +223,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,14 +2,51 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9fc6205b",
|
||||
"id": "62727aaa-bcff-4087-891c-e539f824ee1f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Wikipedia\n",
|
||||
"---\n",
|
||||
"sidebar_label: Wikipedia\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d62a16c1-10de-4f99-b392-c4ad2e6123a1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WikipediaRetriever\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
">[Wikipedia](https://wikipedia.org/) is a multilingual free online encyclopedia written and maintained by a community of volunteers, known as Wikipedians, through open collaboration and using a wiki-based editing system called MediaWiki. `Wikipedia` is the largest and most-read reference work in history.\n",
|
||||
"\n",
|
||||
"This notebook shows how to retrieve wiki pages from `wikipedia.org` into the Document format that is used downstream."
|
||||
"This notebook shows how to retrieve wiki pages from `wikipedia.org` into the [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) format that is used downstream.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Retriever | Source | Package |\n",
|
||||
"| :--- | :--- | :---: |\n",
|
||||
"[WikipediaRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.wikipedia.WikipediaRetriever.html) | [Wikipedia](https://www.wikipedia.org/) articles | langchain_community |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb7d377c-168b-40e8-bd61-af6a4fb1b44f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1bbc6013-2617-4f7e-9d8b-7453d09315c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,15 +54,9 @@
|
||||
"id": "51489529-5dcd-4b86-bda6-de0a39d8ffd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1435c804-069d-4ade-9a7b-006b97b767c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install `wikipedia` python package."
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package. We also need to install the `wikipedia` python package itself."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,7 +68,15 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet wikipedia"
|
||||
"%pip install -qU langchain_community wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae622ac6-d18a-4754-a4bd-d30a078c19b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,7 +84,9 @@
|
||||
"id": "6c15470b-a16b-4e0d-bc6a-6998bafbb5a4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`WikipediaRetriever` has these arguments:\n",
|
||||
"Now we can instantiate our retriever:\n",
|
||||
"\n",
|
||||
"`WikipediaRetriever` parameters include:\n",
|
||||
"- optional `lang`: default=\"en\". Use it to search in a specific language part of Wikipedia\n",
|
||||
"- optional `load_max_docs`: default=100. Use it to limit number of downloaded documents. It takes time to download all 100 documents, so use a small number for experiments. There is a hard limit of 300 for now.\n",
|
||||
"- optional `load_all_available_meta`: default=False. By default only the most important fields downloaded: `Published` (date when document was published/last updated), `title`, `Summary`. If True, other fields also downloaded.\n",
|
||||
@@ -53,200 +94,149 @@
|
||||
"`get_relevant_documents()` has one argument, `query`: free text which used to find documents in Wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae3c3d16",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fafb73b-d6ec-4822-b161-edf0aaf5224a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Running retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "d0e6f506",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import WikipediaRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "f381f642",
|
||||
"execution_count": 1,
|
||||
"id": "b78f0cd0-ffea-4fe3-9d1d-54639c4ef1ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.retrievers import WikipediaRetriever\n",
|
||||
"\n",
|
||||
"retriever = WikipediaRetriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12aead36-7b97-4d9c-82e7-ec644a3127f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "20ae1a74",
|
||||
"execution_count": 2,
|
||||
"id": "54a76605-6b1e-44bf-b8a2-7d48119290c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = retriever.invoke(\"HUNTER X HUNTER\")"
|
||||
"docs = retriever.invoke(\"TOKYO GHOUL\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "1d5a5088",
|
||||
"execution_count": 3,
|
||||
"id": "65ada2b7-3507-4dcb-9982-5f8f4e97a2e1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'title': 'Hunter × Hunter',\n",
|
||||
" 'summary': 'Hunter × Hunter (stylized as HUNTER×HUNTER and pronounced \"hunter hunter\") is a Japanese manga series written and illustrated by Yoshihiro Togashi. It has been serialized in Shueisha\\'s shōnen manga magazine Weekly Shōnen Jump since March 1998, although the manga has frequently gone on extended hiatuses since 2006. Its chapters have been collected in 37 tankōbon volumes as of November 2022. The story focuses on a young boy named Gon Freecss who discovers that his father, who left him at a young age, is actually a world-renowned Hunter, a licensed professional who specializes in fantastical pursuits such as locating rare or unidentified animal species, treasure hunting, surveying unexplored enclaves, or hunting down lawless individuals. Gon departs on a journey to become a Hunter and eventually find his father. Along the way, Gon meets various other Hunters and encounters the paranormal.\\nHunter × Hunter was adapted into a 62-episode anime television series produced by Nippon Animation and directed by Kazuhiro Furuhashi, which ran on Fuji Television from October 1999 to March 2001. Three separate original video animations (OVAs) totaling 30 episodes were subsequently produced by Nippon Animation and released in Japan from 2002 to 2004. A second anime television series by Madhouse aired on Nippon Television from October 2011 to September 2014, totaling 148 episodes, with two animated theatrical films released in 2013. There are also numerous audio albums, video games, musicals, and other media based on Hunter × Hunter.\\nThe manga has been translated into English and released in North America by Viz Media since April 2005. Both television series have been also licensed by Viz Media, with the first series having aired on the Funimation Channel in 2009 and the second series broadcast on Adult Swim\\'s Toonami programming block from April 2016 to June 2019.\\nHunter × Hunter has been a huge critical and financial success and has become one of the best-selling manga series of all time, having over 84 million copies in circulation by July 2022.\\n\\n'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tokyo Ghoul (Japanese: 東京喰種(トーキョーグール), Hepburn: Tōkyō Gūru) is a Japanese dark fantasy manga series written and illustrated by Sui Ishida. It was serialized in Shueisha's seinen manga magazine Weekly Young Jump from September 2011 to September 2014, with its chapters collected in 14 tankōbon volumes. The story is set in an alternate version of Tokyo where humans coexist with ghouls, beings who loo\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata # meta-information of the Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "c0ccd0c7-f6a6-43e7-b842-5f57afb94224",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Hunter × Hunter (stylized as HUNTER×HUNTER and pronounced \"hunter hunter\") is a Japanese manga series written and illustrated by Yoshihiro Togashi. It has been serialized in Shueisha\\'s shōnen manga magazine Weekly Shōnen Jump since March 1998, although the manga has frequently gone on extended hiatuses since 2006. Its chapters have been collected in 37 tankōbon volumes as of November 2022. The sto'"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content[:400] # a content of the Document"
|
||||
"print(docs[0].page_content[:400])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2670363b-3806-4c7e-b14d-90a4d5d2a200",
|
||||
"id": "ae3c3d16",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Question Answering on facts"
|
||||
"## Use within a chain\n",
|
||||
"Like other retrievers, `WikipediaRetriever` can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n",
|
||||
"\n",
|
||||
"We will need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "bb3601df-53ea-4826-bdbe-554387bc3ad4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get a token: https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"OPENAI_API_KEY = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e9c1a114-0410-4804-be30-05f34a9760f9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": 4,
|
||||
"id": "4bd3d268-eb8c-46e9-930a-18f5e2a50008",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "51a33cc9-ec42-4afc-8a2d-3bfff476aa59",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model=\"gpt-3.5-turbo\") # switch to 'gpt-4'\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "ea537767-a8bf-4adf-ae03-b353c9145d58",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": 5,
|
||||
"id": "9b52bc65-1b2e-4c30-ab43-41eaa5bf79c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"\n",
|
||||
" Answer the question based only on the context provided.\n",
|
||||
" Context: {context}\n",
|
||||
" Question: {question}\n",
|
||||
" \"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d268905-3b19-4338-ac10-223c0fe4d5e4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-> **Question**: What is Apify? \n",
|
||||
"\n",
|
||||
"**Answer**: Apify is a platform that allows you to easily automate web scraping, data extraction and web automation. It provides a cloud-based infrastructure for running web crawlers and other automation tasks, as well as a web-based tool for building and managing your crawlers. Additionally, Apify offers a marketplace for buying and selling pre-built crawlers and related services. \n",
|
||||
"\n",
|
||||
"-> **Question**: When the Monument to the Martyrs of the 1830 Revolution was created? \n",
|
||||
"\n",
|
||||
"**Answer**: Apify is a web scraping and automation platform that enables you to extract data from websites, turn unstructured data into structured data, and automate repetitive tasks. It provides a user-friendly interface for creating web scraping scripts without any coding knowledge. Apify can be used for various web scraping tasks such as data extraction, web monitoring, content aggregation, and much more. Additionally, it offers various features such as proxy support, scheduling, and integration with other tools to make web scraping and automation tasks easier and more efficient. \n",
|
||||
"\n",
|
||||
"-> **Question**: What is the Abhayagiri Vihāra? \n",
|
||||
"\n",
|
||||
"**Answer**: Abhayagiri Vihāra was a major monastery site of Theravada Buddhism that was located in Anuradhapura, Sri Lanka. It was founded in the 2nd century BCE and is considered to be one of the most important monastic complexes in Sri Lanka. \n",
|
||||
"\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The main character in Tokyo Ghoul is Ken Kaneki, who transforms into a ghoul after receiving an organ transplant from a ghoul named Rize.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What is Apify?\",\n",
|
||||
" \"When the Monument to the Martyrs of the 1830 Revolution was created?\",\n",
|
||||
" \"What is the Abhayagiri Vihāra?\",\n",
|
||||
" # \"How big is Wikipédia en français?\",\n",
|
||||
"]\n",
|
||||
"chat_history = []\n",
|
||||
"chain.invoke(\n",
|
||||
" \"Who is the main character in `Tokyo Ghoul` and does he transform into a ghoul?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "236bbafb-ebd4-4165-9b8f-d47605f6eef3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"for question in questions:\n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result[\"answer\"]))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")"
|
||||
"For detailed documentation of all `WikipediaRetriever` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.wikipedia.WikipediaRetriever.html#langchain-community-retrievers-wikipedia-wikipediaretriever)."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -266,7 +256,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,10 +2,14 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Astra DB\n",
|
||||
"sidebar_label: AstraDB\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
@@ -13,130 +17,121 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Astra DB\n",
|
||||
"# AstraDBByteStore\n",
|
||||
"\n",
|
||||
"This will help you get started with Astra DB [key-value stores](/docs/concepts/#key-value-stores). For detailed documentation of all `AstraDBByteStore` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/storage/langchain_astradb.storage.AstraDBByteStore.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"`AstraDBStore` and `AstraDBByteStore` need the `astrapy` package to be installed:"
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [AstraDBByteStore](https://api.python.langchain.com/en/latest/storage/langchain_astradb.storage.AstraDBByteStore.html) | [langchain_astradb](https://api.python.langchain.com/en/latest/astradb_api_reference.html) | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To create an `AstraDBByteStore` byte store, you'll need to [create a DataStax account](https://www.datastax.com/products/datastax-astra).\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"After signing up, set the following credentials:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet astrapy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The Store takes the following parameters:\n",
|
||||
"\n",
|
||||
"* `api_endpoint`: Astra DB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"* `token`: Astra DB token. Looks like `AstraCS:6gBhNmsk135....`\n",
|
||||
"* `collection_name` : Astra DB collection name\n",
|
||||
"* `namespace`: (Optional) Astra DB namespace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## AstraDBStore\n",
|
||||
"\n",
|
||||
"The `AstraDBStore` is an implementation of `BaseStore` that stores everything in your DataStax Astra DB instance.\n",
|
||||
"The store keys must be strings and will be mapped to the `_id` field of the Astra DB document.\n",
|
||||
"The store values can be any object that can be serialized by `json.dumps`.\n",
|
||||
"In the database, entries will have the form:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"_id\": \"<key>\",\n",
|
||||
" \"value\": <value>\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import AstraDBStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_API_ENDPOINT = getpass(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain AstraDB integration lives in the `langchain_astradb` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = AstraDBStore(\n",
|
||||
"%pip install -qU langchain_astradb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our byte store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_astradb import AstraDBByteStore\n",
|
||||
"\n",
|
||||
"kv_store = AstraDBByteStore(\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" collection_name=\"my_store\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['v1', [0.1, 0.2, 0.3]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.mset([(\"k1\", \"v1\"), (\"k2\", [0.1, 0.2, 0.3])])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Usage with CacheBackedEmbeddings\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"You may use the `AstraDBStore` in conjunction with a [`CacheBackedEmbeddings`](/docs/how_to/caching_embeddings) to cache the result of embeddings computations.\n",
|
||||
"Note that `AstraDBStore` stores the embeddings as a list of floats without converting them first to bytes so we don't use `fromByteStore` there."
|
||||
"You can set data under keys like this using the `mset` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[b'value1', b'value2']"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings import CacheBackedEmbeddings\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"kv_store.mset(\n",
|
||||
" [\n",
|
||||
" [\"key1\", b\"value1\"],\n",
|
||||
" [\"key2\", b\"value2\"],\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"embeddings = CacheBackedEmbeddings(\n",
|
||||
" underlying_embeddings=OpenAIEmbeddings(), document_embedding_store=store\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -144,96 +139,67 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## AstraDBByteStore\n",
|
||||
"\n",
|
||||
"The `AstraDBByteStore` is an implementation of `ByteStore` that stores everything in your DataStax Astra DB instance.\n",
|
||||
"The store keys must be strings and will be mapped to the `_id` field of the Astra DB document.\n",
|
||||
"The store `bytes` values are converted to base64 strings for storage into Astra DB.\n",
|
||||
"In the database, entries will have the form:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"_id\": \"<key>\",\n",
|
||||
" \"value\": \"bytes encoded in base 64\"\n",
|
||||
"}\n",
|
||||
"```"
|
||||
"And you can delete data using the `mdelete` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import AstraDBByteStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = AstraDBByteStore(\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" collection_name=\"my_store\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[b'v1', b'v2']\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[None, None]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
"kv_store.mdelete(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
"source": [
|
||||
"You can use an `AstraDBByteStore` anywhere you'd use other ByteStores, including as a [cache for embeddings](/docs/how_to/caching_embeddings)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `AstraDBByteStore` features and configurations, head to the API reference: https://api.python.langchain.com/en/latest/storage/langchain_astradb.storage.AstraDBByteStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,7 +2,11 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Cassandra\n",
|
||||
@@ -13,68 +17,62 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cassandra\n",
|
||||
"# CassandraByteStore\n",
|
||||
"\n",
|
||||
"This will help you get started with Cassandra [key-value stores](/docs/concepts/#key-value-stores). For detailed documentation of all `CassandraByteStore` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/storage/langchain_community.storage.cassandra.CassandraByteStore.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"[Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.\n",
|
||||
"\n",
|
||||
"`CassandraByteStore` needs the `cassio` package to be installed:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet cassio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The Store takes the following parameters:\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"* table: The table where to store the data.\n",
|
||||
"* session: (Optional) The cassandra driver session. If not provided, the cassio resolved session will be used.\n",
|
||||
"* keyspace: (Optional) The keyspace of the table. If not provided, the cassio resolved keyspace will be used.\n",
|
||||
"* setup_mode: (Optional) The mode used to create the Cassandra table (SYNC, ASYNC or OFF). Defaults to SYNC."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## CassandraByteStore\n",
|
||||
"| Class | Package | Local | [JS support](https://js.langchain.com/v0.2/docs/integrations/stores/cassandra_storage) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [CassandraByteStore](https://api.python.langchain.com/en/latest/storage/langchain_community.storage.cassandra.CassandraByteStore.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The `CassandraByteStore` is an implementation of `ByteStore` that stores the data in your Cassandra instance.\n",
|
||||
"The store keys must be strings and will be mapped to the `row_id` column of the Cassandra table.\n",
|
||||
"The store `bytes` values are mapped to the `body_blob` column of the Cassandra table."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain `CassandraByteStore` integration lives in the `langchain_community` package. You'll also need to install the `cassio` package or the `cassandra-driver` package as a peer dependency depending on which initialization method you're using:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import CassandraByteStore"
|
||||
"%pip install -qU langchain_community\n",
|
||||
"%pip install -qU cassandra-driver\n",
|
||||
"%pip install -qU cassio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Init from a cassandra driver Session\n",
|
||||
"You'll also need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"You need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
"You'll first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -90,12 +88,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You need to provide the name of an existing keyspace of the Cassandra instance:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
"Then you can create your store! You'll also need to provide the name of an existing keyspace of the Cassandra instance:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -103,61 +99,91 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")"
|
||||
"from langchain_community.storage import CassandraByteStore\n",
|
||||
"\n",
|
||||
"kv_store = CassandraByteStore(\n",
|
||||
" table=\"my_store\",\n",
|
||||
" session=session,\n",
|
||||
" keyspace=\"<YOUR KEYSPACE>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Creating the store:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"You can set data under keys like this using the `mset` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[b'v1', b'v2']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = CassandraByteStore(\n",
|
||||
" table=\"my_store\",\n",
|
||||
" session=session,\n",
|
||||
" keyspace=CASSANDRA_KEYSPACE,\n",
|
||||
"kv_store.mset(\n",
|
||||
" [\n",
|
||||
" [\"key1\", b\"value1\"],\n",
|
||||
" [\"key2\", b\"value2\"],\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Init from cassio\n",
|
||||
"\n",
|
||||
"It's also possible to use cassio to configure the session and keyspace."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
"And you can delete data using the `mdelete` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"kv_store.mdelete(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Init using `cassio`\n",
|
||||
"\n",
|
||||
"It's also possible to use cassio to configure the session and keyspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cassio\n",
|
||||
"\n",
|
||||
"cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n",
|
||||
"cassio.init(contact_points=\"127.0.0.1\", keyspace=\"<YOUR KEYSPACE>\")\n",
|
||||
"\n",
|
||||
"store = CassandraByteStore(\n",
|
||||
" table=\"my_store\",\n",
|
||||
@@ -165,62 +191,27 @@
|
||||
"\n",
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Usage with CacheBackedEmbeddings\n",
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"You may use the `CassandraByteStore` in conjunction with a [`CacheBackedEmbeddings`](/docs/how_to/caching_embeddings) to cache the result of embeddings computations.\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import CacheBackedEmbeddings\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n",
|
||||
"\n",
|
||||
"store = CassandraByteStore(\n",
|
||||
" table=\"my_store\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"embeddings = CacheBackedEmbeddings.from_bytes_store(\n",
|
||||
" underlying_embeddings=OpenAIEmbeddings(), document_embedding_cache=store\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
"For detailed documentation of all `CassandraByteStore` features and configurations, head to the API reference: https://api.python.langchain.com/en/latest/storage/langchain_community.storage.cassandra.CassandraByteStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,10 +2,14 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Elasticsearch \n",
|
||||
"sidebar_label: Elasticsearch\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
@@ -15,25 +19,31 @@
|
||||
"source": [
|
||||
"# ElasticsearchEmbeddingsCache\n",
|
||||
"\n",
|
||||
"This will help you get started with Elasticsearch [key-value stores](/docs/concepts/#key-value-stores). For detailed documentation of all `ElasticsearchEmbeddingsCache` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/cache/langchain_elasticsearch.cache.ElasticsearchEmbeddingsCache.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The `ElasticsearchEmbeddingsCache` is a `ByteStore` implementation that uses your Elasticsearch instance for efficient storage and retrieval of embeddings.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"First install the LangChain integration with Elasticsearch."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain-elasticsearch"
|
||||
"| Class | Package | Local | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ElasticsearchEmbeddingsCache](https://api.python.langchain.com/en/latest/cache/langchain_elasticsearch.cache.ElasticsearchEmbeddingsCache.html) | [langchain_elasticsearch](https://api.python.langchain.com/en/latest/elasticsearch_api_reference.html) | ✅ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To create a `ElasticsearchEmbeddingsCache` byte store, you'll need an Elasticsearch cluster. You can [set one up locally](https://www.elastic.co/downloads/elasticsearch) or create an [Elastic account](https://www.elastic.co/elasticsearch)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": "it can be instantiated using `CacheBackedEmbeddings.from_bytes_store` method."
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain `ElasticsearchEmbeddingsCache` integration lives in the `__package_name__` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -41,23 +51,37 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import CacheBackedEmbeddings\n",
|
||||
"%pip install -qU langchain_elasticsearch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our byte store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_elasticsearch import ElasticsearchEmbeddingsCache\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"underlying_embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
|
||||
"\n",
|
||||
"store = ElasticsearchEmbeddingsCache(\n",
|
||||
" es_url=\"http://localhost:9200\",\n",
|
||||
"# Example config for a locally running Elasticsearch instance\n",
|
||||
"kv_store = ElasticsearchEmbeddingsCache(\n",
|
||||
" es_url=\"https://localhost:9200\",\n",
|
||||
" index_name=\"llm-chat-cache\",\n",
|
||||
" metadata={\"project\": \"my_chatgpt_project\"},\n",
|
||||
" namespace=\"my_chatgpt_project\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"embeddings = CacheBackedEmbeddings.from_bytes_store(\n",
|
||||
" underlying_embeddings=OpenAIEmbeddings(),\n",
|
||||
" document_embedding_cache=store,\n",
|
||||
" query_embedding_cache=store,\n",
|
||||
" es_user=\"elastic\",\n",
|
||||
" es_password=\"<GENERATED PASSWORD>\",\n",
|
||||
" es_params={\n",
|
||||
" \"ca_certs\": \"~/http_ca.crt\",\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -65,19 +89,93 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The index_name parameter can also accept aliases. This allows to use the ILM: Manage the index lifecycle that we suggest to consider for managing retention and controlling cache growth.\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"Look at the class docstring for all parameters."
|
||||
"You can set data under keys like this using the `mset` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[b'value1', b'value2']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mset(\n",
|
||||
" [\n",
|
||||
" [\"key1\", b\"value1\"],\n",
|
||||
" [\"key2\", b\"value2\"],\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Index the generated vectors\n",
|
||||
"The cached vectors won't be searchable by default. The developer can customize the building of the Elasticsearch document in order to add indexed vector field.\n",
|
||||
"And you can delete data using the `mdelete` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[None, None]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mdelete(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"This can be done by subclassing end overriding methods. "
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use as an embeddings cache\n",
|
||||
"\n",
|
||||
"Like other `ByteStores`, you can use an `ElasticsearchEmbeddingsCache` instance for [persistent caching in document ingestion](/docs/how_to/caching_embeddings/) for RAG.\n",
|
||||
"\n",
|
||||
"However, cached vectors won't be searchable by default. The developer can customize the building of the Elasticsearch document in order to add indexed vector field.\n",
|
||||
"\n",
|
||||
"This can be done by subclassing and overriding methods:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,8 +186,6 @@
|
||||
"source": [
|
||||
"from typing import Any, Dict, List\n",
|
||||
"\n",
|
||||
"from langchain_elasticsearch import ElasticsearchEmbeddingsCache\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class SearchableElasticsearchStore(ElasticsearchEmbeddingsCache):\n",
|
||||
" @property\n",
|
||||
@@ -112,26 +208,29 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": "When overriding the mapping and the document building, please only make additive modifications, keeping the base mapping intact."
|
||||
"source": [
|
||||
"When overriding the mapping and the document building, please only make additive modifications, keeping the base mapping intact."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `ElasticsearchEmbeddingsCache` features and configurations, head to the API reference: https://api.python.langchain.com/en/latest/cache/langchain_elasticsearch.cache.ElasticsearchEmbeddingsCache.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Local Filesystem\n",
|
||||
"sidebar_position: 3\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
@@ -16,51 +19,119 @@
|
||||
"source": [
|
||||
"# LocalFileStore\n",
|
||||
"\n",
|
||||
"The `LocalFileStore` is a persistent implementation of `ByteStore` that stores everything in a folder of your choosing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[b'v1', b'v2']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"This will help you get started with local filesystem [key-value stores](/docs/concepts/#key-value-stores). For detailed documentation of all LocalFileStore features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/storage/langchain.storage.file_system.LocalFileStore.html).\n",
|
||||
"\n",
|
||||
"from langchain.storage import LocalFileStore\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"root_path = Path.cwd() / \"data\" # can also be a path set by a string\n",
|
||||
"store = LocalFileStore(root_path)\n",
|
||||
"The `LocalFileStore` is a persistent implementation of `ByteStore` that stores everything in a folder of your choosing. It's useful if you're using a single machine and are tolerant of files being added or deleted.\n",
|
||||
"\n",
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | [JS support](https://js.langchain.com/v0.2/docs/integrations/stores/file_system) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [LocalFileStore](https://api.python.langchain.com/en/latest/storage/langchain.storage.file_system.LocalFileStore.html) | [langchain](https://api.python.langchain.com/en/latest/langchain_api_reference.html) | ✅ | ✅ |  |  |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's see which files exist in our `data` folder:"
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain `LocalFileStore` integration lives in the `langchain` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our byte store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"from langchain.storage import LocalFileStore\n",
|
||||
"\n",
|
||||
"root_path = Path.cwd() / \"data\" # can also be a path set by a string\n",
|
||||
"\n",
|
||||
"kv_store = LocalFileStore(root_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"You can set data under keys like this using the `mset` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[b'value1', b'value2']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mset(\n",
|
||||
" [\n",
|
||||
" [\"key1\", b\"value1\"],\n",
|
||||
" [\"key2\", b\"value2\"],\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can see the created files in your `data` folder:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"k1 k2\n"
|
||||
"key1 key2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -69,16 +140,57 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"And you can delete data using the `mdelete` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[None, None]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mdelete(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `LocalFileStore` features and configurations, head to the API reference: https://api.python.langchain.com/en/latest/storage/langchain.storage.file_system.LocalFileStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -92,7 +204,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: InMemoryByteStore\n",
|
||||
"sidebar_label: In-memory\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
@@ -28,7 +28,7 @@
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | [JS support](https://js.langchain.com/v0.2/docs/integrations/stores/in_memory/) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [InMemoryByteStore](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.InMemoryByteStore.html) | [langchain_core](https://api.python.langchain.com/en/latest/core_api_reference.html) | ✅ | ✅ |  |  |"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
# Key-value stores
|
||||
|
||||
[Key-value stores](/docs/concepts/#key-value-stores) are used by other LangChain components to store and retrieve data.
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -2,7 +2,11 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Redis\n",
|
||||
@@ -15,9 +19,30 @@
|
||||
"source": [
|
||||
"# RedisStore\n",
|
||||
"\n",
|
||||
"This will help you get started with Redis [key-value stores](/docs/concepts/#key-value-stores). For detailed documentation of all `RedisStore` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/storage/langchain_community.storage.redis.RedisStore.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The `RedisStore` is an implementation of `ByteStore` that stores everything in your Redis instance.\n",
|
||||
"\n",
|
||||
"To configure Redis, follow our [Redis guide](/docs/integrations/providers/redis)."
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | [JS support](https://js.langchain.com/v0.2/docs/integrations/stores/ioredis_storage) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [RedisStore](https://api.python.langchain.com/en/latest/storage/langchain_community.storage.redis.RedisStore.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To create a Redis byte store, you'll need to set up a Redis instance. You can do this locally or via a provider - see our [Redis guide](/docs/integrations/providers/redis) for an overview of options."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain `RedisStore` integration lives in the `langchain_community` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -26,56 +51,128 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet redis"
|
||||
"%pip install -qU langchain_community redis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our byte store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[b'v1', b'v2']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import RedisStore\n",
|
||||
"\n",
|
||||
"store = RedisStore(redis_url=\"redis://localhost:6379\")\n",
|
||||
"kv_store = RedisStore(redis_url=\"redis://localhost:6379\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
"You can set data under keys like this using the `mset` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[b'value1', b'value2']"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mset(\n",
|
||||
" [\n",
|
||||
" [\"key1\", b\"value1\"],\n",
|
||||
" [\"key2\", b\"value2\"],\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And you can delete data using the `mdelete` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[None, None]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mdelete(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `RedisStore` features and configurations, head to the API reference: https://api.python.langchain.com/en/latest/storage/langchain_community.storage.redis.RedisStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,7 +2,11 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Upstash Redis\n",
|
||||
@@ -15,11 +19,48 @@
|
||||
"source": [
|
||||
"# UpstashRedisByteStore\n",
|
||||
"\n",
|
||||
"The `UpstashRedisStore` is an implementation of `ByteStore` that stores everything in your Upstash-hosted Redis instance.\n",
|
||||
"This will help you get started with Upstash redis [key-value stores](/docs/concepts/#key-value-stores). For detailed documentation of all `UpstashRedisByteStore` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/storage/langchain_community.storage.upstash_redis.UpstashRedisByteStore.html).\n",
|
||||
"\n",
|
||||
"To use the base `RedisStore` instead, see [this guide](/docs/integrations/stores/redis/)\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"To configure Upstash Redis, follow our [Upstash guide](/docs/integrations/providers/upstash)."
|
||||
"The `UpstashRedisStore` is an implementation of `ByteStore` that stores everything in your [Upstash](https://upstash.com/)-hosted Redis instance.\n",
|
||||
"\n",
|
||||
"To use the base `RedisStore` instead, see [this guide](/docs/integrations/stores/redis/).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | [JS support](https://js.langchain.com/v0.2/docs/integrations/stores/upstash_redis_storage) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [UpstashRedisByteStore](https://api.python.langchain.com/en/latest/storage/langchain_community.storage.upstash_redis.UpstashRedisByteStore.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You'll first need to [sign up for an Upstash account](https://upstash.com/docs/redis/overall/getstarted). Next, you'll need to create a Redis database to connect to.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Once you've created your database, get your database URL (don't forget the `https://`!) and token:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"URL = getpass(\"Enter your Upstash URL\")\n",
|
||||
"TOKEN = getpass(\"Enter your Upstash REST token\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Upstash integration lives in the `langchain_community` package. You'll also need to install the `upstash-redis` package as a peer dependency:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,61 +69,130 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet upstash-redis"
|
||||
"%pip install -qU langchain_community upstash-redis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our byte store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[b'v1', b'v2']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import UpstashRedisByteStore\n",
|
||||
"from upstash_redis import Redis\n",
|
||||
"\n",
|
||||
"URL = \"<UPSTASH_REDIS_REST_URL>\"\n",
|
||||
"TOKEN = \"<UPSTASH_REDIS_REST_TOKEN>\"\n",
|
||||
"\n",
|
||||
"redis_client = Redis(url=URL, token=TOKEN)\n",
|
||||
"store = UpstashRedisByteStore(client=redis_client, ttl=None, namespace=\"test-ns\")\n",
|
||||
"kv_store = UpstashRedisByteStore(client=redis_client, ttl=None, namespace=\"test-ns\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
"You can set data under keys like this using the `mset` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[b'value1', b'value2']"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mset(\n",
|
||||
" [\n",
|
||||
" [\"key1\", b\"value1\"],\n",
|
||||
" [\"key2\", b\"value2\"],\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And you can delete data using the `mdelete` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[None, None]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"kv_store.mdelete(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"kv_store.mget(\n",
|
||||
" [\n",
|
||||
" \"key1\",\n",
|
||||
" \"key2\",\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `UpstashRedisByteStore` features and configurations, head to the API reference: https://api.python.langchain.com/en/latest/storage/langchain_community.storage.upstash_redis.UpstashRedisByteStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,129 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Question Answering\n",
|
||||
"This notebook shows how to do question answering over structured data, in this case using the `AirbyteStripeLoader`.\n",
|
||||
"\n",
|
||||
"Vectorstores often have a hard time answering questions that requires computing, grouping and filtering structured data so the high level idea is to use a `pandas` dataframe to help with these types of questions. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Load data from Stripe using Airbyte. user the `record_handler` paramater to return a JSON from the data loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain_community.document_loaders.airbyte import AirbyteStripeLoader\n",
|
||||
"from langchain_experimental.agents import create_pandas_dataframe_agent\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"stream_name = \"customers\"\n",
|
||||
"config = {\n",
|
||||
" \"client_secret\": os.getenv(\"STRIPE_CLIENT_SECRET\"),\n",
|
||||
" \"account_id\": os.getenv(\"STRIPE_ACCOUNT_D\"),\n",
|
||||
" \"start_date\": \"2023-01-20T00:00:00Z\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def handle_record(record: dict, _id: str):\n",
|
||||
" return record.data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = AirbyteStripeLoader(\n",
|
||||
" config=config,\n",
|
||||
" record_handler=handle_record,\n",
|
||||
" stream_name=stream_name,\n",
|
||||
")\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Pass the data to `pandas` dataframe."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.DataFrame(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Pass the dataframe `df` to the `create_pandas_dataframe_agent` and invoke\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_pandas_dataframe_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
|
||||
" df,\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"4. Run the agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output = agent.run(\"How many rows are there?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,301 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7094e328",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CSV\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with data in `CSV` format. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Pandas DataFrame agent under the hood, which in turn calls the Python agent, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "caae0bec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_csv_agent\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bd806175",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "a1717204",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" OpenAI(temperature=0),\n",
|
||||
" \"titanic.csv\",\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c31bb8a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "16c4dc59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
" \"titanic.csv\",\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "46b9489d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df.shape[0]`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m891\u001b[0m\u001b[32;1m\u001b[1;3mThere are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many rows are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a96309be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df[df['SibSp'] > 3]['PassengerId'].count()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m30\u001b[0m\u001b[32;1m\u001b[1;3mThere are 30 people in the dataframe who have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 30 people in the dataframe who have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "964a09f7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `import pandas as pd\n",
|
||||
"import math\n",
|
||||
"\n",
|
||||
"# Create a dataframe\n",
|
||||
"data = {'Age': [22, 38, 26, 35, 35]}\n",
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"\n",
|
||||
"# Calculate the average age\n",
|
||||
"average_age = df['Age'].mean()\n",
|
||||
"\n",
|
||||
"# Calculate the square root of the average age\n",
|
||||
"square_root = math.sqrt(average_age)\n",
|
||||
"\n",
|
||||
"square_root`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m5.585696017507576\u001b[0m\u001b[32;1m\u001b[1;3mThe square root of the average age is approximately 5.59.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The square root of the average age is approximately 5.59.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the square root of the average age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09539c18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Multi CSV Example\n",
|
||||
"\n",
|
||||
"This next part shows how the agent can interact with multiple csv files passed in as a list."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "15f11fbd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df1['Age'].nunique() - df2['Age'].nunique()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m-1\u001b[0m\u001b[32;1m\u001b[1;3mThere is 1 row in the age column that is different between the two dataframes.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There is 1 row in the age column that is different between the two dataframes.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
" [\"titanic.csv\", \"titanic_age_fillna.csv\"],\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")\n",
|
||||
"agent.run(\"how many rows in the age column are different between the two dfs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f2909808",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,850 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Github\n",
|
||||
"\n",
|
||||
"The `Github` toolkit contains tools that enable an LLM agent to interact with a github repository. \n",
|
||||
"The tool is a wrapper for the [PyGitHub](https://github.com/PyGithub/PyGithub) library. \n",
|
||||
"\n",
|
||||
"## Quickstart\n",
|
||||
"\n",
|
||||
"1. Install the pygithub library\n",
|
||||
"2. Create a Github app\n",
|
||||
"3. Set your environmental variables\n",
|
||||
"4. Pass the tools to your agent with `toolkit.get_tools()`\n",
|
||||
"\n",
|
||||
"Each of these steps will be explained in great detail below.\n",
|
||||
"\n",
|
||||
"1. **Get Issues**- fetches issues from the repository.\n",
|
||||
"\n",
|
||||
"2. **Get Issue**- fetches details about a specific issue.\n",
|
||||
"\n",
|
||||
"3. **Comment on Issue**- posts a comment on a specific issue.\n",
|
||||
"\n",
|
||||
"4. **Create Pull Request**- creates a pull request from the bot's working branch to the base branch.\n",
|
||||
"\n",
|
||||
"5. **Create File**- creates a new file in the repository.\n",
|
||||
"\n",
|
||||
"6. **Read File**- reads a file from the repository.\n",
|
||||
"\n",
|
||||
"7. **Update File**- updates a file in the repository.\n",
|
||||
"\n",
|
||||
"8. **Delete File**- deletes a file from the repository.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Install the `pygithub` library "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pygithub langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2. Create a Github App\n",
|
||||
"\n",
|
||||
"[Follow the instructions here](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to create and register a Github app. Make sure your app has the following [repository permissions:](https://docs.github.com/en/rest/overview/permissions-required-for-github-apps?apiVersion=2022-11-28)\n",
|
||||
"\n",
|
||||
"* Commit statuses (read only)\n",
|
||||
"* Contents (read and write)\n",
|
||||
"* Issues (read and write)\n",
|
||||
"* Metadata (read only)\n",
|
||||
"* Pull requests (read and write)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Once the app has been registered, you must give your app permission to access each of the repositories you whish it to act upon. Use the App settings on [github.com here](https://github.com/settings/installations).\n",
|
||||
"\n",
|
||||
"### 3. Set Environmental Variables\n",
|
||||
"\n",
|
||||
"Before initializing your agent, the following environmental variables need to be set:\n",
|
||||
"\n",
|
||||
"* **GITHUB_APP_ID**- A six digit number found in your app's general settings\n",
|
||||
"* **GITHUB_APP_PRIVATE_KEY**- The location of your app's private key .pem file, or the full text of that file as a string.\n",
|
||||
"* **GITHUB_REPOSITORY**- The name of the Github repository you want your bot to act upon. Must follow the format {username}/{repo-name}. *Make sure the app has been added to this repository first!*\n",
|
||||
"* Optional: **GITHUB_BRANCH**- The branch where the bot will make its commits. Defaults to `repo.default_branch`.\n",
|
||||
"* Optional: **GITHUB_BASE_BRANCH**- The base branch of your repo upon which PRs will based from. Defaults to `repo.default_branch`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: Simple Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit\n",
|
||||
"from langchain_community.utilities.github import GitHubAPIWrapper\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set your environment variables using os.environ\n",
|
||||
"os.environ[\"GITHUB_APP_ID\"] = \"123456\"\n",
|
||||
"os.environ[\"GITHUB_APP_PRIVATE_KEY\"] = \"path/to/your/private-key.pem\"\n",
|
||||
"os.environ[\"GITHUB_REPOSITORY\"] = \"username/repo-name\"\n",
|
||||
"os.environ[\"GITHUB_BRANCH\"] = \"bot-branch-name\"\n",
|
||||
"os.environ[\"GITHUB_BASE_BRANCH\"] = \"main\"\n",
|
||||
"\n",
|
||||
"# This example also requires an OpenAI API key\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Available tools:\n",
|
||||
"\tGet Issues\n",
|
||||
"\tGet Issue\n",
|
||||
"\tComment on Issue\n",
|
||||
"\tList open pull requests (PRs)\n",
|
||||
"\tGet Pull Request\n",
|
||||
"\tOverview of files included in PR\n",
|
||||
"\tCreate Pull Request\n",
|
||||
"\tList Pull Requests' Files\n",
|
||||
"\tCreate File\n",
|
||||
"\tRead File\n",
|
||||
"\tUpdate File\n",
|
||||
"\tDelete File\n",
|
||||
"\tOverview of existing files in Main branch\n",
|
||||
"\tOverview of files in current working branch\n",
|
||||
"\tList branches in this repository\n",
|
||||
"\tSet active branch\n",
|
||||
"\tCreate a new branch\n",
|
||||
"\tGet files from a directory\n",
|
||||
"\tSearch issues and pull requests\n",
|
||||
"\tSearch code\n",
|
||||
"\tCreate review request\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n",
|
||||
"github = GitHubAPIWrapper()\n",
|
||||
"toolkit = GitHubToolkit.from_github_api_wrapper(github)\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"# STRUCTURED_CHAT includes args_schema for each tool, helps tool args parsing errors.\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
"print(\"Available tools:\")\n",
|
||||
"for tool in tools:\n",
|
||||
" print(\"\\t\" + tool.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to figure out what issues need to be completed.\n",
|
||||
"Action: Get Issues\n",
|
||||
"Action Input: N/A\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mFound 1 issues:\n",
|
||||
"[{'title': 'Update README file', 'number': 9}]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to get more information about this issue.\n",
|
||||
"Action: Get Issue\n",
|
||||
"Action Input: 9\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m{\"title\": \"Update README file\", \"body\": \"Find what the most popular frontend framework is right now and add a short blurb to the readme.md file about how this website will take advantage of it.\", \"comments\": \"[]\"}\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to update the README file.\n",
|
||||
"Action: Create File\n",
|
||||
"Action Input: README.md\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mFile already exists at README.md. Use update_file instead\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to update the existing README file.\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input: README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is a sample website\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is a sample website that uses the most popular frontend framework.\n",
|
||||
">>>> NEW\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mFile content was not updated because old content was not found.It may be helpful to use the read_file action to get the current file contents.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to get the current file contents.\n",
|
||||
"Action: Read File\n",
|
||||
"Action Input: README.md\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mThis is my awesome website!\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to update the README file with the new content.\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input: README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is my awesome website!\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is my awesome website that uses the most popular frontend framework.\n",
|
||||
">>>> NEW\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mUpdated file README.md\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: The README.md file has been updated with the new content.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The README.md file has been updated with the new content.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"You have the software engineering capabilities of a Google Principle engineer. You are tasked with completing issues on a github repository. Please look at the existing issues and complete them.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: Read an issue, open a pull request\n",
|
||||
"\n",
|
||||
"Workflow: \n",
|
||||
"1. Read issues, either a specific one or just ask it to look at recent ones. \n",
|
||||
"2. Write code, commit it to a new branch.\n",
|
||||
"3. Open a PR\n",
|
||||
"4. \"Request review\" on the PR from the original author of the issue.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Input data and LangSmith Trace\n",
|
||||
"* LangSmith trace for this run: https://smith.langchain.com/public/fee6643c-b214-42d0-967b-d24dcdd690fe/r\n",
|
||||
"* Input issue: https://github.com/KastanDay/ML4Bio/issues/33\n",
|
||||
"* Final PR created by bot: https://github.com/KastanDay/ML4Bio/pull/40"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Please implement these changes by creating or editing the necessary files. \n",
|
||||
"\n",
|
||||
"1. First use read_file to read any files in the repo that seem relevant. \n",
|
||||
"2. Then, when you're ready, start implementing changes by creating and updating files. Implement any and all remaining code to make the project work as the commenter intended. \n",
|
||||
"2. The last step is to create a PR with a clear and concise title and description, list any concerns or final changes necessary in the PR body.\n",
|
||||
"3. After opening the PR, comment on the original issue and mention the new PR your just opened, you must comment \"I opened a PR for you to review here #<PR_NUMBER>\" (it'll be something like #30). That hashtag syntax will automatically link to the PR, as necessary. Thanks.\n",
|
||||
"4. If you feel the PR is satisfactory for completing your assignment, create a review request for the original user that opened the issue. Use their username to tag them.\n",
|
||||
"\n",
|
||||
"Feel free to ask for help or leave a comment on the Issue or PR if you're stuck.\n",
|
||||
"\n",
|
||||
"Here's your latest assignment: {issue_description}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"\n",
|
||||
"gh_issue_prompt_template = hub.pull(\"kastanday/new-github-issue\")\n",
|
||||
"print(gh_issue_prompt_template.template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Please implement these changes by creating or editing the necessary files. \n",
|
||||
"\n",
|
||||
"1. First use read_file to read any files in the repo that seem relevant. \n",
|
||||
"2. Then, when you're ready, start implementing changes by creating and updating files. Implement any and all remaining code to make the project work as the commenter intended. \n",
|
||||
"2. The last step is to create a PR with a clear and concise title and description, list any concerns or final changes necessary in the PR body.\n",
|
||||
"3. After opening the PR, comment on the original issue and mention the new PR your just opened, you must comment \"I opened a PR for you to review here #<PR_NUMBER>\" (it'll be something like #30). That hashtag syntax will automatically link to the PR, as necessary. Thanks.\n",
|
||||
"4. If you feel the PR is satisfactory for completing your assignment, create a review request for the original user that opened the issue. Use their username to tag them.\n",
|
||||
"\n",
|
||||
"Feel free to ask for help or leave a comment on the Issue or PR if you're stuck.\n",
|
||||
"\n",
|
||||
"Here's your latest assignment: Title: Create a full command line executable workflow for RNA-Seq on PBMC Samples. Open a new pull request (on a separate branch) and comment the PR number here when you're done..\n",
|
||||
"Opened by user: KastanDay\n",
|
||||
"Body: Experiment Type:\n",
|
||||
"RNA-Seq\n",
|
||||
"Sequencing of total cellular RNA\n",
|
||||
"\n",
|
||||
"Workflow Management:\n",
|
||||
"Bash/SLURM\n",
|
||||
"Scripting and job scheduling\n",
|
||||
"\n",
|
||||
"Software Stack:\n",
|
||||
"FastQC\n",
|
||||
"MultiQC\n",
|
||||
"STAR\n",
|
||||
"RSEM\n",
|
||||
"samtools\n",
|
||||
"DESeq2\n",
|
||||
"\n",
|
||||
"What else to know about the pipeline?\n",
|
||||
"I am working PBMC samples collected from patients that are undergoing immunotherapy.\n",
|
||||
"\n",
|
||||
"Use the data files existing in [Report_WholeBrain](https://github.com/KastanDay/ML4Bio/tree/main/Report_WholeBrain) as input for this workflow.\n",
|
||||
"\n",
|
||||
"You should write a series of bash scripts and R scripts that can accomplish this task. Open a PR with those scripts when you're done.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def format_issue(issue):\n",
|
||||
" title = f\"Title: {issue.get('title')}.\"\n",
|
||||
" opened_by = f\"Opened by user: {issue.get('opened_by')}\"\n",
|
||||
" body = f\"Body: {issue.get('body')}\"\n",
|
||||
" comments = issue.get(\"comments\") # often too long\n",
|
||||
" return \"\\n\".join([title, opened_by, body])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"issue = github.get_issue(33) # task to implement a RNA-seq pipeline (bioinformatics)\n",
|
||||
"final_gh_issue_prompt = gh_issue_prompt_template.format(\n",
|
||||
" issue_description=format_issue(issue)\n",
|
||||
")\n",
|
||||
"print(final_gh_issue_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.summary_buffer import ConversationSummaryBufferMemory\n",
|
||||
"from langchain_core.prompts.chat import MessagesPlaceholder\n",
|
||||
"\n",
|
||||
"summarizer_llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\") # type: ignore\n",
|
||||
"chat_history = MessagesPlaceholder(variable_name=\"chat_history\")\n",
|
||||
"memory = ConversationSummaryBufferMemory(\n",
|
||||
" memory_key=\"chat_history\",\n",
|
||||
" return_messages=True,\n",
|
||||
" llm=summarizer_llm,\n",
|
||||
" max_token_limit=2_000,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
" handle_parsing_errors=True, # or pass a function that accepts the error and returns a string\n",
|
||||
" max_iterations=30,\n",
|
||||
" max_execution_time=None,\n",
|
||||
" early_stopping_method=\"generate\",\n",
|
||||
" memory=memory,\n",
|
||||
" # trim_intermediate_steps=fancier_trim_intermediate_steps,\n",
|
||||
" agent_kwargs={\n",
|
||||
" \"memory_prompts\": [chat_history],\n",
|
||||
" \"input_variables\": [\"input\", \"agent_scratchpad\", \"chat_history\"],\n",
|
||||
" \"prefix\": final_gh_issue_prompt,\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Get files from a directory\",\n",
|
||||
" \"action_input\": \"ML4Bio/tree/main/Report_WholeBrain\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mError: status code 404, None\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe previous action to get files from a directory failed because the path provided does not exist or is not accessible. I need to correct the path to access the files in the `Report_WholeBrain` directory. Let's try to fetch the list of files from the correct directory path. \n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Get files from a directory\",\n",
|
||||
" \"action_input\": \"Report_WholeBrain\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m['Report_WholeBrain/MDSclustering_WholeBrain.html', 'Report_WholeBrain/MDSclustering_WholeBrain_RUVremoved.html', 'Report_WholeBrain/Report_Antonson_WholeBrain_2022Mar.Rmd', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 1-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 2-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 3-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 4-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 6-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 7-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 8-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 9-1.png', 'Report_WholeBrain/SalmonSummarizedOutput.RData', 'Report_WholeBrain/SampleInfo_RUVvariables_WholeBrain_2022-05-12.csv', 'Report_WholeBrain/Targets_Final.txt', 'Report_WholeBrain/WholeBrain_GeneResults_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_GeneResults_RUV_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_Gene_level_counts_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_RUV_FDR0.1.html', 'Report_WholeBrain/WholeBrain_logCPMValues_RUVcorrected_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_logCPMvalues_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_rawP05.html', 'Report_WholeBrain/getGO.R', 'Report_WholeBrain/getPath.R', 'Report_WholeBrain/interactive_plots/css/glimma.min.css', 'Report_WholeBrain/interactive_plots/css/src/images/animated-overlay.gif', 'Report_WholeBrain/interactive_plots/css/src/images/favicon.ico', 'Report_WholeBrain/interactive_plots/css/src/images/sort_asc.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_asc_disabled.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_both.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_desc.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_desc_disabled.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_flat_0_aaaaaa_40x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_flat_75_ffffff_40x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_55_fbf9ee_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_65_ffffff_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_75_dadada_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_75_e6e6e6_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_95_fef1ec_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_highlight-soft_75_cccccc_1x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_222222_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_2e83ff_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_454545_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_888888_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_cd0a0a_256x240.png', 'Report_WholeBrain/interactive_plots/js/glimma.min.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_Microglia.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_Microglia_RUV.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain_RUV.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain_noOUT.js', 'Report_WholeBrain/interactive_plots/js/old_Microglia_rawP05.js', 'Report_WholeBrain/interactive_plots/js/old_WholeBrain_RUV_FDR0.1.js', 'Report_WholeBrain/interactive_plots/js/old_WholeBrain_rawP05.js', 'Report_WholeBrain/interactive_plots/old_MDSclustering_Microglia.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_Microglia_RUV.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain_RUV.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain_noOUT.html', 'Report_WholeBrain/interactive_plots/old_Microglia_rawP05.html', 'Report_WholeBrain/interactive_plots/old_WholeBrain_RUV_FDR0.1.html', 'Report_WholeBrain/interactive_plots/old_WholeBrain_rawP05.html', 'Report_WholeBrain/trx_EGids_combined.txt', 'Report_WholeBrain/⭐️____Report_Antonson_WholeBrain_2022Mar.html']\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe files in the `Report_WholeBrain` directory seem to be mostly reports, images, and some R scripts. However, none of these files appear to be raw RNA-Seq data files or scripts directly related to the RNA-Seq workflow requested by the user. To proceed with creating the workflow, I will need to create new bash and R scripts that will handle the RNA-Seq analysis using the software stack mentioned (FastQC, MultiQC, STAR, RSEM, samtools, DESeq2).\n",
|
||||
"\n",
|
||||
"Before creating the scripts, I should create a new branch to work on this feature.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create a new branch\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mBranch 'rna-seq-workflow_v1' created successfully, and set as current active branch.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that the new branch `rna-seq-workflow_v1` is created and active, I can start creating the necessary bash and R scripts for the RNA-Seq workflow. I will begin by creating a bash script for quality control using FastQC and MultiQC.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/01_quality_control.sh\\n#!/bin/bash\\n#PBS -N quality_control\\n#PBS -l walltime=04:00:00\\n#PBS -l nodes=1:ppn=8\\n#PBS -q batch\\n\\n# Load modules for FastQC and MultiQC\\nmodule load FastQC/0.11.8\\nmodule load MultiQC/1.9\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/raw_data'\\nOUTPUT_DIR='/path/to/output/quality_control'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run FastQC on all files in the input directory\\nfastqc -o $OUTPUT_DIR -t 8 $INPUT_DIR/*.fastq.gz\\n\\n# Run MultiQC to aggregate results\\nmultiqc -o $OUTPUT_DIR $OUTPUT_DIR\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/01_quality_control.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe quality control bash script has been created successfully. Next, I will create a bash script for the alignment step using STAR.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/02_alignment.sh\\n#!/bin/bash\\n#PBS -N alignment\\n#PBS -l walltime=08:00:00\\n#PBS -l nodes=1:ppn=16\\n#PBS -q batch\\n\\n# Load module for STAR\\nmodule load STAR/2.7.3a\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/quality_control'\\nOUTPUT_DIR='/path/to/output/alignment'\\nGENOME_DIR='/path/to/genome_indices'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run STAR for each sample\\nfor sample in $INPUT_DIR/*.fastq.gz\\ndo\\n # Define output file names\\n prefix=$(basename $sample .fastq.gz)\\n output_prefix=$OUTPUT_DIR/$prefix\\n\\n # Run STAR\\n STAR --runThreadN 16 --genomeDir $GENOME_DIR --readFilesIn $sample --readFilesCommand zcat --outFileNamePrefix $output_prefix --outSAMtype BAM SortedByCoordinate\\n\\ndone\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/02_alignment.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe alignment bash script has been created successfully. Next, I will create a bash script for transcript quantification using RSEM.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/03_transcript_quantification.sh\\n#!/bin/bash\\n#PBS -N transcript_quantification\\n#PBS -l walltime=12:00:00\\n#PBS -l nodes=1:ppn=8\\n#PBS -q batch\\n\\n# Load module for RSEM\\nmodule load RSEM/1.3.3\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/alignment'\\nOUTPUT_DIR='/path/to/output/transcript_quantification'\\nREFERENCE_DIR='/path/to/reference'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run RSEM for each sample\\nfor bam in $INPUT_DIR/*.bam\\ndo\\n # Define output file names\\n prefix=$(basename $bam .bam)\\n output_prefix=$OUTPUT_DIR/$prefix\\n\\n # Run RSEM\\n rsem-calculate-expression --bam --no-bam-output --estimate-rspd --append-names --output-genome-bam -p 8 $bam $REFERENCE_DIR $output_prefix\\n\\ndone\\n\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/03_transcript_quantification.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe transcript quantification bash script has been created successfully. Now, I will create a bash script for sorting and indexing BAM files using samtools, which is a common step before differential expression analysis.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/04_sort_index_bam.sh\\n#!/bin/bash\\n#PBS -N sort_index_bam\\n#PBS -l walltime=02:00:00\\n#PBS -l nodes=1:ppn=4\\n#PBS -q batch\\n\\n# Load module for samtools\\nmodule load samtools/1.10\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/alignment'\\nOUTPUT_DIR='/path/to/output/sorted_bams'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Sort and index BAM files\\nfor bam in $INPUT_DIR/*.bam\\ndo\\n # Define output file names\\n sorted_bam=$OUTPUT_DIR/$(basename $bam .bam).sorted.bam\\n\\n # Sort BAM file\\n samtools sort -@ 4 -o $sorted_bam $bam\\n\\n # Index sorted BAM file\\n samtools index $sorted_bam\\ndone\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/04_sort_index_bam.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe sorting and indexing of BAM files script has been created successfully. The next step is to create an R script for differential expression analysis using DESeq2. This script will be executed after the previous steps in the workflow have been completed.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/05_differential_expression.R\\n# Load DESeq2 library\\nlibrary(DESeq2)\\n\\n# Set input directory\\ninput_dir <- '/path/to/transcript_quantification'\\n\\n# Read in the count matrix and the sample information\\n# Assuming countData is a matrix with count data and colData is a DataFrame with sample information\\ncountData <- as.matrix(read.csv(file.path(input_dir, 'count_matrix.csv'), row.names=1))\\ncolData <- read.csv(file.path(input_dir, 'colData.csv'))\\n\\n# Create a DESeqDataSet object\\ndds <- DESeqDataSetFromMatrix(countData = countData,\\n colData = colData,\\n design = ~ condition)\\n\\n# Run the differential expression analysis\\ndds <- DESeq(dds)\\n\\n# Get the results\\nres <- results(dds)\\n\\n# Order the results by the adjusted p-value\\nresOrdered <- res[order(res$padj),]\\n\\n# Write the results to a CSV file\\nwrite.csv(as.data.frame(resOrdered), file.path(input_dir, 'differential_expression_results.csv'))\\n\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/05_differential_expression.R\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe R script for differential expression analysis using DESeq2 has been created successfully. Now that all the necessary scripts for the RNA-Seq workflow have been created, I should create a master bash script that will call each of these scripts in sequence to run the entire workflow.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create File\",\n",
|
||||
" \"action_input\": \"rna-seq-workflow/run_workflow.sh\\n#!/bin/bash\\n# Master script to run the RNA-Seq workflow\\n\\n# Run quality control\\nbash 01_quality_control.sh\\n\\n# Run alignment\\nbash 02_alignment.sh\\n\\n# Run transcript quantification\\nbash 03_transcript_quantification.sh\\n\\n# Sort and index BAM files\\nbash 04_sort_index_bam.sh\\n\\n# Run differential expression analysis\\nRscript 05_differential_expression.R\\n\\n# Workflow completed\\necho \\\"RNA-Seq workflow completed successfully.\\\"\\n\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/run_workflow.sh\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe master bash script to run the entire RNA-Seq workflow has been created successfully. The next step is to create a pull request with all the scripts that have been created in the `rna-seq-workflow_v1` branch.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create Pull Request\",\n",
|
||||
" \"action_input\": \"Create a full command line executable workflow for RNA-Seq on PBMC Samples\\n\\nThis pull request includes a series of bash and R scripts to perform RNA-Seq analysis on PBMC samples. The workflow includes steps for quality control, alignment, transcript quantification, sorting and indexing BAM files, and differential expression analysis using the following software stack: FastQC, MultiQC, STAR, RSEM, samtools, and DESeq2.\\n\\nPlease review the scripts and provide feedback or request any additional changes as necessary.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mSuccessfully created PR number 40\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe pull request has been successfully created. Now, I need to comment on the original issue to inform the user about the new PR.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Comment on Issue\",\n",
|
||||
" \"action_input\": \"2\\n\\nI opened a PR for you to review here #40\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mCommented on issue 2\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe comment has been posted on the original issue. Now, I will create a review request for the user KastanDay who opened the issue.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Create review request\",\n",
|
||||
" \"action_input\": \"KastanDay\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mReview request created for user KastanDay on PR #40\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"The tasks have been completed: a new branch was created, the necessary scripts for the RNA-Seq workflow were written, a pull request was opened, the original issue was commented on with the PR number, and a review request was sent to the user KastanDay.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.tracers.context import tracing_v2_enabled\n",
|
||||
"\n",
|
||||
"# To use langsmith (recommended for these long tasks):\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"ls__......\"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = \"Github_Demo_PR\"\n",
|
||||
"os.environ[\"LANGCHAIN_WANDB_TRACING\"] = \"false\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with tracing_v2_enabled(project_name=\"Github_Demo_PR\", tags=[\"PR_bot\"]) as cb:\n",
|
||||
" agent.run(final_gh_issue_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Full text of tools\n",
|
||||
"\n",
|
||||
"When using or building tools, it's always helpful to inspect what the model sees.\n",
|
||||
"\n",
|
||||
"On OpenAI models, tool descriptions are part of the `SystemPrompt`.\n",
|
||||
"\n",
|
||||
"The `args` are added to the prompt in structured chats, e.g. `AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION`, but not in `AgentType.ZERO_SHOT_REACT_DESCRIPTION`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Get Issues: \n",
|
||||
"This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Get Issue: \n",
|
||||
"This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer., args: {'issue_number': {'title': 'Issue Number', 'description': 'Issue number as an integer, e.g. `42`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Comment on Issue: \n",
|
||||
"This tool is useful when you need to comment on a GitHub issue. Simply pass in the issue number and the comment you would like to make. Please use this sparingly as we don't want to clutter the comment threads. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify the issue number as an integer\n",
|
||||
"- Then you must place two newlines\n",
|
||||
"- Then you must specify your comment, args: {'input': {'title': 'Input', 'description': 'Follow the required formatting.', 'type': 'string'}}\n",
|
||||
"List open pull requests (PRs): \n",
|
||||
"This tool will fetch a list of the repository's Pull Requests (PRs). It will return the title, and PR number of 5 PRs. It takes no input., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Get Pull Request: \n",
|
||||
"This tool will fetch the title, body, comment thread and commit history of a specific Pull Request (by PR number). **VERY IMPORTANT**: You must specify the PR number as an integer., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Overview of files included in PR: \n",
|
||||
"This tool will fetch the full text of all files in a pull request (PR) given the PR number as an input. This is useful for understanding the code changes in a PR or contributing to it. **VERY IMPORTANT**: You must specify the PR number as an integer input parameter., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Create Pull Request: \n",
|
||||
"This tool is useful when you need to create a new pull request in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify the title of the pull request\n",
|
||||
"- Then you must place two newlines\n",
|
||||
"- Then you must write the body or description of the pull request\n",
|
||||
"\n",
|
||||
"When appropriate, always reference relevant issues in the body by using the syntax `closes #<issue_number` like `closes #3, closes #6`.\n",
|
||||
"For example, if you would like to create a pull request called \"README updates\" with contents \"added contributors' names, closes #3\", you would pass in the following string:\n",
|
||||
"\n",
|
||||
"README updates\n",
|
||||
"\n",
|
||||
"added contributors' names, closes #3, args: {'formatted_pr': {'title': 'Formatted Pr', 'description': 'Follow the required formatting.', 'type': 'string'}}\n",
|
||||
"List Pull Requests' Files: \n",
|
||||
"This tool will fetch the full text of all files in a pull request (PR) given the PR number as an input. This is useful for understanding the code changes in a PR or contributing to it. **VERY IMPORTANT**: You must specify the PR number as an integer input parameter., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n",
|
||||
"Create File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to create a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify which file to create by passing a full file path (**IMPORTANT**: the path must not start with a slash)\n",
|
||||
"- Then you must specify the contents of the file\n",
|
||||
"\n",
|
||||
"For example, if you would like to create a file called /test/test.txt with contents \"test contents\", you would pass in the following string:\n",
|
||||
"\n",
|
||||
"test/test.txt\n",
|
||||
"\n",
|
||||
"test contents, args: {'formatted_file': {'title': 'Formatted File', 'description': 'Follow the required formatting.', 'type': 'string'}}\n",
|
||||
"Read File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to read the contents of a file. Simply pass in the full file path of the file you would like to read. **IMPORTANT**: the path must not start with a slash, args: {'formatted_filepath': {'title': 'Formatted Filepath', 'description': 'The full file path of the file you would like to read where the path must NOT start with a slash, e.g. `some_dir/my_file.py`.', 'type': 'string'}}\n",
|
||||
"Update File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to update the contents of a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n",
|
||||
"\n",
|
||||
"- First you must specify which file to modify by passing a full file path (**IMPORTANT**: the path must not start with a slash)\n",
|
||||
"- Then you must specify the old contents which you would like to replace wrapped in OLD <<<< and >>>> OLD\n",
|
||||
"- Then you must specify the new contents which you would like to replace the old contents with wrapped in NEW <<<< and >>>> NEW\n",
|
||||
"\n",
|
||||
"For example, if you would like to replace the contents of the file /test/test.txt from \"old contents\" to \"new contents\", you would pass in the following string:\n",
|
||||
"\n",
|
||||
"test/test.txt\n",
|
||||
"\n",
|
||||
"This is text that will not be changed\n",
|
||||
"OLD <<<<\n",
|
||||
"old contents\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"new contents\n",
|
||||
">>>> NEW, args: {'formatted_file_update': {'title': 'Formatted File Update', 'description': 'Strictly follow the provided rules.', 'type': 'string'}}\n",
|
||||
"Delete File: \n",
|
||||
"This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash, args: {'formatted_filepath': {'title': 'Formatted Filepath', 'description': 'The full file path of the file you would like to delete where the path must NOT start with a slash, e.g. `some_dir/my_file.py`. Only input a string, not the param name.', 'type': 'string'}}\n",
|
||||
"Overview of existing files in Main branch: \n",
|
||||
"This tool will provide an overview of all existing files in the main branch of the repository. It will list the file names, their respective paths, and a brief summary of their contents. This can be useful for understanding the structure and content of the repository, especially when navigating through large codebases. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Overview of files in current working branch: \n",
|
||||
"This tool will provide an overview of all files in your current working branch where you should implement changes. This is great for getting a high level overview of the structure of your code. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"List branches in this repository: \n",
|
||||
"This tool will fetch a list of all branches in the repository. It will return the name of each branch. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n",
|
||||
"Set active branch: \n",
|
||||
"This tool will set the active branch in the repository, similar to `git checkout <branch_name>` and `git switch -c <branch_name>`. **VERY IMPORTANT**: You must specify the name of the branch as a string input parameter., args: {'branch_name': {'title': 'Branch Name', 'description': 'The name of the branch, e.g. `my_branch`.', 'type': 'string'}}\n",
|
||||
"Create a new branch: \n",
|
||||
"This tool will create a new branch in the repository. **VERY IMPORTANT**: You must specify the name of the new branch as a string input parameter., args: {'branch_name': {'title': 'Branch Name', 'description': 'The name of the branch, e.g. `my_branch`.', 'type': 'string'}}\n",
|
||||
"Get files from a directory: \n",
|
||||
"This tool will fetch a list of all files in a specified directory. **VERY IMPORTANT**: You must specify the path of the directory as a string input parameter., args: {'input': {'title': 'Input', 'description': 'The path of the directory, e.g. `some_dir/inner_dir`. Only input a string, do not include the parameter name.', 'default': '', 'type': 'string'}}\n",
|
||||
"Search issues and pull requests: \n",
|
||||
"This tool will search for issues and pull requests in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter., args: {'search_query': {'title': 'Search Query', 'description': 'Natural language search query, e.g. `My issue title or topic`.', 'type': 'string'}}\n",
|
||||
"Search code: \n",
|
||||
"This tool will search for code in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter., args: {'search_query': {'title': 'Search Query', 'description': 'A keyword-focused natural language search query for code, e.g. `MyFunctionName()`.', 'type': 'string'}}\n",
|
||||
"Create review request: \n",
|
||||
"This tool will create a review request on the open pull request that matches the current active branch. **VERY IMPORTANT**: You must specify the username of the person who is being requested as a string input parameter., args: {'username': {'title': 'Username', 'description': 'GitHub username of the user being requested, e.g. `my_username`.', 'type': 'string'}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.tools.render import render_text_description_and_args\n",
|
||||
"\n",
|
||||
"print(render_text_description_and_args(tools))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: Agent with Search\n",
|
||||
"\n",
|
||||
"If your agent does not need to use all 8 tools, you can build tools individually to use. For this example, we'll make an agent that does not use the create_file, delete_file or create_pull_request tools, but can also use duckduckgo-search."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet duckduckgo-search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools import DuckDuckGoSearchRun\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"tools = []\n",
|
||||
"unwanted_tools = [\"Get Issue\", \"Delete File\", \"Create File\", \"Create Pull Request\"]\n",
|
||||
"\n",
|
||||
"for tool in toolkit.get_tools():\n",
|
||||
" if tool.name not in unwanted_tools:\n",
|
||||
" tools.append(tool)\n",
|
||||
"tools += [\n",
|
||||
" Tool(\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=DuckDuckGoSearchRun().run,\n",
|
||||
" description=\"useful for when you need to search the web\",\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=ChatOpenAI(temperature=0.1),\n",
|
||||
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally let's build a prompt and test it out!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mTo complete this issue, I need to find the most popular frontend framework and add a blurb about how this website will utilize it to the readme.md file. I should start by researching the most popular frontend frameworks and then update the readme file accordingly. I will use the \"Search\" tool to research the most popular frontend framework.\n",
|
||||
"\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"most popular frontend framework\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAlex Ivanovs February 25, 2023 Table of Contents What are the current Front-end trends? Top Front-end Frameworks for 2023 #1 - React #2 - Angular #3 - Vue #4 - Svelte #5 - Preact #6 - Ember #7 - Solid #8 - Lit #9 - Alpine #10 - Stencil #11 - Qwik Front-end Frameworks: A Summary Top 6 Frontend Frameworks To Use in 2022 by Nwose Lotanna Victor August 26, 2022 Web 0 Comments This post reveals the top six frontend libraries to use in 2022. The list is fresh and very different from the previous years. State of JS Though React is the most popular framework for frontend development, it also has some shortcomings. Due to its limitations, the idea was to design a small-size framework that will offer the same features as React. This is how a tiny version of React — Preact — appeared. Top 10 Popular Frontend Frameworks to Use in 2023 Sep 26, 2022 10 min Сontents 1. What is a framework? 2. Front-end frameworks vs backend frameworks 3. The best front-end frameworks in 2023 React Vue.js Angular Svelte JQuery Ember Backbone Semantic UI 4. Final words Technostacks Jan 11 2023 Top Frontend Frameworks of 2023 for Web Development Developing what the users see on their screens is the role of a front-end web developer. Unarguably, front-end developers worldwide are trying to use the best front-end frameworks to provide the best user experience.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mBased on my research, the most popular frontend framework right now is React. I will now update the readme.md file to include a blurb about how this website will take advantage of React.\n",
|
||||
"\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input:\n",
|
||||
"README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is the readme file for the website.\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is the readme file for the website.\n",
|
||||
"\n",
|
||||
"This website takes advantage of the React framework, which allows for efficient and reusable UI components. With React, we can easily manage the state of our application and create interactive user interfaces. It provides a smooth and seamless user experience, making this website highly responsive and dynamic.\n",
|
||||
">>>> NEW\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mFile content was not updated because old content was not found.It may be helpful to use the read_file action to get the current file contents.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to first read the contents of the README.md file to get the current content. Then I can update the file with the new content.\n",
|
||||
"\n",
|
||||
"Action: Read File\n",
|
||||
"Action Input: README.md\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mThis is my awesome website that uses the most popular frontend framework.\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe current content of the README.md file is \"This is my awesome website that uses the most popular frontend framework.\" I can now update the file with the new content.\n",
|
||||
"\n",
|
||||
"Action: Update File\n",
|
||||
"Action Input:\n",
|
||||
"README.md\n",
|
||||
"OLD <<<<\n",
|
||||
"This is my awesome website that uses the most popular frontend framework.\n",
|
||||
">>>> OLD\n",
|
||||
"NEW <<<<\n",
|
||||
"This is my awesome website that uses the most popular frontend framework.\n",
|
||||
"\n",
|
||||
"This website takes advantage of the React framework, which allows for efficient and reusable UI components. With React, we can easily manage the state of our application and create interactive user interfaces. It provides a smooth and seamless user experience, making this website highly responsive and dynamic.\n",
|
||||
">>>> NEW\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mUpdated file README.md\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI have successfully updated the README.md file with the blurb about how this website will take advantage of the React framework.\n",
|
||||
"\n",
|
||||
"Final Answer: The most popular frontend framework right now is React. This website takes advantage of React to create efficient and reusable UI components, manage application state, and provide a smooth and seamless user experience.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The most popular frontend framework right now is React. This website takes advantage of React to create efficient and reusable UI components, manage application state, and provide a smooth and seamless user experience.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The GitHubAPIWrapper can be used outside of an agent, too\n",
|
||||
"# This gets the info about issue number 9, since we want to\n",
|
||||
"# force the agent to address this specific issue.\n",
|
||||
"\n",
|
||||
"issue = github.get_issue(9)\n",
|
||||
"\n",
|
||||
"prompt = f\"\"\"\n",
|
||||
"You are a senior frontend developer who is experienced in HTML, CSS, and JS- especially React.\n",
|
||||
"You have been assigned the below issue. Complete it to the best of your ability.\n",
|
||||
"Remember to first make a plan and pay attention to details like file names and commonsense.\n",
|
||||
"Then execute the plan and use tools appropriately.\n",
|
||||
"Finally, make a pull request to merge your changes.\n",
|
||||
"Issue: {issue[\"title\"]}\n",
|
||||
"Issue Description: {issue['body']}\n",
|
||||
"Comments: {issue['comments']}\"\"\"\n",
|
||||
"\n",
|
||||
"agent.run(prompt)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,311 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gmail\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting a LangChain email to the `Gmail API`.\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API. Once this is done, we'll install the required libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-api-python-client > /dev/null\n",
|
||||
"%pip install --upgrade --quiet google-auth-oauthlib > /dev/null\n",
|
||||
"%pip install --upgrade --quiet google-auth-httplib2 > /dev/null\n",
|
||||
"%pip install --upgrade --quiet beautifulsoup4 > /dev/null # This is optional but is useful for parsing HTML messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You also need to install the `langchain-community` package where the integration lives:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-community\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Toolkit\n",
|
||||
"\n",
|
||||
"By default the toolkit reads the local `credentials.json` file. You can also manually provide a `Credentials` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits import GmailToolkit\n",
|
||||
"\n",
|
||||
"toolkit = GmailToolkit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customizing Authentication\n",
|
||||
"\n",
|
||||
"Behind the scenes, a `googleapi` resource is created using the following methods. \n",
|
||||
"you can manually build a `googleapi` resource for more auth control. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools.gmail.utils import (\n",
|
||||
" build_resource_service,\n",
|
||||
" get_gmail_credentials,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Can review scopes here https://developers.google.com/gmail/api/auth/scopes\n",
|
||||
"# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'\n",
|
||||
"credentials = get_gmail_credentials(\n",
|
||||
" token_file=\"token.json\",\n",
|
||||
" scopes=[\"https://mail.google.com/\"],\n",
|
||||
" client_secrets_file=\"credentials.json\",\n",
|
||||
")\n",
|
||||
"api_resource = build_resource_service(credentials=credentials)\n",
|
||||
"toolkit = GmailToolkit(api_resource=api_resource)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[GmailCreateDraft(name='create_gmail_draft', description='Use this tool to create a draft email with the provided message fields.', args_schema=<class 'langchain_community.tools.gmail.create_draft.CreateDraftSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSendMessage(name='send_gmail_message', description='Use this tool to send email messages. The input is the message, recipents', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSearch(name='search_gmail', description=('Use this tool to search for email messages or threads. The input must be a valid Gmail query. The output is a JSON list of the requested resource.',), args_schema=<class 'langchain_community.tools.gmail.search.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetMessage(name='get_gmail_message', description='Use this tool to fetch an email by message ID. Returns the thread ID, snipet, body, subject, and sender.', args_schema=<class 'langchain_community.tools.gmail.get_message.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetThread(name='get_gmail_thread', description=('Use this tool to search for email messages. The input must be a valid Gmail query. The output is a JSON list of messages.',), args_schema=<class 'langchain_community.tools.gmail.get_thread.GetThreadSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"We show here how to use it as part of an [agent](/docs/tutorials/agents). We use the OpenAI Functions Agent, so we will need to setup and install the required dependencies for that. We will also use [LangSmith Hub](https://smith.langchain.com/hub) to pull the prompt from, so we will need to install that.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-openai langchainhub\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructions = \"\"\"You are an assistant.\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_openai_functions_agent(llm, toolkit.get_tools(), prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" # This is set to False to prevent information about my email showing up on the screen\n",
|
||||
" # Normally, it is helpful to have it set to True however.\n",
|
||||
" verbose=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Create a gmail draft for me to edit of a letter from the perspective of a sentient parrot who is looking to collaborate on some research with her estranged friend, a cat. Under no circumstances may you send the message, however.',\n",
|
||||
" 'output': 'I have created a draft email for you to edit. Please find the draft in your Gmail drafts folder. Remember, under no circumstances should you send the message.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Create a gmail draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||
" \" who is looking to collaborate on some research with her\"\n",
|
||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Could you search in my drafts for the latest email? what is the title?',\n",
|
||||
" 'output': 'The latest email in your drafts is titled \"Collaborative Research Proposal\".'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\"input\": \"Could you search in my drafts for the latest email? what is the title?\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
# Toolkits
|
||||
|
||||
**Toolkits** are collections of tools that are designed to be used together for specific tasks. They include conveniences for loading tools
|
||||
that share common authentication, services, or other objects. They can be implemented by subclassing the
|
||||
[BaseToolkit](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseToolkit.html#langchain_core.tools.BaseToolkit) class.
|
||||
|
||||
This table lists common toolkits.
|
||||
|
||||
|
||||
| Namespace 🔻 | Class |
|
||||
|------------|---------|
|
||||
| langchain_community.agent_toolkits.github | [GitHubToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html) |
|
||||
| langchain_community.agent_toolkits.gmail | [GmailToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.gmail.toolkit.GmailToolkit.html) |
|
||||
| langchain_community.agent_toolkits.openapi | [RequestsToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.openapi.toolkit.RequestsToolkit.html) |
|
||||
| langchain_community.agent_toolkits.slack | [SlackToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html) |
|
||||
| langchain_community.agent_toolkits.sql | [SQLDatabaseToolkit](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html) |
|
||||
File diff suppressed because one or more lines are too long
@@ -1,382 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "be75cb7e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"keywords: [PythonREPLTool]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82a4c2cc-20ea-4b20-a565-63e905dee8ff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Python\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to write and execute `Python` code to answer a question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f98e9c90-5c37-4fb9-af3e-d09693af8543",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor\n",
|
||||
"from langchain_experimental.tools import PythonREPLTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba9adf51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the tool(s)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "003bce04",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = [PythonREPLTool()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4aceaeaf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions Agent\n",
|
||||
"\n",
|
||||
"This is probably the most reliable type of agent, but is only compatible with function calling"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "3a054d1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "3454514b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructions = \"\"\"You are an agent designed to write and execute python code to answer questions.\n",
|
||||
"You have access to a python REPL, which you can use to execute python code.\n",
|
||||
"If you get an error, debug your code and try again.\n",
|
||||
"Only use the output of your code to answer the question. \n",
|
||||
"You might know the answer without running any code, but you should still run the code to get the answer.\n",
|
||||
"If it does not seem like you can write code to answer the question, just return \"I don't know\" as the answer.\n",
|
||||
"\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "2a573e95",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_openai_functions_agent(ChatOpenAI(temperature=0), tools, prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "cae41550",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca30d64c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ReAct Agent\n",
|
||||
"\n",
|
||||
"This is a less reliable type, but is compatible with most models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "bcaa0b18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_react_agent\n",
|
||||
"from langchain_anthropic import ChatAnthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "d2470880",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructions = \"\"\"You are an agent designed to write and execute python code to answer questions.\n",
|
||||
"You have access to a python REPL, which you can use to execute python code.\n",
|
||||
"If you get an error, debug your code and try again.\n",
|
||||
"Only use the output of your code to answer the question. \n",
|
||||
"You might know the answer without running any code, but you should still run the code to get the answer.\n",
|
||||
"If it does not seem like you can write code to answer the question, just return \"I don't know\" as the answer.\n",
|
||||
"\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/react-agent-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "cc422f53-c51c-4694-a834-72ecd1e68363",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_react_agent(ChatAnthropic(temperature=0), tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c16161de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fibonacci Example\n",
|
||||
"This example was created by [John Wiseman](https://twitter.com/lemonodor/status/1628270074074398720?s=20)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "25cd4f92-ea9b-4fe6-9838-a4f85f81eebe",
|
||||
"metadata": {
|
||||
"scrolled": false,
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m Sure, I can write some Python code to get the 10th Fibonacci number.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Python_REPL \n",
|
||||
"Action Input: \n",
|
||||
"def fib(n):\n",
|
||||
" a, b = 0, 1\n",
|
||||
" for i in range(n):\n",
|
||||
" a, b = b, a + b\n",
|
||||
" return a\n",
|
||||
"\n",
|
||||
"print(fib(10))\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m55\n",
|
||||
"\u001b[0m\u001b[32;1m\u001b[1;3m Let me break this down step-by-step:\n",
|
||||
"\n",
|
||||
"1. I defined a fibonacci function called `fib` that takes in a number `n`. \n",
|
||||
"2. Inside the function, I initialized two variables `a` and `b` to 0 and 1, which are the first two Fibonacci numbers.\n",
|
||||
"3. Then I used a for loop to iterate up to `n`, updating `a` and `b` each iteration to the next Fibonacci numbers.\n",
|
||||
"4. Finally, I return `a`, which after `n` iterations, contains the `n`th Fibonacci number.\n",
|
||||
"\n",
|
||||
"5. I called `fib(10)` to get the 10th Fibonacci number and printed the result.\n",
|
||||
"\n",
|
||||
"The key parts are defining the fibonacci calculation in the function, and then calling it with the desired input index to print the output.\n",
|
||||
"\n",
|
||||
"The observation shows the 10th Fibonacci number is 55, so that is the final answer.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"Thought: Do I need to use a tool? No\n",
|
||||
"Final Answer: 55\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is the 10th fibonacci number?', 'output': '55\\n```'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke({\"input\": \"What is the 10th fibonacci number?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7caa30de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training neural net\n",
|
||||
"This example was created by [Samee Ur Rehman](https://twitter.com/sameeurehman/status/1630130518133207046?s=20)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4b9f60e7-eb6a-4f14-8604-498d863d4482",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mCould not parse tool input: {'name': 'python', 'arguments': 'import torch\\nimport torch.nn as nn\\nimport torch.optim as optim\\n\\n# Define the neural network\\nclass SingleNeuron(nn.Module):\\n def __init__(self):\\n super(SingleNeuron, self).__init__()\\n self.linear = nn.Linear(1, 1)\\n \\n def forward(self, x):\\n return self.linear(x)\\n\\n# Create the synthetic data\\nx_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)\\ny_train = torch.tensor([[2.0], [4.0], [6.0], [8.0]], dtype=torch.float32)\\n\\n# Create the neural network\\nmodel = SingleNeuron()\\n\\n# Define the loss function and optimizer\\ncriterion = nn.MSELoss()\\noptimizer = optim.SGD(model.parameters(), lr=0.01)\\n\\n# Train the neural network\\nfor epoch in range(1, 1001):\\n # Forward pass\\n y_pred = model(x_train)\\n \\n # Compute loss\\n loss = criterion(y_pred, y_train)\\n \\n # Backward pass and optimization\\n optimizer.zero_grad()\\n loss.backward()\\n optimizer.step()\\n \\n # Print the loss every 100 epochs\\n if epoch % 100 == 0:\\n print(f\"Epoch {epoch}: Loss = {loss.item()}\")\\n\\n# Make a prediction for x = 5\\nx_test = torch.tensor([[5.0]], dtype=torch.float32)\\ny_pred = model(x_test)\\ny_pred.item()'} because the `arguments` is not valid JSON.\u001b[0mInvalid or incomplete response\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"\n",
|
||||
"# Define the neural network\n",
|
||||
"class SingleNeuron(nn.Module):\n",
|
||||
" def __init__(self):\n",
|
||||
" super(SingleNeuron, self).__init__()\n",
|
||||
" self.linear = nn.Linear(1, 1)\n",
|
||||
" \n",
|
||||
" def forward(self, x):\n",
|
||||
" return self.linear(x)\n",
|
||||
"\n",
|
||||
"# Create the synthetic data\n",
|
||||
"x_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)\n",
|
||||
"y_train = torch.tensor([[2.0], [4.0], [6.0], [8.0]], dtype=torch.float32)\n",
|
||||
"\n",
|
||||
"# Create the neural network\n",
|
||||
"model = SingleNeuron()\n",
|
||||
"\n",
|
||||
"# Define the loss function and optimizer\n",
|
||||
"criterion = nn.MSELoss()\n",
|
||||
"optimizer = optim.SGD(model.parameters(), lr=0.01)\n",
|
||||
"\n",
|
||||
"# Train the neural network\n",
|
||||
"for epoch in range(1, 1001):\n",
|
||||
" # Forward pass\n",
|
||||
" y_pred = model(x_train)\n",
|
||||
" \n",
|
||||
" # Compute loss\n",
|
||||
" loss = criterion(y_pred, y_train)\n",
|
||||
" \n",
|
||||
" # Backward pass and optimization\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" \n",
|
||||
" # Print the loss every 100 epochs\n",
|
||||
" if epoch % 100 == 0:\n",
|
||||
" print(f\"Epoch {epoch}: Loss = {loss.item()}\")\n",
|
||||
"\n",
|
||||
"# Make a prediction for x = 5\n",
|
||||
"x_test = torch.tensor([[5.0]], dtype=torch.float32)\n",
|
||||
"y_pred = model(x_test)\n",
|
||||
"y_pred.item()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mEpoch 100: Loss = 0.03825576975941658\n",
|
||||
"Epoch 200: Loss = 0.02100197970867157\n",
|
||||
"Epoch 300: Loss = 0.01152981910854578\n",
|
||||
"Epoch 400: Loss = 0.006329738534986973\n",
|
||||
"Epoch 500: Loss = 0.0034749575424939394\n",
|
||||
"Epoch 600: Loss = 0.0019077073084190488\n",
|
||||
"Epoch 700: Loss = 0.001047312980517745\n",
|
||||
"Epoch 800: Loss = 0.0005749554838985205\n",
|
||||
"Epoch 900: Loss = 0.0003156439634039998\n",
|
||||
"Epoch 1000: Loss = 0.00017328384274151176\n",
|
||||
"\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `x_test.item()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m\u001b[0m\u001b[32;1m\u001b[1;3mThe prediction for x = 5 is 10.000173568725586.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The prediction for x = 5 is 10.000173568725586.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"\"\"Understand, write a single neuron neural network in PyTorch.\n",
|
||||
"Take synthetic data for y=2x. Train for 1000 epochs and print every 100 epochs.\n",
|
||||
"Return prediction for x = 5\"\"\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb654671",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,267 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Slack\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting LangChain to your `Slack` account.\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to get a token explained in the [Slack API docs](https://api.slack.com/tutorials/tracks/getting-a-token). Once you've received a SLACK_USER_TOKEN, you can input it as an environmental variable below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet slack_sdk > /dev/null\n",
|
||||
"%pip install --upgrade --quiet beautifulsoup4 > /dev/null # This is optional but is useful for parsing HTML messages\n",
|
||||
"%pip install --upgrade --quiet python-dotenv > /dev/null # This is for loading environmental variables from a .env file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set Environmental Variables\n",
|
||||
"\n",
|
||||
"The toolkit will read the SLACK_USER_TOKEN environmental variable to authenticate the user so you need to set them here. You will also need to set your OPENAI_API_KEY to use the agent later."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Set environmental variables here\n",
|
||||
"# In this example, you set environmental variables by loading a .env file.\n",
|
||||
"import dotenv\n",
|
||||
"\n",
|
||||
"dotenv.load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Toolkit and Get Tools\n",
|
||||
"\n",
|
||||
"To start, you need to create the toolkit, so you can access its tools later."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[SlackGetChannel(client=<slack_sdk.web.client.WebClient object at 0x11eba6a00>),\n",
|
||||
" SlackGetMessage(client=<slack_sdk.web.client.WebClient object at 0x11eba69d0>),\n",
|
||||
" SlackScheduleMessage(client=<slack_sdk.web.client.WebClient object at 0x11eba65b0>),\n",
|
||||
" SlackSendMessage(client=<slack_sdk.web.client.WebClient object at 0x11eba6790>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits import SlackToolkit\n",
|
||||
"\n",
|
||||
"toolkit = SlackToolkit()\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an ReAct Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_tools_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
|
||||
"prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
|
||||
"agent = create_openai_tools_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" prompt=prompt,\n",
|
||||
")\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Send a greeting to my coworkers in the #general channel. Note use `channel` as key of channel id, and `message` as key of content to sent in the channel.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI need to get the list of channels in the workspace.\n",
|
||||
"Action: get_channelid_name_dict\n",
|
||||
"Action Input: {}\u001b[0m\u001b[36;1m\u001b[1;3m[{\"id\": \"C052SCUP4UD\", \"name\": \"general\", \"created\": 1681297313, \"num_members\": 1}, {\"id\": \"C052VBBU4M8\", \"name\": \"test-bots\", \"created\": 1681297343, \"num_members\": 2}, {\"id\": \"C053805TNUR\", \"name\": \"random\", \"created\": 1681297313, \"num_members\": 2}]\u001b[0m\u001b[32;1m\u001b[1;3mI now have the list of channels and their names.\n",
|
||||
"Final Answer: There are 3 channels in the workspace. Their names are \"general\", \"test-bots\", and \"random\".\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'How many channels are in the workspace? Please list out their names.',\n",
|
||||
" 'output': 'There are 3 channels in the workspace. Their names are \"general\", \"test-bots\", and \"random\".'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\"input\": \"How many channels are in the workspace? Please list out their names.\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFirst, I need to identify the channel ID for the #introductions channel.\n",
|
||||
"Action: get_channelid_name_dict\n",
|
||||
"Action Input: None\u001b[0m\u001b[36;1m\u001b[1;3m[{\"id\": \"C052SCUP4UD\", \"name\": \"general\", \"created\": 1681297313, \"num_members\": 1}, {\"id\": \"C052VBBU4M8\", \"name\": \"test-bots\", \"created\": 1681297343, \"num_members\": 2}, {\"id\": \"C053805TNUR\", \"name\": \"random\", \"created\": 1681297313, \"num_members\": 2}]\u001b[0m\u001b[32;1m\u001b[1;3mThe #introductions channel is not listed in the observed channels. I need to inform the user that the #introductions channel does not exist or is not accessible.\n",
|
||||
"Final Answer: The #introductions channel does not exist or is not accessible.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Tell me the number of messages sent in the #introductions channel from the past month.',\n",
|
||||
" 'output': 'The #introductions channel does not exist or is not accessible.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Tell me the number of messages sent in the #introductions channel from the past month.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,419 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark Dataframe\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a `Spark DataFrame` and `Spark Connect`. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input your openai api key here...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `Spark DataFrame` example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"23/05/15 20:33:10 WARN Utils: Your hostname, Mikes-Mac-mini.local resolves to a loopback address: 127.0.0.1; using 192.168.68.115 instead (on interface en1)\n",
|
||||
"23/05/15 20:33:10 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
|
||||
"Setting default log level to \"WARN\".\n",
|
||||
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
|
||||
"23/05/15 20:33:10 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n",
|
||||
"| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n",
|
||||
"| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n",
|
||||
"| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n",
|
||||
"| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n",
|
||||
"| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n",
|
||||
"| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n",
|
||||
"| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n",
|
||||
"| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n",
|
||||
"| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n",
|
||||
"| 11| 1| 3|Sandstrom, Miss. ...|female| 4.0| 1| 1| PP 9549| 16.7| G6| S|\n",
|
||||
"| 12| 1| 1|Bonnell, Miss. El...|female|58.0| 0| 0| 113783| 26.55| C103| S|\n",
|
||||
"| 13| 0| 3|Saundercock, Mr. ...| male|20.0| 0| 0| A/5. 2151| 8.05| null| S|\n",
|
||||
"| 14| 0| 3|Andersson, Mr. An...| male|39.0| 1| 5| 347082| 31.275| null| S|\n",
|
||||
"| 15| 0| 3|Vestrom, Miss. Hu...|female|14.0| 0| 0| 350406| 7.8542| null| S|\n",
|
||||
"| 16| 1| 2|Hewlett, Mrs. (Ma...|female|55.0| 0| 0| 248706| 16.0| null| S|\n",
|
||||
"| 17| 0| 3|Rice, Master. Eugene| male| 2.0| 4| 1| 382652| 29.125| null| Q|\n",
|
||||
"| 18| 1| 2|Williams, Mr. Cha...| male|null| 0| 0| 244373| 13.0| null| S|\n",
|
||||
"| 19| 0| 3|Vander Planke, Mr...|female|31.0| 1| 0| 345763| 18.0| null| S|\n",
|
||||
"| 20| 1| 3|Masselmani, Mrs. ...|female|null| 0| 0| 2649| 7.225| null| C|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"only showing top 20 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_experimental.agents.agent_toolkits import create_spark_dataframe_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"spark = SparkSession.builder.getOrCreate()\n",
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
"df = spark.read.csv(csv_file_path, header=True, inferSchema=True)\n",
|
||||
"df.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_spark_dataframe_agent(llm=OpenAI(temperature=0), df=df, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many rows are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many people have more than 3 siblings\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.filter(df.SibSp > 3).count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 30 people have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'30 people have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to get the average age first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.agg({\"Age\": \"mean\"}).collect()[0][0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the average age, I need to get the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(29.69911764705882)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mname 'math' is not defined\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import math first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the math library imported, I can get the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(29.69911764705882)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 5.449689683556195\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'5.449689683556195'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the square root of the average age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark.stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `Spark Connect` example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# in apache-spark root directory. (tested here with \"spark-3.4.0-bin-hadoop3 and later\")\n",
|
||||
"# To launch Spark with support for Spark Connect sessions, run the start-connect-server.sh script.\n",
|
||||
"!./sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:3.4.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"23/05/08 10:06:09 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"# Now that the Spark server is running, we can connect to it remotely using Spark Connect. We do this by\n",
|
||||
"# creating a remote Spark session on the client where our application runs. Before we can do that, we need\n",
|
||||
"# to make sure to stop the existing regular Spark session because it cannot coexist with the remote\n",
|
||||
"# Spark Connect session we are about to create.\n",
|
||||
"SparkSession.builder.master(\"local[*]\").getOrCreate().stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The command we used above to launch the server configured Spark to run as localhost:15002.\n",
|
||||
"# So now we can create a remote Spark session on the client using the following command.\n",
|
||||
"spark = SparkSession.builder.remote(\"sc://localhost:15002\").getOrCreate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n",
|
||||
"| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n",
|
||||
"| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n",
|
||||
"| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n",
|
||||
"| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n",
|
||||
"| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n",
|
||||
"| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n",
|
||||
"| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n",
|
||||
"| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n",
|
||||
"| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n",
|
||||
"| 11| 1| 3|Sandstrom, Miss. ...|female| 4.0| 1| 1| PP 9549| 16.7| G6| S|\n",
|
||||
"| 12| 1| 1|Bonnell, Miss. El...|female|58.0| 0| 0| 113783| 26.55| C103| S|\n",
|
||||
"| 13| 0| 3|Saundercock, Mr. ...| male|20.0| 0| 0| A/5. 2151| 8.05| null| S|\n",
|
||||
"| 14| 0| 3|Andersson, Mr. An...| male|39.0| 1| 5| 347082| 31.275| null| S|\n",
|
||||
"| 15| 0| 3|Vestrom, Miss. Hu...|female|14.0| 0| 0| 350406| 7.8542| null| S|\n",
|
||||
"| 16| 1| 2|Hewlett, Mrs. (Ma...|female|55.0| 0| 0| 248706| 16.0| null| S|\n",
|
||||
"| 17| 0| 3|Rice, Master. Eugene| male| 2.0| 4| 1| 382652| 29.125| null| Q|\n",
|
||||
"| 18| 1| 2|Williams, Mr. Cha...| male|null| 0| 0| 244373| 13.0| null| S|\n",
|
||||
"| 19| 0| 3|Vander Planke, Mr...|female|31.0| 1| 0| 345763| 18.0| null| S|\n",
|
||||
"| 20| 1| 3|Masselmani, Mrs. ...|female|null| 0| 0| 2649| 7.225| null| C|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"only showing top 20 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
"df = spark.read.csv(csv_file_path, header=True, inferSchema=True)\n",
|
||||
"df.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_experimental.agents import create_spark_dataframe_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input your openai api key here...\"\n",
|
||||
"\n",
|
||||
"agent = create_spark_dataframe_agent(llm=OpenAI(temperature=0), df=df, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Thought: I need to find the row with the highest fare\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.sort(df.Fare.desc()).first()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mRow(PassengerId=259, Survived=1, Pclass=1, Name='Ward, Miss. Anna', Sex='female', Age=35.0, SibSp=0, Parch=0, Ticket='PC 17755', Fare=512.3292, Cabin=None, Embarked='C')\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the name of the person who bought the most expensive ticket\n",
|
||||
"Final Answer: Miss. Anna Ward\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Miss. Anna Ward'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"\"\"\n",
|
||||
"who bought the most expensive ticket?\n",
|
||||
"You can find all supported function types in https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/dataframe\n",
|
||||
"\"\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark.stop()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,583 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "757e7780-b89a-4a87-b10c-cfa42337a8e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: SQLDatabaseToolkit\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SQLDatabaseToolkit\n",
|
||||
"\n",
|
||||
"This will help you getting started with the SQL Database [toolkit](/docs/concepts/#toolkits). For detailed documentation of all `SQLDatabaseToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html).\n",
|
||||
"\n",
|
||||
"Tools within the `SQLDatabaseToolkit` are designed to interact with a `SQL` database. \n",
|
||||
"\n",
|
||||
"A common application is to enable agents to answer questions using data in a relational database, potentially in an iterative fashion (e.g., recovering from errors).\n",
|
||||
"\n",
|
||||
"**⚠️ Security note ⚠️**\n",
|
||||
"\n",
|
||||
"Building Q&A systems of SQL databases requires executing model-generated SQL queries. There are inherent risks in doing this. Make sure that your database connection permissions are always scoped as narrowly as possible for your chain/agent's needs. This will mitigate though not eliminate the risks of building a model-driven system. For more on general security best practices, [see here](/docs/security).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"This uses the example `Chinook` database. \n",
|
||||
"\n",
|
||||
"To set it up follow [these instructions](https://database.guide/2-sample-databases-sqlite/). This notebook reads from the resulting .db file.\n",
|
||||
"\n",
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3de6e3be-1fd9-42a3-8564-8ca7dca11e1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "31896b61-68d2-4b4d-be9d-b829eda327d1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-community` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4933e04-9120-4ccc-9ef7-369987823b0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ad08dbe-1642-448c-b58d-153810024375",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For demonstration purposes, we will access a prompt in the LangChain [Hub](https://smith.langchain.com/hub). We will also require `langgraph` to demonstrate the use of the toolkit with an agent. This is not required to use the toolkit."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3dead45-9908-497d-a5a3-bce30642e88f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchainhub langgraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79e86f98-3436-474d-ac67-529c93726b95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will also need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a5076e3d-3a04-4be9-ae82-41d7685e2197",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "804533b1-2f16-497b-821b-c82d67fcf7b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"The `SQLDatabaseToolkit` toolkit requires:\n",
|
||||
"\n",
|
||||
"- a [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) object;\n",
|
||||
"- a LLM or chat model (for instantiating the [QuerySQLCheckerTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLCheckerTool.html) tool).\n",
|
||||
"\n",
|
||||
"Below, we instantiate the toolkit with these objects:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "42bd5a41-672a-4a53-b70a-2f0c0555758c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit\n",
|
||||
"from langchain_community.utilities.sql_database import SQLDatabase\n",
|
||||
"\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///Chinook.db\")\n",
|
||||
"\n",
|
||||
"toolkit = SQLDatabaseToolkit(db=db, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2f882cf-4156-4a9f-a714-db97ec8ccc37",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a18c3e69-bee0-4f5d-813e-eeb540f41b98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[QuerySQLDataBaseTool(description=\"Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.\", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x10e4c14b0>),\n",
|
||||
" InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x10e4c14b0>),\n",
|
||||
" ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x10e4c14b0>),\n",
|
||||
" QuerySQLCheckerTool(description='Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x10e4c14b0>, llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x10e4a3190>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x10e4c08e0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['dialect', 'query'], template='\\n{query}\\nDouble check the {dialect} query above for common mistakes, including:\\n- Using NOT IN with NULL values\\n- Using UNION when UNION ALL should have been used\\n- Using BETWEEN for exclusive ranges\\n- Data type mismatch in predicates\\n- Properly quoting identifiers\\n- Using the correct number of arguments for functions\\n- Casting to the correct data type\\n- Using the proper columns for joins\\n\\nIf there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.\\n\\nOutput the final SQL query only.\\n\\nSQL Query: '), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x10e4a3190>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x10e4c08e0>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"toolkit.get_tools()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f5751e3-2e98-485f-8164-db8094039c25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"API references:\n",
|
||||
"\n",
|
||||
"- [QuerySQLDataBaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLDataBaseTool.html)\n",
|
||||
"- [InfoSQLDatabaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.InfoSQLDatabaseTool.html)\n",
|
||||
"- [ListSQLDatabaseTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.ListSQLDatabaseTool.html)\n",
|
||||
"- [QuerySQLCheckerTool](https://api.python.langchain.com/en/latest/tools/langchain_community.tools.sql_database.tool.QuerySQLCheckerTool.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c067e0ed-dcca-4dcc-81b2-a0eeb4fc2a9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Following the [SQL Q&A Tutorial](/docs/tutorials/sql_qa/#agents), below we equip a simple question-answering agent with the tools in our toolkit. First we pull a relevant prompt and populate it with its required parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "eda12f8b-be90-4697-ac84-2ece9e2d1708",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['dialect', 'top_k']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"\n",
|
||||
"prompt_template = hub.pull(\"langchain-ai/sql-agent-system-prompt\")\n",
|
||||
"\n",
|
||||
"assert len(prompt_template.messages) == 1\n",
|
||||
"print(prompt_template.input_variables)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3470ae96-e5e5-4717-a6d6-d7d28c7b7347",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_message = prompt_template.format(dialect=\"SQLite\", top_k=5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97930c07-36d1-4137-94ae-fe5ac83ecc44",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We then instantiate the agent:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "48bca92c-9b4b-4d5c-bcce-1b239c9e901c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(\n",
|
||||
" llm, toolkit.get_tools(), state_modifier=system_message\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09fb1845-1105-4f41-98b4-24756452a3e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And issue it a query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "39e6d2bf-3194-4aba-854b-63faf919157b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Which country's customers spent the most?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_list_tables (call_xK4hUKXF8wb1tPM1s5e6gZVb)\n",
|
||||
" Call ID: call_xK4hUKXF8wb1tPM1s5e6gZVb\n",
|
||||
" Args:\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_list_tables\n",
|
||||
"\n",
|
||||
"Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_schema (call_XnagYKuUNXo4FgK0a0bUSlIM)\n",
|
||||
" Call ID: call_XnagYKuUNXo4FgK0a0bUSlIM\n",
|
||||
" Args:\n",
|
||||
" table_names: Customer, Invoice, InvoiceLine\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_schema\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Customer\" (\n",
|
||||
"\t\"CustomerId\" INTEGER NOT NULL, \n",
|
||||
"\t\"FirstName\" NVARCHAR(40) NOT NULL, \n",
|
||||
"\t\"LastName\" NVARCHAR(20) NOT NULL, \n",
|
||||
"\t\"Company\" NVARCHAR(80), \n",
|
||||
"\t\"Address\" NVARCHAR(70), \n",
|
||||
"\t\"City\" NVARCHAR(40), \n",
|
||||
"\t\"State\" NVARCHAR(40), \n",
|
||||
"\t\"Country\" NVARCHAR(40), \n",
|
||||
"\t\"PostalCode\" NVARCHAR(10), \n",
|
||||
"\t\"Phone\" NVARCHAR(24), \n",
|
||||
"\t\"Fax\" NVARCHAR(24), \n",
|
||||
"\t\"Email\" NVARCHAR(60) NOT NULL, \n",
|
||||
"\t\"SupportRepId\" INTEGER, \n",
|
||||
"\tPRIMARY KEY (\"CustomerId\"), \n",
|
||||
"\tFOREIGN KEY(\"SupportRepId\") REFERENCES \"Employee\" (\"EmployeeId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Customer table:\n",
|
||||
"CustomerId\tFirstName\tLastName\tCompany\tAddress\tCity\tState\tCountry\tPostalCode\tPhone\tFax\tEmail\tSupportRepId\n",
|
||||
"1\tLuís\tGonçalves\tEmbraer - Empresa Brasileira de Aeronáutica S.A.\tAv. Brigadeiro Faria Lima, 2170\tSão José dos Campos\tSP\tBrazil\t12227-000\t+55 (12) 3923-5555\t+55 (12) 3923-5566\tluisg@embraer.com.br\t3\n",
|
||||
"2\tLeonie\tKöhler\tNone\tTheodor-Heuss-Straße 34\tStuttgart\tNone\tGermany\t70174\t+49 0711 2842222\tNone\tleonekohler@surfeu.de\t5\n",
|
||||
"3\tFrançois\tTremblay\tNone\t1498 rue Bélanger\tMontréal\tQC\tCanada\tH2G 1A7\t+1 (514) 721-4711\tNone\tftremblay@gmail.com\t3\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Invoice\" (\n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"CustomerId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceDate\" DATETIME NOT NULL, \n",
|
||||
"\t\"BillingAddress\" NVARCHAR(70), \n",
|
||||
"\t\"BillingCity\" NVARCHAR(40), \n",
|
||||
"\t\"BillingState\" NVARCHAR(40), \n",
|
||||
"\t\"BillingCountry\" NVARCHAR(40), \n",
|
||||
"\t\"BillingPostalCode\" NVARCHAR(10), \n",
|
||||
"\t\"Total\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceId\"), \n",
|
||||
"\tFOREIGN KEY(\"CustomerId\") REFERENCES \"Customer\" (\"CustomerId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Invoice table:\n",
|
||||
"InvoiceId\tCustomerId\tInvoiceDate\tBillingAddress\tBillingCity\tBillingState\tBillingCountry\tBillingPostalCode\tTotal\n",
|
||||
"1\t2\t2021-01-01 00:00:00\tTheodor-Heuss-Straße 34\tStuttgart\tNone\tGermany\t70174\t1.98\n",
|
||||
"2\t4\t2021-01-02 00:00:00\tUllevålsveien 14\tOslo\tNone\tNorway\t0171\t3.96\n",
|
||||
"3\t8\t2021-01-03 00:00:00\tGrétrystraat 63\tBrussels\tNone\tBelgium\t1000\t5.94\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"InvoiceLine\" (\n",
|
||||
"\t\"InvoiceLineId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\t\"Quantity\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceLineId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from InvoiceLine table:\n",
|
||||
"InvoiceLineId\tInvoiceId\tTrackId\tUnitPrice\tQuantity\n",
|
||||
"1\t1\t2\t0.99\t1\n",
|
||||
"2\t1\t4\t0.99\t1\n",
|
||||
"3\t2\t6\t0.99\t1\n",
|
||||
"*/\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_tnibWEiAbTD0Al4u4lFRCcO0)\n",
|
||||
" Call ID: call_tnibWEiAbTD0Al4u4lFRCcO0\n",
|
||||
" Args:\n",
|
||||
" query: SELECT c.Country, SUM(i.Total) AS TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.Country ORDER BY TotalSpent DESC LIMIT 1\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"[('USA', 523.0600000000003)]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"Customers from the USA spent the most, with a total amount spent of $523.06.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Which country's customers spent the most?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "adbf3d8d-7570-45a5-950f-ce84db5145ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also observe the agent recover from an error:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "23c1235c-6d18-43e4-98ab-85b426b53d94",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Who are the top 3 best selling artists?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_EBmGkOb4ceEc6VNCszE9s9N7)\n",
|
||||
" Call ID: call_EBmGkOb4ceEc6VNCszE9s9N7\n",
|
||||
" Args:\n",
|
||||
" query: SELECT artist_name, SUM(quantity) AS total_sold FROM sales GROUP BY artist_name ORDER BY total_sold DESC LIMIT 3\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"Error: (sqlite3.OperationalError) no such table: sales\n",
|
||||
"[SQL: SELECT artist_name, SUM(quantity) AS total_sold FROM sales GROUP BY artist_name ORDER BY total_sold DESC LIMIT 3]\n",
|
||||
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_list_tables (call_mEBlNVGQmf6IiikdqlFSoBzN)\n",
|
||||
" Call ID: call_mEBlNVGQmf6IiikdqlFSoBzN\n",
|
||||
" Args:\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_list_tables\n",
|
||||
"\n",
|
||||
"Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_schema (call_ZEnt0V29DVZf2RDpyVDqCjyN)\n",
|
||||
" Call ID: call_ZEnt0V29DVZf2RDpyVDqCjyN\n",
|
||||
" Args:\n",
|
||||
" table_names: Artist, Album, InvoiceLine\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_schema\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Album\" (\n",
|
||||
"\t\"AlbumId\" INTEGER NOT NULL, \n",
|
||||
"\t\"Title\" NVARCHAR(160) NOT NULL, \n",
|
||||
"\t\"ArtistId\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"AlbumId\"), \n",
|
||||
"\tFOREIGN KEY(\"ArtistId\") REFERENCES \"Artist\" (\"ArtistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Album table:\n",
|
||||
"AlbumId\tTitle\tArtistId\n",
|
||||
"1\tFor Those About To Rock We Salute You\t1\n",
|
||||
"2\tBalls to the Wall\t2\n",
|
||||
"3\tRestless and Wild\t2\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"Artist\" (\n",
|
||||
"\t\"ArtistId\" INTEGER NOT NULL, \n",
|
||||
"\t\"Name\" NVARCHAR(120), \n",
|
||||
"\tPRIMARY KEY (\"ArtistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from Artist table:\n",
|
||||
"ArtistId\tName\n",
|
||||
"1\tAC/DC\n",
|
||||
"2\tAccept\n",
|
||||
"3\tAerosmith\n",
|
||||
"*/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CREATE TABLE \"InvoiceLine\" (\n",
|
||||
"\t\"InvoiceLineId\" INTEGER NOT NULL, \n",
|
||||
"\t\"InvoiceId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\t\"UnitPrice\" NUMERIC(10, 2) NOT NULL, \n",
|
||||
"\t\"Quantity\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"InvoiceLineId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"InvoiceId\") REFERENCES \"Invoice\" (\"InvoiceId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from InvoiceLine table:\n",
|
||||
"InvoiceLineId\tInvoiceId\tTrackId\tUnitPrice\tQuantity\n",
|
||||
"1\t1\t2\t0.99\t1\n",
|
||||
"2\t1\t4\t0.99\t1\n",
|
||||
"3\t2\t6\t0.99\t1\n",
|
||||
"*/\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" sql_db_query (call_6tHsI79n3dYWphezh3fp9EKp)\n",
|
||||
" Call ID: call_6tHsI79n3dYWphezh3fp9EKp\n",
|
||||
" Args:\n",
|
||||
" query: SELECT Artist.Name AS artist_name, SUM(InvoiceLine.Quantity) AS total_sold FROM Artist JOIN Album ON Artist.ArtistId = Album.ArtistId JOIN Track ON Album.AlbumId = Track.AlbumId JOIN InvoiceLine ON Track.TrackId = InvoiceLine.TrackId GROUP BY Artist.Name ORDER BY total_sold DESC LIMIT 3\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: sql_db_query\n",
|
||||
"\n",
|
||||
"[('Iron Maiden', 140), ('U2', 107), ('Metallica', 91)]\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The top 3 best selling artists are:\n",
|
||||
"1. Iron Maiden - 140 units sold\n",
|
||||
"2. U2 - 107 units sold\n",
|
||||
"3. Metallica - 91 units sold\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Who are the top 3 best selling artists?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73521f1b-be03-44e6-8b27-a9a46ae8e962",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specific functionality\n",
|
||||
"\n",
|
||||
"`SQLDatabaseToolkit` implements a [.get_context](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html#langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.get_context) method as a convenience for use in prompts or other contexts.\n",
|
||||
"\n",
|
||||
"**⚠️ Disclaimer ⚠️** : The agent may generate insert/update/delete queries. When this is not expected, use a custom prompt or create a SQL users without write permissions.\n",
|
||||
"\n",
|
||||
"The final user might overload your SQL database by asking a simple question such as \"run the biggest query possible\". The generated query might look like:\n",
|
||||
"\n",
|
||||
"```sql\n",
|
||||
"SELECT * FROM \"public\".\"users\"\n",
|
||||
" JOIN \"public\".\"user_permissions\" ON \"public\".\"users\".id = \"public\".\"user_permissions\".user_id\n",
|
||||
" JOIN \"public\".\"projects\" ON \"public\".\"users\".id = \"public\".\"projects\".user_id\n",
|
||||
" JOIN \"public\".\"events\" ON \"public\".\"projects\".id = \"public\".\"events\".project_id;\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"For a transactional SQL database, if one of the table above contains millions of rows, the query might cause trouble to other applications using the same database.\n",
|
||||
"\n",
|
||||
"Most datawarehouse oriented databases support user-level quota, for limiting resource usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1aa8a7e3-87ca-4963-a224-0cbdc9d88714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SQLDatabaseToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.sql.toolkit.SQLDatabaseToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,752 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Xorbits"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook shows how to use agents to interact with [Xorbits Pandas](https://doc.xorbits.io/en/latest/reference/pandas/index.html) dataframe and [Xorbits Numpy](https://doc.xorbits.io/en/latest/reference/numpy/index.html) ndarray. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the `Python` agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pandas examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:06:33.955439Z",
|
||||
"start_time": "2023-07-13T08:06:33.767539500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import xorbits.pandas as pd\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_xorbits_agent\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:06:33.955439Z",
|
||||
"start_time": "2023-07-13T08:06:33.767539500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "05b7c067b1114ce9a8aef4a58a5d5fef",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = pd.read_csv(\"titanic.csv\")\n",
|
||||
"agent = create_xorbits_agent(OpenAI(temperature=0), data, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:11:06.622471100Z",
|
||||
"start_time": "2023-07-13T08:11:03.183042Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of rows and columns\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data.shape\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m(891, 12)\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows and 12 columns.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows and 12 columns.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How many rows and columns are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-07-13T08:11:23.189275300Z",
|
||||
"start_time": "2023-07-13T08:11:11.029030900Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8c63d745a7eb41a484043a5dba357997",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of people in pclass 1\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data[data['Pclass'] == 1].shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m216\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 216 people in pclass 1.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 216 people in pclass 1.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How many people are in pclass 1?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to calculate the mean age\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data['Age'].mean()\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "29af2e29f2d64a3397c212812adf0e9b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The mean age is 29.69911764705882.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The mean age is 29.69911764705882.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the mean age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to group the data by sex and then find the average age for each group\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data.groupby('Sex')['Age'].mean()\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c3d28625c35946fd91ebc2a47f8d8c5b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mSex\n",
|
||||
"female 27.915709\n",
|
||||
"male 30.726645\n",
|
||||
"Name: Age, dtype: float64\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the average age for each group\n",
|
||||
"Final Answer: The average age for female passengers is 27.92 and the average age for male passengers is 30.73.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The average age for female passengers is 27.92 and the average age for male passengers is 30.73.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Group the data by sex and find the average age for each group\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c72aab63b20d47599f4f9806f6887a69",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to filter the dataframe to get the desired result\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data[(data['Age'] > 30) & (data['Fare'] > 30) & (data['Fare'] < 50) & ((data['Pclass'] == 1) | (data['Pclass'] == 2))].shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m20\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 20\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'20'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Show the number of people whose age is greater than 30 and fare is between 30 and 50 , and pclass is either 1 or 2\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Numpy examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "fa8baf315a0c41c89392edc4a24b76f5",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import xorbits.numpy as np\n",
|
||||
"from langchain_experimental.agents.agent_toolkits import create_xorbits_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"arr = np.array([1, 2, 3, 4, 5, 6])\n",
|
||||
"agent = create_xorbits_agent(OpenAI(temperature=0), arr, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the shape of the array\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data.shape\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m(6,)\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The shape of the array is (6,).\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The shape of the array is (6,).'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Give the shape of the array \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to access the 2nd element of the array\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: data[1]\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "64efcc74f81f404eb0a7d3f0326cd8b3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m2\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 2\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'2'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Give the 2nd element of the array \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to reshape the array and then transpose it\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: np.reshape(data, (2,3)).T\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "fce51acf6fb347c0b400da67c6750534",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[[1 4]\n",
|
||||
" [2 5]\n",
|
||||
" [3 6]]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The reshaped and transposed array is [[1 4], [2 5], [3 6]].\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The reshaped and transposed array is [[1 4], [2 5], [3 6]].'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Reshape the array into a 2-dimensional array with 2 rows and 3 columns, and then transpose it\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to reshape the array and then sum it\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: np.sum(np.reshape(data, (3,2)), axis=0)\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "27fd4a0bbf694936bc41a6991064dec2",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[ 9 12]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The sum of the array along the first axis is [9, 12].\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The sum of the array along the first axis is [9, 12].'"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Reshape the array into a 2-dimensional array with 3 rows and 2 columns and sum the array along the first axis\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a591b6d7913f45cba98d2f3b71a5120a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
|
||||
"agent = create_xorbits_agent(OpenAI(temperature=0), arr, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to use the numpy covariance function\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: np.cov(data)\u001b[0m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5fe40f83cfae48d0919c147627b5839f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[[1. 1. 1.]\n",
|
||||
" [1. 1. 1.]\n",
|
||||
" [1. 1. 1.]]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The covariance matrix is [[1. 1. 1.], [1. 1. 1.], [1. 1. 1.]].\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The covariance matrix is [[1. 1. 1.], [1. 1. 1.], [1. 1. 1.]].'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"calculate the covariance matrix\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to use the SVD function\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: U, S, V = np.linalg.svd(data)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the U matrix\n",
|
||||
"Final Answer: U = [[-0.70710678 -0.70710678]\n",
|
||||
" [-0.70710678 0.70710678]]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'U = [[-0.70710678 -0.70710678]\\n [-0.70710678 0.70710678]]'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"compute the U of Singular Value Decomposition of the matrix\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AINetwork\n",
|
||||
"# AINetwork Toolkit\n",
|
||||
"\n",
|
||||
">[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate large-scale AI models, utilizing a decentralized GPU network powered by the [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).\n",
|
||||
">\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Amadeus\n",
|
||||
"# Amadeus Toolkit\n",
|
||||
"\n",
|
||||
"This notebook walks you through connecting LangChain to the `Amadeus` travel APIs.\n",
|
||||
"\n",
|
||||
@@ -1,170 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Apify\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the [Apify integration](/docs/integrations/providers/apify) for LangChain.\n",
|
||||
"\n",
|
||||
"[Apify](https://apify.com) is a cloud platform for web scraping and data extraction,\n",
|
||||
"which provides an [ecosystem](https://apify.com/store) of more than a thousand\n",
|
||||
"ready-made apps called *Actors* for various web scraping, crawling, and data extraction use cases.\n",
|
||||
"For example, you can use it to extract Google Search results, Instagram and Facebook profiles, products from Amazon or Shopify, Google Maps reviews, etc. etc.\n",
|
||||
"\n",
|
||||
"In this example, we'll use the [Website Content Crawler](https://apify.com/apify/website-content-crawler) Actor,\n",
|
||||
"which can deeply crawl websites such as documentation, knowledge bases, help centers, or blogs,\n",
|
||||
"and extract text content from the web pages. Then we feed the documents into a vector index and answer questions from it.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet apify-client langchain-community langchain-openai langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, import `ApifyWrapper` into your source code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.indexes import VectorstoreIndexCreator\n",
|
||||
"from langchain_community.utilities import ApifyWrapper\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from langchain_openai.embeddings import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize it using your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token) and for the purpose of this example, also with your OpenAI API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"Your OpenAI API key\"\n",
|
||||
"os.environ[\"APIFY_API_TOKEN\"] = \"Your Apify API token\"\n",
|
||||
"\n",
|
||||
"apify = ApifyWrapper()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then run the Actor, wait for it to finish, and fetch its results from the Apify dataset into a LangChain document loader.\n",
|
||||
"\n",
|
||||
"Note that if you already have some results in an Apify dataset, you can load them directly using `ApifyDatasetLoader`, as shown in [this notebook](/docs/integrations/document_loaders/apify_dataset). In that notebook, you'll also find the explanation of the `dataset_mapping_function`, which is used to map fields from the Apify dataset records to LangChain `Document` fields."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = apify.call_actor(\n",
|
||||
" actor_id=\"apify/website-content-crawler\",\n",
|
||||
" run_input={\"startUrls\": [{\"url\": \"https://python.langchain.com\"}]},\n",
|
||||
" dataset_mapping_function=lambda item: Document(\n",
|
||||
" page_content=item[\"text\"] or \"\", metadata={\"source\": item[\"url\"]}\n",
|
||||
" ),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the vector index from the crawled documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"index = VectorstoreIndexCreator(embedding=OpenAIEmbeddings()).from_loaders([loader])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And finally, query the vector index:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What is LangChain?\"\n",
|
||||
"result = index.query_with_sources(query, llm=OpenAI())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" LangChain is a standard interface through which you can interact with a variety of large language models (LLMs). It provides modules that can be used to build language model applications, and it also provides chains and agents with memory capabilities.\n",
|
||||
"\n",
|
||||
"https://python.langchain.com/en/latest/modules/models/llms.html, https://python.langchain.com/en/latest/getting_started/getting_started.html\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result[\"answer\"])\n",
|
||||
"print(result[\"sources\"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure AI Services\n",
|
||||
"# Azure AI Services Toolkit\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the `Azure AI Services API` to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Cognitive Services\n",
|
||||
"# Azure Cognitive Services Toolkit\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the `Azure Cognitive Services API` to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cassandra Database\n",
|
||||
"# Cassandra Database Toolkit\n",
|
||||
"\n",
|
||||
">`Apache Cassandra®` is a widely used database for storing transactional application data. The introduction of functions and >tooling in Large Language Models has opened up some exciting use cases for existing data in Generative AI applications. \n",
|
||||
"\n",
|
||||
@@ -148,11 +148,6 @@
|
||||
" CassandraDatabaseToolkit,\n",
|
||||
")\n",
|
||||
"from langchain_community.tools.cassandra_database.prompt import QUERY_PATH_PROMPT\n",
|
||||
"from langchain_community.tools.cassandra_database.tool import (\n",
|
||||
" GetSchemaCassandraDatabaseTool,\n",
|
||||
" GetTableDataCassandraDatabaseTool,\n",
|
||||
" QueryCassandraDatabaseTool,\n",
|
||||
")\n",
|
||||
"from langchain_community.utilities.cassandra_database import CassandraDatabase\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
@@ -263,12 +258,7 @@
|
||||
"source": [
|
||||
"# Create a CassandraDatabase instance\n",
|
||||
"# Uses the cassio session to connect to the database\n",
|
||||
"db = CassandraDatabase()\n",
|
||||
"\n",
|
||||
"# Create the Cassandra Database tools\n",
|
||||
"query_tool = QueryCassandraDatabaseTool(db=db)\n",
|
||||
"schema_tool = GetSchemaCassandraDatabaseTool(db=db)\n",
|
||||
"select_data_tool = GetTableDataCassandraDatabaseTool(db=db)"
|
||||
"db = CassandraDatabase()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ClickUp\n",
|
||||
"# ClickUp Toolkit\n",
|
||||
"\n",
|
||||
">[ClickUp](https://clickup.com/) is an all-in-one productivity platform that provides small and large teams across industries with flexible and customizable work management solutions, tools, and functions. \n",
|
||||
"\n",
|
||||
@@ -5,44 +5,17 @@
|
||||
"id": "19062701",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cogniswitch Tools\n",
|
||||
"# Cogniswitch Toolkit\n",
|
||||
"\n",
|
||||
"**Use CogniSwitch to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. Get started by interacting with your knowledge in just two simple steps.**\n",
|
||||
"CogniSwitch is used to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain, CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. \n",
|
||||
"\n",
|
||||
"visit [https://www.cogniswitch.ai/developer to register](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev).\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"**Registration:** \n",
|
||||
"Visit [this page](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev) to register a Cogniswitch account.\n",
|
||||
"\n",
|
||||
"- Signup with your email and verify your registration \n",
|
||||
"\n",
|
||||
"- You will get a mail with a platform token and oauth token for using the services.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"**step 1: Instantiate the toolkit and get the tools:**\n",
|
||||
"\n",
|
||||
"- Instantiate the cogniswitch toolkit with the cogniswitch token, openAI API key and OAuth token and get the tools. \n",
|
||||
"\n",
|
||||
"**step 2: Instantiate the agent with the tools and llm:**\n",
|
||||
"- Instantiate the agent with the list of cogniswitch tools and the llm, into the agent executor.\n",
|
||||
"\n",
|
||||
"**step 3: CogniSwitch Store Tool:** \n",
|
||||
"\n",
|
||||
"***CogniSwitch knowledge source file tool***\n",
|
||||
"- Use the agent to upload a file by giving the file path.(formats that are currently supported are .pdf, .docx, .doc, .txt, .html) \n",
|
||||
"- The content from the file will be processed by the cogniswitch and stored in your knowledge store. \n",
|
||||
"\n",
|
||||
"***CogniSwitch knowledge source url tool***\n",
|
||||
"- Use the agent to upload a URL. \n",
|
||||
"- The content from the url will be processed by the cogniswitch and stored in your knowledge store. \n",
|
||||
"\n",
|
||||
"**step 4: CogniSwitch Status Tool:**\n",
|
||||
"- Use the agent to know the status of the document uploaded with a document name.\n",
|
||||
"- You can also check the status of document processing in cogniswitch console. \n",
|
||||
"\n",
|
||||
"**step 5: CogniSwitch Answer Tool:**\n",
|
||||
"- Use the agent to ask your question.\n",
|
||||
"- You will get the answer from your knowledge as the response. \n"
|
||||
"- You will get a mail with a platform token and oauth token for using the services.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,7 +33,7 @@
|
||||
"id": "1435b193",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import necessary libraries"
|
||||
"## Import necessary libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,7 +59,7 @@
|
||||
"id": "6e6acf0e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Cogniswitch platform token, OAuth token and OpenAI API key"
|
||||
"## Cogniswitch platform token, OAuth token and OpenAI API key"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,7 +81,7 @@
|
||||
"id": "320e02fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instantiate the cogniswitch toolkit with the credentials"
|
||||
"## Instantiate the cogniswitch toolkit with the credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +119,7 @@
|
||||
"id": "4aae43a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instantiate the llm"
|
||||
"## Instantiate the LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -169,7 +142,9 @@
|
||||
"id": "04179282",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a agent executor"
|
||||
"## Use the LLM with the Toolkit\n",
|
||||
"\n",
|
||||
"### Create an agent with the LLM and Toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -9,7 +9,7 @@
|
||||
"Using this tool, you can integrate individual Connery Action into your LangChain agent.\n",
|
||||
"\n",
|
||||
"If you want to use more than one Connery Action in your agent,\n",
|
||||
"check out the [Connery Toolkit](/docs/integrations/toolkits/connery) documentation.\n",
|
||||
"check out the [Connery Toolkit](/docs/integrations/tools/connery_toolkit) documentation.\n",
|
||||
"\n",
|
||||
"## What is Connery?\n",
|
||||
"\n",
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet databricks-sdk langchain-community langchain-openai"
|
||||
"%pip install --upgrade --quiet databricks-sdk langchain-community mlflow"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -47,9 +47,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain_community.chat_models.databricks import ChatDatabricks\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\")"
|
||||
"llm = ChatDatabricks(endpoint=\"databricks-meta-llama-3-70b-instruct\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because one or more lines are too long
303
docs/docs/integrations/tools/financial_datasets.ipynb
Normal file
303
docs/docs/integrations/tools/financial_datasets.ipynb
Normal file
@@ -0,0 +1,303 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# FinancialDatasets Toolkit\n",
|
||||
"\n",
|
||||
"The [financial datasets](https://financialdatasets.ai/) stock market API provides REST endpoints that let you get financial data for 16,000+ tickers spanning 30+ years.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use this toolkit, you need two API keys:\n",
|
||||
"\n",
|
||||
"`FINANCIAL_DATASETS_API_KEY`: Get it from [financialdatasets.ai](https://financialdatasets.ai/).\n",
|
||||
"`OPENAI_API_KEY`: Get it from [OpenAI](https://platform.openai.com/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"FINANCIAL_DATASETS_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-community` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.financial_datasets.toolkit import (\n",
|
||||
" FinancialDatasetsToolkit,\n",
|
||||
")\n",
|
||||
"from langchain_community.utilities.financial_datasets import FinancialDatasetsAPIWrapper\n",
|
||||
"\n",
|
||||
"api_wrapper = FinancialDatasetsAPIWrapper(\n",
|
||||
" financial_datasets_api_key=os.environ[\"FINANCIAL_DATASETS_API_KEY\"]\n",
|
||||
")\n",
|
||||
"toolkit = FinancialDatasetsToolkit(api_wrapper=api_wrapper)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c5f2839-4020-424e-9fc9-07777eede442",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51a60dbe-9f2e-4e04-bb62-23968f17164a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Let's equip our agent with the FinancialDatasetsToolkit and ask financial questions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are an advanced financial analysis AI assistant equipped with specialized tools\n",
|
||||
"to access and analyze financial data. Your primary function is to help users with\n",
|
||||
"financial analysis by retrieving and interpreting income statements, balance sheets,\n",
|
||||
"and cash flow statements for publicly traded companies.\n",
|
||||
"\n",
|
||||
"You have access to the following tools from the FinancialDatasetsToolkit:\n",
|
||||
"\n",
|
||||
"1. Balance Sheets: Retrieves balance sheet data for a given ticker symbol.\n",
|
||||
"2. Income Statements: Fetches income statement data for a specified company.\n",
|
||||
"3. Cash Flow Statements: Accesses cash flow statement information for a particular ticker.\n",
|
||||
"\n",
|
||||
"Your capabilities include:\n",
|
||||
"\n",
|
||||
"1. Retrieving financial statements for any publicly traded company using its ticker symbol.\n",
|
||||
"2. Analyzing financial ratios and metrics based on the data from these statements.\n",
|
||||
"3. Comparing financial performance across different time periods (e.g., year-over-year or quarter-over-quarter).\n",
|
||||
"4. Identifying trends in a company's financial health and performance.\n",
|
||||
"5. Providing insights on a company's liquidity, solvency, profitability, and efficiency.\n",
|
||||
"6. Explaining complex financial concepts in simple terms.\n",
|
||||
"\n",
|
||||
"When responding to queries:\n",
|
||||
"\n",
|
||||
"1. Always specify which financial statement(s) you're using for your analysis.\n",
|
||||
"2. Provide context for the numbers you're referencing (e.g., fiscal year, quarter).\n",
|
||||
"3. Explain your reasoning and calculations clearly.\n",
|
||||
"4. If you need more information to provide a complete answer, ask for clarification.\n",
|
||||
"5. When appropriate, suggest additional analyses that might be helpful.\n",
|
||||
"\n",
|
||||
"Remember, your goal is to provide accurate, insightful financial analysis to\n",
|
||||
"help users make informed decisions. Always maintain a professional and objective tone in your responses.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Instantiate the LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "310bf18e-6c9a-4072-b86e-47bc1fcca29d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.tools import tool\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model=\"gpt-4o\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Define a user query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What was AAPL's revenue in 2023? What about it's total debt in Q1 2024?\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Create the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentExecutor, create_tool_calling_agent\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", system_prompt),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" # Placeholders fill up a **list** of messages\n",
|
||||
" (\"placeholder\", \"{agent_scratchpad}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"agent = create_tool_calling_agent(model, tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Query the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke({\"input\": query})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `FinancialDatasetsToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.financial_datasets.toolkit.FinancialDatasetsToolkit.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
331
docs/docs/integrations/tools/github.ipynb
Normal file
331
docs/docs/integrations/tools/github.ipynb
Normal file
@@ -0,0 +1,331 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Github Toolkit\n",
|
||||
"\n",
|
||||
"The `Github` toolkit contains tools that enable an LLM agent to interact with a github repository. \n",
|
||||
"The tool is a wrapper for the [PyGitHub](https://github.com/PyGithub/PyGithub) library. \n",
|
||||
"\n",
|
||||
"For detailed documentation of all GithubToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"At a high-level, we will:\n",
|
||||
"\n",
|
||||
"1. Install the pygithub library\n",
|
||||
"2. Create a Github app\n",
|
||||
"3. Set your environmental variables\n",
|
||||
"4. Pass the tools to your agent with `toolkit.get_tools()`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"#### 1. Install dependencies\n",
|
||||
"\n",
|
||||
"This integration is implemented in `langchain-community`. We will also need the `pygithub` dependency:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pygithub langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 2. Create a Github App\n",
|
||||
"\n",
|
||||
"[Follow the instructions here](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to create and register a Github app. Make sure your app has the following [repository permissions:](https://docs.github.com/en/rest/overview/permissions-required-for-github-apps?apiVersion=2022-11-28)\n",
|
||||
"\n",
|
||||
"* Commit statuses (read only)\n",
|
||||
"* Contents (read and write)\n",
|
||||
"* Issues (read and write)\n",
|
||||
"* Metadata (read only)\n",
|
||||
"* Pull requests (read and write)\n",
|
||||
"\n",
|
||||
"Once the app has been registered, you must give your app permission to access each of the repositories you whish it to act upon. Use the App settings on [github.com here](https://github.com/settings/installations).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#### 3. Set Environment Variables\n",
|
||||
"\n",
|
||||
"Before initializing your agent, the following environment variables need to be set:\n",
|
||||
"\n",
|
||||
"* **GITHUB_APP_ID**- A six digit number found in your app's general settings\n",
|
||||
"* **GITHUB_APP_PRIVATE_KEY**- The location of your app's private key .pem file, or the full text of that file as a string.\n",
|
||||
"* **GITHUB_REPOSITORY**- The name of the Github repository you want your bot to act upon. Must follow the format {username}/{repo-name}. *Make sure the app has been added to this repository first!*\n",
|
||||
"* Optional: **GITHUB_BRANCH**- The branch where the bot will make its commits. Defaults to `repo.default_branch`.\n",
|
||||
"* Optional: **GITHUB_BASE_BRANCH**- The base branch of your repo upon which PRs will based from. Defaults to `repo.default_branch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"for env_var in [\n",
|
||||
" \"GITHUB_APP_ID\",\n",
|
||||
" \"GITHUB_APP_PRIVATE_KEY\",\n",
|
||||
" \"GITHUB_REPOSITORY\",\n",
|
||||
"]:\n",
|
||||
" if not os.getenv(env_var):\n",
|
||||
" os.environ[env_var] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit\n",
|
||||
"from langchain_community.utilities.github import GitHubAPIWrapper\n",
|
||||
"\n",
|
||||
"github = GitHubAPIWrapper()\n",
|
||||
"toolkit = GitHubToolkit.from_github_api_wrapper(github)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Get Issues\n",
|
||||
"Get Issue\n",
|
||||
"Comment on Issue\n",
|
||||
"List open pull requests (PRs)\n",
|
||||
"Get Pull Request\n",
|
||||
"Overview of files included in PR\n",
|
||||
"Create Pull Request\n",
|
||||
"List Pull Requests' Files\n",
|
||||
"Create File\n",
|
||||
"Read File\n",
|
||||
"Update File\n",
|
||||
"Delete File\n",
|
||||
"Overview of existing files in Main branch\n",
|
||||
"Overview of files in current working branch\n",
|
||||
"List branches in this repository\n",
|
||||
"Set active branch\n",
|
||||
"Create a new branch\n",
|
||||
"Get files from a directory\n",
|
||||
"Search issues and pull requests\n",
|
||||
"Search code\n",
|
||||
"Create review request\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"\n",
|
||||
"for tool in tools:\n",
|
||||
" print(tool.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The purpose of these tools is as follows:\n",
|
||||
"\n",
|
||||
"Each of these steps will be explained in great detail below.\n",
|
||||
"\n",
|
||||
"1. **Get Issues**- fetches issues from the repository.\n",
|
||||
"\n",
|
||||
"2. **Get Issue**- fetches details about a specific issue.\n",
|
||||
"\n",
|
||||
"3. **Comment on Issue**- posts a comment on a specific issue.\n",
|
||||
"\n",
|
||||
"4. **Create Pull Request**- creates a pull request from the bot's working branch to the base branch.\n",
|
||||
"\n",
|
||||
"5. **Create File**- creates a new file in the repository.\n",
|
||||
"\n",
|
||||
"6. **Read File**- reads a file from the repository.\n",
|
||||
"\n",
|
||||
"7. **Update File**- updates a file in the repository.\n",
|
||||
"\n",
|
||||
"8. **Delete File**- deletes a file from the repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"We will need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the agent with a subset of tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"tools = [tool for tool in toolkit.get_tools() if tool.name == \"Get Issue\"]\n",
|
||||
"assert len(tools) == 1\n",
|
||||
"tools[0].name = \"get_issue\"\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And issue it a query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"What is the title of issue 24888?\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" get_issue (call_iSYJVaM7uchfNHOMJoVPQsOi)\n",
|
||||
" Call ID: call_iSYJVaM7uchfNHOMJoVPQsOi\n",
|
||||
" Args:\n",
|
||||
" issue_number: 24888\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: get_issue\n",
|
||||
"\n",
|
||||
"{\"number\": 24888, \"title\": \"Standardize KV-Store Docs\", \"body\": \"To make our KV-store integrations as easy to use as possible we need to make sure the docs for them are thorough and standardized. There are two parts to this: updating the KV-store docstrings and updating the actual integration docs.\\r\\n\\r\\nThis needs to be done for each KV-store integration, ideally with one PR per KV-store.\\r\\n\\r\\nRelated to broader issues #21983 and #22005.\\r\\n\\r\\n## Docstrings\\r\\nEach KV-store class docstring should have the sections shown in the [Appendix](#appendix) below. The sections should have input and output code blocks when relevant.\\r\\n\\r\\nTo build a preview of the API docs for the package you're working on run (from root of repo):\\r\\n\\r\\n```shell\\r\\nmake api_docs_clean; make api_docs_quick_preview API_PKG=openai\\r\\n```\\r\\n\\r\\nwhere `API_PKG=` should be the parent directory that houses the edited package (e.g. community, openai, anthropic, huggingface, together, mistralai, groq, fireworks, etc.). This should be quite fast for all the partner packages.\\r\\n\\r\\n## Doc pages\\r\\nEach KV-store [docs page](https://python.langchain.com/v0.2/docs/integrations/stores/) should follow [this template](https://github.com/langchain-ai/langchain/blob/master/libs/cli/langchain_cli/integration_template/docs/kv_store.ipynb).\\r\\n\\r\\nHere is an example: https://python.langchain.com/v0.2/docs/integrations/stores/in_memory/\\r\\n\\r\\nYou can use the `langchain-cli` to quickly get started with a new chat model integration docs page (run from root of repo):\\r\\n\\r\\n```shell\\r\\npoetry run pip install -e libs/cli\\r\\npoetry run langchain-cli integration create-doc --name \\\"foo-bar\\\" --name-class FooBar --component-type kv_store --destination-dir ./docs/docs/integrations/stores/\\r\\n```\\r\\n\\r\\nwhere `--name` is the integration package name without the \\\"langchain-\\\" prefix and `--name-class` is the class name without the \\\"ByteStore\\\" suffix. This will create a template doc with some autopopulated fields at docs/docs/integrations/stores/foo_bar.ipynb.\\r\\n\\r\\nTo build a preview of the docs you can run (from root):\\r\\n\\r\\n```shell\\r\\nmake docs_clean\\r\\nmake docs_build\\r\\ncd docs/build/output-new\\r\\nyarn\\r\\nyarn start\\r\\n```\\r\\n\\r\\n## Appendix\\r\\nExpected sections for the KV-store class docstring.\\r\\n\\r\\n```python\\r\\n \\\"\\\"\\\"__ModuleName__ completion KV-store integration.\\r\\n\\r\\n # TODO: Replace with relevant packages, env vars.\\r\\n Setup:\\r\\n Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.\\r\\n\\r\\n .. code-block:: bash\\r\\n\\r\\n pip install -U __package_name__\\r\\n export __MODULE_NAME___API_KEY=\\\"your-api-key\\\"\\r\\n\\r\\n # TODO: Populate with relevant params.\\r\\n Key init args \\u2014 client params:\\r\\n api_key: Optional[str]\\r\\n __ModuleName__ API key. If not passed in will be read from env var __MODULE_NAME___API_KEY.\\r\\n\\r\\n See full list of supported init args and their descriptions in the params section.\\r\\n\\r\\n # TODO: Replace with relevant init params.\\r\\n Instantiate:\\r\\n .. code-block:: python\\r\\n\\r\\n from __module_name__ import __ModuleName__ByteStore\\r\\n\\r\\n kv_store = __ModuleName__ByteStore(\\r\\n # api_key=\\\"...\\\",\\r\\n # other params...\\r\\n )\\r\\n\\r\\n Set keys:\\r\\n .. code-block:: python\\r\\n\\r\\n kv_pairs = [\\r\\n [\\\"key1\\\", \\\"value1\\\"],\\r\\n [\\\"key2\\\", \\\"value2\\\"],\\r\\n ]\\r\\n\\r\\n kv_store.mset(kv_pairs)\\r\\n\\r\\n .. code-block:: python\\r\\n\\r\\n Get keys:\\r\\n .. code-block:: python\\r\\n\\r\\n kv_store.mget([\\\"key1\\\", \\\"key2\\\"])\\r\\n\\r\\n .. code-block:: python\\r\\n\\r\\n # TODO: Example output.\\r\\n\\r\\n Delete keys:\\r\\n ..code-block:: python\\r\\n\\r\\n kv_store.mdelete([\\\"key1\\\", \\\"key2\\\"])\\r\\n\\r\\n ..code-block:: python\\r\\n \\\"\\\"\\\" # noqa: E501\\r\\n```\", \"comments\": \"[]\", \"opened_by\": \"jacoblee93\"}\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"The title of issue 24888 is \"Standardize KV-Store Docs\".\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"What is the title of issue 24888?\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GithubToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.github.toolkit.GitHubToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gitlab\n",
|
||||
"# Gitlab Toolkit\n",
|
||||
"\n",
|
||||
"The `Gitlab` toolkit contains tools that enable an LLM agent to interact with a gitlab repository. \n",
|
||||
"The tool is a wrapper for the [python-gitlab](https://github.com/python-gitlab/python-gitlab) library. \n",
|
||||
271
docs/docs/integrations/tools/gmail.ipynb
Normal file
271
docs/docs/integrations/tools/gmail.ipynb
Normal file
@@ -0,0 +1,271 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gmail Toolkit\n",
|
||||
"\n",
|
||||
"This will help you getting started with the GMail [toolkit](/docs/concepts/#toolkits). This toolkit interacts with the GMail API to read messages, draft and send messages, and more. For detailed documentation of all GmailToolkit features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.toolkit.GmailToolkit.html).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"This toolkit lives in the `langchain-google-community` package. We'll need the `gmail` extra:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-google-community\\[gmail\\]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"By default the toolkit reads the local `credentials.json` file. You can also manually provide a `Credentials` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community import GmailToolkit\n",
|
||||
"\n",
|
||||
"toolkit = GmailToolkit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customizing Authentication\n",
|
||||
"\n",
|
||||
"Behind the scenes, a `googleapi` resource is created using the following methods. \n",
|
||||
"you can manually build a `googleapi` resource for more auth control. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_community.gmail.utils import (\n",
|
||||
" build_resource_service,\n",
|
||||
" get_gmail_credentials,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Can review scopes here https://developers.google.com/gmail/api/auth/scopes\n",
|
||||
"# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'\n",
|
||||
"credentials = get_gmail_credentials(\n",
|
||||
" token_file=\"token.json\",\n",
|
||||
" scopes=[\"https://mail.google.com/\"],\n",
|
||||
" client_secrets_file=\"credentials.json\",\n",
|
||||
")\n",
|
||||
"api_resource = build_resource_service(credentials=credentials)\n",
|
||||
"toolkit = GmailToolkit(api_resource=api_resource)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"\n",
|
||||
"View available tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[GmailCreateDraft(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailSendMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailSearch(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailGetMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),\n",
|
||||
" GmailGetThread(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- [GmailCreateDraft](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.create_draft.GmailCreateDraft.html)\n",
|
||||
"- [GmailSendMessage](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.send_message.GmailSendMessage.html)\n",
|
||||
"- [GmailSearch](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.search.GmailSearch.html)\n",
|
||||
"- [GmailGetMessage](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.get_message.GmailGetMessage.html)\n",
|
||||
"- [GmailGetThread](https://api.python.langchain.com/en/latest/gmail/langchain_google_community.gmail.get_thread.GmailGetThread.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"Below we show how to incorporate the toolkit into an [agent](/docs/tutorials/agents).\n",
|
||||
"\n",
|
||||
"We will need a LLM or chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"Draft an email to fake@fake.com thanking them for coffee.\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"Tool Calls:\n",
|
||||
" create_gmail_draft (call_slGkYKZKA6h3Mf1CraUBzs6M)\n",
|
||||
" Call ID: call_slGkYKZKA6h3Mf1CraUBzs6M\n",
|
||||
" Args:\n",
|
||||
" message: Dear Fake,\n",
|
||||
"\n",
|
||||
"I wanted to take a moment to thank you for the coffee yesterday. It was a pleasure catching up with you. Let's do it again soon!\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"[Your Name]\n",
|
||||
" to: ['fake@fake.com']\n",
|
||||
" subject: Thank You for the Coffee\n",
|
||||
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
||||
"Name: create_gmail_draft\n",
|
||||
"\n",
|
||||
"Draft created. Draft Id: r-7233782721440261513\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"I have drafted an email to fake@fake.com thanking them for the coffee. You can review and send it from your email draft with the subject \"Thank You for the Coffee\".\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"example_query = \"Draft an email to fake@fake.com thanking them for coffee.\"\n",
|
||||
"\n",
|
||||
"events = agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"user\", example_query)]},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
")\n",
|
||||
"for event in events:\n",
|
||||
" event[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GmailToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -21,7 +21,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community"
|
||||
"%pip install --upgrade --quiet langchain_google_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,8 +44,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.utilities import GoogleSearchAPIWrapper\n",
|
||||
"from langchain_core.tools import Tool\n",
|
||||
"from langchain_google_community import GoogleSearchAPIWrapper\n",
|
||||
"\n",
|
||||
"search = GoogleSearchAPIWrapper()\n",
|
||||
"\n",
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "245a954a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Jira\n",
|
||||
"# Jira Toolkit\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the `Jira` toolkit.\n",
|
||||
"\n",
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSON\n",
|
||||
"# JSON Toolkit\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent interacting with large `JSON/dict` objects. \n",
|
||||
"This is useful when you want to answer questions about a JSON blob that's too large to fit in the context window of an LLM. The agent is able to iteratively explore the blob to find what it needs to answer the user's question.\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MultiOn\n",
|
||||
"# MultiOn Toolkit\n",
|
||||
" \n",
|
||||
"[MultiON](https://www.multion.ai/blog/multion-building-a-brighter-future-for-humanity-with-ai-agents) has built an AI Agent that can interact with a broad array of web services and applications. \n",
|
||||
"\n",
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "e6fd05db-21c2-4227-9900-0840bc62cb31",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NASA\n",
|
||||
"# NASA Toolkit\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with the NASA toolkit. The toolkit provides access to the NASA Image and Video Library API, with potential to expand and include other accessible NASA APIs in future iterations.\n",
|
||||
"\n",
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Office365\n",
|
||||
"# Office365 Toolkit\n",
|
||||
"\n",
|
||||
">[Microsoft 365](https://www.office.com/) is a product family of productivity software, collaboration and cloud-based services owned by `Microsoft`.\n",
|
||||
">\n",
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user