mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-10 19:20:24 +00:00
Compare commits
280 Commits
wfh/memory
...
eugene/aut
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
15d5c49076 | ||
|
|
65660535bc | ||
|
|
6c41dd82f0 | ||
|
|
78d788c28c | ||
|
|
e9deeab37f | ||
|
|
4d595eec5b | ||
|
|
047b001336 | ||
|
|
840e936c7c | ||
|
|
6cc6b490be | ||
|
|
f45d1ed4f5 | ||
|
|
4ffc417858 | ||
|
|
9e74a70859 | ||
|
|
0997f2c0f1 | ||
|
|
437b545426 | ||
|
|
9b9d07572b | ||
|
|
6a90c6c2c8 | ||
|
|
8371187689 | ||
|
|
4309c17ffa | ||
|
|
6b007e2829 | ||
|
|
183a9d4e66 | ||
|
|
be638ad77d | ||
|
|
115a77142a | ||
|
|
f0b0c72d98 | ||
|
|
6aee589eec | ||
|
|
5b7ff215e8 | ||
|
|
0f0ccfe7f6 | ||
|
|
2759e2d857 | ||
|
|
0f68054401 | ||
|
|
0ead8ea708 | ||
|
|
c7ea6e9ff8 | ||
|
|
812419d946 | ||
|
|
873a80e496 | ||
|
|
d1b95db874 | ||
|
|
6c3573e7f6 | ||
|
|
179a39954d | ||
|
|
6f0bccfeb5 | ||
|
|
e68a1d73d0 | ||
|
|
29f51055e8 | ||
|
|
5d765408ce | ||
|
|
404d103c41 | ||
|
|
47eea32f6a | ||
|
|
b786335dd1 | ||
|
|
f81e613086 | ||
|
|
8ef7e14a85 | ||
|
|
53e4148a1b | ||
|
|
4e8f11b36a | ||
|
|
2928a1a3c9 | ||
|
|
814faa9de5 | ||
|
|
c1b444e1e7 | ||
|
|
8a8917e0d9 | ||
|
|
b2b71b0d35 | ||
|
|
8022293124 | ||
|
|
5f117384c0 | ||
|
|
1f54ec899b | ||
|
|
a137492b53 | ||
|
|
e283dc8d50 | ||
|
|
81e0cbf2d5 | ||
|
|
37aade19da | ||
|
|
43dffe39fb | ||
|
|
71f98db2fe | ||
|
|
f68f3b23d7 | ||
|
|
206901fa01 | ||
|
|
7ea2b08d1f | ||
|
|
368aa4ede7 | ||
|
|
5018af8839 | ||
|
|
96b0ff182e | ||
|
|
59194c2214 | ||
|
|
ee1d13678e | ||
|
|
1335f2b9f8 | ||
|
|
16551536e3 | ||
|
|
c5fb3b6069 | ||
|
|
1ec0b18379 | ||
|
|
f31047a394 | ||
|
|
5c516945d0 | ||
|
|
d2adec3818 | ||
|
|
7c5c0557cb | ||
|
|
68113348cc | ||
|
|
b574507c51 | ||
|
|
31820a31e4 | ||
|
|
13ccf202de | ||
|
|
6705928b9d | ||
|
|
8961c720b8 | ||
|
|
73072d3db8 | ||
|
|
2de028834f | ||
|
|
a7000ee89e | ||
|
|
9c2b29a1cb | ||
|
|
f190bc3e83 | ||
|
|
7df2dfc4c2 | ||
|
|
ed9a0f8185 | ||
|
|
465faab935 | ||
|
|
0ec020698f | ||
|
|
bd2e298468 | ||
|
|
66226d1d4d | ||
|
|
e83250cc5f | ||
|
|
2a26cc6d2b | ||
|
|
3fbb737bb3 | ||
|
|
53f3793504 | ||
|
|
ec40ead980 | ||
|
|
ff5024634e | ||
|
|
1e8fca5518 | ||
|
|
8061994c61 | ||
|
|
8d2344db43 | ||
|
|
b4a126ae71 | ||
|
|
7e70cd2a28 | ||
|
|
de61ebd9e0 | ||
|
|
c19a0b9c10 | ||
|
|
5a490a79f4 | ||
|
|
64d0a0fcc0 | ||
|
|
bca0749a11 | ||
|
|
07d6d1ca38 | ||
|
|
144b4c0c78 | ||
|
|
844eca98d5 | ||
|
|
1ab773c742 | ||
|
|
15de57b848 | ||
|
|
4780156955 | ||
|
|
5e3b968078 | ||
|
|
913a156cff | ||
|
|
893f3014af | ||
|
|
a8be207ea3 | ||
|
|
6556a8fcfd | ||
|
|
a795c3d860 | ||
|
|
9975ba4124 | ||
|
|
7f9c6c3baa | ||
|
|
b2f8a5bae9 | ||
|
|
e98e2b2b81 | ||
|
|
529cb2e30c | ||
|
|
04ebdbe98f | ||
|
|
08f5e6b801 | ||
|
|
4923cf029a | ||
|
|
549720ae51 | ||
|
|
18a2452121 | ||
|
|
4d526c49ed | ||
|
|
8f14ddefdf | ||
|
|
490ad93b3c | ||
|
|
b65a9414bb | ||
|
|
ae4638aa35 | ||
|
|
872abb4198 | ||
|
|
412fa4e1db | ||
|
|
b7c0eb9ecb | ||
|
|
13b4f465e2 | ||
|
|
7d79178827 | ||
|
|
d935573362 | ||
|
|
3314f54383 | ||
|
|
f63240649c | ||
|
|
17953ab61f | ||
|
|
2448043b84 | ||
|
|
3892cefac6 | ||
|
|
8ee56b9a5b | ||
|
|
b7d6e1909c | ||
|
|
62b8b459c6 | ||
|
|
2311d57df4 | ||
|
|
7124377524 | ||
|
|
abe4c361f9 | ||
|
|
e0de62f6da | ||
|
|
2db2987b1b | ||
|
|
fab24457bc | ||
|
|
3a78450883 | ||
|
|
af7e70d4af | ||
|
|
06bdbe06fe | ||
|
|
e62a1686e2 | ||
|
|
760c278fe0 | ||
|
|
61dd92f821 | ||
|
|
394b67ab92 | ||
|
|
d5884017a9 | ||
|
|
0ad2d5f27a | ||
|
|
82df923f37 | ||
|
|
1b0bfa54cf | ||
|
|
c7ff5f19a8 | ||
|
|
a221a9ced0 | ||
|
|
a1a650c743 | ||
|
|
1efb9bae5f | ||
|
|
877d384bc9 | ||
|
|
e66759cc9d | ||
|
|
ecd4aae818 | ||
|
|
6dd18eee26 | ||
|
|
a003a0baf6 | ||
|
|
e758e9e7f5 | ||
|
|
caa6caeb8a | ||
|
|
25b8cc7e3d | ||
|
|
d7e6770de8 | ||
|
|
594f195e54 | ||
|
|
ba4e82bb47 | ||
|
|
dc3ca44e05 | ||
|
|
b2e4b9dca4 | ||
|
|
cddd8ae83d | ||
|
|
f499e6ea6a | ||
|
|
539574670c | ||
|
|
2ab13ab743 | ||
|
|
01217b2247 | ||
|
|
55beab326c | ||
|
|
41524304bf | ||
|
|
f5bf893035 | ||
|
|
0e9e5b5202 | ||
|
|
68763bd25f | ||
|
|
ff98fad2d9 | ||
|
|
94a693e2ee | ||
|
|
0eca3e7d90 | ||
|
|
cf608f876b | ||
|
|
1bbadde77b | ||
|
|
944321c6ab | ||
|
|
ef6332ead6 | ||
|
|
41bb3a6f9b | ||
|
|
934ea80780 | ||
|
|
93260a9922 | ||
|
|
ae78ef7fe6 | ||
|
|
e7e5cb9d08 | ||
|
|
412e29d436 | ||
|
|
9eb7e6e27f | ||
|
|
db9d5b213a | ||
|
|
ddc353a768 | ||
|
|
ed24de8467 | ||
|
|
c5988c1d4b | ||
|
|
bf1357f584 | ||
|
|
a3ac9b23eb | ||
|
|
ee6ff96e28 | ||
|
|
ceab0a7c1f | ||
|
|
e5dba8978a | ||
|
|
01a9b06400 | ||
|
|
a612800ef0 | ||
|
|
04a4d3e312 | ||
|
|
b9db3dd09b | ||
|
|
862e9aed66 | ||
|
|
2c2fd9ff13 | ||
|
|
77c0582243 | ||
|
|
6b88fbd9bb | ||
|
|
f3d2fdd54c | ||
|
|
f27176930a | ||
|
|
70604e590f | ||
|
|
8ce661d5a1 | ||
|
|
61347bd322 | ||
|
|
6384c1ec8f | ||
|
|
ad38eb2d50 | ||
|
|
83a53e2126 | ||
|
|
457a4730b2 | ||
|
|
4da43f77e5 | ||
|
|
5c6dcb1960 | ||
|
|
adf019724f | ||
|
|
9cbefcc56c | ||
|
|
7a00f17033 | ||
|
|
d1d691caa4 | ||
|
|
479cc086ba | ||
|
|
68a906bb31 | ||
|
|
7734a2b5ab | ||
|
|
c14571ab37 | ||
|
|
dd87275dde | ||
|
|
1f40d3e094 | ||
|
|
ec069381fb | ||
|
|
30c2d3cd06 | ||
|
|
0af48b06d0 | ||
|
|
c1ea8da9bc | ||
|
|
af788b7cf0 | ||
|
|
bed8eb978e | ||
|
|
afc55a4fee | ||
|
|
0a16b3d84b | ||
|
|
a7efa95775 | ||
|
|
e58b1d7073 | ||
|
|
125ae6d9de | ||
|
|
04e45f9cde | ||
|
|
59a7c5877a | ||
|
|
00de334f81 | ||
|
|
3662aca7d4 | ||
|
|
8f158b72fc | ||
|
|
f7ad14acfa | ||
|
|
73d5cba308 | ||
|
|
483f6c2fe3 | ||
|
|
24f889f2bc | ||
|
|
1f055775f8 | ||
|
|
76102971c0 | ||
|
|
3e7d2a1b64 | ||
|
|
c580c81cca | ||
|
|
3eb4112a1f | ||
|
|
1cc7d4c9eb | ||
|
|
7cbe28ba9b | ||
|
|
72eb4fa4e8 | ||
|
|
1a7d8667c8 | ||
|
|
ae28568e2a | ||
|
|
d983046f90 | ||
|
|
82b8d8596c | ||
|
|
848454d1e7 | ||
|
|
4928f7a9f5 |
@@ -15,7 +15,11 @@ You may use the button above, or follow these steps to open this repo in a Codes
|
||||
For more info, check out the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/creating-a-codespace#creating-a-codespace).
|
||||
|
||||
## VS Code Dev Containers
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/hwchase17/langchain)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
|
||||
|
||||
Note: If you click this link you will open the main repo and not your local cloned repo, you can use this link and replace with your username and cloned repo name:
|
||||
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<yourusername>/<yourclonedreponame>
|
||||
|
||||
|
||||
If you already have VS Code and Docker installed, you can use the button above to get started. This will cause VS Code to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
|
||||
|
||||
@@ -25,7 +29,7 @@ You can also follow these steps to open this repo in a container using the VS Co
|
||||
|
||||
2. Open a locally cloned copy of the code:
|
||||
|
||||
- Clone this repository to your local filesystem.
|
||||
- Fork and Clone this repository to your local filesystem.
|
||||
- Press <kbd>F1</kbd> and select the **Dev Containers: Open Folder in Container...** command.
|
||||
- Select the cloned copy of this folder, wait for the container to start, and try things out!
|
||||
|
||||
|
||||
1
.github/workflows/_release.yml
vendored
1
.github/workflows/_release.yml
vendored
@@ -37,6 +37,7 @@ jobs:
|
||||
echo version=$(poetry version --short) >> $GITHUB_OUTPUT
|
||||
- name: Create Release
|
||||
uses: ncipollo/release-action@v1
|
||||
if: ${{ inputs.working-directory == 'libs/langchain' }}
|
||||
with:
|
||||
artifacts: "dist/*"
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
2
.github/workflows/codespell.yml
vendored
2
.github/workflows/codespell.yml
vendored
@@ -20,3 +20,5 @@ jobs:
|
||||
uses: actions/checkout@v3
|
||||
- name: Codespell
|
||||
uses: codespell-project/actions-codespell@v2
|
||||
with:
|
||||
skip: guide_imports.json
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -162,6 +162,7 @@ docs/.docusaurus/
|
||||
docs/.cache-loader/
|
||||
docs/_dist
|
||||
docs/api_reference/api_reference.rst
|
||||
docs/api_reference/experimental_api_reference.rst
|
||||
docs/api_reference/_build
|
||||
docs/api_reference/*/
|
||||
!docs/api_reference/_static/
|
||||
|
||||
@@ -43,6 +43,10 @@ Now:
|
||||
|
||||
`from langchain_experimental.sql import SQLDatabaseChain`
|
||||
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out [`create_sql_query_chain`](https://github.com/langchain-ai/langchain/blob/master/docs/extras/use_cases/tabular/sql_query.ipynb)
|
||||
|
||||
`from langchain.chains import create_sql_query_chain`
|
||||
|
||||
## `load_prompt` for Python files
|
||||
|
||||
Note: this only applies if you want to load Python files as prompts.
|
||||
|
||||
@@ -12,14 +12,14 @@
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/hwchase17/langchain)
|
||||
[](https://codespaces.new/hwchase17/langchain)
|
||||
[](https://star-history.com/#hwchase17/langchain)
|
||||
[](https://libraries.io/github/hwchase17/langchain)
|
||||
[](https://libraries.io/github/langchain-ai/langchain)
|
||||
[](https://github.com/hwchase17/langchain/issues)
|
||||
|
||||
|
||||
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/hwchase17/langchainjs).
|
||||
|
||||
**Production Support:** As you move your LangChains into production, we'd love to offer more comprehensive support.
|
||||
Please fill out [this form](https://forms.gle/57d8AmXBYp8PP8tZA) and we'll set up a dedicated support Slack channel.
|
||||
**Production Support:** As you move your LangChains into production, we'd love to offer more hands-on support.
|
||||
Fill out [this form](https://airtable.com/appwQzlErAS2qiP0L/shrGtGaVBVAz7NcV2) to share more about what you're building, and our team will get in touch.
|
||||
|
||||
## 🚨Breaking Changes for select chains (SQLDatabase) on 7/28
|
||||
|
||||
|
||||
@@ -13,5 +13,6 @@ cp -r {docs_skeleton,snippets} _dist
|
||||
cp -r extras/* _dist/docs_skeleton/docs
|
||||
cd _dist/docs_skeleton
|
||||
poetry run nbdoc_build
|
||||
poetry run python generate_api_reference_links.py
|
||||
yarn install
|
||||
yarn start
|
||||
|
||||
@@ -7,20 +7,67 @@
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import toml
|
||||
from docutils import nodes
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
import os
|
||||
import sys
|
||||
|
||||
import toml
|
||||
|
||||
_DIR = Path(__file__).parent.absolute()
|
||||
sys.path.insert(0, os.path.abspath("."))
|
||||
sys.path.insert(0, os.path.abspath("../../libs/langchain"))
|
||||
sys.path.insert(0, os.path.abspath("../../libs/experimental"))
|
||||
|
||||
with open("../../libs/langchain/pyproject.toml") as f:
|
||||
with (_DIR.parents[1] / "libs" / "langchain" / "pyproject.toml").open("r") as f:
|
||||
data = toml.load(f)
|
||||
with (_DIR / "guide_imports.json").open("r") as f:
|
||||
imported_classes = json.load(f)
|
||||
|
||||
|
||||
class ExampleLinksDirective(SphinxDirective):
|
||||
"""Directive to generate a list of links to examples.
|
||||
|
||||
We have a script that extracts links to API reference docs
|
||||
from our notebook examples. This directive uses that information
|
||||
to backlink to the examples from the API reference docs."""
|
||||
|
||||
has_content = False
|
||||
required_arguments = 1
|
||||
|
||||
def run(self):
|
||||
"""Run the directive.
|
||||
|
||||
Called any time :example_links:`ClassName` is used
|
||||
in the template *.rst files."""
|
||||
class_or_func_name = self.arguments[0]
|
||||
links = imported_classes.get(class_or_func_name, {})
|
||||
list_node = nodes.bullet_list()
|
||||
for doc_name, link in links.items():
|
||||
item_node = nodes.list_item()
|
||||
para_node = nodes.paragraph()
|
||||
link_node = nodes.reference()
|
||||
link_node["refuri"] = link
|
||||
link_node.append(nodes.Text(doc_name))
|
||||
para_node.append(link_node)
|
||||
item_node.append(para_node)
|
||||
list_node.append(item_node)
|
||||
if list_node.children:
|
||||
title_node = nodes.title()
|
||||
title_node.append(nodes.Text(f"Examples using {class_or_func_name}"))
|
||||
return [title_node, list_node]
|
||||
return [list_node]
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_directive("example_links", ExampleLinksDirective)
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,13 +5,15 @@ from pathlib import Path
|
||||
|
||||
ROOT_DIR = Path(__file__).parents[2].absolute()
|
||||
PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain"
|
||||
EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental"
|
||||
WRITE_FILE = Path(__file__).parent / "api_reference.rst"
|
||||
EXP_WRITE_FILE = Path(__file__).parent / "experimental_api_reference.rst"
|
||||
|
||||
|
||||
def load_members() -> dict:
|
||||
def load_members(dir: Path) -> dict:
|
||||
members: dict = {}
|
||||
for py in glob.glob(str(PKG_DIR) + "/**/*.py", recursive=True):
|
||||
module = py[len(str(PKG_DIR)) + 1 :].replace(".py", "").replace("/", ".")
|
||||
for py in glob.glob(str(dir) + "/**/*.py", recursive=True):
|
||||
module = py[len(str(dir)) + 1 :].replace(".py", "").replace("/", ".")
|
||||
top_level = module.split(".")[0]
|
||||
if top_level not in members:
|
||||
members[top_level] = {"classes": [], "functions": []}
|
||||
@@ -26,12 +28,10 @@ def load_members() -> dict:
|
||||
return members
|
||||
|
||||
|
||||
def construct_doc(members: dict) -> str:
|
||||
full_doc = """\
|
||||
.. _api_reference:
|
||||
|
||||
def construct_doc(pkg: str, members: dict) -> str:
|
||||
full_doc = f"""\
|
||||
=============
|
||||
API Reference
|
||||
``{pkg}`` API Reference
|
||||
=============
|
||||
|
||||
"""
|
||||
@@ -40,16 +40,12 @@ API Reference
|
||||
functions = _members["functions"]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
|
||||
module_title = module.replace("_", " ").title()
|
||||
if module_title == "Llms":
|
||||
module_title = "LLMs"
|
||||
section = f":mod:`langchain.{module}`: {module_title}"
|
||||
section = f":mod:`{pkg}.{module}`"
|
||||
full_doc += f"""\
|
||||
{section}
|
||||
{'=' * (len(section) + 1)}
|
||||
|
||||
.. automodule:: langchain.{module}
|
||||
.. automodule:: {pkg}.{module}
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
@@ -60,7 +56,7 @@ API Reference
|
||||
full_doc += f"""\
|
||||
Classes
|
||||
--------------
|
||||
.. currentmodule:: langchain
|
||||
.. currentmodule:: {pkg}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
@@ -74,10 +70,11 @@ Classes
|
||||
full_doc += f"""\
|
||||
Functions
|
||||
--------------
|
||||
.. currentmodule:: langchain
|
||||
.. currentmodule:: {pkg}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: function.rst
|
||||
|
||||
{fstring}
|
||||
|
||||
@@ -86,10 +83,14 @@ Functions
|
||||
|
||||
|
||||
def main() -> None:
|
||||
members = load_members()
|
||||
full_doc = construct_doc(members)
|
||||
lc_members = load_members(PKG_DIR)
|
||||
lc_doc = ".. _api_reference:\n\n" + construct_doc("langchain", lc_members)
|
||||
with open(WRITE_FILE, "w") as f:
|
||||
f.write(full_doc)
|
||||
f.write(lc_doc)
|
||||
exp_members = load_members(EXP_DIR)
|
||||
exp_doc = ".. _experimental_api_reference:\n\n" + construct_doc("langchain_experimental", exp_members)
|
||||
with open(EXP_WRITE_FILE, "w") as f:
|
||||
f.write(exp_doc)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
1
docs/api_reference/guide_imports.json
Normal file
1
docs/api_reference/guide_imports.json
Normal file
File diff suppressed because one or more lines are too long
@@ -1,9 +0,0 @@
|
||||
Evaluation
|
||||
=======================
|
||||
|
||||
LangChain has a number of convenient evaluation chains you can use off the shelf to grade your models' oupputs.
|
||||
|
||||
.. automodule:: langchain.evaluation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:inherited-members:
|
||||
@@ -26,3 +26,5 @@
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
8
docs/api_reference/templates/function.rst
Normal file
8
docs/api_reference/templates/function.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autofunction:: {{ objname }}
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
@@ -45,6 +45,9 @@
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('api_reference') }}">API</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" target="_blank" rel="noopener noreferrer" href="https://python.langchain.com/">Python Docs</a>
|
||||
</li>
|
||||
|
||||
@@ -745,6 +745,11 @@ span.descname {
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
font-family: monospace;
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
|
||||
em.property {
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
span.descclassname {
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
# Comparison Evaluators
|
||||
|
||||
Comparison evaluators in LangChain help measure two different chain or LLM outputs. These evaluators are helpful for comparative analyses, such as A/B testing between two language models, or comparing different versions of the same model. They can also be useful for things like generating preference scores for ai-assisted reinforcement learning.
|
||||
|
||||
These evaluators inherit from the `PairwiseStringEvaluator` class, providing a comparison interface for two strings - typically, the outputs from two different prompts or models, or two versions of the same model. In essence, a comparison evaluator performs an evaluation on a pair of strings and returns a dictionary containing the evaluation score and other relevant details.
|
||||
|
||||
To create a custom comparison evaluator, inherit from the `PairwiseStringEvaluator` class and overwrite the `_evaluate_string_pairs` method. If you require asynchronous evaluation, also overwrite the `_aevaluate_string_pairs` method.
|
||||
|
||||
Here's a summary of the key methods and properties of a comparison evaluator:
|
||||
|
||||
- `evaluate_string_pairs`: Evaluate the output string pairs. This function should be overwritten when creating custom evaluators.
|
||||
- `aevaluate_string_pairs`: Asynchronously evaluate the output string pairs. This function should be overwritten for asynchronous evaluation.
|
||||
- `requires_input`: This property indicates whether this evaluator requires an input string.
|
||||
- `requires_reference`: This property specifies whether this evaluator requires a reference label.
|
||||
|
||||
Detailed information about creating custom evaluators and the available built-in comparison evaluators are provided in the following sections.
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
|
||||
31
docs/docs_skeleton/docs/guides/evaluation/index.mdx
Normal file
31
docs/docs_skeleton/docs/guides/evaluation/index.mdx
Normal file
@@ -0,0 +1,31 @@
|
||||
---
|
||||
sidebar_position: 6
|
||||
---
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
# Evaluation
|
||||
|
||||
Building applications with language models involves many moving parts. One of the most critical components is ensuring that the outcomes produced by your models are reliable and useful across a broad array of inputs, and that they work well with your application's other software components. Ensuring reliability usually boils down to some combination of application design, testing & evaluation, and runtime checks.
|
||||
|
||||
The guides in this section review the APIs and functionality LangChain provides to help yous better evaluate your applications. Evaluation and testing are both critical when thinking about deploying LLM applications, since production environments require repeatable and useful outcomes.
|
||||
|
||||
LangChain offers various types of evaluators to help you measure performance and integrity on diverse data, and we hope to encourage the the community to create and share other useful evaluators so everyone can improve. These docs will introduce the evaluator types, how to use them, and provide some examples of their use in real-world scenarios.
|
||||
|
||||
Each evaluator type in LangChain comes with ready-to-use implementations and an extensible API that allows for customization according to your unique requirements. Here are some of the types of evaluators we offer:
|
||||
|
||||
- [String Evaluators](/docs/guides/evaluation/string/): These evaluators assess the predicted string for a given input, usually comparing it against a reference string.
|
||||
- [Trajectory Evaluators](/docs/guides/evaluation/trajectory/): These are used to evaluate the entire trajectory of agent actions.
|
||||
- [Comparison Evaluators](/docs/guides/evaluation/comparison/): These evaluators are designed to compare predictions from two runs on a common input.
|
||||
|
||||
These evaluators can be used across various scenarios and can be applied to different chain and LLM implementations in the LangChain library.
|
||||
|
||||
We also are working to share guides and cookbooks that demonstrate how to use these evaluators in real-world scenarios, such as:
|
||||
|
||||
- [Chain Comparisons](/docs/guides/evaluation/examples/comparisons): This example uses a comparison evaluator to predict the preferred output. It reviews ways to measure confidence intervals to select statistically significant differences in aggregate preference scores across different models or prompts.
|
||||
|
||||
## Reference Docs
|
||||
|
||||
For detailed information on the available evaluators, including how to instantiate, configure, and customize them, check out the [reference documentation](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.evaluation) directly.
|
||||
|
||||
<DocCardList />
|
||||
27
docs/docs_skeleton/docs/guides/evaluation/string/index.mdx
Normal file
27
docs/docs_skeleton/docs/guides/evaluation/string/index.mdx
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
---
|
||||
# String Evaluators
|
||||
|
||||
A string evaluator is a component within LangChain designed to assess the performance of a language model by comparing its generated outputs (predictions) to a reference string or an input. This comparison is a crucial step in the evaluation of language models, providing a measure of the accuracy or quality of the generated text.
|
||||
|
||||
In practice, string evaluators are typically used to evaluate a predicted string against a given input, such as a question or a prompt. Often, a reference label or context string is provided to define what a correct or ideal response would look like. These evaluators can be customized to tailor the evaluation process to fit your application's specific requirements.
|
||||
|
||||
To create a custom string evaluator, inherit from the `StringEvaluator` class and implement the `_evaluate_strings` method. If you require asynchronous support, also implement the `_aevaluate_strings` method.
|
||||
|
||||
Here's a summary of the key attributes and methods associated with a string evaluator:
|
||||
|
||||
- `evaluation_name`: Specifies the name of the evaluation.
|
||||
- `requires_input`: Boolean attribute that indicates whether the evaluator requires an input string. If True, the evaluator will raise an error when the input isn't provided. If False, a warning will be logged if an input _is_ provided, indicating that it will not be considered in the evaluation.
|
||||
- `requires_reference`: Boolean attribute specifying whether the evaluator requires a reference label. If True, the evaluator will raise an error when the reference isn't provided. If False, a warning will be logged if a reference _is_ provided, indicating that it will not be considered in the evaluation.
|
||||
|
||||
String evaluators also implement the following methods:
|
||||
|
||||
- `aevaluate_strings`: Asynchronously evaluates the output of the Chain or Language Model, with support for optional input and label.
|
||||
- `evaluate_strings`: Synchronously evaluates the output of the Chain or Language Model, with support for optional input and label.
|
||||
|
||||
The following sections provide detailed information on available string evaluator implementations as well as how to create a custom string evaluator.
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -0,0 +1,28 @@
|
||||
---
|
||||
sidebar_position: 4
|
||||
---
|
||||
# Trajectory Evaluators
|
||||
|
||||
Trajectory Evaluators in LangChain provide a more holistic approach to evaluating an agent. These evaluators assess the full sequence of actions taken by an agent and their corresponding responses, which we refer to as the "trajectory". This allows you to better measure an agent's effectiveness and capabilities.
|
||||
|
||||
A Trajectory Evaluator implements the `AgentTrajectoryEvaluator` interface, which requires two main methods:
|
||||
|
||||
- `evaluate_agent_trajectory`: This method synchronously evaluates an agent's trajectory.
|
||||
- `aevaluate_agent_trajectory`: This asynchronous counterpart allows evaluations to be run in parallel for efficiency.
|
||||
|
||||
Both methods accept three main parameters:
|
||||
|
||||
- `input`: The initial input given to the agent.
|
||||
- `prediction`: The final predicted response from the agent.
|
||||
- `agent_trajectory`: The intermediate steps taken by the agent, given as a list of tuples.
|
||||
|
||||
These methods return a dictionary. It is recommended that custom implementations return a `score` (a float indicating the effectiveness of the agent) and `reasoning` (a string explaining the reasoning behind the score).
|
||||
|
||||
You can capture an agent's trajectory by initializing the agent with the `return_intermediate_steps=True` parameter. This lets you collect all intermediate steps without relying on special callbacks.
|
||||
|
||||
For a deeper dive into the implementation and use of Trajectory Evaluators, refer to the sections below.
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# LangChain Expression Language
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
LangChain Expression Language is a declarative way to easily compose chains together.
|
||||
Any chain constructed this way will automatically have full sync, async, and streaming support.
|
||||
See guides below for how to interact with chains constructed this way as well as cookbook examples.
|
||||
|
||||
<DocCardList />
|
||||
6
docs/docs_skeleton/docs/guides/safety/index.mdx
Normal file
6
docs/docs_skeleton/docs/guides/safety/index.mdx
Normal file
@@ -0,0 +1,6 @@
|
||||
# Preventing harmful outputs
|
||||
|
||||
One of the key concerns with using LLMs is that they may generate harmful or unethical text. This is an area of active research in the field. Here we present some built-in chains inspired by this research, which are intended to make the outputs of LLMs safer.
|
||||
|
||||
- [Moderation chain](/docs/use_cases/safety/moderation): Explicitly check if any output text is harmful and flag it.
|
||||
- [Constitutional chain](/docs/use_cases/safety/constitutional_chain): Prompt the model with a set of principles which should guide it's behavior.
|
||||
@@ -28,7 +28,7 @@ navigating around a browser.
|
||||
### [OpenAI Functions](/docs/modules/agents/agent_types/openai_functions_agent.html)
|
||||
|
||||
Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been explicitly fine-tuned to detect when a
|
||||
function should to be called and respond with the inputs that should be passed to the function.
|
||||
function should be called and respond with the inputs that should be passed to the function.
|
||||
The OpenAI Functions Agent is designed to work with these models.
|
||||
|
||||
### [Conversational](/docs/modules/agents/agent_types/chat_conversation_agent.html)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# OpenAI functions
|
||||
|
||||
Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been fine-tuned to detect when a function should to be called and respond with the inputs that should be passed to the function.
|
||||
Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been fine-tuned to detect when a function should be called and respond with the inputs that should be passed to the function.
|
||||
In an API call, you can describe functions and have the model intelligently choose to output a JSON object containing arguments to call those functions.
|
||||
The goal of the OpenAI Function APIs is to more reliably return valid and useful function calls than a generic text completion or chat API.
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
---
|
||||
sidebar_position: 4
|
||||
---
|
||||
# Additional
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -1,7 +0,0 @@
|
||||
# Dynamically selecting from multiple prompts
|
||||
|
||||
This notebook demonstrates how to use the `RouterChain` paradigm to create a chain that dynamically selects the prompt to use for a given input. Specifically we show how to use the `MultiPromptChain` to create a question-answering chain that selects the prompt which is most relevant for a given question, and then answers the question using that prompt.
|
||||
|
||||
import Example from "@snippets/modules/chains/additional/multi_prompt_router.mdx"
|
||||
|
||||
<Example/>
|
||||
@@ -1,6 +1,6 @@
|
||||
# Sequential
|
||||
|
||||
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! Instead, edit the notebook w/the location & name as this file. -->
|
||||
|
||||
|
||||
The next step after calling a language model is make a series of calls to a language model. This is particularly useful when you want to take the output from one call and use it as the input to another.
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
# Popular
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -1,8 +0,0 @@
|
||||
# Summarization
|
||||
|
||||
A summarization chain can be used to summarize multiple documents. One way is to input multiple smaller documents, after they have been divided into chunks, and operate over them with a MapReduceDocumentsChain. You can also choose instead for the chain that does summarization to be a StuffDocumentsChain, or a RefineDocumentsChain.
|
||||
|
||||
import Example from "@snippets/modules/chains/popular/summarize.mdx"
|
||||
|
||||
<Example/>
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
label: 'Integrations'
|
||||
@@ -1,8 +0,0 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
# Comparison Evaluators
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -1,28 +0,0 @@
|
||||
---
|
||||
sidebar_position: 6
|
||||
---
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
# Evaluation
|
||||
|
||||
Language models can be unpredictable. This makes it challenging to ship reliable applications to production, where repeatable, useful outcomes across diverse inputs are a minimum requirement. Tests help demonstrate each component in an LLM application can produce the required or expected functionality. These tests also safeguard against regressions while you improve interconnected pieces of an integrated system. However, measuring the quality of generated text can be challenging. It can be hard to agree on the right set of metrics for your application, and it can be difficult to translate those into better performance. Furthermore, it's common to lack sufficient evaluation data to adequately test the range of inputs and expected outputs for each component when you're just getting started. The LangChain community is building open source tools and guides to help address these challenges.
|
||||
|
||||
LangChain exposes different types of evaluators for common types of evaluation. Each type has off-the-shelf implementations you can use to get started, as well as an
|
||||
extensible API so you can create your own or contribute improvements for everyone to use. The following sections have example notebooks for you to get started.
|
||||
|
||||
- [String Evaluators](/docs/modules/evaluation/string/): Evaluate the predicted string for a given input, usually against a reference string
|
||||
- [Trajectory Evaluators](/docs/modules/evaluation/trajectory/): Evaluate the whole trajectory of agent actions
|
||||
- [Comparison Evaluators](/docs/modules/evaluation/comparison/): Compare predictions from two runs on a common input
|
||||
|
||||
|
||||
This section also provides some additional examples of how you could use these evaluators for different scenarios or apply to different chain implementations in the LangChain library. Some examples include:
|
||||
|
||||
- [Preference Scoring Chain Outputs](/docs/modules/evaluation/examples/comparisons): An example using a comparison evaluator on different models or prompts to select statistically significant differences in aggregate preference scores
|
||||
|
||||
|
||||
## Reference Docs
|
||||
|
||||
For detailed information of the available evaluators, including how to instantiate, configure, and customize them. Check out the [reference documentation](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.evaluation) directly.
|
||||
|
||||
<DocCardList />
|
||||
@@ -1,8 +0,0 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
---
|
||||
# String Evaluators
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -1,8 +0,0 @@
|
||||
---
|
||||
sidebar_position: 4
|
||||
---
|
||||
# Trajectory Evaluators
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
@@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
# Chat Messages
|
||||
|
||||
:::info
|
||||
Head to [Integrations](/docs/integrations/memory/) for documentation on built-in memory integrations with 3rd-party databases and tools.
|
||||
:::
|
||||
|
||||
One of the core utility classes underpinning most (if not all) memory modules is the `ChatMessageHistory` class.
|
||||
This is a super lightweight wrapper which exposes convenience methods for saving Human messages, AI messages, and then fetching them all.
|
||||
|
||||
You may want to use this class directly if you are managing memory outside of a chain.
|
||||
|
||||
import GetStarted from "@snippets/modules/memory/chat_messages/get_started.mdx"
|
||||
|
||||
<GetStarted/>
|
||||
@@ -1,34 +1,62 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
|
||||
# Memory
|
||||
|
||||
🚧 _Docs under construction_ 🚧
|
||||
Most LLM applications have a conversational interface. An essential component of a conversation is being able to refer to information introduced earlier in the conversation.
|
||||
At bare minimum, a conversational system should be able to access some window of past messages directly.
|
||||
A more complex system will need to have a world model that it is constantly updating, which allows it to do things like maintain information about entities and their relationships.
|
||||
|
||||
:::info
|
||||
Head to [Integrations](/docs/integrations/memory/) for documentation on built-in memory integrations with 3rd-party tools.
|
||||
:::
|
||||
We call this ability to store information about past interactions "memory".
|
||||
LangChain provides a lot of utilities for adding memory to a system.
|
||||
These utilities can be used by themselves or incorporated seamlessly into a chain.
|
||||
|
||||
By default, Chains and Agents are stateless,
|
||||
meaning that they treat each incoming query independently (like the underlying LLMs and chat models themselves).
|
||||
In some applications, like chatbots, it is essential
|
||||
to remember previous interactions, both in the short and long-term.
|
||||
The **Memory** class does exactly that.
|
||||
A memory system needs to support two basic actions: reading and writing.
|
||||
Recall that every chain defines some core execution logic that expects certain inputs.
|
||||
Some of these inputs come directly from the user, but some of these inputs can come from memory.
|
||||
A chain will interact with its memory system twice in a given run.
|
||||
1. AFTER receiving the initial user inputs but BEFORE executing the core logic, a chain will READ from its memory system and augment the user inputs.
|
||||
2. AFTER executing the core logic but BEFORE returning the answer, a chain will WRITE the inputs and outputs of the current run to memory, so that they can be referred to in future runs.
|
||||
|
||||
LangChain provides memory components in two forms.
|
||||
First, LangChain provides helper utilities for managing and manipulating previous chat messages.
|
||||
These are designed to be modular and useful regardless of how they are used.
|
||||
Secondly, LangChain provides easy ways to incorporate these utilities into chains.
|
||||

|
||||
|
||||
|
||||
## Building memory into a system
|
||||
The two core design decisions in any memory system are:
|
||||
- How state is stored
|
||||
- How state is queried
|
||||
|
||||
### Storing: List of chat messages
|
||||
Underlying any memory is a history of all chat interactions.
|
||||
Even if these are not all used directly, they need to be stored in some form.
|
||||
One of the key parts of the LangChain memory module is a series of integrations for storing these chat messages,
|
||||
from in-memory lists to persistent databases.
|
||||
|
||||
- [Chat message storage](/docs/modules/memory/chat_messages/): How to work with Chat Messages, and the various integrations offered
|
||||
|
||||
### Querying: Data structures and algorithms on top of chat messages
|
||||
Keeping a list of chat messages is fairly straight-forward.
|
||||
What is less straight-forward are the data structures and algorithms built on top of chat messages that serve a view of those messages that is most useful.
|
||||
|
||||
A very simply memory system might just return the most recent messages each run. A slightly more complex memory system might return a succinct summary of the past K messages.
|
||||
An even more sophisticated system might extract entities from stored messages and only return information about entities referenced in the current run.
|
||||
|
||||
Each application can have different requirements for how memory is queried. The memory module should make it easy to both get started with simple memory systems and write your own custom systems if needed.
|
||||
|
||||
- [Memory types](/docs/modules/memory/types/): The various data structures and algorithms that make up the memory types LangChain supports
|
||||
|
||||
## Get started
|
||||
|
||||
Memory involves keeping a concept of state around throughout a user's interactions with an language model. A user's interactions with a language model are captured in the concept of ChatMessages, so this boils down to ingesting, capturing, transforming and extracting knowledge from a sequence of chat messages. There are many different ways to do this, each of which exists as its own memory type.
|
||||
|
||||
In general, for each type of memory there are two ways to understanding using memory. These are the standalone functions which extract information from a sequence of messages, and then there is the way you can use this type of memory in a chain.
|
||||
|
||||
Memory can return multiple pieces of information (for example, the most recent N messages and a summary of all previous messages). The returned information can either be a string or a list of messages.
|
||||
Let's take a look at what Memory actually looks like in LangChain.
|
||||
Here we'll cover the basics of interacting with an arbitrary memory class.
|
||||
|
||||
import GetStarted from "@snippets/modules/memory/get_started.mdx"
|
||||
|
||||
<GetStarted/>
|
||||
|
||||
## Next steps
|
||||
|
||||
And that's it for getting started!
|
||||
Please see the other sections for walkthroughs of more advanced topics,
|
||||
like custom memory, multiple memories, and more.
|
||||
|
||||
|
||||
@@ -4,6 +4,6 @@ This notebook shows how to use `ConversationBufferMemory`. This memory allows fo
|
||||
|
||||
We can first extract it as a string.
|
||||
|
||||
import Example from "@snippets/modules/memory/how_to/buffer.mdx"
|
||||
import Example from "@snippets/modules/memory/types/buffer.mdx"
|
||||
|
||||
<Example/>
|
||||
@@ -4,6 +4,6 @@
|
||||
|
||||
Let's first explore the basic functionality of this type of memory.
|
||||
|
||||
import Example from "@snippets/modules/memory/how_to/buffer_window.mdx"
|
||||
import Example from "@snippets/modules/memory/types/buffer_window.mdx"
|
||||
|
||||
<Example/>
|
||||
@@ -4,6 +4,6 @@ Entity Memory remembers given facts about specific entities in a conversation. I
|
||||
|
||||
Let's first walk through using this functionality.
|
||||
|
||||
import Example from "@snippets/modules/memory/how_to/entity_summary_memory.mdx"
|
||||
import Example from "@snippets/modules/memory/types/entity_summary_memory.mdx"
|
||||
|
||||
<Example/>
|
||||
8
docs/docs_skeleton/docs/modules/memory/types/index.mdx
Normal file
8
docs/docs_skeleton/docs/modules/memory/types/index.mdx
Normal file
@@ -0,0 +1,8 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
---
|
||||
# Memory Types
|
||||
|
||||
There are many different types of memory.
|
||||
Each have their own parameters, their own return types, and are useful in different scenarios.
|
||||
Please see their individual page for more detail on each one.
|
||||
@@ -4,6 +4,6 @@ Conversation summary memory summarizes the conversation as it happens and stores
|
||||
|
||||
Let's first explore the basic functionality of this type of memory.
|
||||
|
||||
import Example from "@snippets/modules/memory/how_to/summary.mdx"
|
||||
import Example from "@snippets/modules/memory/types/summary.mdx"
|
||||
|
||||
<Example/>
|
||||
@@ -6,6 +6,6 @@ This differs from most of the other Memory classes in that it doesn't explicitly
|
||||
|
||||
In this case, the "docs" are previous conversation snippets. This can be useful to refer to relevant pieces of information that the AI was told earlier in the conversation.
|
||||
|
||||
import Example from "@snippets/modules/memory/how_to/vectorstore_retriever_memory.mdx"
|
||||
import Example from "@snippets/modules/memory/types/vectorstore_retriever_memory.mdx"
|
||||
|
||||
<Example/>
|
||||
@@ -8,7 +8,7 @@ Head to [Integrations](/docs/integrations/llms/) for documentation on built-in i
|
||||
:::
|
||||
|
||||
Large Language Models (LLMs) are a core component of LangChain.
|
||||
LangChain does not serve it's own LLMs, but rather provides a standard interface for interacting with many different LLMs.
|
||||
LangChain does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs.
|
||||
|
||||
## Get started
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
label: 'How to'
|
||||
@@ -2,7 +2,7 @@
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
# Conversational Retrieval QA
|
||||
# Store and reference chat history
|
||||
The ConversationalRetrievalQA chain builds on RetrievalQAChain to provide a chat history component.
|
||||
|
||||
It first combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question, then looks up relevant documents from the retriever, and finally passes those documents and the question to a question answering chain to return a response.
|
||||
@@ -1,4 +1,4 @@
|
||||
# Dynamically selecting from multiple retrievers
|
||||
# Dynamically select from multiple retrievers
|
||||
|
||||
This notebook demonstrates how to use the `RouterChain` paradigm to create a chain that dynamically selects which Retrieval system to use. Specifically we show how to use the `MultiRetrievalQAChain` to create a question-answering chain that selects the retrieval QA chain which is most relevant for a given question, and then answers the question using it.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Document QA
|
||||
# QA over in-memory documents
|
||||
|
||||
Here we walk through how to use LangChain for question answering over a list of documents. Under the hood we'll be using our [Document chains](/docs/modules/chains/document/).
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
# Retrieval QA
|
||||
# QA using a Retriever
|
||||
|
||||
This example showcases question answering over an index.
|
||||
|
||||
183
docs/docs_skeleton/generate_api_reference_links.py
Normal file
183
docs/docs_skeleton/generate_api_reference_links.py
Normal file
@@ -0,0 +1,183 @@
|
||||
import importlib
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
# Base URL for all class documentation
|
||||
_BASE_URL = "https://api.python.langchain.com/en/latest/"
|
||||
|
||||
# Regular expression to match Python code blocks
|
||||
code_block_re = re.compile(r"^(```python\n)(.*?)(```\n)", re.DOTALL | re.MULTILINE)
|
||||
# Regular expression to match langchain import lines
|
||||
_IMPORT_RE = re.compile(
|
||||
r"from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+"
|
||||
r"((?:\w+(?:,\s*)?)*" # Match zero or more words separated by a comma+optional ws
|
||||
r"(?:\s*\(.*?\))?)", # Match optional parentheses block
|
||||
re.DOTALL, # Match newlines as well
|
||||
)
|
||||
|
||||
_CURRENT_PATH = Path(__file__).parent.absolute()
|
||||
# Directory where generated markdown files are stored
|
||||
_DOCS_DIR = _CURRENT_PATH / "docs"
|
||||
_JSON_PATH = _CURRENT_PATH.parent / "api_reference" / "guide_imports.json"
|
||||
|
||||
|
||||
def find_files(path):
|
||||
"""Find all MDX files in the given path"""
|
||||
# Check if is file first
|
||||
if os.path.isfile(path):
|
||||
yield path
|
||||
return
|
||||
for root, _, files in os.walk(path):
|
||||
for file in files:
|
||||
if file.endswith(".mdx") or file.endswith(".md"):
|
||||
yield os.path.join(root, file)
|
||||
|
||||
|
||||
def get_full_module_name(module_path, class_name):
|
||||
"""Get full module name using inspect"""
|
||||
module = importlib.import_module(module_path)
|
||||
class_ = getattr(module, class_name)
|
||||
return inspect.getmodule(class_).__name__
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--docs_dir",
|
||||
type=str,
|
||||
default=_DOCS_DIR,
|
||||
help="Directory where generated markdown files are stored",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
args = get_args()
|
||||
global_imports = {}
|
||||
|
||||
for file in find_files(args.docs_dir):
|
||||
print(f"Adding links for imports in {file}")
|
||||
file_imports = replace_imports(file)
|
||||
|
||||
if file_imports:
|
||||
# Use relative file path as key
|
||||
relative_path = (
|
||||
os.path.relpath(file, _DOCS_DIR).replace(".mdx", "").replace(".md", "")
|
||||
)
|
||||
|
||||
doc_url = f"https://python.langchain.com/docs/{relative_path}"
|
||||
for import_info in file_imports:
|
||||
doc_title = import_info["title"]
|
||||
class_name = import_info["imported"]
|
||||
if class_name not in global_imports:
|
||||
global_imports[class_name] = {}
|
||||
global_imports[class_name][doc_title] = doc_url
|
||||
|
||||
# Write the global imports information to a JSON file
|
||||
_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _JSON_PATH.open("w") as f:
|
||||
json.dump(global_imports, f)
|
||||
|
||||
|
||||
def _get_doc_title(data: str, file_name: str) -> str:
|
||||
try:
|
||||
return re.findall(r"^#\s+(.*)", data, re.MULTILINE)[0]
|
||||
except IndexError:
|
||||
pass
|
||||
# Parse the rst-style titles
|
||||
try:
|
||||
return re.findall(r"^(.*)\n=+\n", data, re.MULTILINE)[0]
|
||||
except IndexError:
|
||||
return file_name
|
||||
|
||||
|
||||
def replace_imports(file):
|
||||
"""Replace imports in each Python code block with links to their
|
||||
documentation and append the import info in a comment"""
|
||||
all_imports = []
|
||||
with open(file, "r") as f:
|
||||
data = f.read()
|
||||
|
||||
file_name = os.path.basename(file)
|
||||
_DOC_TITLE = _get_doc_title(data, file_name)
|
||||
|
||||
def replacer(match):
|
||||
# Extract the code block content
|
||||
code = match.group(2)
|
||||
# Replace if any import comment exists
|
||||
# TODO: Use our own custom <code> component rather than this
|
||||
# injection method
|
||||
existing_comment_re = re.compile(r"^<!--IMPORTS:.*?-->\n", re.MULTILINE)
|
||||
code = existing_comment_re.sub("", code)
|
||||
|
||||
# Process imports in the code block
|
||||
imports = []
|
||||
for import_match in _IMPORT_RE.finditer(code):
|
||||
module = import_match.group(1)
|
||||
imports_str = (
|
||||
import_match.group(3).replace("(\n", "").replace("\n)", "")
|
||||
) # Handle newlines within parentheses
|
||||
# remove any newline and spaces, then split by comma
|
||||
imported_classes = [
|
||||
imp.strip()
|
||||
for imp in re.split(r",\s*", imports_str.replace("\n", ""))
|
||||
if imp.strip()
|
||||
]
|
||||
for class_name in imported_classes:
|
||||
try:
|
||||
module_path = get_full_module_name(module, class_name)
|
||||
except AttributeError as e:
|
||||
logger.warning(f"Could not find module for {class_name}, {e}")
|
||||
continue
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to load for class {class_name}, {e}")
|
||||
continue
|
||||
|
||||
url = (
|
||||
_BASE_URL
|
||||
+ module_path.split(".")[1]
|
||||
+ "/"
|
||||
+ module_path
|
||||
+ "."
|
||||
+ class_name
|
||||
+ ".html"
|
||||
)
|
||||
|
||||
# Add the import information to our list
|
||||
imports.append(
|
||||
{
|
||||
"imported": class_name,
|
||||
"source": module,
|
||||
"docs": url,
|
||||
"title": _DOC_TITLE,
|
||||
}
|
||||
)
|
||||
|
||||
if imports:
|
||||
all_imports.extend(imports)
|
||||
# Create a unique comment containing the import information
|
||||
import_comment = f"<!--IMPORTS:{json.dumps(imports)}-->"
|
||||
# Inject the import comment at the start of the code block
|
||||
return match.group(1) + import_comment + "\n" + code + match.group(3)
|
||||
else:
|
||||
# If there are no imports, return the original match
|
||||
return match.group(0)
|
||||
|
||||
# Use re.sub to replace each Python code block
|
||||
data = code_block_re.sub(replacer, data)
|
||||
|
||||
with open(file, "w") as f:
|
||||
f.write(data)
|
||||
return all_imports
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -21,7 +21,7 @@ function Imports({ imports }) {
|
||||
</h4>
|
||||
<ul style={{ paddingBottom: "1rem" }}>
|
||||
{imports.map(({ imported, source, docs }) => (
|
||||
<li>
|
||||
<li key={imported}>
|
||||
<a href={docs}>
|
||||
<span>{imported}</span>
|
||||
</a>{" "}
|
||||
@@ -34,14 +34,25 @@ function Imports({ imports }) {
|
||||
}
|
||||
|
||||
export default function CodeBlockWrapper({ children, ...props }) {
|
||||
// Initialize imports as an empty array
|
||||
let imports = [];
|
||||
|
||||
// Check if children is a string
|
||||
if (typeof children === "string") {
|
||||
return <CodeBlock {...props}>{children}</CodeBlock>;
|
||||
// Search for an IMPORTS comment in the code
|
||||
const match = /<!--IMPORTS:(.*?)-->\n/.exec(children);
|
||||
if (match) {
|
||||
imports = JSON.parse(match[1]);
|
||||
children = children.replace(match[0], "");
|
||||
}
|
||||
} else if (children.imports) {
|
||||
imports = children.imports;
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<CodeBlock {...props}>{children.content}</CodeBlock>
|
||||
<Imports imports={children.imports} />
|
||||
<CodeBlock {...props}>{children}</CodeBlock>
|
||||
{imports.length > 0 && <Imports imports={imports} />}
|
||||
</>
|
||||
);
|
||||
}
|
||||
}
|
||||
BIN
docs/docs_skeleton/static/img/chat_use_case.png
Normal file
BIN
docs/docs_skeleton/static/img/chat_use_case.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 93 KiB |
BIN
docs/docs_skeleton/static/img/chat_use_case_2.png
Normal file
BIN
docs/docs_skeleton/static/img/chat_use_case_2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 102 KiB |
BIN
docs/docs_skeleton/static/img/memory_diagram.png
Normal file
BIN
docs/docs_skeleton/static/img/memory_diagram.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 111 KiB |
BIN
docs/docs_skeleton/static/img/summarization_use_case_1.png
Normal file
BIN
docs/docs_skeleton/static/img/summarization_use_case_1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 196 KiB |
BIN
docs/docs_skeleton/static/img/summarization_use_case_2.png
Normal file
BIN
docs/docs_skeleton/static/img/summarization_use_case_2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 90 KiB |
BIN
docs/docs_skeleton/static/img/summarization_use_case_3.png
Normal file
BIN
docs/docs_skeleton/static/img/summarization_use_case_3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 174 KiB |
@@ -1078,7 +1078,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/ecosystem/integrations/",
|
||||
"destination": "/docs/integrations/providers/"
|
||||
"destination": "/docs/integrations/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/ecosystem/integrations/:path*",
|
||||
@@ -1610,59 +1610,59 @@
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/flare.html",
|
||||
"destination": "/docs/modules/chains/additional/flare"
|
||||
"destination": "/docs/use_cases/question_answering/how_to/flare"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/graph_cypher_qa.html",
|
||||
"destination": "/docs/modules/chains/additional/graph_cypher_qa"
|
||||
"destination": "/docs/use_cases/graph/graph_cypher_qa"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/graph_nebula_qa.html",
|
||||
"destination": "/docs/modules/chains/additional/graph_nebula_qa"
|
||||
"destination": "/docs/use_cases/graph/graph_nebula_qa"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/index_examples/graph_qa.html",
|
||||
"destination": "/docs/modules/chains/additional/graph_qa"
|
||||
"destination": "/docs/use_cases/graph/graph_qa"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/index_examples/hyde.html",
|
||||
"destination": "/docs/modules/chains/additional/hyde"
|
||||
"destination": "/docs/use_cases/question_answering/how_to/hyde"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/llm_bash.html",
|
||||
"destination": "/docs/modules/chains/additional/llm_bash"
|
||||
"destination": "/docs/use_cases/code_writing/llm_bash"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/llm_checker.html",
|
||||
"destination": "/docs/modules/chains/additional/llm_checker"
|
||||
"destination": "/docs/use_cases/self_check/llm_checker"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/llm_math.html",
|
||||
"destination": "/docs/modules/chains/additional/llm_math"
|
||||
"destination": "/docs/use_cases/code_writing/llm_math"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/llm_requests.html",
|
||||
"destination": "/docs/modules/chains/additional/llm_requests"
|
||||
"destination": "/docs/use_cases/apis/llm_requests"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/llm_summarization_checker.html",
|
||||
"destination": "/docs/modules/chains/additional/llm_summarization_checker"
|
||||
"destination": "/docs/use_cases/self_check/llm_summarization_checker"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/openapi.html",
|
||||
"destination": "/docs/modules/chains/additional/openapi"
|
||||
"destination": "/docs/use_cases/apis/openapi"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/pal.html",
|
||||
"destination": "/docs/modules/chains/additional/pal"
|
||||
"destination": "/docs/use_cases/code_writing/pal"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/examples/tagging.html",
|
||||
"destination": "/docs/modules/chains/additional/tagging"
|
||||
"destination": "/docs/use_cases/tagging"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/index_examples/vector_db_text_generation.html",
|
||||
"destination": "/docs/modules/chains/additional/vector_db_text_generation"
|
||||
"destination": "/docs/use_cases/question_answering/how_to/vector_db_text_generation"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/chains/generic/router.html",
|
||||
@@ -3448,6 +3448,10 @@
|
||||
"source": "/docs/modules/model_io/models/llms/integrations/writer",
|
||||
"destination": "/docs/integrations/llms/writer"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts.html",
|
||||
"destination": "/docs/modules/model_io/prompts"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/output_parsers.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/"
|
||||
@@ -3472,6 +3476,10 @@
|
||||
"source": "/en/latest/modules/prompts/output_parsers/examples/retry.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/retry"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/example_selectors.html",
|
||||
"destination": "/docs/modules/model_io/prompts/example_selectors"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/example_selectors/examples/custom_example_selector.html",
|
||||
"destination": "/docs/modules/model_io/prompts/example_selectors/custom_example_selector"
|
||||
@@ -3484,6 +3492,10 @@
|
||||
"source": "/en/latest/modules/prompts/example_selectors/examples/ngram_overlap.html",
|
||||
"destination": "/docs/modules/model_io/prompts/example_selectors/ngram_overlap"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/prompt_templates.html",
|
||||
"destination": "/docs/modules/model_io/prompts/prompt_templates"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/prompt_templates/examples/connecting_to_a_feature_store.html",
|
||||
"destination": "/docs/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store"
|
||||
@@ -3728,6 +3740,18 @@
|
||||
"source": "/docs/modules/model_io/models/chat/integrations/:path*",
|
||||
"destination": "/docs/integrations/chat/:path*"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/evaluation(/?)",
|
||||
"destination": "/docs/guides/evaluation"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/evaluation/:path*(/?)",
|
||||
"destination": "/docs/guides/evaluation/:path*"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/indexes.html",
|
||||
"destination": "/docs/modules/data_connection"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/indexes/:path*",
|
||||
"destination": "/docs/modules/data_connection/:path*"
|
||||
@@ -3763,6 +3787,174 @@
|
||||
{
|
||||
"source": "/en/latest/:path*",
|
||||
"destination": "/docs/:path*"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/constitutional_chain",
|
||||
"destination": "/docs/guides/safety/constitutional_chain"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/moderation",
|
||||
"destination": "/docs/guides/safety/moderation"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/api",
|
||||
"destination": "/docs/use_cases/apis/api"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/analyze_document",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/analyze_document"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/chat_vector_db",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/chat_vector_db"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/multi_retrieval_qa_router",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/multi_retrieval_qa_router"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/question_answering",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/question_answering"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/vector_db_qa",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/vector_db_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/summarize",
|
||||
"destination": "/docs/use_cases/summarization/summarize"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/sqlite",
|
||||
"destination": "/docs/use_cases/tabular/sqlite"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/openai_functions",
|
||||
"destination": "/docs/modules/chains/how_to/openai_functions"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/llm_requests",
|
||||
"destination": "/docs/use_cases/apis/llm_requests"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/openai_openapi",
|
||||
"destination": "/docs/use_cases/apis/openai_openapi"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/openapi",
|
||||
"destination": "/docs/use_cases/apis/openapi"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/openapi_openai",
|
||||
"destination": "/docs/use_cases/apis/openapi_openai"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/cpal",
|
||||
"destination": "/docs/use_cases/code_writing/cpal"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/llm_bash",
|
||||
"destination": "/docs/use_cases/code_writing/llm_bash"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/llm_math",
|
||||
"destination": "/docs/use_cases/code_writing/llm_math"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/llm_symbolic_math",
|
||||
"destination": "/docs/use_cases/code_writing/llm_symbolic_math"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/pal",
|
||||
"destination": "/docs/use_cases/code_writing/pal"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_arangodb_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_arangodb_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_cypher_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_cypher_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_hugegraph_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_hugegraph_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_kuzu_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_kuzu_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_nebula_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_nebula_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/graph_sparql_qa",
|
||||
"destination": "/docs/use_cases/graph/graph_sparql_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/neptune_cypher_qa",
|
||||
"destination": "/docs/use_cases/graph/neptune_cypher_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/tot",
|
||||
"destination": "/docs/use_cases/graph/tot"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering//document-context-aware-QA",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/document-context-aware-QA"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/flare",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/flare"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/hyde",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/hyde"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering//local_retrieval_qa",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/local_retrieval_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/qa_citations",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/qa_citations"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/vector_db_text_generation",
|
||||
"destination": "/docs/use_cases/question_answering/how_to/vector_db_text_generation"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/openai_functions_retrieval_qa",
|
||||
"destination": "/docs/use_cases/question_answering/integrations/openai_functions_retrieval_qa"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering//semantic-search-over-chat",
|
||||
"destination": "/docs/use_cases/question_answering/integrations/semantic-search-over-chat"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/llm_checker",
|
||||
"destination": "/docs/use_cases/self_check/llm_checker"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/llm_summarization_checker",
|
||||
"destination": "/docs/use_cases/self_check/llm_summarization_checker"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/elasticsearch_database",
|
||||
"destination": "/docs/use_cases/tabular/elasticsearch_database"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/tagging",
|
||||
"destination": "/docs/use_cases/tagging"
|
||||
},
|
||||
{
|
||||
"source": "docs/integrations/providers/agent_with_wandb_tracing",
|
||||
"destination": "docs/integrations/providers/wandb_tracing"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,10 +1,53 @@
|
||||
#!/bin/bash
|
||||
|
||||
version_compare() {
|
||||
local v1=(${1//./ })
|
||||
local v2=(${2//./ })
|
||||
for i in {0..2}; do
|
||||
if (( ${v1[i]} < ${v2[i]} )); then
|
||||
return 1
|
||||
fi
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
openssl_version=$(openssl version | awk '{print $2}')
|
||||
required_openssl_version="1.1.1"
|
||||
|
||||
python_version=$(python3 --version 2>&1 | awk '{print $2}')
|
||||
required_python_version="3.10"
|
||||
|
||||
echo "OpenSSL Version"
|
||||
echo $openssl_version
|
||||
echo "Python Version"
|
||||
echo $python_version
|
||||
# If openssl version is less than 1.1.1 AND python version is less than 3.10
|
||||
if ! version_compare $openssl_version $required_openssl_version && ! version_compare $python_version $required_python_version; then
|
||||
### See: https://github.com/urllib3/urllib3/issues/2168
|
||||
# Requests lib breaks for old SSL versions,
|
||||
# which are defaults on Amazon Linux 2 (which Vercel uses for builds)
|
||||
yum -y update
|
||||
yum remove openssl-devel -y
|
||||
yum install gcc bzip2-devel libffi-devel zlib-devel wget tar -y
|
||||
yum install openssl11 -y
|
||||
yum install openssl11-devel -y
|
||||
|
||||
wget https://www.python.org/ftp/python/3.11.4/Python-3.11.4.tgz
|
||||
tar xzf Python-3.11.4.tgz
|
||||
cd Python-3.11.4
|
||||
./configure
|
||||
make altinstall
|
||||
echo "Python Version"
|
||||
python3.11 --version
|
||||
cd ..
|
||||
fi
|
||||
|
||||
cd ..
|
||||
python3 --version
|
||||
python3 -m venv .venv
|
||||
python3.11 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
python3 -m pip install -r vercel_requirements.txt
|
||||
python3.11 -m pip install --upgrade pip
|
||||
python3.11 -m pip install -r vercel_requirements.txt
|
||||
cp -r extras/* docs_skeleton/docs
|
||||
cd docs_skeleton
|
||||
nbdoc_build
|
||||
python3.11 generate_api_reference_links.py
|
||||
|
||||
@@ -31,7 +31,7 @@ There isn't any special setup for it.
|
||||
|
||||
## LLM
|
||||
|
||||
See a [usage example](/docs/modules/model_io/models/llms/integrations/INCLUDE_REAL_NAME.html).
|
||||
See a [usage example](/docs/integrations/llms/INCLUDE_REAL_NAME).
|
||||
|
||||
```python
|
||||
from langchain.llms import integration_class_REPLACE_ME
|
||||
@@ -40,7 +40,7 @@ from langchain.llms import integration_class_REPLACE_ME
|
||||
|
||||
## Text Embedding Models
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/text_embedding/integrations/INCLUDE_REAL_NAME.html)
|
||||
See a [usage example](/docs/integrations/text_embedding/INCLUDE_REAL_NAME)
|
||||
|
||||
```python
|
||||
from langchain.embeddings import integration_class_REPLACE_ME
|
||||
@@ -49,7 +49,7 @@ from langchain.embeddings import integration_class_REPLACE_ME
|
||||
|
||||
## Chat Models
|
||||
|
||||
See a [usage example](/docs/modules/model_io/models/chat/integrations/INCLUDE_REAL_NAME.html)
|
||||
See a [usage example](/docs/integrations/chat/INCLUDE_REAL_NAME)
|
||||
|
||||
```python
|
||||
from langchain.chat_models import integration_class_REPLACE_ME
|
||||
@@ -57,7 +57,7 @@ from langchain.chat_models import integration_class_REPLACE_ME
|
||||
|
||||
## Document Loader
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/document_loaders/integrations/INCLUDE_REAL_NAME.html).
|
||||
See a [usage example](/docs/integrations/document_loaders/INCLUDE_REAL_NAME).
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import integration_class_REPLACE_ME
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# Tutorials
|
||||
|
||||
Below are links to video tutorials and courses on LangChain. For written guides on common use cases for LangChain, check out the [use cases guides](/docs/use_cases).
|
||||
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ If you're building with LLMs, at some point something will break, and you'll nee
|
||||
|
||||
Here's a few different tools and functionalities to aid in debugging.
|
||||
|
||||
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! Instead, edit the notebook w/the location & name as this file. -->
|
||||
|
||||
|
||||
## Tracing
|
||||
|
||||
|
||||
381
docs/extras/guides/evaluation/comparison/pairwise_string.ipynb
Normal file
381
docs/extras/guides/evaluation/comparison/pairwise_string.ipynb
Normal file
@@ -0,0 +1,381 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2da95378",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pairwise String Comparison\n",
|
||||
"\n",
|
||||
"Often you will want to compare predictions of an LLM, Chain, or Agent for a given input. The `StringComparison` evaluators facilitate this so you can answer questions like:\n",
|
||||
"\n",
|
||||
"- Which LLM or prompt produces a preferred output for a given question?\n",
|
||||
"- Which examples should I include for few-shot example selection?\n",
|
||||
"- Which output is better to include for fintetuning?\n",
|
||||
"\n",
|
||||
"The simplest and often most reliable automated way to choose a preferred prediction for a given input is to use the `pairwise_string` evaluator.\n",
|
||||
"\n",
|
||||
"Check out the reference docs for the [PairwiseStringEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain.html#langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain) for more info."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f6790c46",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation import load_evaluator\n",
|
||||
"\n",
|
||||
"evaluator = load_evaluator(\"labeled_pairwise_string\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "49ad9139",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'Both responses are relevant to the question asked, as they both provide a numerical answer to the question about the number of dogs in the park. However, Response A is incorrect according to the reference answer, which states that there are four dogs. Response B, on the other hand, is correct as it matches the reference answer. Neither response demonstrates depth of thought, as they both simply provide a numerical answer without any additional information or context. \\n\\nBased on these criteria, Response B is the better response.\\n',\n",
|
||||
" 'value': 'B',\n",
|
||||
" 'score': 0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluator.evaluate_string_pairs(\n",
|
||||
" prediction=\"there are three dogs\",\n",
|
||||
" prediction_b=\"4\",\n",
|
||||
" input=\"how many dogs are in the park?\",\n",
|
||||
" reference=\"four\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7491d2e6-4e77-4b17-be6b-7da966785c1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Methods\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The pairwise string evaluator can be called using [evaluate_string_pairs](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain.html#langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain.evaluate_string_pairs) (or async [aevaluate_string_pairs](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain.html#langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain.aevaluate_string_pairs)) methods, which accept:\n",
|
||||
"\n",
|
||||
"- prediction (str) – The predicted response of the first model, chain, or prompt.\n",
|
||||
"- prediction_b (str) – The predicted response of the second model, chain, or prompt.\n",
|
||||
"- input (str) – The input question, prompt, or other text.\n",
|
||||
"- reference (str) – (Only for the labeled_pairwise_string variant) The reference response.\n",
|
||||
"\n",
|
||||
"They return a dictionary with the following values:\n",
|
||||
"- value: 'A' or 'B', indicating whether `prediction` or `prediction_b` is preferred, respectively\n",
|
||||
"- score: Integer 0 or 1 mapped from the 'value', where a score of 1 would mean that the first `prediction` is preferred, and a score of 0 would mean `prediction_b` is preferred.\n",
|
||||
"- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed353b93-be71-4479-b9c0-8c97814c2e58",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Without References\n",
|
||||
"\n",
|
||||
"When references aren't available, you can still predict the preferred response.\n",
|
||||
"The results will reflect the evaluation model's preference, which is less reliable and may result\n",
|
||||
"in preferences that are factually incorrect."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "586320da",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation import load_evaluator\n",
|
||||
"\n",
|
||||
"evaluator = load_evaluator(\"pairwise_string\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7f56c76e-a39b-4509-8b8a-8a2afe6c3da1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'Both responses are correct and relevant to the question. However, Response B is more helpful and insightful as it provides a more detailed explanation of what addition is. Response A is correct but lacks depth as it does not explain what the operation of addition entails. \\n\\nFinal Decision: [[B]]',\n",
|
||||
" 'value': 'B',\n",
|
||||
" 'score': 0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluator.evaluate_string_pairs(\n",
|
||||
" prediction=\"Addition is a mathematical operation.\",\n",
|
||||
" prediction_b=\"Addition is a mathematical operation that adds two numbers to create a third number, the 'sum'.\",\n",
|
||||
" input=\"What is addition?\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a09b21d-9851-47e8-93d3-90044b2945b0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Defining the Criteria\n",
|
||||
"\n",
|
||||
"By default, the LLM is instructed to select the 'preferred' response based on helpfulness, relevance, correctness, and depth of thought. You can customize the criteria by passing in a `criteria` argument, where the criteria could take any of the following forms:\n",
|
||||
"- [`Criteria`](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.Criteria.html#langchain.evaluation.criteria.eval_chain.Criteria) enum or its string value - to use one of the default criteria and their descriptions\n",
|
||||
"- [Constitutional principal](https://api.python.langchain.com/en/latest/chains/langchain.chains.constitutional_ai.models.ConstitutionalPrinciple.html#langchain.chains.constitutional_ai.models.ConstitutionalPrinciple) - use one any of the constitutional principles defined in langchain\n",
|
||||
"- Dictionary: a list of custom criteria, where the key is the name of the criteria, and the value is the description.\n",
|
||||
"- A list of criteria or constitutional principles - to combine multiple criteria in one.\n",
|
||||
"\n",
|
||||
"Below is an example for determining preferred writing responses based on a custom style."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8539e7d9-f7b0-4d32-9c45-593a7915c093",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_criteria = {\n",
|
||||
" \"simplicity\": \"Is the language straightforward and unpretentious?\",\n",
|
||||
" \"clarity\": \"Are the sentences clear and easy to understand?\",\n",
|
||||
" \"precision\": \"Is the writing precise, with no unnecessary words or details?\",\n",
|
||||
" \"truthfulness\": \"Does the writing feel honest and sincere?\",\n",
|
||||
" \"subtext\": \"Does the writing suggest deeper meanings or themes?\",\n",
|
||||
"}\n",
|
||||
"evaluator = load_evaluator(\"pairwise_string\", criteria=custom_criteria)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fec7bde8-fbdc-4730-8366-9d90d033c181",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'Response A is simple, clear, and precise. It uses straightforward language to convey a deep and sincere message about families. The metaphor of joy and sorrow as music is effective and easy to understand.\\n\\nResponse B, on the other hand, is more complex and less clear. The language is more pretentious, with words like \"domicile,\" \"resounds,\" \"abode,\" \"dissonant,\" and \"elegy.\" While it conveys a similar message to Response A, it does so in a more convoluted way. The precision is also lacking due to the use of unnecessary words and details.\\n\\nBoth responses suggest deeper meanings or themes about the shared joy and unique sorrow in families. However, Response A does so in a more effective and accessible way.\\n\\nTherefore, the better response is [[A]].',\n",
|
||||
" 'value': 'A',\n",
|
||||
" 'score': 1}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluator.evaluate_string_pairs(\n",
|
||||
" prediction=\"Every cheerful household shares a similar rhythm of joy; but sorrow, in each household, plays a unique, haunting melody.\",\n",
|
||||
" prediction_b=\"Where one finds a symphony of joy, every domicile of happiness resounds in harmonious,\"\n",
|
||||
" \" identical notes; yet, every abode of despair conducts a dissonant orchestra, each\"\n",
|
||||
" \" playing an elegy of grief that is peculiar and profound to its own existence.\",\n",
|
||||
" input=\"Write some prose about families.\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a25b60b2-627c-408a-be4b-a2e5cbc10726",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customize the LLM\n",
|
||||
"\n",
|
||||
"By default, the loader uses `gpt-4` in the evaluation chain. You can customize this when loading."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "de84a958-1330-482b-b950-68bcf23f9e35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatAnthropic\n",
|
||||
"\n",
|
||||
"llm = ChatAnthropic(temperature=0)\n",
|
||||
"\n",
|
||||
"evaluator = load_evaluator(\"labeled_pairwise_string\", llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "e162153f-d50a-4a7c-a033-019dabbc954c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'Here is my assessment:\\n\\nResponse B is more helpful, insightful, and accurate than Response A. Response B simply states \"4\", which directly answers the question by providing the exact number of dogs mentioned in the reference answer. In contrast, Response A states \"there are three dogs\", which is incorrect according to the reference answer. \\n\\nIn terms of helpfulness, Response B gives the precise number while Response A provides an inaccurate guess. For relevance, both refer to dogs in the park from the question. However, Response B is more correct and factual based on the reference answer. Response A shows some attempt at reasoning but is ultimately incorrect. Response B requires less depth of thought to simply state the factual number.\\n\\nIn summary, Response B is superior in terms of helpfulness, relevance, correctness, and depth. My final decision is: [[B]]\\n',\n",
|
||||
" 'value': 'B',\n",
|
||||
" 'score': 0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluator.evaluate_string_pairs(\n",
|
||||
" prediction=\"there are three dogs\",\n",
|
||||
" prediction_b=\"4\",\n",
|
||||
" input=\"how many dogs are in the park?\",\n",
|
||||
" reference=\"four\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e0e89c13-d0ad-4f87-8fcb-814399bafa2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customize the Evaluation Prompt\n",
|
||||
"\n",
|
||||
"You can use your own custom evaluation prompt to add more task-specific instructions or to instruct the evaluator to score the output.\n",
|
||||
"\n",
|
||||
"*Note: If you use a prompt that expects generates a result in a unique format, you may also have to pass in a custom output parser (`output_parser=your_parser()`) instead of the default `PairwiseStringResultOutputParser`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "fb817efa-3a4d-439d-af8c-773b89d97ec9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"prompt_template = PromptTemplate.from_template(\n",
|
||||
" \"\"\"Given the input context, which do you prefer: A or B?\n",
|
||||
"Evaluate based on the following criteria:\n",
|
||||
"{criteria}\n",
|
||||
"Reason step by step and finally, respond with either [[A]] or [[B]] on its own line.\n",
|
||||
"\n",
|
||||
"DATA\n",
|
||||
"----\n",
|
||||
"input: {input}\n",
|
||||
"reference: {reference}\n",
|
||||
"A: {prediction}\n",
|
||||
"B: {prediction_b}\n",
|
||||
"---\n",
|
||||
"Reasoning:\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"evaluator = load_evaluator(\n",
|
||||
" \"labeled_pairwise_string\", prompt=prompt_template\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "d40aa4f0-cfd5-4cb4-83c8-8d2300a04c2f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"input_variables=['prediction', 'reference', 'prediction_b', 'input'] output_parser=None partial_variables={'criteria': 'helpfulness: Is the submission helpful, insightful, and appropriate?\\nrelevance: Is the submission referring to a real quote from the text?\\ncorrectness: Is the submission correct, accurate, and factual?\\ndepth: Does the submission demonstrate depth of thought?'} template='Given the input context, which do you prefer: A or B?\\nEvaluate based on the following criteria:\\n{criteria}\\nReason step by step and finally, respond with either [[A]] or [[B]] on its own line.\\n\\nDATA\\n----\\ninput: {input}\\nreference: {reference}\\nA: {prediction}\\nB: {prediction_b}\\n---\\nReasoning:\\n\\n' template_format='f-string' validate_template=True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The prompt was assigned to the evaluator\n",
|
||||
"print(evaluator.prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "9467bb42-7a31-4071-8f66-9ed2c6f06dcd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'Helpfulness: Both A and B are helpful as they provide a direct answer to the question.\\nRelevance: A is relevant as it refers to the correct name of the dog from the text. B is not relevant as it provides a different name.\\nCorrectness: A is correct as it accurately states the name of the dog. B is incorrect as it provides a different name.\\nDepth: Both A and B demonstrate a similar level of depth as they both provide a straightforward answer to the question.\\n\\nGiven these evaluations, the preferred response is:\\n',\n",
|
||||
" 'value': 'A',\n",
|
||||
" 'score': 1}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluator.evaluate_string_pairs(\n",
|
||||
" prediction=\"The dog that ate the ice cream was named fido.\",\n",
|
||||
" prediction_b=\"The dog's name is spot\",\n",
|
||||
" input=\"What is the name of the dog that ate the ice cream?\",\n",
|
||||
" reference=\"The dog's name is fido\",\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -24,18 +24,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.evaluation.comparison import PairwiseStringEvalChain\n",
|
||||
"from langchain.evaluation import load_evaluator\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4\")\n",
|
||||
"\n",
|
||||
"eval_chain = PairwiseStringEvalChain.from_llm(llm=llm)"
|
||||
"eval_chain = load_evaluator(\"pairwise_string\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,7 +47,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -59,13 +56,13 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--langchain-howto-queries-bbb748bbee7e77aa/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
|
||||
"Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--langchain-howto-queries-bbb748bbee7e77aa/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "d852a1884480457292c90d8bd9d4f1e6",
|
||||
"model_id": "a2358d37246640ce95e0f9940194590a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -94,7 +91,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -127,7 +124,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -152,7 +149,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -160,7 +157,7 @@
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b076d6bf6680422aa9082d4bad4d98a3",
|
||||
"model_id": "87277cb39a1a4726bb7cc533a24e2ea4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -170,14 +167,6 @@
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Retrying langchain.chat_models.openai.acompletion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..\n",
|
||||
"Retrying langchain.chat_models.openai.acompletion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -215,7 +204,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -252,7 +241,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -270,7 +259,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -279,8 +268,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI Functions Agent: 90.00%\n",
|
||||
"Structured Chat Agent: 10.00%\n"
|
||||
"OpenAI Functions Agent: 95.00%\n",
|
||||
"None: 5.00%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -310,7 +299,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -349,7 +338,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -358,8 +347,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The \"OpenAI Functions Agent\" would be preferred between 69.90% and 97.21% percent of the time (with 95% confidence).\n",
|
||||
"The \"Structured Chat Agent\" would be preferred between 2.79% and 30.10% percent of the time (with 95% confidence).\n"
|
||||
"The \"OpenAI Functions Agent\" would be preferred between 83.18% and 100.00% percent of the time (with 95% confidence).\n",
|
||||
"The \"Structured Chat Agent\" would be preferred between 0.00% and 16.82% percent of the time (with 95% confidence).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -380,7 +369,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -389,9 +378,17 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The p-value is 0.00040. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
|
||||
"then there is a 0.04025% chance of observing the OpenAI Functions Agent be preferred at least 18\n",
|
||||
"times out of 20 trials.\n"
|
||||
"The p-value is 0.00000. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
|
||||
"then there is a 0.00038% chance of observing the OpenAI Functions Agent be preferred at least 19\n",
|
||||
"times out of 19 trials.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/gf/6rnp_mbx5914kx7qmmh7xzmw0000gn/T/ipykernel_15978/384907688.py:6: DeprecationWarning: 'binom_test' is deprecated in favour of 'binomtest' from version 1.7.0 and will be removed in Scipy 1.12.0.\n",
|
||||
" p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
318
docs/extras/guides/evaluation/string/Untitled.ipynb
Normal file
318
docs/extras/guides/evaluation/string/Untitled.ipynb
Normal file
@@ -0,0 +1,318 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bce7335e-f3b2-44f3-90cc-8c0a23a89a21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.utilities import GoogleSearchAPIWrapper\n",
|
||||
"from langchain.schema import (\n",
|
||||
" SystemMessage,\n",
|
||||
" HumanMessage,\n",
|
||||
" AIMessage\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = \"******\"\n",
|
||||
"# os.environ[\"LANGCHAIN_PROJECT\"] = \"Jarvis\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"prefix_messages = [{\"role\": \"system\", \"content\": \"You are a helpful discord Chatbot.\"}]\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name='gpt-3.5-turbo', \n",
|
||||
" temperature=0.5, \n",
|
||||
" max_tokens = 2000)\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(tools,\n",
|
||||
" llm,\n",
|
||||
" agent=\"zero-shot-react-description\",\n",
|
||||
" verbose=True,\n",
|
||||
" handle_parsing_errors=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def on_ready():\n",
|
||||
" print(f'{bot.user} has connected to Discord!')\n",
|
||||
"\n",
|
||||
"async def on_message(message):\n",
|
||||
"\n",
|
||||
" print(\"Detected bot name in message:\", message.content)\n",
|
||||
"\n",
|
||||
" # Capture the output of agent.run() in the response variable\n",
|
||||
" response = agent.run(message.content)\n",
|
||||
"\n",
|
||||
" while response:\n",
|
||||
" print(response)\n",
|
||||
" chunk, response = response[:2000], response[2000:]\n",
|
||||
" print(f\"Chunk: {chunk}\")\n",
|
||||
" print(\"Response sent.\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "1551ce9f-b6de-4035-b6d6-825722823b48",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dataclasses import dataclass\n",
|
||||
"@dataclass\n",
|
||||
"class Message:\n",
|
||||
" content: str"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "6e6859ec-8544-4407-9663-6b53c0092903",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Detected bot name in message: Hi AI, how are you today?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThis question is not something that can be answered using the available tools.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
||||
"Action: N/A\u001b[0m\n",
|
||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"Agent stopped due to iteration limit or time limit.\n",
|
||||
"Chunk: Agent stopped due to iteration limit or time limit.\n",
|
||||
"Response sent.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await on_message(Message(content=\"Hi AI, how are you today?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "b850294c-7f8f-4e79-adcf-47e4e3a898df",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client\n",
|
||||
"\n",
|
||||
"client = Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "6d089ddc-69bc-45a8-b8db-9962e4f1f5ee",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from itertools import islice\n",
|
||||
"\n",
|
||||
"runs = list(islice(client.list_runs(), 10))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "f0349fac-5a98-400f-ba03-61ed4e1332be",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"runs = sorted(runs, key=lambda x: x.start_time, reverse=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "02f133f0-39ee-4b46-b443-12c1f9b76fff",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ids = [run.id for run in runs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "3366dce4-0c38-4a7d-8111-046a58b24917",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"runs2 = list(client.list_runs(id=ids))\n",
|
||||
"runs2 = sorted(runs2, key=lambda x: x.start_time, reverse=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "82915b90-39a0-47d6-9121-56a13f210f52",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['a36092d2-4ad5-4fb4-9b0d-0dba9a2ed836',\n",
|
||||
" '9398e6be-964f-4aa4-8de9-ad78cd4b7074']"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"[str(x) for x in ids[:2]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "f610ec91-dc48-4a17-91c5-5c4675c77abc",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith.run_helpers import traceable\n",
|
||||
"\n",
|
||||
"@traceable(run_type=\"llm\", name=\"\"\"<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/dQw4w9WgXcQ?start=5\" title=\"YouTube video player\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" allowfullscreen></iframe>\"\"\")\n",
|
||||
"def foo():\n",
|
||||
" return \"bar\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "bd317bd7-8b2a-433a-8ec3-098a84ba8e64",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'bar'"
|
||||
]
|
||||
},
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"foo()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"id": "b142519b-6885-415c-83b9-4a346fb90589",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import AzureOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c50bb2b-72b8-4322-9b16-d857ecd9f347",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,16 +5,13 @@
|
||||
"id": "4cf569a7-9a1d-4489-934e-50e57760c907",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Evaluating Custom Criteria\n",
|
||||
"# Criteria Evaluation\n",
|
||||
"\n",
|
||||
"Suppose you want to test a model's output against a custom rubric or custom set of criteria, how would you go about testing this?\n",
|
||||
"In scenarios where you wish to assess a model's output using a specific rubric or criteria set, the `criteria` evaluator proves to be a handy tool. It allows you to verify if an LLM or Chain's output complies with a defined set of criteria.\n",
|
||||
"\n",
|
||||
"The `criteria` evaluator is a convenient way to predict whether an LLM or Chain's output complies with a set of criteria, so long as you can\n",
|
||||
"properly define those criteria.\n",
|
||||
"To understand its functionality and configurability in depth, refer to the reference documentation of the [CriteriaEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain) class.\n",
|
||||
"\n",
|
||||
"For more details, check out the reference docs for the [CriteriaEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain)'s class definition.\n",
|
||||
"\n",
|
||||
"### Without References\n",
|
||||
"### Usage without references\n",
|
||||
"\n",
|
||||
"In this example, you will use the `CriteriaEvalChain` to check whether an output is concise. First, create the evaluation chain to predict whether outputs are \"concise\"."
|
||||
]
|
||||
@@ -30,7 +27,12 @@
|
||||
"source": [
|
||||
"from langchain.evaluation import load_evaluator\n",
|
||||
"\n",
|
||||
"evaluator = load_evaluator(\"criteria\", criteria=\"conciseness\")"
|
||||
"evaluator = load_evaluator(\"criteria\", criteria=\"conciseness\")\n",
|
||||
"\n",
|
||||
"# This is equivalent to loading using the enum\n",
|
||||
"from langchain.evaluation import EvaluatorType\n",
|
||||
"\n",
|
||||
"evaluator = load_evaluator(EvaluatorType.CRITERIA, criteria=\"conciseness\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,7 +47,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': 'The criterion is conciseness. This means the submission should be brief and to the point. \\n\\nLooking at the submission, the answer to the task is included, but there is additional commentary that is not necessary to answer the question. The phrase \"That\\'s an elementary question\" and \"The answer you\\'re looking for is\" could be removed and the answer would still be clear and correct. \\n\\nTherefore, the submission is not concise and does not meet the criterion. \\n\\nN', 'value': 'N', 'score': 0}\n"
|
||||
"{'reasoning': 'The criterion is conciseness, which means the submission should be brief and to the point. \\n\\nLooking at the submission, the answer to the question \"What\\'s 2+2?\" is indeed \"four\". However, the respondent has added extra information, stating \"That\\'s an elementary question.\" This statement does not contribute to answering the question and therefore makes the response less concise.\\n\\nTherefore, the submission does not meet the criterion of conciseness.\\n\\nN', 'value': 'N', 'score': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -59,49 +61,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43397a9f-ccca-4f91-b0e1-df0cada2efb1",
|
||||
"id": "35e61e4d-b776-4f6b-8c89-da5d3604134a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Default Criteria**\n",
|
||||
"#### Output Format\n",
|
||||
"\n",
|
||||
"Most of the time, you'll want to define your own custom criteria (see below), but we also provide some common criteria you can load with a single string.\n",
|
||||
"Here's a list of pre-implemented criteria:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8c4ec9dd-6557-4f23-8480-c822eb6ec552",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['conciseness',\n",
|
||||
" 'relevance',\n",
|
||||
" 'correctness',\n",
|
||||
" 'coherence',\n",
|
||||
" 'harmfulness',\n",
|
||||
" 'maliciousness',\n",
|
||||
" 'helpfulness',\n",
|
||||
" 'controversiality',\n",
|
||||
" 'mysogyny',\n",
|
||||
" 'criminality',\n",
|
||||
" 'insensitive']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.evaluation import CriteriaEvalChain\n",
|
||||
"All string evaluators expose an [evaluate_strings](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html?highlight=evaluate_strings#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.evaluate_strings) (or async [aevaluate_strings](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html?highlight=evaluate_strings#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.aevaluate_strings)) method, which accepts:\n",
|
||||
"\n",
|
||||
"# For a list of other default supported criteria, try calling `supported_default_criteria`\n",
|
||||
"CriteriaEvalChain.get_supported_default_criteria()"
|
||||
"- input (str) – The input to the agent.\n",
|
||||
"- prediction (str) – The predicted response.\n",
|
||||
"\n",
|
||||
"The criteria evaluators return a dictionary with the following values:\n",
|
||||
"- score: Binary integeer 0 to 1, where 1 would mean that the output is compliant with the criteria, and 0 otherwise\n",
|
||||
"- value: A \"Y\" or \"N\" corresponding to the score\n",
|
||||
"- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -111,12 +84,12 @@
|
||||
"source": [
|
||||
"## Using Reference Labels\n",
|
||||
"\n",
|
||||
"Some criteria (such as correctness) require reference labels to work correctly. To do this, initialize with `requires_reference=True` and call the evaluator with a `reference` string."
|
||||
"Some criteria (such as correctness) require reference labels to work correctly. To do this, initialize the `labeled_criteria` evaluator and call the evaluator with a `reference` string."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "20d8a86b-beba-42ce-b82c-d9e5ebc13686",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -126,13 +99,12 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"With ground truth: 1\n",
|
||||
"Without ground truth: 0\n"
|
||||
"With ground truth: 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluator = load_evaluator(\"criteria\", criteria=\"correctness\", requires_reference=True)\n",
|
||||
"evaluator = load_evaluator(\"labeled_criteria\", criteria=\"correctness\")\n",
|
||||
"\n",
|
||||
"# We can even override the model's learned knowledge using ground truth labels\n",
|
||||
"eval_result = evaluator.evaluate_strings(\n",
|
||||
@@ -143,6 +115,51 @@
|
||||
"print(f'With ground truth: {eval_result[\"score\"]}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e05b5748-d373-4ff8-85d9-21da4641e84c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Default Criteria**\n",
|
||||
"\n",
|
||||
"Most of the time, you'll want to define your own custom criteria (see below), but we also provide some common criteria you can load with a single string.\n",
|
||||
"Here's a list of pre-implemented criteria. Note that in the absence of labels, the LLM merely predicts what it thinks the best answer is and is not grounded in actual law or context."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "47de7359-db3e-4cad-bcfa-4fe834dea893",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<Criteria.CONCISENESS: 'conciseness'>,\n",
|
||||
" <Criteria.RELEVANCE: 'relevance'>,\n",
|
||||
" <Criteria.CORRECTNESS: 'correctness'>,\n",
|
||||
" <Criteria.COHERENCE: 'coherence'>,\n",
|
||||
" <Criteria.HARMFULNESS: 'harmfulness'>,\n",
|
||||
" <Criteria.MALICIOUSNESS: 'maliciousness'>,\n",
|
||||
" <Criteria.HELPFULNESS: 'helpfulness'>,\n",
|
||||
" <Criteria.CONTROVERSIALITY: 'controversiality'>,\n",
|
||||
" <Criteria.MISOGYNY: 'misogyny'>,\n",
|
||||
" <Criteria.CRIMINALITY: 'criminality'>,\n",
|
||||
" <Criteria.INSENSITIVITY: 'insensitivity'>]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.evaluation import Criteria\n",
|
||||
"\n",
|
||||
"# For a list of other default supported criteria, try calling `supported_default_criteria`\n",
|
||||
"list(Criteria)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "077c4715-e857-44a3-9f87-346642586a8d",
|
||||
@@ -152,32 +169,52 @@
|
||||
"\n",
|
||||
"To evaluate outputs against your own custom criteria, or to be more explicit the definition of any of the default criteria, pass in a dictionary of `\"criterion_name\": \"criterion_description\"`\n",
|
||||
"\n",
|
||||
"Note: the evaluator still predicts whether the output complies with ALL of the criteria provided. If you specify antagonistic criteria / antonyms, the evaluator won't be very useful."
|
||||
"Note: it's recommended that you create a single evaluator per criterion. This way, separate feedback can be provided for each aspect. Additionally, if you provide antagonistic criteria, the evaluator won't be very useful, as it will be configured to predict compliance for ALL of the criteria provided."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 19,
|
||||
"id": "bafa0a11-2617-4663-84bf-24df7d0736be",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': 'The criterion is asking if the output contains numeric information. The submission does mention the \"late 16th century,\" which is a numeric information. Therefore, the submission meets the criterion.\\n\\nY', 'value': 'Y', 'score': 1}\n"
|
||||
"{'reasoning': \"The criterion asks if the output contains numeric or mathematical information. The joke in the submission does contain mathematical information. It refers to the mathematical concept of squaring a number and also mentions 'pi', which is a mathematical constant. Therefore, the submission does meet the criterion.\\n\\nY\", 'value': 'Y', 'score': 1}\n",
|
||||
"{'reasoning': 'Let\\'s assess the submission based on the given criteria:\\n\\n1. Numeric: The output does not contain any explicit numeric information. The word \"square\" and \"pi\" are mathematical terms but they are not numeric information per se.\\n\\n2. Mathematical: The output does contain mathematical information. The terms \"square\" and \"pi\" are mathematical terms. The joke is a play on the mathematical concept of squaring a number (in this case, pi).\\n\\n3. Grammatical: The output is grammatically correct. The sentence structure, punctuation, and word usage are all correct.\\n\\n4. Logical: The output is logical. It makes sense within the context of the joke. The joke is a play on words between the mathematical concept of squaring a number (pi) and eating a square pie.\\n\\nBased on the above analysis, the submission does not meet all the criteria because it does not contain numeric information.\\nN', 'value': 'N', 'score': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"custom_criterion = {\"numeric\": \"Does the output contain numeric information?\"}\n",
|
||||
"custom_criterion = {\"numeric\": \"Does the output contain numeric or mathematical information?\"}\n",
|
||||
"\n",
|
||||
"eval_chain = load_evaluator(\n",
|
||||
" EvaluatorType.CRITERIA,\n",
|
||||
" llm=eval_llm,\n",
|
||||
" criteria=custom_criterion,\n",
|
||||
")\n",
|
||||
"query = \"Tell me a joke\"\n",
|
||||
"prediction = \"I ate some square pie but I don't know the square of pi.\"\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n",
|
||||
"print(eval_result)\n",
|
||||
"\n",
|
||||
"# If you wanted to specify multiple criteria. Generally not recommended\n",
|
||||
"custom_criteria = {\n",
|
||||
" \"numeric\": \"Does the output contain numeric information?\",\n",
|
||||
" \"mathematical\": \"Does the output contain mathematical information?\",\n",
|
||||
" \"grammatical\": \"Is the output grammatically correct?\",\n",
|
||||
" \"logical\": \"Is the output logical?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"eval_chain = load_evaluator(\n",
|
||||
" EvaluatorType.CRITERIA,\n",
|
||||
" criteria=custom_criteria,\n",
|
||||
")\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n",
|
||||
"print(\"Multi-criteria evaluation\")\n",
|
||||
"print(eval_result)"
|
||||
]
|
||||
},
|
||||
@@ -194,7 +231,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"id": "99e3c242-5b12-4bd5-b487-64990a159655",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -220,7 +257,7 @@
|
||||
" ConstitutionalPrinciple(critique_request='Discuss the assistant’s last response to the human and whether it is in any way insensitive, sexist, racist, or socially inappropriate.', revision_request='Revise the assistant’s last response to remove content that is in any way insensitive, sexist, racist, or socially inappropriate.', name='insensitive'))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -234,28 +271,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 12,
|
||||
"id": "f4061bc0-a88a-4d1c-88a0-8003bba084c7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'The criterion asks to identify if the assistant\\'s response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.\\n\\nThe assistant\\'s response is \"I say that man is a lilly-livered nincompoop\". This is a negative statement about a person named Will. \\n\\nThe term \"lilly-livered\" is an old-fashioned term meaning cowardly, and \"nincompoop\" is a term meaning a foolish or stupid person. \\n\\nWhile this is a negative and potentially insulting statement, it does not appear to be harmful in a physical sense, unethical, racist, sexist, toxic, dangerous, or illegal. \\n\\nIt could be argued that the statement is harmful in a psychological or emotional sense, as it is insulting. However, without further context, it\\'s hard to definitively say if this meets the criteria of being harmful.\\n\\nN',\n",
|
||||
" 'value': 'N',\n",
|
||||
" 'score': 0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': 'The criterion asks to identify if the assistant\\'s response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.\\n\\nLooking at the assistant\\'s response, it is clear that it is not racist or sexist as it does not discriminate or stereotype based on race or gender. \\n\\nThe response is also not illegal as it does not involve any criminal activity or encourage any form of illegal behavior.\\n\\nThe response is not dangerous as it does not pose a physical threat or risk to anyone\\'s safety.\\n\\nHowever, the assistant\\'s response can be considered harmful and toxic as it uses derogatory language (\"lilly-livered nincompoop\") to describe \\'Will\\'. This can be seen as a form of verbal abuse or insult, which can cause emotional harm.\\n\\nThe response can also be seen as unethical, as it is generally considered inappropriate to insult or belittle someone in this manner.\\n\\nN', 'value': 'N', 'score': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval_chain = load_evaluator(\n",
|
||||
" EvaluatorType.CRITERIA, llm=eval_llm, criteria=PRINCIPLES[\"harmful1\"]\n",
|
||||
"evaluator = load_evaluator(\n",
|
||||
" EvaluatorType.CRITERIA, criteria=PRINCIPLES[\"harmful1\"]\n",
|
||||
")\n",
|
||||
"eval_result = evaluator.evaluate_strings(\n",
|
||||
" prediction=\"I say that man is a lilly-livered nincompoop\",\n",
|
||||
@@ -278,7 +310,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 13,
|
||||
"id": "1717162d-f76c-4a14-9ade-168d6fa42b7a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -291,7 +323,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 14,
|
||||
"id": "8727e6f4-aaba-472d-bb7d-09fc1a0f0e2a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -306,7 +338,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 15,
|
||||
"id": "3f6f0d8b-cf42-4241-85ae-35b3ce8152a0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -316,7 +348,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': 'Here is my step-by-step reasoning for each criterion:\\n\\nconciseness: The submission is not concise. It contains unnecessary words and phrases like \"That\\'s an elementary question\" and \"you\\'re looking for\". The answer could have simply been stated as \"4\" to be concise.\\n\\nN', 'value': 'N', 'score': 0}\n"
|
||||
"{'reasoning': 'Step 1) Analyze the conciseness criterion: Is the submission concise and to the point?\\nStep 2) The submission provides extraneous information beyond just answering the question directly. It characterizes the question as \"elementary\" and provides reasoning for why the answer is 4. This additional commentary makes the submission not fully concise.\\nStep 3) Therefore, based on the analysis of the conciseness criterion, the submission does not meet the criteria.\\n\\nN', 'value': 'N', 'score': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -340,7 +372,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 16,
|
||||
"id": "22e57704-682f-44ff-96ba-e915c73269c0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -364,13 +396,13 @@
|
||||
"prompt = PromptTemplate.from_template(fstring)\n",
|
||||
"\n",
|
||||
"evaluator = load_evaluator(\n",
|
||||
" \"criteria\", criteria=\"correctness\", prompt=prompt, requires_reference=True\n",
|
||||
" \"labeled_criteria\", criteria=\"correctness\", prompt=prompt\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 17,
|
||||
"id": "5d6b0eca-7aea-4073-a65a-18c3a9cdb5af",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -380,7 +412,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': 'Correctness: No, the submission is not correct. The expected response was \"It\\'s 17 now.\" but the response given was \"What\\'s 2+2? That\\'s an elementary question. The answer you\\'re looking for is that two and two is four.\"', 'value': 'N', 'score': 0}\n"
|
||||
"{'reasoning': 'Correctness: No, the response is not correct. The expected response was \"It\\'s 17 now.\" but the response given was \"What\\'s 2+2? That\\'s an elementary question. The answer you\\'re looking for is that two and two is four.\"', 'value': 'N', 'score': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -404,6 +436,12 @@
|
||||
"\n",
|
||||
"Remember when selecting criteria to decide whether they ought to require ground truth labels or not. Things like \"correctness\" are best evaluated with ground truth or with extensive context. Also, remember to pick aligned principles for a given chain so that the classification makes sense."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a684e2f1",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -53,7 +53,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'score': 12}"
|
||||
"{'score': 0.11555555555555552}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@@ -79,7 +79,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'score': 4}"
|
||||
"{'score': 0.0724999999999999}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -143,7 +143,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"jaro_evaluator = load_evaluator(\n",
|
||||
" \"string_distance\", distance=StringDistance.JARO, requires_reference=True\n",
|
||||
" \"string_distance\", distance=StringDistance.JARO\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -67,7 +67,7 @@
|
||||
"id": "297dea4b-fb28-4292-b6e0-1c769cfb9cbd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The example above will return a score of 1 if the language model predicts that any of the actions were unnecessary, and it returns a score of 0 if all of them were predicted to be necessary.\n",
|
||||
"The example above will return a score of 1 if the language model predicts that any of the actions were unnecessary, and it returns a score of 0 if all of them were predicted to be necessary. It returns the string 'decision' as the 'value', and includes the rest of the generated text as 'reasoning' to let you audit the decision.\n",
|
||||
"\n",
|
||||
"You can call this evaluator to grade the intermediate steps of your agent's trajectory."
|
||||
]
|
||||
@@ -30,6 +30,25 @@
|
||||
"evaluator = load_evaluator(\"trajectory\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b1c64c1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Methods\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The Agent Trajectory Evaluators are used with the [evaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.evaluate_agent_trajectory) (and async [aevaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.aevaluate_agent_trajectory)) methods, which accept:\n",
|
||||
"\n",
|
||||
"- input (str) – The input to the agent.\n",
|
||||
"- prediction (str) – The final predicted response.\n",
|
||||
"- agent_trajectory (List[Tuple[AgentAction, str]]) – The intermediate steps forming the agent trajectory\n",
|
||||
"\n",
|
||||
"They return a dictionary with the following values:\n",
|
||||
"- score: Float from 0 to 1, where 1 would mean \"most effective\" and 0 would mean \"least effective\"\n",
|
||||
"- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e733562c-4c17-4942-9647-acfc5ebfaca2",
|
||||
@@ -52,11 +71,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.tools import tool\n",
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"\n",
|
||||
"from pydantic import HttpUrl\n",
|
||||
"import subprocess\n",
|
||||
"from urllib.parse import urlparse\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -117,17 +138,11 @@
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Type <class 'langchain.agents.openai_functions_multi_agent.base._FunctionsAgentAction'> not serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.0"
|
||||
"{'score': 1.0,\n",
|
||||
" 'reasoning': \"i. The final answer is helpful. It directly answers the user's question about the latency for the website https://langchain.com.\\n\\nii. The AI language model uses a logical sequence of tools to answer the question. It uses the 'ping' tool to measure the latency of the website, which is the correct tool for this task.\\n\\niii. The AI language model uses the tool in a helpful way. It inputs the URL into the 'ping' tool and correctly interprets the output to provide the latency in milliseconds.\\n\\niv. The AI language model does not use too many steps to answer the question. It only uses one step, which is appropriate for this type of question.\\n\\nv. The appropriate tool is used to answer the question. The 'ping' tool is the correct tool to measure website latency.\\n\\nGiven these considerations, the AI language model's performance is excellent. It uses the correct tool, interprets the output correctly, and provides a helpful and direct answer to the user's question.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@@ -141,7 +156,7 @@
|
||||
" input=result[\"input\"],\n",
|
||||
" agent_trajectory=result[\"intermediate_steps\"],\n",
|
||||
")\n",
|
||||
"evaluation_result[\"score\"]"
|
||||
"evaluation_result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -193,7 +208,8 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.0"
|
||||
"{'score': 1.0,\n",
|
||||
" 'reasoning': \"Here is my detailed evaluation of the AI's response:\\n\\ni. The final answer is helpful, as it directly provides the latency measurement for the requested website.\\n\\nii. The sequence of using the ping tool to measure latency is logical for this question.\\n\\niii. The ping tool is used in a helpful way, with the website URL provided as input and the output latency measurement extracted.\\n\\niv. Only one step is used, which is appropriate for simply measuring latency. More steps are not needed.\\n\\nv. The ping tool is an appropriate choice to measure latency. \\n\\nIn summary, the AI uses an optimal single step approach with the right tool and extracts the needed output. The final answer directly answers the question in a helpful way.\\n\\nOverall\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
@@ -207,7 +223,7 @@
|
||||
" input=result[\"input\"],\n",
|
||||
" agent_trajectory=result[\"intermediate_steps\"],\n",
|
||||
")\n",
|
||||
"evaluation_result[\"score\"]"
|
||||
"evaluation_result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -245,7 +261,8 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.0"
|
||||
"{'score': 1.0,\n",
|
||||
" 'reasoning': \"i. The final answer is helpful. It directly answers the user's question about the latency for the specified website.\\n\\nii. The AI language model uses a logical sequence of tools to answer the question. In this case, only one tool was needed to answer the question, and the model chose the correct one.\\n\\niii. The AI language model uses the tool in a helpful way. The 'ping' tool was used to determine the latency of the website, which was the information the user was seeking.\\n\\niv. The AI language model does not use too many steps to answer the question. Only one step was needed and used.\\n\\nv. The appropriate tool was used to answer the question. The 'ping' tool is designed to measure latency, which was the information the user was seeking.\\n\\nGiven these considerations, the AI language model's performance in answering this question is excellent.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@@ -259,7 +276,7 @@
|
||||
" input=result[\"input\"],\n",
|
||||
" agent_trajectory=result[\"intermediate_steps\"],\n",
|
||||
")\n",
|
||||
"evaluation_result[\"score\"]"
|
||||
"evaluation_result"
|
||||
]
|
||||
}
|
||||
],
|
||||
1424
docs/extras/guides/expression_language/cookbook.ipynb
Normal file
1424
docs/extras/guides/expression_language/cookbook.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
282
docs/extras/guides/expression_language/interface.ipynb
Normal file
282
docs/extras/guides/expression_language/interface.ipynb
Normal file
@@ -0,0 +1,282 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a9acd2e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Interface\n",
|
||||
"\n",
|
||||
"In an effort to make it as easy as possible to create custom chains, we've implemented a [\"Runnable\"](https://api.python.langchain.com/en/latest/schema/langchain.schema.runnable.Runnable.html#langchain.schema.runnable.Runnable) protocol that most components implement. This is a standard interface with a few different methods, which makes it easy to define custom chains as well as making it possible to invoke them in a standard way. The standard interface exposed includes:\n",
|
||||
"\n",
|
||||
"- `stream`: stream back chunks of the response\n",
|
||||
"- `invoke`: call the chain on an input\n",
|
||||
"- `batch`: call the chain on a list of inputs\n",
|
||||
"\n",
|
||||
"These also have corresponding async methods:\n",
|
||||
"\n",
|
||||
"- `astream`: stream back chunks of the response async\n",
|
||||
"- `ainvoke`: call the chain on an input async\n",
|
||||
"- `abatch`: call the chain on a list of inputs async\n",
|
||||
"\n",
|
||||
"The type of the input varies by component:\n",
|
||||
"\n",
|
||||
"| Component | Input Type |\n",
|
||||
"| --- | --- |\n",
|
||||
"|Prompt|Dictionary|\n",
|
||||
"|Retriever|Single string|\n",
|
||||
"|Model| Single string, list of chat messages or a PromptValue|\n",
|
||||
"\n",
|
||||
"The output type also varies by component:\n",
|
||||
"\n",
|
||||
"| Component | Output Type |\n",
|
||||
"| --- | --- |\n",
|
||||
"| LLM | String |\n",
|
||||
"| ChatModel | ChatMessage |\n",
|
||||
"| Prompt | PromptValue |\n",
|
||||
"| Retriever | List of documents |\n",
|
||||
"\n",
|
||||
"Let's take a look at these methods! To do so, we'll create a super simple PromptTemplate + ChatModel chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "466b65b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3c634ef0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d1850a1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "56d0669f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = prompt | model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "daf2b2b2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Stream"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "bea9639d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Sure, here's a bear-themed joke for you:\n",
|
||||
"\n",
|
||||
"Why don't bears wear shoes?\n",
|
||||
"\n",
|
||||
"Because they have bear feet!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for s in chain.stream({\"topic\": \"bears\"}):\n",
|
||||
" print(s.content, end=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cbf1c782",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invoke"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "470e483f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Why don't bears wear shoes?\\n\\nBecause they already have bear feet!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"topic\": \"bears\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88f0c279",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Batch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "9685de67",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Why don't bears ever wear shoes?\\n\\nBecause they have bear feet!\", additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content=\"Why don't cats play poker in the wild?\\n\\nToo many cheetahs!\", additional_kwargs={}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.batch([{\"topic\": \"bears\"}, {\"topic\": \"cats\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b960cbfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Async Stream"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ea35eee4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Why don't bears wear shoes?\n",
|
||||
"\n",
|
||||
"Because they have bear feet!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"async for s in chain.astream({\"topic\": \"bears\"}):\n",
|
||||
" print(s.content, end=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "04cb3324",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Async Invoke"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "ef8c9b20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Sure, here you go:\\n\\nWhy don't bears wear shoes?\\n\\nBecause they have bear feet!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chain.ainvoke({\"topic\": \"bears\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3da288d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Async Batch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "eba2a103",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Why don't bears wear shoes?\\n\\nBecause they have bear feet!\", additional_kwargs={}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chain.abatch([{\"topic\": \"bears\"}])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "920a3c1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Model Comparison\n",
|
||||
"# Model comparison\n",
|
||||
"\n",
|
||||
"Constructing your language model application will likely involved choosing between many different options of prompts, models, and even chains to use. When doing so, you will want to compare these different options on different inputs in an easy, flexible, and intuitive way. \n",
|
||||
"\n",
|
||||
@@ -254,7 +254,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
"> using both human and machine feedback. We provide support for each step in the MLOps cycle, \n",
|
||||
"> from data labeling to model monitoring.\n",
|
||||
"\n",
|
||||
"<a target=\"_blank\" href=\"https://colab.research.google.com/github/hwchase17/langchain/blob/master/docs/modules/callbacks/integrations/argilla.html\">\n",
|
||||
"<a target=\"_blank\" href=\"https://colab.research.google.com/github/hwchase17/langchain/blob/master/docs/integrations/callbacks/argilla.html\">\n",
|
||||
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
|
||||
"</a>"
|
||||
]
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"\n",
|
||||
"[PromptLayer](https://promptlayer.com) is a an LLM observability platform that lets you visualize requests, version prompts, and track usage. In this guide we will go over how to setup the `PromptLayerCallbackHandler`. \n",
|
||||
"\n",
|
||||
"While PromptLayer does have LLMs that integrate directly with LangChain (eg [`PromptLayerOpenAI`](https://python.langchain.com/docs/modules/model_io/models/llms/integrations/promptlayer_openai)), this callback is the recommended way to integrate PromptLayer with LangChain.\n",
|
||||
"While PromptLayer does have LLMs that integrate directly with LangChain (eg [`PromptLayerOpenAI`](https://python.langchain.com/docs/integrations/llms/promptlayer_openai)), this callback is the recommended way to integrate PromptLayer with LangChain.\n",
|
||||
"\n",
|
||||
"See [our docs](https://docs.promptlayer.com/languages/langchain) for more information."
|
||||
]
|
||||
|
||||
287
docs/extras/integrations/chat/anthropic_functions.ipynb
Normal file
287
docs/extras/integrations/chat/anthropic_functions.ipynb
Normal file
@@ -0,0 +1,287 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5125a1e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Anthropic Functions\n",
|
||||
"\n",
|
||||
"This notebook shows how to use an experimental wrapper around Anthropic that gives it the same API as OpenAI Functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "378be79b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.14) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_experimental.llms.anthropic_functions import AnthropicFunctions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "65499965",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Model\n",
|
||||
"\n",
|
||||
"You can initialize this wrapper the same way you'd initialize ChatAnthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e1d535f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = AnthropicFunctions(model='claude-2')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fcc9eaf4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Passing in functions\n",
|
||||
"\n",
|
||||
"You can now pass in functions in a similar way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0779c320",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"functions=[\n",
|
||||
" {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
|
||||
" },\n",
|
||||
" \"unit\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ad75a933",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fc703085",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = model.predict_messages(\n",
|
||||
" [HumanMessage(content=\"whats the weater in boston?\")], \n",
|
||||
" functions=functions\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "04d7936a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' ', additional_kwargs={'function_call': {'name': 'get_current_weather', 'arguments': '{\"location\": \"Boston, MA\", \"unit\": \"fahrenheit\"}'}}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0072fdba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using for extraction\n",
|
||||
"\n",
|
||||
"You can now use this for extraction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7af5c567",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import create_extraction_chain\n",
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"name\": {\"type\": \"string\"},\n",
|
||||
" \"height\": {\"type\": \"integer\"},\n",
|
||||
" \"hair_color\": {\"type\": \"string\"},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"name\", \"height\"],\n",
|
||||
"}\n",
|
||||
"inp = \"\"\"\n",
|
||||
"Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.\n",
|
||||
" \"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "bd01082a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_extraction_chain(schema, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "b5a23e9f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'Alex', 'height': '5', 'hair_color': 'blonde'},\n",
|
||||
" {'name': 'Claudia', 'height': '6', 'hair_color': 'brunette'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(inp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90ec959e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using for tagging\n",
|
||||
"\n",
|
||||
"You can now use this for tagging"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "03c1eb0d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import create_tagging_chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "581c0ece",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"sentiment\": {\"type\": \"string\"},\n",
|
||||
" \"aggressiveness\": {\"type\": \"integer\"},\n",
|
||||
" \"language\": {\"type\": \"string\"},\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "d9a8570e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_tagging_chain(schema, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "cf37d679",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'sentiment': 'positive', 'aggressiveness': '0', 'language': 'english'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"this is really cool\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
95
docs/extras/integrations/chat/azureml_chat_endpoint.ipynb
Normal file
95
docs/extras/integrations/chat/azureml_chat_endpoint.ipynb
Normal file
@@ -0,0 +1,95 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AzureML Chat Online Endpoint\n",
|
||||
"\n",
|
||||
"[AzureML](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. Azure Foundation Models include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use a chat model hosted on an `AzureML online endpoint`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models.azureml_endpoint import AzureMLChatOnlineEndpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up\n",
|
||||
"\n",
|
||||
"To use the wrapper, you must [deploy a model on AzureML](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-foundation-models?view=azureml-api-2#deploying-foundation-models-to-endpoints-for-inferencing) and obtain the following parameters:\n",
|
||||
"\n",
|
||||
"* `endpoint_api_key`: The API key provided by the endpoint\n",
|
||||
"* `endpoint_url`: The REST endpoint url provided by the endpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Content Formatter\n",
|
||||
"\n",
|
||||
"The `content_formatter` parameter is a handler class for transforming the request and response of an AzureML endpoint to match with required schema. Since there are a wide range of models in the model catalog, each of which may process data differently from one another, a `ContentFormatterBase` class is provided to allow users to transform data to their liking. The following content formatters are provided:\n",
|
||||
"\n",
|
||||
"* `LLamaContentFormatter`: Formats request and response data for LLaMa2-chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' The Collatz Conjecture is one of the most famous unsolved problems in mathematics, and it has been the subject of much study and research for many years. While it is impossible to predict with certainty whether the conjecture will ever be solved, there are several reasons why it is considered a challenging and important problem:\\n\\n1. Simple yet elusive: The Collatz Conjecture is a deceptively simple statement that has proven to be extraordinarily difficult to prove or disprove. Despite its simplicity, the conjecture has eluded some of the brightest minds in mathematics, and it remains one of the most famous open problems in the field.\\n2. Wide-ranging implications: The Collatz Conjecture has far-reaching implications for many areas of mathematics, including number theory, algebra, and analysis. A solution to the conjecture could have significant impacts on these fields and potentially lead to new insights and discoveries.\\n3. Computational evidence: While the conjecture remains unproven, extensive computational evidence supports its validity. In fact, no counterexample to the conjecture has been found for any starting value up to 2^64 (a number', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models.azureml_endpoint import LlamaContentFormatter\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"\n",
|
||||
"chat = AzureMLChatOnlineEndpoint(content_formatter=LlamaContentFormatter())\n",
|
||||
"response = chat(messages=[\n",
|
||||
" HumanMessage(content=\"Will the Collatz conjecture ever be solved?\")\n",
|
||||
"])\n",
|
||||
"response"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -8,11 +9,7 @@
|
||||
"\n",
|
||||
"Note: This is seperate from the Google PaLM integration. Google has chosen to offer an enterprise version of PaLM through GCP, and this supports the models made available through there. \n",
|
||||
"\n",
|
||||
"PaLM API on Vertex AI is a Preview offering, subject to the Pre-GA Offerings Terms of the [GCP Service Specific Terms](https://cloud.google.com/terms/service-terms). \n",
|
||||
"\n",
|
||||
"Pre-GA products and features may have limited support, and changes to pre-GA products and features may not be compatible with other pre-GA versions. For more information, see the [launch stage descriptions](https://cloud.google.com/products#product-launch-stages). Further, by using PaLM API on Vertex AI, you agree to the Generative AI Preview [terms and conditions](https://cloud.google.com/trustedtester/aitos) (Preview Terms).\n",
|
||||
"\n",
|
||||
"For PaLM API on Vertex AI, you can process personal data as outlined in the Cloud Data Processing Addendum, subject to applicable restrictions and obligations in the Agreement (as defined in the Preview Terms).\n",
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
"To use Vertex AI PaLM you must have the `google-cloud-aiplatform` Python package installed and either:\n",
|
||||
"- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
|
||||
@@ -90,6 +87,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -142,6 +140,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
|
||||
220
docs/extras/integrations/document_loaders/Etherscan.ipynb
Normal file
220
docs/extras/integrations/document_loaders/Etherscan.ipynb
Normal file
@@ -0,0 +1,220 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ab83660",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Etherscan Loader\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The Etherscan loader use etherscan api to load transacactions histories under specific account on Ethereum Mainnet.\n",
|
||||
"\n",
|
||||
"You will need a Etherscan api key to proceed. The free api key has 5 calls per seconds quota.\n",
|
||||
"\n",
|
||||
"The loader supports the following six functinalities:\n",
|
||||
"* Retrieve normal transactions under specific account on Ethereum Mainet\n",
|
||||
"* Retrieve internal transactions under specific account on Ethereum Mainet\n",
|
||||
"* Retrieve erc20 transactions under specific account on Ethereum Mainet\n",
|
||||
"* Retrieve erc721 transactions under specific account on Ethereum Mainet\n",
|
||||
"* Retrieve erc1155 transactions under specific account on Ethereum Mainet\n",
|
||||
"* Retrieve ethereum balance in wei under specific account on Ethereum Mainet\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If the account does not have corresponding transactions, the loader will a list with one document. The content of document is ''.\n",
|
||||
"\n",
|
||||
"You can pass differnt filters to loader to access different functionalities we mentioned above:\n",
|
||||
"* \"normal_transaction\"\n",
|
||||
"* \"internal_transaction\"\n",
|
||||
"* \"erc20_transaction\"\n",
|
||||
"* \"eth_balance\"\n",
|
||||
"* \"erc721_transaction\"\n",
|
||||
"* \"erc1155_transaction\"\n",
|
||||
"The filter is default to normal_transaction\n",
|
||||
"\n",
|
||||
"If you have any questions, you can access [Etherscan API Doc](https://etherscan.io/tx/0x0ffa32c787b1398f44303f731cb06678e086e4f82ce07cebf75e99bb7c079c77) or contact me via i@inevitable.tech.\n",
|
||||
"\n",
|
||||
"All functions related to transactions histories are restricted 1000 histories maximum because of Etherscan limit. You can use the following parameters to find the transaction histories you need:\n",
|
||||
"* offset: default to 20. Shows 20 transactions for one time\n",
|
||||
"* page: default to 1. This controls pagenation.\n",
|
||||
"* start_block: Default to 0. The transaction histories starts from 0 block.\n",
|
||||
"* end_block: Default to 99999999. The transaction histories starts from 99999999 block\n",
|
||||
"* sort: \"desc\" or \"asc\". Set default to \"desc\" to get latest transactions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d72d4e22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2911e51e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install langchain -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "208e2fbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import EtherscanLoader\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "5d24b650",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"ETHERSCAN_API_KEY\"] = etherscanAPIKey"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bcbb63e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create a ERC20 transaction loader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d525e6c8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'blockNumber': '13242975',\n",
|
||||
" 'timeStamp': '1631878751',\n",
|
||||
" 'hash': '0x366dda325b1a6570928873665b6b418874a7dedf7fee9426158fa3536b621788',\n",
|
||||
" 'nonce': '28',\n",
|
||||
" 'blockHash': '0x5469dba1b1e1372962cf2be27ab2640701f88c00640c4d26b8cc2ae9ac256fb6',\n",
|
||||
" 'from': '0x2ceee24f8d03fc25648c68c8e6569aa0512f6ac3',\n",
|
||||
" 'contractAddress': '0x2ceee24f8d03fc25648c68c8e6569aa0512f6ac3',\n",
|
||||
" 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b',\n",
|
||||
" 'value': '298131000000000',\n",
|
||||
" 'tokenName': 'ABCHANGE.io',\n",
|
||||
" 'tokenSymbol': 'XCH',\n",
|
||||
" 'tokenDecimal': '9',\n",
|
||||
" 'transactionIndex': '71',\n",
|
||||
" 'gas': '15000000',\n",
|
||||
" 'gasPrice': '48614996176',\n",
|
||||
" 'gasUsed': '5712724',\n",
|
||||
" 'cumulativeGasUsed': '11507920',\n",
|
||||
" 'input': 'deprecated',\n",
|
||||
" 'confirmations': '4492277'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"account_address = \"0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b\"\n",
|
||||
"loader = EtherscanLoader(account_address, filter=\"erc20_transaction\")\n",
|
||||
"result = loader.load()\n",
|
||||
"eval(result[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a1ecce0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create a normal transaction loader with customized parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "07aa2b6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"20\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"{'blockNumber': '1723771', 'timeStamp': '1466213371', 'hash': '0xe00abf5fa83a4b23ee1cc7f07f9dda04ab5fa5efe358b315df8b76699a83efc4', 'nonce': '3155', 'blockHash': '0xc2c2207bcaf341eed07f984c9a90b3f8e8bdbdbd2ac6562f8c2f5bfa4b51299d', 'transactionIndex': '5', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '13149213761000000000', 'gas': '90000', 'gasPrice': '22655598156', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '126000', 'gasUsed': '21000', 'confirmations': '16011481', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0xe00abf5fa83a4b23ee1cc7f07f9dda04ab5fa5efe358b315df8b76699a83efc4', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1727090', 'timeStamp': '1466262018', 'hash': '0xd5a779346d499aa722f72ffe7cd3c8594a9ddd91eb7e439e8ba92ceb7bc86928', 'nonce': '3267', 'blockHash': '0xc0cff378c3446b9b22d217c2c5f54b1c85b89a632c69c55b76cdffe88d2b9f4d', 'transactionIndex': '20', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11521979886000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '3806725', 'gasUsed': '21000', 'confirmations': '16008162', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0xd5a779346d499aa722f72ffe7cd3c8594a9ddd91eb7e439e8ba92ceb7bc86928', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1730337', 'timeStamp': '1466308222', 'hash': '0xceaffdb3766d2741057d402738eb41e1d1941939d9d438c102fb981fd47a87a4', 'nonce': '3344', 'blockHash': '0x3a52d28b8587d55c621144a161a0ad5c37dd9f7d63b629ab31da04fa410b2cfa', 'transactionIndex': '1', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9783400526000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '60788', 'gasUsed': '21000', 'confirmations': '16004915', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0xceaffdb3766d2741057d402738eb41e1d1941939d9d438c102fb981fd47a87a4', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1733479', 'timeStamp': '1466352351', 'hash': '0x720d79bf78775f82b40280aae5abfc347643c5f6708d4bf4ec24d65cd01c7121', 'nonce': '3367', 'blockHash': '0x9928661e7ae125b3ae0bcf5e076555a3ee44c52ae31bd6864c9c93a6ebb3f43e', 'transactionIndex': '0', 'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '1570706444000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '16001773', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x3763e6e1228bfeab94191c856412d1bb0a8e6996', 'tx_hash': '0x720d79bf78775f82b40280aae5abfc347643c5f6708d4bf4ec24d65cd01c7121', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1734172', 'timeStamp': '1466362463', 'hash': '0x7a062d25b83bafc9fe6b22bc6f5718bca333908b148676e1ac66c0adeccef647', 'nonce': '1016', 'blockHash': '0x8a8afe2b446713db88218553cfb5dd202422928e5e0bc00475ed2f37d95649de', 'transactionIndex': '4', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '6322276709000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '105333', 'gasUsed': '21000', 'confirmations': '16001080', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x7a062d25b83bafc9fe6b22bc6f5718bca333908b148676e1ac66c0adeccef647', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1737276', 'timeStamp': '1466406037', 'hash': '0xa4e89bfaf075abbf48f96700979e6c7e11a776b9040113ba64ef9c29ac62b19b', 'nonce': '1024', 'blockHash': '0xe117cad73752bb485c3bef24556e45b7766b283229180fcabc9711f3524b9f79', 'transactionIndex': '35', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9976891868000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '3187163', 'gasUsed': '21000', 'confirmations': '15997976', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xa4e89bfaf075abbf48f96700979e6c7e11a776b9040113ba64ef9c29ac62b19b', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1740314', 'timeStamp': '1466450262', 'hash': '0x6e1a22dcc6e2c77a9451426fb49e765c3c459dae88350e3ca504f4831ec20e8a', 'nonce': '1051', 'blockHash': '0x588d17842819a81afae3ac6644d8005c12ce55ddb66c8d4c202caa91d4e8fdbe', 'transactionIndex': '6', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '8060633765000000000', 'gas': '90000', 'gasPrice': '22926905859', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '153077', 'gasUsed': '21000', 'confirmations': '15994938', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x6e1a22dcc6e2c77a9451426fb49e765c3c459dae88350e3ca504f4831ec20e8a', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1743384', 'timeStamp': '1466494099', 'hash': '0xdbfcc15f02269fc3ae27f69e344a1ac4e08948b12b76ebdd78a64d8cafd511ef', 'nonce': '1068', 'blockHash': '0x997245108c84250057fda27306b53f9438ad40978a95ca51d8fd7477e73fbaa7', 'transactionIndex': '2', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9541921352000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '119650', 'gasUsed': '21000', 'confirmations': '15991868', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xdbfcc15f02269fc3ae27f69e344a1ac4e08948b12b76ebdd78a64d8cafd511ef', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1746405', 'timeStamp': '1466538123', 'hash': '0xbd4f9602f7fff4b8cc2ab6286efdb85f97fa114a43f6df4e6abc88e85b89e97b', 'nonce': '1092', 'blockHash': '0x3af3966cdaf22e8b112792ee2e0edd21ceb5a0e7bf9d8c168a40cf22deb3690c', 'transactionIndex': '0', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '8433783799000000000', 'gas': '90000', 'gasPrice': '25689279306', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '15988847', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xbd4f9602f7fff4b8cc2ab6286efdb85f97fa114a43f6df4e6abc88e85b89e97b', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1749459', 'timeStamp': '1466582044', 'hash': '0x28c327f462cc5013d81c8682c032f014083c6891938a7bdeee85a1c02c3e9ed4', 'nonce': '1096', 'blockHash': '0x5fc5d2a903977b35ce1239975ae23f9157d45d7bd8a8f6205e8ce270000797f9', 'transactionIndex': '1', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '10269065805000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '42000', 'gasUsed': '21000', 'confirmations': '15985793', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x28c327f462cc5013d81c8682c032f014083c6891938a7bdeee85a1c02c3e9ed4', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1752614', 'timeStamp': '1466626168', 'hash': '0xc3849e550ca5276d7b3c51fa95ad3ae62c1c164799d33f4388fe60c4e1d4f7d8', 'nonce': '1118', 'blockHash': '0x88ef054b98e47504332609394e15c0a4467f84042396717af6483f0bcd916127', 'transactionIndex': '11', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11325836780000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '252000', 'gasUsed': '21000', 'confirmations': '15982638', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xc3849e550ca5276d7b3c51fa95ad3ae62c1c164799d33f4388fe60c4e1d4f7d8', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1755659', 'timeStamp': '1466669931', 'hash': '0xb9f891b7c3d00fcd64483189890591d2b7b910eda6172e3bf3973c5fd3d5a5ae', 'nonce': '1133', 'blockHash': '0x2983972217a91343860415d1744c2a55246a297c4810908bbd3184785bc9b0c2', 'transactionIndex': '14', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '13226475343000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '2674679', 'gasUsed': '21000', 'confirmations': '15979593', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xb9f891b7c3d00fcd64483189890591d2b7b910eda6172e3bf3973c5fd3d5a5ae', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1758709', 'timeStamp': '1466713652', 'hash': '0xd6cce5b184dc7fce85f305ee832df647a9c4640b68e9b79b6f74dc38336d5622', 'nonce': '1147', 'blockHash': '0x1660de1e73067251be0109d267a21ffc7d5bde21719a3664c7045c32e771ecf9', 'transactionIndex': '1', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9758447294000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '42000', 'gasUsed': '21000', 'confirmations': '15976543', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xd6cce5b184dc7fce85f305ee832df647a9c4640b68e9b79b6f74dc38336d5622', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1761783', 'timeStamp': '1466757809', 'hash': '0xd01545872629956867cbd65fdf5e97d0dde1a112c12e76a1bfc92048d37f650f', 'nonce': '1169', 'blockHash': '0x7576961afa4218a3264addd37a41f55c444dd534e9410dbd6f93f7fe20e0363e', 'transactionIndex': '2', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '10197126683000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '63000', 'gasUsed': '21000', 'confirmations': '15973469', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xd01545872629956867cbd65fdf5e97d0dde1a112c12e76a1bfc92048d37f650f', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1764895', 'timeStamp': '1466801683', 'hash': '0x620b91b12af7aac75553b47f15742e2825ea38919cfc8082c0666f404a0db28b', 'nonce': '1186', 'blockHash': '0x2e687643becd3c36e0c396a02af0842775e17ccefa0904de5aeca0a9a1aa795e', 'transactionIndex': '7', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '8690241462000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '168000', 'gasUsed': '21000', 'confirmations': '15970357', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x620b91b12af7aac75553b47f15742e2825ea38919cfc8082c0666f404a0db28b', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1767936', 'timeStamp': '1466845682', 'hash': '0x758efa27576cd17ebe7b842db4892eac6609e3962a4f9f57b7c84b7b1909512f', 'nonce': '1211', 'blockHash': '0xb01d8fd47b3554a99352ac3e5baf5524f314cfbc4262afcfbea1467b2d682898', 'transactionIndex': '0', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11914401843000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '15967316', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x758efa27576cd17ebe7b842db4892eac6609e3962a4f9f57b7c84b7b1909512f', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1770911', 'timeStamp': '1466888890', 'hash': '0x9d84470b54ab44b9074b108a0e506cd8badf30457d221e595bb68d63e926b865', 'nonce': '1212', 'blockHash': '0x79a9de39276132dab8bf00dc3e060f0e8a14f5e16a0ee4e9cc491da31b25fe58', 'transactionIndex': '0', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '10918214730000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '21000', 'gasUsed': '21000', 'confirmations': '15964341', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x9d84470b54ab44b9074b108a0e506cd8badf30457d221e595bb68d63e926b865', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1774044', 'timeStamp': '1466932983', 'hash': '0x958d85270b58b80f1ad228f716bbac8dd9da7c5f239e9f30d8edeb5bb9301d20', 'nonce': '1240', 'blockHash': '0x69cee390378c3b886f9543fb3a1cb2fc97621ec155f7884564d4c866348ce539', 'transactionIndex': '2', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '9979637283000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '63000', 'gasUsed': '21000', 'confirmations': '15961208', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0x958d85270b58b80f1ad228f716bbac8dd9da7c5f239e9f30d8edeb5bb9301d20', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1777057', 'timeStamp': '1466976422', 'hash': '0xe76ca3603d2f4e7134bdd7a1c3fd553025fc0b793f3fd2a75cd206b8049e74ab', 'nonce': '1248', 'blockHash': '0xc7cacda0ac38c99f1b9bccbeee1562a41781d2cfaa357e8c7b4af6a49584b968', 'transactionIndex': '7', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '4556173496000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '168000', 'gasUsed': '21000', 'confirmations': '15958195', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xe76ca3603d2f4e7134bdd7a1c3fd553025fc0b793f3fd2a75cd206b8049e74ab', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'}),\n",
|
||||
" Document(page_content=\"{'blockNumber': '1780120', 'timeStamp': '1467020353', 'hash': '0xc5ec8cecdc9f5ed55a5b8b0ad79c964fb5c49dc1136b6a49e981616c3e70bbe6', 'nonce': '1266', 'blockHash': '0xfc0e066e5b613239e1a01e6d582e7ab162ceb3ca4f719dfbd1a0c965adcfe1c5', 'transactionIndex': '1', 'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b', 'value': '11890330240000000000', 'gas': '90000', 'gasPrice': '20000000000', 'isError': '0', 'txreceipt_status': '', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '42000', 'gasUsed': '21000', 'confirmations': '15955132', 'methodId': '0x', 'functionName': ''}\", metadata={'from': '0x16545fb79dbee1ad3a7f868b7661c023f372d5de', 'tx_hash': '0xc5ec8cecdc9f5ed55a5b8b0ad79c964fb5c49dc1136b6a49e981616c3e70bbe6', 'to': '0x9dd134d14d1e65f84b706d6f205cd5b1cd03a46b'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = EtherscanLoader(\n",
|
||||
" account_address,\n",
|
||||
" page=2,\n",
|
||||
" offset=20,\n",
|
||||
" start_block=10000,\n",
|
||||
" end_block=8888888888,\n",
|
||||
" sort=\"asc\",\n",
|
||||
")\n",
|
||||
"result = loader.load()\n",
|
||||
"result"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -13,7 +13,7 @@
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"You need to have an existing dataset on the Apify platform. If you don't have one, please first check out [this notebook](/docs/modules/agents/tools/integrations/apify.html) on how to use Apify to extract content from documentation, knowledge bases, help centers, or blogs."
|
||||
"You need to have an existing dataset on the Apify platform. If you don't have one, please first check out [this notebook](/docs/integrations/tools/apify.html) on how to use Apify to extract content from documentation, knowledge bases, help centers, or blogs."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ChatGPT Data\n",
|
||||
"# ChatGPT Data\n",
|
||||
"\n",
|
||||
">[ChatGPT](https://chat.openai.com) is an artificial intelligence (AI) chatbot developed by OpenAI.\n",
|
||||
"\n",
|
||||
|
||||
94
docs/extras/integrations/document_loaders/concurrent.ipynb
Normal file
94
docs/extras/integrations/document_loaders/concurrent.ipynb
Normal file
@@ -0,0 +1,94 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23c6e167",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Concurrent Loader\n",
|
||||
"\n",
|
||||
"Works just like the GenericLoader but concurrently for those who choose to optimize their workflow.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6ff3fb1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import ConcurrentLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "ce96fa20",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ConcurrentLoader.from_filesystem('example_data/', glob=\"**/*.txt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "06a6cf5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"files = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "b87d3e58",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(files)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "668f1ee5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -53,11 +53,23 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Input arguments (mandatory)**\n",
|
||||
"\n",
|
||||
"`Cube Semantic Loader` requires 2 arguments:\n",
|
||||
"| Input Parameter | Description |\n",
|
||||
"| --- | --- |\n",
|
||||
"| `cube_api_url` | The URL of your Cube's deployment REST API. Please refer to the [Cube documentation](https://cube.dev/docs/http-api/rest#configuration-base-path) for more information on configuring the base path. |\n",
|
||||
"| `cube_api_token` | The authentication token generated based on your Cube's API secret. Please refer to the [Cube documentation](https://cube.dev/docs/security#generating-json-web-tokens-jwt) for instructions on generating JSON Web Tokens (JWT). |\n"
|
||||
"\n",
|
||||
"- `cube_api_url`: The URL of your Cube's deployment REST API. Please refer to the [Cube documentation](https://cube.dev/docs/http-api/rest#configuration-base-path) for more information on configuring the base path.\n",
|
||||
"\n",
|
||||
"- `cube_api_token`: The authentication token generated based on your Cube's API secret. Please refer to the [Cube documentation](https://cube.dev/docs/security#generating-json-web-tokens-jwt) for instructions on generating JSON Web Tokens (JWT).\n",
|
||||
"\n",
|
||||
"**Input arguments (optional)**\n",
|
||||
"\n",
|
||||
"- `load_dimension_values`: Whether to load dimension values for every string dimension or not.\n",
|
||||
"\n",
|
||||
"- `dimension_values_limit`: Maximum number of dimension values to load.\n",
|
||||
"\n",
|
||||
"- `dimension_values_max_retries`: Maximum number of retries to load dimension values.\n",
|
||||
"\n",
|
||||
"- `dimension_values_retry_delay`: Delay between retries to load dimension values."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -85,9 +97,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Returns:\n",
|
||||
"\n",
|
||||
"A list of documents with the following attributes:\n",
|
||||
"Returns a list of documents with the following attributes:\n",
|
||||
"\n",
|
||||
"- `page_content`\n",
|
||||
"- `metadata`\n",
|
||||
@@ -95,7 +105,8 @@
|
||||
" - `column_name`\n",
|
||||
" - `column_data_type`\n",
|
||||
" - `column_title`\n",
|
||||
" - `column_description`"
|
||||
" - `column_description`\n",
|
||||
" - `column_values`"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +114,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> page_content='table name: orders_view, column name: orders_view.total_amount, column data type: number, column title: Orders View Total Amount, column description: None' metadata={'table_name': 'orders_view', 'column_name': 'orders_view.total_amount', 'column_data_type': 'number', 'column_title': 'Orders View Total Amount', 'column_description': 'None'}"
|
||||
"> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
149
docs/extras/integrations/document_loaders/dropbox.ipynb
Normal file
149
docs/extras/integrations/document_loaders/dropbox.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -2,7 +2,7 @@
|
||||
|
||||
This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain.
|
||||
|
||||
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! Instead, edit the notebook w/the location & name as this file. -->
|
||||
|
||||
|
||||
|
||||
```python
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>Sample RSS feed subscriptions</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="Tech" title="Tech">
|
||||
<outline type="rss" text="Engadget" title="Engadget" xmlUrl="http://www.engadget.com/rss-full.xml" htmlUrl="http://www.engadget.com"/>
|
||||
<outline type="rss" text="Ars Technica - All content" title="Ars Technica - All content" xmlUrl="http://feeds.arstechnica.com/arstechnica/index/" htmlUrl="https://arstechnica.com"/>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
@@ -46,7 +46,7 @@
|
||||
"id": "04981332",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a GeoPandas dataframe from [`Open City Data`](https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/open_city_data) as an example input."
|
||||
"Create a GeoPandas dataframe from [`Open City Data`](https://python.langchain.com/docs/integrations/document_loaders/open_city_data) as an example input."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c83b6a4c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Huawei OBS Directory\n",
|
||||
"The following code demonstrates how to load objects from the Huawei OBS (Object Storage Service) as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c2191935",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install the required package\n",
|
||||
"# pip install esdk-obs-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "55fca3b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import OBSDirectoryLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c3ed419f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint = \"your-endpoint\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3428fd4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Configure your access credentials\\n\n",
|
||||
"config = {\n",
|
||||
" \"ak\": \"your-access-key\",\n",
|
||||
" \"sk\": \"your-secret-key\"\n",
|
||||
"}\n",
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9beede9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e20a839",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specify a Prefix for Loading\n",
|
||||
"If you want to load objects with a specific prefix from the bucket, you can use the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "125f311d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint, config=config, prefix=\"test_prefix\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b3488037",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84c82c0a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Authentication Information from ECS\n",
|
||||
"If your langchain is deployed on Huawei Cloud ECS and [Agency is set up](https://support.huaweicloud.com/intl/en-us/usermanual-ecs/ecs_03_0166.html#section7), the loader can directly get the security token from ECS without needing access key and secret key. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1db99969",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = {\"get_token_from_ecs\": True}\n",
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "57dd9f35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30205d25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use a Public Bucket\n",
|
||||
"If your bucket's bucket policy allows anonymous access (anonymous users have `listBucket` and `GetObject` permissions), you can directly load the objects without configuring the `config` parameter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4dfa2ef0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67d4c1d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
180
docs/extras/integrations/document_loaders/huawei_obs_file.ipynb
Normal file
180
docs/extras/integrations/document_loaders/huawei_obs_file.ipynb
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4394a872",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Huawei OBS File\n",
|
||||
"The following code demonstrates how to load an object from the Huawei OBS (Object Storage Service) as document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c43d811b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install the required package\n",
|
||||
"# pip install esdk-obs-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5e16bae6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.obs_file import OBSFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "75cc7e7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint = \"your-endpoint\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f9816984",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from obs import ObsClient\n",
|
||||
"obs_client = ObsClient(access_key_id=\"your-access-key\", secret_access_key=\"your-secret-key\", server=endpoint)\n",
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\", client=obs_client)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6143b39b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "633e05ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Each Loader with Separate Authentication Information\n",
|
||||
"If you don't need to reuse OBS connections between different loaders, you can directly configure the `config`. The loader will use the config information to initialize its own OBS client."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a5dd6a5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Configure your access credentials\\n\n",
|
||||
"config = {\n",
|
||||
" \"ak\": \"your-access-key\",\n",
|
||||
" \"sk\": \"your-secret-key\"\n",
|
||||
"}\n",
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\",endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9a741f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e2e611c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Authentication Information from ECS\n",
|
||||
"If your langchain is deployed on Huawei Cloud ECS and [Agency is set up](https://support.huaweicloud.com/intl/en-us/usermanual-ecs/ecs_03_0166.html#section7), the loader can directly get the security token from ECS without needing access key and secret key. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "338fafef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = {\"get_token_from_ecs\": True}\n",
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\", endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73976c55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b77aa18c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Access a Publicly Accessible Object\n",
|
||||
"If the object you want to access allows anonymous user access (anonymous users have `GetObject` permission), you can directly load the object without configuring the `config` parameter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "df83d121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\", endpoint=endpoint)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82a844ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -57,7 +57,13 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = MWDumpLoader(\"example_data/testmw_pages_current.xml\", encoding=\"utf8\")\n",
|
||||
"loader = MWDumpLoader(\n",
|
||||
" file_path = \"example_data/testmw_pages_current.xml\", \n",
|
||||
" encoding=\"utf8\",\n",
|
||||
" #namespaces = [0,2,3] Optional list to load only specific namespaces. Loads all namespaces by default.\n",
|
||||
" skip_redirects = True, #will skip over pages that just redirect to other pages (or not if False)\n",
|
||||
" stop_on_error = False #will skip over pages that cause parsing errors (or not if False)\n",
|
||||
" )\n",
|
||||
"documents = loader.load()\n",
|
||||
"print(f\"You have {len(documents)} document(s) in your data \")"
|
||||
]
|
||||
|
||||
192
docs/extras/integrations/document_loaders/news.ipynb
Normal file
192
docs/extras/integrations/document_loaders/news.ipynb
Normal file
@@ -0,0 +1,192 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dfc4698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# News URL\n",
|
||||
"\n",
|
||||
"This covers how to load HTML news articles from a list of URLs into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "16c3699e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:18.886031400Z",
|
||||
"start_time": "2023-08-02T21:18:17.682345Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import NewsURLLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "836fbac1",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:18.895539800Z",
|
||||
"start_time": "2023-08-02T21:18:18.895539800Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\n",
|
||||
" \"https://www.bbc.com/news/world-us-canada-66388172\",\n",
|
||||
" \"https://www.bbc.com/news/entertainment-arts-66384971\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33089aba-ff74-4d00-8f40-9449c29587cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Pass in urls to load them into Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "00f46fda",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.227074500Z",
|
||||
"start_time": "2023-08-02T21:18:18.895539800Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"First article: page_content='In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact. Neither she nor her representatives have commented.' metadata={'title': 'Donald Trump indictment: What do we know about the six co-conspirators?', 'link': 'https://www.bbc.com/news/world-us-canada-66388172', 'authors': [], 'language': 'en', 'description': 'Six people accused of helping Mr Trump undermine the election have been described by prosecutors.', 'publish_date': None}\n",
|
||||
"\n",
|
||||
"Second article: page_content='Ms Williams added: \"If there\\'s anything that I can do in my power to ensure that dancers or singers or whoever decides to work with her don\\'t have to go through that same experience, I\\'m going to do that.\"' metadata={'title': \"Lizzo dancers Arianna Davis and Crystal Williams: 'No one speaks out, they are scared'\", 'link': 'https://www.bbc.com/news/entertainment-arts-66384971', 'authors': [], 'language': 'en', 'description': 'The US pop star is being sued for sexual harassment and fat-shaming but has yet to comment.', 'publish_date': None}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = NewsURLLoader(urls=urls)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(\"First article: \", data[0])\n",
|
||||
"print(\"\\nSecond article: \", data[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Use nlp=True to run nlp analysis and generate keywords + summary"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "98ac26c488315bff"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b68a26b3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.585758200Z",
|
||||
"start_time": "2023-08-02T21:18:19.227074500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"First article: page_content='In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact. Neither she nor her representatives have commented.' metadata={'title': 'Donald Trump indictment: What do we know about the six co-conspirators?', 'link': 'https://www.bbc.com/news/world-us-canada-66388172', 'authors': [], 'language': 'en', 'description': 'Six people accused of helping Mr Trump undermine the election have been described by prosecutors.', 'publish_date': None, 'keywords': ['powell', 'know', 'donald', 'trump', 'review', 'indictment', 'telling', 'view', 'reasonable', 'person', 'testimony', 'coconspirators', 'riot', 'representatives', 'claims'], 'summary': 'In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact.\\nNeither she nor her representatives have commented.'}\n",
|
||||
"\n",
|
||||
"Second article: page_content='Ms Williams added: \"If there\\'s anything that I can do in my power to ensure that dancers or singers or whoever decides to work with her don\\'t have to go through that same experience, I\\'m going to do that.\"' metadata={'title': \"Lizzo dancers Arianna Davis and Crystal Williams: 'No one speaks out, they are scared'\", 'link': 'https://www.bbc.com/news/entertainment-arts-66384971', 'authors': [], 'language': 'en', 'description': 'The US pop star is being sued for sexual harassment and fat-shaming but has yet to comment.', 'publish_date': None, 'keywords': ['davis', 'lizzo', 'singers', 'experience', 'crystal', 'ensure', 'arianna', 'theres', 'williams', 'power', 'going', 'dancers', 'im', 'speaks', 'work', 'ms', 'scared'], 'summary': 'Ms Williams added: \"If there\\'s anything that I can do in my power to ensure that dancers or singers or whoever decides to work with her don\\'t have to go through that same experience, I\\'m going to do that.\"'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = NewsURLLoader(urls=urls, nlp=True)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(\"First article: \", data[0])\n",
|
||||
"print(\"\\nSecond article: \", data[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "['powell',\n 'know',\n 'donald',\n 'trump',\n 'review',\n 'indictment',\n 'telling',\n 'view',\n 'reasonable',\n 'person',\n 'testimony',\n 'coconspirators',\n 'riot',\n 'representatives',\n 'claims']"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata['keywords']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.585758200Z",
|
||||
"start_time": "2023-08-02T21:18:19.585758200Z"
|
||||
}
|
||||
},
|
||||
"id": "ae37e004e0284b1d"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact.\\nNeither she nor her representatives have commented.'"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata['summary']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.598966800Z",
|
||||
"start_time": "2023-08-02T21:18:19.594950200Z"
|
||||
}
|
||||
},
|
||||
"id": "7676155fb175e53e"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
170
docs/extras/integrations/document_loaders/rss.ipynb
Normal file
170
docs/extras/integrations/document_loaders/rss.ipynb
Normal file
@@ -0,0 +1,170 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dfc4698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# RSS Feeds\n",
|
||||
"\n",
|
||||
"This covers how to load HTML news articles from a list of RSS feed URLs into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16c3699e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import RSSFeedLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "836fbac1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\"https://www.engadget.com/rss.xml\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33089aba-ff74-4d00-8f40-9449c29587cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Pass in urls to load them into Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00f46fda",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = RSSFeedLoader(urls=urls)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(len(data))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data[0]"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "b447468cc42266d0"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"You can pass arguments to the NewsURLLoader which it uses to load articles."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "c36d3b0d329faf2a"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = RSSFeedLoader(urls=urls, nlp=True)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(len(data))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "5fdada62470d3019"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data[0].metadata['keywords']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "11d71963f7735c1d"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data[0].metadata['summary']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "9fb64ba0e8780966"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"You can also use an OPML file such as a Feedly export. Pass in either a URL or the OPML contents."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "98ac26c488315bff"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b6f07ae526a897c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"example_data/sample_rss_feeds.opml\", \"r\") as f:\n",
|
||||
" loader = RSSFeedLoader(opml=f.read())\n",
|
||||
"data = loader.load()\n",
|
||||
"print(len(data))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data[0]"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "b68a26b3"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -113,7 +113,7 @@
|
||||
"\n",
|
||||
"The modules are (from least to most complex):\n",
|
||||
"\n",
|
||||
"- [Models](https://python.langchain.com/en/latest/modules/models.html): Supported model types and integrations.\n",
|
||||
"- [Models](https://python.langchain.com/docs/modules/model_io/models/): Supported model types and integrations.\n",
|
||||
"\n",
|
||||
"- [Prompts](https://python.langchain.com/en/latest/modules/prompts.html): Prompt management, optimization, and serialization.\n",
|
||||
"\n",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user