Compare commits
199 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c6eccd701 | ||
|
|
7de6a1b78e | ||
|
|
a2681f950d | ||
|
|
3f64b8a761 | ||
|
|
0a1be1d501 | ||
|
|
e3056340da | ||
|
|
99b5a7226c | ||
|
|
95cf7de112 | ||
|
|
8f0cd91d57 | ||
|
|
15f650ae8c | ||
|
|
7543a3d70e | ||
|
|
ab193338aa | ||
|
|
bb12184551 | ||
|
|
33a2f58fbf | ||
|
|
fad26e79a3 | ||
|
|
b2eb4ff0fc | ||
|
|
2d078c7767 | ||
|
|
a7824f16f2 | ||
|
|
642b57c7ff | ||
|
|
4a07fba9f0 | ||
|
|
c5c0735fc4 | ||
|
|
6327eecdaf | ||
|
|
beab637f04 | ||
|
|
4a63533216 | ||
|
|
bf4a112aa6 | ||
|
|
d1e305028f | ||
|
|
6b9f266837 | ||
|
|
6116cbf0de | ||
|
|
67718c1d6b | ||
|
|
61c2d918c6 | ||
|
|
52d6b91c18 | ||
|
|
e74a605379 | ||
|
|
022ef170f8 | ||
|
|
fa30a57034 | ||
|
|
1f9124ceaa | ||
|
|
b52a3785c9 | ||
|
|
ff44fe4e16 | ||
|
|
d56eff042a | ||
|
|
ce3666c28b | ||
|
|
cff52638b2 | ||
|
|
bbd22b9b76 | ||
|
|
33cdb06b5c | ||
|
|
cc908d49a3 | ||
|
|
7fc07ba5df | ||
|
|
fe78aff1f2 | ||
|
|
40079d4936 | ||
|
|
84c1ad7eaa | ||
|
|
9892e95d03 | ||
|
|
f616aee35a | ||
|
|
ab47557db3 | ||
|
|
40096c73cd | ||
|
|
fbc83dfdbb | ||
|
|
91be7eee66 | ||
|
|
fc2f450f2d | ||
|
|
aeaef8f3a3 | ||
|
|
472f00ada7 | ||
|
|
6e3fa59073 | ||
|
|
a616e19975 | ||
|
|
100d9ce4c7 | ||
|
|
c9da300e4d | ||
|
|
5a9765b1b5 | ||
|
|
454998c1fb | ||
|
|
0adc282d70 | ||
|
|
bd4865b6fe | ||
|
|
485d716c21 | ||
|
|
b57fa1a39c | ||
|
|
6b93670410 | ||
|
|
2bb1d256f3 | ||
|
|
4a7ebb7184 | ||
|
|
797c9e92c8 | ||
|
|
5f1aab5487 | ||
|
|
983678dedc | ||
|
|
f76d50d8dc | ||
|
|
15c271e7b3 | ||
|
|
d7b613a293 | ||
|
|
2f309a4ce6 | ||
|
|
2111ed3c75 | ||
|
|
d9bc46186d | ||
|
|
1bd4890506 | ||
|
|
b0d0338f21 | ||
|
|
a22d502248 | ||
|
|
9b86235a56 | ||
|
|
9fc9018951 | ||
|
|
ef5bc1fef1 | ||
|
|
1d68470bac | ||
|
|
c8f3615aa6 | ||
|
|
d00a247da7 | ||
|
|
21771a6f1c | ||
|
|
e5fed7d535 | ||
|
|
19dfe166c9 | ||
|
|
91a0817e39 | ||
|
|
f437311eef | ||
|
|
003e1ca9a0 | ||
|
|
8374367de2 | ||
|
|
82ef1f587d | ||
|
|
b0d0399d34 | ||
|
|
a6ee646ef3 | ||
|
|
bd61757423 | ||
|
|
affaaea87b | ||
|
|
8c35fcb571 | ||
|
|
e45be8b3f6 | ||
|
|
0d5a90f30a | ||
|
|
6b007e2829 | ||
|
|
be638ad77d | ||
|
|
115a77142a | ||
|
|
f0b0c72d98 | ||
|
|
6aee589eec | ||
|
|
5b7ff215e8 | ||
|
|
0f0ccfe7f6 | ||
|
|
2759e2d857 | ||
|
|
0f68054401 | ||
|
|
0ead8ea708 | ||
|
|
c7ea6e9ff8 | ||
|
|
812419d946 | ||
|
|
873a80e496 | ||
|
|
d1b95db874 | ||
|
|
6c3573e7f6 | ||
|
|
179a39954d | ||
|
|
6f0bccfeb5 | ||
|
|
e68a1d73d0 | ||
|
|
29f51055e8 | ||
|
|
5d765408ce | ||
|
|
404d103c41 | ||
|
|
47eea32f6a | ||
|
|
b786335dd1 | ||
|
|
f81e613086 | ||
|
|
8ef7e14a85 | ||
|
|
53e4148a1b | ||
|
|
4e8f11b36a | ||
|
|
2928a1a3c9 | ||
|
|
814faa9de5 | ||
|
|
8a8917e0d9 | ||
|
|
b2b71b0d35 | ||
|
|
8022293124 | ||
|
|
1f54ec899b | ||
|
|
a137492b53 | ||
|
|
e283dc8d50 | ||
|
|
81e0cbf2d5 | ||
|
|
37aade19da | ||
|
|
43dffe39fb | ||
|
|
71f98db2fe | ||
|
|
f68f3b23d7 | ||
|
|
206901fa01 | ||
|
|
7ea2b08d1f | ||
|
|
368aa4ede7 | ||
|
|
5018af8839 | ||
|
|
96b0ff182e | ||
|
|
59194c2214 | ||
|
|
ee1d13678e | ||
|
|
1335f2b9f8 | ||
|
|
16551536e3 | ||
|
|
c5fb3b6069 | ||
|
|
1ec0b18379 | ||
|
|
f31047a394 | ||
|
|
5c516945d0 | ||
|
|
d2adec3818 | ||
|
|
7c5c0557cb | ||
|
|
68113348cc | ||
|
|
b574507c51 | ||
|
|
31820a31e4 | ||
|
|
13ccf202de | ||
|
|
6705928b9d | ||
|
|
8961c720b8 | ||
|
|
73072d3db8 | ||
|
|
2de028834f | ||
|
|
a7000ee89e | ||
|
|
9c2b29a1cb | ||
|
|
f190bc3e83 | ||
|
|
7df2dfc4c2 | ||
|
|
ed9a0f8185 | ||
|
|
465faab935 | ||
|
|
0ec020698f | ||
|
|
bd2e298468 | ||
|
|
66226d1d4d | ||
|
|
e83250cc5f | ||
|
|
2a26cc6d2b | ||
|
|
3fbb737bb3 | ||
|
|
53f3793504 | ||
|
|
ec40ead980 | ||
|
|
ff5024634e | ||
|
|
1e8fca5518 | ||
|
|
8061994c61 | ||
|
|
8d2344db43 | ||
|
|
b4a126ae71 | ||
|
|
7e70cd2a28 | ||
|
|
de61ebd9e0 | ||
|
|
c19a0b9c10 | ||
|
|
5a490a79f4 | ||
|
|
64d0a0fcc0 | ||
|
|
bca0749a11 | ||
|
|
07d6d1ca38 | ||
|
|
144b4c0c78 | ||
|
|
844eca98d5 | ||
|
|
1ab773c742 | ||
|
|
15de57b848 | ||
|
|
4780156955 | ||
|
|
5e3b968078 | ||
|
|
913a156cff | ||
|
|
893f3014af |
@@ -1,5 +1,5 @@
|
||||
---
|
||||
name: libs/langchain-experimental CI
|
||||
name: libs/experimental CI
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
name: libs/langchain-experimental Release
|
||||
name: libs/experimental Release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
38
.github/workflows/scheduled_test.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Scheduled tests
|
||||
|
||||
on:
|
||||
scheduled:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
environment: Scheduled testing
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.4.2"
|
||||
install-command: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install -E scheduled_testing
|
||||
- name: Run tests
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
make scheduled_tests
|
||||
shell: bash
|
||||
secrets: inherit
|
||||
7
Makefile
@@ -43,7 +43,12 @@ spell_fix:
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
@echo 'clean - run docs_clean and api_docs_clean'
|
||||
@echo 'docs_build - build the documentation'
|
||||
@echo 'docs_clean - clean the documentation build artifacts'
|
||||
@echo 'docs_linkcheck - run linkchecker on the documentation'
|
||||
@echo 'api_docs_build - build the API Reference documentation'
|
||||
@echo 'api_docs_clean - clean the API Reference documentation build artifacts'
|
||||
@echo 'api_docs_linkcheck - run linkchecker on the API Reference documentation'
|
||||
@echo 'spell_check - run codespell on the project'
|
||||
@echo 'spell_fix - run codespell on the project and fix the errors'
|
||||
@@ -18,8 +18,8 @@
|
||||
|
||||
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/hwchase17/langchainjs).
|
||||
|
||||
**Production Support:** As you move your LangChains into production, we'd love to offer more comprehensive support.
|
||||
Please fill out [this form](https://6w1pwbss0py.typeform.com/to/rrbrdTH2) and we'll set up a dedicated support Slack channel.
|
||||
**Production Support:** As you move your LangChains into production, we'd love to offer more hands-on support.
|
||||
Fill out [this form](https://airtable.com/appwQzlErAS2qiP0L/shrGtGaVBVAz7NcV2) to share more about what you're building, and our team will get in touch.
|
||||
|
||||
## 🚨Breaking Changes for select chains (SQLDatabase) on 7/28
|
||||
|
||||
|
||||
@@ -100,6 +100,9 @@ extensions = [
|
||||
]
|
||||
source_suffix = [".rst"]
|
||||
|
||||
# some autodoc pydantic options are repeated in the actual template.
|
||||
# potentially user error, but there may be bugs in the sphinx extension
|
||||
# with options not being passed through correctly (from either the location in the code)
|
||||
autodoc_pydantic_model_show_json = False
|
||||
autodoc_pydantic_field_list_validators = False
|
||||
autodoc_pydantic_config_members = False
|
||||
@@ -112,13 +115,6 @@ autodoc_member_order = "groupwise"
|
||||
autoclass_content = "both"
|
||||
autodoc_typehints_format = "short"
|
||||
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"show-inheritance": True,
|
||||
"inherited-members": "BaseModel",
|
||||
"undoc-members": True,
|
||||
"special-members": "__call__",
|
||||
}
|
||||
# autodoc_typehints = "description"
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["templates"]
|
||||
|
||||
@@ -1,49 +1,209 @@
|
||||
"""Script for auto-generating api_reference.rst"""
|
||||
import glob
|
||||
import re
|
||||
"""Script for auto-generating api_reference.rst."""
|
||||
import importlib
|
||||
import inspect
|
||||
import typing
|
||||
from pathlib import Path
|
||||
from typing import TypedDict, Sequence, List, Dict, Literal, Union
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
ROOT_DIR = Path(__file__).parents[2].absolute()
|
||||
HERE = Path(__file__).parent
|
||||
|
||||
PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain"
|
||||
EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental"
|
||||
WRITE_FILE = Path(__file__).parent / "api_reference.rst"
|
||||
EXP_WRITE_FILE = Path(__file__).parent / "experimental_api_reference.rst"
|
||||
WRITE_FILE = HERE / "api_reference.rst"
|
||||
EXP_WRITE_FILE = HERE / "experimental_api_reference.rst"
|
||||
|
||||
|
||||
def load_members(dir: Path) -> dict:
|
||||
members: dict = {}
|
||||
for py in glob.glob(str(dir) + "/**/*.py", recursive=True):
|
||||
module = py[len(str(dir)) + 1 :].replace(".py", "").replace("/", ".")
|
||||
top_level = module.split(".")[0]
|
||||
if top_level not in members:
|
||||
members[top_level] = {"classes": [], "functions": []}
|
||||
with open(py, "r") as f:
|
||||
for line in f.readlines():
|
||||
cls = re.findall(r"^class ([^_].*)\(", line)
|
||||
members[top_level]["classes"].extend([module + "." + c for c in cls])
|
||||
func = re.findall(r"^def ([^_].*)\(", line)
|
||||
afunc = re.findall(r"^async def ([^_].*)\(", line)
|
||||
func_strings = [module + "." + f for f in func + afunc]
|
||||
members[top_level]["functions"].extend(func_strings)
|
||||
return members
|
||||
ClassKind = Literal["TypedDict", "Regular", "Pydantic", "enum"]
|
||||
|
||||
|
||||
def construct_doc(pkg: str, members: dict) -> str:
|
||||
class ClassInfo(TypedDict):
|
||||
"""Information about a class."""
|
||||
|
||||
name: str
|
||||
"""The name of the class."""
|
||||
qualified_name: str
|
||||
"""The fully qualified name of the class."""
|
||||
kind: ClassKind
|
||||
"""The kind of the class."""
|
||||
is_public: bool
|
||||
"""Whether the class is public or not."""
|
||||
|
||||
|
||||
class FunctionInfo(TypedDict):
|
||||
"""Information about a function."""
|
||||
|
||||
name: str
|
||||
"""The name of the function."""
|
||||
qualified_name: str
|
||||
"""The fully qualified name of the function."""
|
||||
is_public: bool
|
||||
"""Whether the function is public or not."""
|
||||
|
||||
|
||||
class ModuleMembers(TypedDict):
|
||||
"""A dictionary of module members."""
|
||||
|
||||
classes_: Sequence[ClassInfo]
|
||||
functions: Sequence[FunctionInfo]
|
||||
|
||||
|
||||
def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
"""Load all members of a module.
|
||||
|
||||
Args:
|
||||
module_path: Path to the module.
|
||||
namespace: the namespace of the module.
|
||||
|
||||
Returns:
|
||||
list: A list of loaded module objects.
|
||||
"""
|
||||
classes_: List[ClassInfo] = []
|
||||
functions: List[FunctionInfo] = []
|
||||
module = importlib.import_module(module_path)
|
||||
for name, type_ in inspect.getmembers(module):
|
||||
if not hasattr(type_, "__module__"):
|
||||
continue
|
||||
if type_.__module__ != module_path:
|
||||
continue
|
||||
|
||||
if inspect.isclass(type_):
|
||||
if type(type_) == typing._TypedDictMeta: # type: ignore
|
||||
kind: ClassKind = "TypedDict"
|
||||
elif issubclass(type_, Enum):
|
||||
kind = "enum"
|
||||
elif issubclass(type_, BaseModel):
|
||||
kind = "Pydantic"
|
||||
else:
|
||||
kind = "Regular"
|
||||
|
||||
classes_.append(
|
||||
ClassInfo(
|
||||
name=name,
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
kind=kind,
|
||||
is_public=not name.startswith("_"),
|
||||
)
|
||||
)
|
||||
elif inspect.isfunction(type_):
|
||||
functions.append(
|
||||
FunctionInfo(
|
||||
name=name,
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
is_public=not name.startswith("_"),
|
||||
)
|
||||
)
|
||||
else:
|
||||
continue
|
||||
|
||||
return ModuleMembers(
|
||||
classes_=classes_,
|
||||
functions=functions,
|
||||
)
|
||||
|
||||
|
||||
def _merge_module_members(
|
||||
module_members: Sequence[ModuleMembers],
|
||||
) -> ModuleMembers:
|
||||
"""Merge module members."""
|
||||
classes_: List[ClassInfo] = []
|
||||
functions: List[FunctionInfo] = []
|
||||
for module in module_members:
|
||||
classes_.extend(module["classes_"])
|
||||
functions.extend(module["functions"])
|
||||
|
||||
return ModuleMembers(
|
||||
classes_=classes_,
|
||||
functions=functions,
|
||||
)
|
||||
|
||||
|
||||
def _load_package_modules(
|
||||
package_directory: Union[str, Path]
|
||||
) -> Dict[str, ModuleMembers]:
|
||||
"""Recursively load modules of a package based on the file system.
|
||||
|
||||
Traversal based on the file system makes it easy to determine which
|
||||
of the modules/packages are part of the package vs. 3rd party or built-in.
|
||||
|
||||
Parameters:
|
||||
package_directory: Path to the package directory.
|
||||
|
||||
Returns:
|
||||
list: A list of loaded module objects.
|
||||
"""
|
||||
package_path = (
|
||||
Path(package_directory)
|
||||
if isinstance(package_directory, str)
|
||||
else package_directory
|
||||
)
|
||||
modules_by_namespace = {}
|
||||
|
||||
package_name = package_path.name
|
||||
|
||||
for file_path in package_path.rglob("*.py"):
|
||||
if not file_path.name.startswith("__"):
|
||||
relative_module_name = file_path.relative_to(package_path)
|
||||
# Get the full namespace of the module
|
||||
namespace = str(relative_module_name).replace(".py", "").replace("/", ".")
|
||||
# Keep only the top level namespace
|
||||
top_namespace = namespace.split(".")[0]
|
||||
|
||||
try:
|
||||
module_members = _load_module_members(
|
||||
f"{package_name}.{namespace}", namespace
|
||||
)
|
||||
# Merge module members if the namespace already exists
|
||||
if top_namespace in modules_by_namespace:
|
||||
existing_module_members = modules_by_namespace[top_namespace]
|
||||
_module_members = _merge_module_members(
|
||||
[existing_module_members, module_members]
|
||||
)
|
||||
else:
|
||||
_module_members = module_members
|
||||
|
||||
modules_by_namespace[top_namespace] = _module_members
|
||||
|
||||
except ImportError as e:
|
||||
print(f"Error: Unable to import module '{namespace}' with error: {e}")
|
||||
|
||||
return modules_by_namespace
|
||||
|
||||
|
||||
def _construct_doc(pkg: str, members_by_namespace: Dict[str, ModuleMembers]) -> str:
|
||||
"""Construct the contents of the reference.rst file for the given package.
|
||||
|
||||
Args:
|
||||
pkg: The package name
|
||||
members_by_namespace: The members of the package, dict organized by top level
|
||||
module contains a list of classes and functions
|
||||
inside of the top level namespace.
|
||||
|
||||
Returns:
|
||||
The contents of the reference.rst file.
|
||||
"""
|
||||
full_doc = f"""\
|
||||
=============
|
||||
=======================
|
||||
``{pkg}`` API Reference
|
||||
=============
|
||||
=======================
|
||||
|
||||
"""
|
||||
for module, _members in sorted(members.items(), key=lambda kv: kv[0]):
|
||||
classes = _members["classes"]
|
||||
namespaces = sorted(members_by_namespace)
|
||||
|
||||
for module in namespaces:
|
||||
_members = members_by_namespace[module]
|
||||
classes = _members["classes_"]
|
||||
functions = _members["functions"]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
section = f":mod:`{pkg}.{module}`"
|
||||
underline = "=" * (len(section) + 1)
|
||||
full_doc += f"""\
|
||||
{section}
|
||||
{'=' * (len(section) + 1)}
|
||||
{underline}
|
||||
|
||||
.. automodule:: {pkg}.{module}
|
||||
:no-members:
|
||||
@@ -52,7 +212,6 @@ def construct_doc(pkg: str, members: dict) -> str:
|
||||
"""
|
||||
|
||||
if classes:
|
||||
cstring = "\n ".join(sorted(classes))
|
||||
full_doc += f"""\
|
||||
Classes
|
||||
--------------
|
||||
@@ -60,13 +219,31 @@ Classes
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: class.rst
|
||||
|
||||
{cstring}
|
||||
|
||||
"""
|
||||
|
||||
for class_ in classes:
|
||||
if not class_['is_public']:
|
||||
continue
|
||||
|
||||
if class_["kind"] == "TypedDict":
|
||||
template = "typeddict.rst"
|
||||
elif class_["kind"] == "enum":
|
||||
template = "enum.rst"
|
||||
elif class_["kind"] == "Pydantic":
|
||||
template = "pydantic.rst"
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
full_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
|
||||
if functions:
|
||||
fstring = "\n ".join(sorted(functions))
|
||||
_functions = [f["qualified_name"] for f in functions if f["is_public"]]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
full_doc += f"""\
|
||||
Functions
|
||||
--------------
|
||||
@@ -83,12 +260,15 @@ Functions
|
||||
|
||||
|
||||
def main() -> None:
|
||||
lc_members = load_members(PKG_DIR)
|
||||
lc_doc = ".. _api_reference:\n\n" + construct_doc("langchain", lc_members)
|
||||
"""Generate the reference.rst file for each package."""
|
||||
lc_members = _load_package_modules(PKG_DIR)
|
||||
lc_doc = ".. _api_reference:\n\n" + _construct_doc("langchain", lc_members)
|
||||
with open(WRITE_FILE, "w") as f:
|
||||
f.write(lc_doc)
|
||||
exp_members = load_members(EXP_DIR)
|
||||
exp_doc = ".. _experimental_api_reference:\n\n" + construct_doc("langchain_experimental", exp_members)
|
||||
exp_members = _load_package_modules(EXP_DIR)
|
||||
exp_doc = ".. _experimental_api_reference:\n\n" + _construct_doc(
|
||||
"langchain_experimental", exp_members
|
||||
)
|
||||
with open(EXP_WRITE_FILE, "w") as f:
|
||||
f.write(exp_doc)
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
-e libs/langchain
|
||||
-e libs/experimental
|
||||
autodoc_pydantic==1.8.0
|
||||
myst_parser
|
||||
nbsphinx==0.8.9
|
||||
@@ -10,4 +11,4 @@ sphinx-panels
|
||||
toml
|
||||
myst_nb
|
||||
sphinx_copybutton
|
||||
pydata-sphinx-theme==0.13.1
|
||||
pydata-sphinx-theme==0.13.1
|
||||
|
||||
@@ -5,17 +5,6 @@
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
{% block methods %}
|
||||
{% if methods %}
|
||||
.. rubric:: {{ _('Methods') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
{% block attributes %}
|
||||
{% if attributes %}
|
||||
.. rubric:: {{ _('Attributes') }}
|
||||
@@ -27,4 +16,21 @@
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
{% block methods %}
|
||||
{% if methods %}
|
||||
.. rubric:: {{ _('Methods') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
14
docs/api_reference/templates/enum.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
{% block attributes %}
|
||||
{% for item in attributes %}
|
||||
.. autoattribute:: {{ item }}
|
||||
{% endfor %}
|
||||
{% endblock %}
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
22
docs/api_reference/templates/pydantic.rst
Normal file
@@ -0,0 +1,22 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autopydantic_model:: {{ objname }}
|
||||
:model-show-json: False
|
||||
:model-show-config-summary: False
|
||||
:model-show-validator-members: False
|
||||
:model-show-field-summary: False
|
||||
:field-signature-prefix: param
|
||||
:members:
|
||||
:undoc-members:
|
||||
:inherited-members:
|
||||
:member-order: groupwise
|
||||
:show-inheritance: True
|
||||
:special-members: __call__
|
||||
|
||||
{% block attributes %}
|
||||
{% endblock %}
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
14
docs/api_reference/templates/typeddict.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
{% block attributes %}
|
||||
{% for item in attributes %}
|
||||
.. autoattribute:: {{ item }}
|
||||
{% endfor %}
|
||||
{% endblock %}
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
@@ -19,7 +19,7 @@
|
||||
{% block htmltitle %}
|
||||
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
|
||||
{% endblock %}
|
||||
<link rel="canonical" href="http://scikit-learn.org/stable/{{pagename}}.html" />
|
||||
<link rel="canonical" href="https://api.python.langchain.com/en/latest/{{pagename}}.html" />
|
||||
|
||||
{% if favicon_url %}
|
||||
<link rel="shortcut icon" href="{{ favicon_url|e }}"/>
|
||||
|
||||
@@ -6,17 +6,6 @@
|
||||
{%- set top_container_cls = "sk-landing-container" %}
|
||||
{%- endif %}
|
||||
|
||||
{% if theme_link_to_live_contributing_page|tobool %}
|
||||
{# Link to development page for live builds #}
|
||||
{%- set development_link = "https://scikit-learn.org/dev/developers/index.html" %}
|
||||
{# Open on a new development page in new window/tab for live builds #}
|
||||
{%- set development_attrs = 'target="_blank" rel="noopener noreferrer"' %}
|
||||
{%- else %}
|
||||
{%- set development_link = pathto('developers/index') %}
|
||||
{%- set development_attrs = '' %}
|
||||
{%- endif %}
|
||||
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
{%- if logo_url %}
|
||||
|
||||
@@ -28,7 +28,7 @@ navigating around a browser.
|
||||
### [OpenAI Functions](/docs/modules/agents/agent_types/openai_functions_agent.html)
|
||||
|
||||
Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been explicitly fine-tuned to detect when a
|
||||
function should to be called and respond with the inputs that should be passed to the function.
|
||||
function should be called and respond with the inputs that should be passed to the function.
|
||||
The OpenAI Functions Agent is designed to work with these models.
|
||||
|
||||
### [Conversational](/docs/modules/agents/agent_types/chat_conversation_agent.html)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# OpenAI functions
|
||||
|
||||
Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been fine-tuned to detect when a function should to be called and respond with the inputs that should be passed to the function.
|
||||
Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been fine-tuned to detect when a function should be called and respond with the inputs that should be passed to the function.
|
||||
In an API call, you can describe functions and have the model intelligently choose to output a JSON object containing arguments to call those functions.
|
||||
The goal of the OpenAI Function APIs is to more reliably return valid and useful function calls than a generic text completion or chat API.
|
||||
|
||||
|
||||
@@ -18,5 +18,3 @@ Let chains choose which tools to use given high-level directives
|
||||
Persist application state between runs of a chain
|
||||
#### [Callbacks](/docs/modules/callbacks/)
|
||||
Log and stream intermediate steps of any chain
|
||||
#### [Evaluation](/docs/modules/evaluation/)
|
||||
Evaluate the performance of a chain.
|
||||
@@ -3,10 +3,12 @@ sidebar_position: 0
|
||||
---
|
||||
# Prompts
|
||||
|
||||
The new way of programming models is through prompts.
|
||||
A **prompt** refers to the input to the model.
|
||||
This input is often constructed from multiple components.
|
||||
LangChain provides several classes and functions to make constructing and working with prompts easy.
|
||||
A prompt for a language model is a set of instructions or input provided by a user to
|
||||
guide the model's response, helping it understand the context and generate relevant
|
||||
and coherent language-based output, such as answering questions, completing sentences,
|
||||
or engaging in a conversation.
|
||||
|
||||
- [Prompt templates](/docs/modules/model_io/prompts/prompt_templates/): Parametrize model inputs
|
||||
LangChain provides several classes and functions to help construct and work with prompts.
|
||||
|
||||
- [Prompt templates](/docs/modules/model_io/prompts/prompt_templates/): Parametrized model inputs
|
||||
- [Example selectors](/docs/modules/model_io/prompts/example_selectors/): Dynamically select examples to include in prompts
|
||||
@@ -4,18 +4,15 @@ sidebar_position: 0
|
||||
|
||||
# Prompt templates
|
||||
|
||||
Language models take text as input - that text is commonly referred to as a prompt.
|
||||
Typically this is not simply a hardcoded string but rather a combination of a template, some examples, and user input.
|
||||
LangChain provides several classes and functions to make constructing and working with prompts easy.
|
||||
Prompt templates are pre-defined recipes for generating prompts for language models.
|
||||
|
||||
## What is a prompt template?
|
||||
A template may include instructions, few shot examples, and specific context and
|
||||
questions appropriate for a given task.
|
||||
|
||||
A prompt template refers to a reproducible way to generate a prompt. It contains a text string ("the template"), that can take in a set of parameters from the end user and generates a prompt.
|
||||
LangChain provides tooling to create and work with prompt templates.
|
||||
|
||||
A prompt template can contain:
|
||||
- instructions to the language model,
|
||||
- a set of few shot examples to help the language model generate a better response,
|
||||
- a question to the language model.
|
||||
LangChain strives to create model agnostic templates to make it easy to reuse
|
||||
existing templates across different language models.
|
||||
|
||||
import GetStarted from "@snippets/modules/model_io/prompts/prompt_templates/get_started.mdx"
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
# Summarization
|
||||
|
||||
A summarization chain can be used to summarize multiple documents. One way is to input multiple smaller documents, after they have been divided into chunks, and operate over them with a MapReduceDocumentsChain. You can also choose instead for the chain that does summarization to be a StuffDocumentsChain, or a RefineDocumentsChain.
|
||||
|
||||
import Example from "@snippets/modules/chains/popular/summarize.mdx"
|
||||
|
||||
<Example/>
|
||||
|
||||
@@ -128,6 +128,10 @@ const config = {
|
||||
hideable: true,
|
||||
},
|
||||
},
|
||||
colorMode: {
|
||||
disableSwitch: false,
|
||||
respectPrefersColorScheme: true,
|
||||
},
|
||||
prism: {
|
||||
theme: {
|
||||
...baseLightCodeBlockTheme,
|
||||
|
||||
71
docs/docs_skeleton/package-lock.json
generated
@@ -12,7 +12,7 @@
|
||||
"@docusaurus/preset-classic": "2.4.0",
|
||||
"@docusaurus/remark-plugin-npm2yarn": "^2.4.0",
|
||||
"@mdx-js/react": "^1.6.22",
|
||||
"@mendable/search": "^0.0.125",
|
||||
"@mendable/search": "^0.0.137",
|
||||
"clsx": "^1.2.1",
|
||||
"json-loader": "^0.5.7",
|
||||
"process": "^0.11.10",
|
||||
@@ -3212,10 +3212,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@mendable/search": {
|
||||
"version": "0.0.125",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/search/-/search-0.0.125.tgz",
|
||||
"integrity": "sha512-Mb1J3zDhOyBZV9cXqJocSOBNYGpe8+LQDqd9n9laPWxosSJcSTUewqtlIbMerrYsScBsxskoSiWgRsc7xF5z0Q==",
|
||||
"version": "0.0.137",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/search/-/search-0.0.137.tgz",
|
||||
"integrity": "sha512-2J2fd5eqToK+mLzrSDA6NAr4F1kfql7QRiHpD7AUJJX0nqpvInhr/mMJKBCUSCv2z76UKCmF5wLuPSw+C90Qdg==",
|
||||
"dependencies": {
|
||||
"html-react-parser": "^4.2.0",
|
||||
"posthog-js": "^1.45.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
@@ -8332,6 +8333,33 @@
|
||||
"safe-buffer": "~5.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/html-dom-parser": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/html-dom-parser/-/html-dom-parser-4.0.0.tgz",
|
||||
"integrity": "sha512-TUa3wIwi80f5NF8CVWzkopBVqVAtlawUzJoLwVLHns0XSJGynss4jiY0mTWpiDOsuyw+afP+ujjMgRh9CoZcXw==",
|
||||
"dependencies": {
|
||||
"domhandler": "5.0.3",
|
||||
"htmlparser2": "9.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/html-dom-parser/node_modules/htmlparser2": {
|
||||
"version": "9.0.0",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.0.0.tgz",
|
||||
"integrity": "sha512-uxbSI98wmFT/G4P2zXx4OVx04qWUmyFPrD2/CNepa2Zo3GPNaCaaxElDgwUrwYWkK1nr9fft0Ya8dws8coDLLQ==",
|
||||
"funding": [
|
||||
"https://github.com/fb55/htmlparser2?sponsor=1",
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
],
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.1.0",
|
||||
"entities": "^4.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/html-entities": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.4.0.tgz",
|
||||
@@ -8375,6 +8403,20 @@
|
||||
"node": ">= 12"
|
||||
}
|
||||
},
|
||||
"node_modules/html-react-parser": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/html-react-parser/-/html-react-parser-4.2.0.tgz",
|
||||
"integrity": "sha512-gzU55AS+FI6qD7XaKe5BLuLFM2Xw0/LodfMWZlxV9uOHe7LCD5Lukx/EgYuBI3c0kLu0XlgFXnSzO0qUUn3Vrg==",
|
||||
"dependencies": {
|
||||
"domhandler": "5.0.3",
|
||||
"html-dom-parser": "4.0.0",
|
||||
"react-property": "2.0.0",
|
||||
"style-to-js": "1.1.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "0.14 || 15 || 16 || 17 || 18"
|
||||
}
|
||||
},
|
||||
"node_modules/html-tags": {
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/html-tags/-/html-tags-3.3.1.tgz",
|
||||
@@ -11762,6 +11804,11 @@
|
||||
"webpack": ">=4.41.1 || 5.x"
|
||||
}
|
||||
},
|
||||
"node_modules/react-property": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/react-property/-/react-property-2.0.0.tgz",
|
||||
"integrity": "sha512-kzmNjIgU32mO4mmH5+iUyrqlpFQhF8K2k7eZ4fdLSOPFrD1XgEuSBv9LDEgxRXTMBqMd8ppT0x6TIzqE5pdGdw=="
|
||||
},
|
||||
"node_modules/react-router": {
|
||||
"version": "5.3.4",
|
||||
"resolved": "https://registry.npmjs.org/react-router/-/react-router-5.3.4.tgz",
|
||||
@@ -13127,6 +13174,22 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/style-to-js": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.3.tgz",
|
||||
"integrity": "sha512-zKI5gN/zb7LS/Vm0eUwjmjrXWw8IMtyA8aPBJZdYiQTXj4+wQ3IucOLIOnF7zCHxvW8UhIGh/uZh/t9zEHXNTQ==",
|
||||
"dependencies": {
|
||||
"style-to-object": "0.4.1"
|
||||
}
|
||||
},
|
||||
"node_modules/style-to-js/node_modules/style-to-object": {
|
||||
"version": "0.4.1",
|
||||
"resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.4.1.tgz",
|
||||
"integrity": "sha512-HFpbb5gr2ypci7Qw+IOhnP2zOU7e77b+rzM+wTzXzfi1PrtBCX0E7Pk4wL4iTLnhzZ+JgEGAhX81ebTg/aYjQw==",
|
||||
"dependencies": {
|
||||
"inline-style-parser": "0.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/style-to-object": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.3.0.tgz",
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"@docusaurus/preset-classic": "2.4.0",
|
||||
"@docusaurus/remark-plugin-npm2yarn": "^2.4.0",
|
||||
"@mdx-js/react": "^1.6.22",
|
||||
"@mendable/search": "^0.0.125",
|
||||
"@mendable/search": "^0.0.137",
|
||||
"clsx": "^1.2.1",
|
||||
"json-loader": "^0.5.7",
|
||||
"process": "^0.11.10",
|
||||
|
||||
BIN
docs/docs_skeleton/static/img/SQLDatabaseToolkit.png
Normal file
|
After Width: | Height: | Size: 405 KiB |
BIN
docs/docs_skeleton/static/img/chat_use_case.png
Normal file
|
After Width: | Height: | Size: 93 KiB |
BIN
docs/docs_skeleton/static/img/chat_use_case_2.png
Normal file
|
After Width: | Height: | Size: 102 KiB |
BIN
docs/docs_skeleton/static/img/create_sql_query_chain.png
Normal file
|
After Width: | Height: | Size: 190 KiB |
BIN
docs/docs_skeleton/static/img/extraction.png
Normal file
|
After Width: | Height: | Size: 125 KiB |
BIN
docs/docs_skeleton/static/img/extraction_trace_function.png
Normal file
|
After Width: | Height: | Size: 131 KiB |
BIN
docs/docs_skeleton/static/img/extraction_trace_function_2.png
Normal file
|
After Width: | Height: | Size: 211 KiB |
BIN
docs/docs_skeleton/static/img/extraction_trace_joke.png
Normal file
|
After Width: | Height: | Size: 132 KiB |
BIN
docs/docs_skeleton/static/img/sql_usecase.png
Normal file
|
After Width: | Height: | Size: 119 KiB |
BIN
docs/docs_skeleton/static/img/sqldbchain_trace.png
Normal file
|
After Width: | Height: | Size: 266 KiB |
BIN
docs/docs_skeleton/static/img/summarization_use_case_1.png
Normal file
|
After Width: | Height: | Size: 196 KiB |
BIN
docs/docs_skeleton/static/img/summarization_use_case_2.png
Normal file
|
After Width: | Height: | Size: 90 KiB |
BIN
docs/docs_skeleton/static/img/summarization_use_case_3.png
Normal file
|
After Width: | Height: | Size: 174 KiB |
@@ -556,6 +556,14 @@
|
||||
"source": "/docs/integrations/llamacpp",
|
||||
"destination": "/docs/integrations/providers/llamacpp"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/integrations/log10.html",
|
||||
"destination": "/docs/integrations/providers/log10"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/log10",
|
||||
"destination": "/docs/integrations/providers/log10"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/integrations/mediawikidump.html",
|
||||
"destination": "/docs/integrations/providers/mediawikidump"
|
||||
@@ -3478,7 +3486,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/example_selectors.html",
|
||||
"destination": "/docs/modules/model_io/example_selectors"
|
||||
"destination": "/docs/modules/model_io/prompts/example_selectors"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/example_selectors/examples/custom_example_selector.html",
|
||||
@@ -3494,7 +3502,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/prompt_templates.html",
|
||||
"destination": "/docs/modules/model_io/prompt_templates"
|
||||
"destination": "/docs/modules/model_io/prompts/prompt_templates"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/prompt_templates/examples/connecting_to_a_feature_store.html",
|
||||
@@ -3951,6 +3959,10 @@
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/tagging",
|
||||
"destination": "/docs/use_cases/tagging"
|
||||
},
|
||||
{
|
||||
"source": "docs/integrations/providers/agent_with_wandb_tracing",
|
||||
"destination": "docs/integrations/providers/wandb_tracing"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -22,7 +22,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 1,
|
||||
"id": "466b65b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -33,7 +33,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 11,
|
||||
"id": "3c634ef0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -171,9 +171,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "decf7710",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -202,7 +200,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"id": "f799664d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -347,7 +345,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 12,
|
||||
"id": "5d3d8ffe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -368,7 +366,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 2,
|
||||
"id": "33be32af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -380,7 +378,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 3,
|
||||
"id": "df3f3fa2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -392,7 +390,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 16,
|
||||
"id": "bfc47ec1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -407,7 +405,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 17,
|
||||
"id": "eae31755",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -422,11 +420,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 18,
|
||||
"id": "f3040b0c",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
@@ -441,7 +437,7 @@
|
||||
"'Harrison worked at Kensho.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -452,7 +448,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 19,
|
||||
"id": "e1d20c7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -475,11 +471,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 20,
|
||||
"id": "7ee8b2d4",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
@@ -494,7 +488,7 @@
|
||||
"'Harrison ha lavorato a Kensho.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -503,6 +497,317 @@
|
||||
"chain.invoke({\"question\": \"where did harrison work\", \"language\": \"italian\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f007669c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversational Retrieval Chain\n",
|
||||
"\n",
|
||||
"We can easily add in conversation history. This primarily means adding in chat_message_history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "3f30c348",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema.runnable import RunnableMap\n",
|
||||
"from langchain.schema import format_document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "64ab1dbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"\n",
|
||||
"_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n",
|
||||
"\n",
|
||||
"Chat History:\n",
|
||||
"{chat_history}\n",
|
||||
"Follow Up Input: {question}\n",
|
||||
"Standalone question:\"\"\"\n",
|
||||
"CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "7d628c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Answer the question based only on the following context:\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"Question: {question}\n",
|
||||
"\"\"\"\n",
|
||||
"ANSWER_PROMPT = ChatPromptTemplate.from_template(template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f60a5d0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template=\"{page_content}\")\n",
|
||||
"def _combine_documents(docs, document_prompt = DEFAULT_DOCUMENT_PROMPT, document_separator=\"\\n\\n\"):\n",
|
||||
" doc_strings = [format_document(doc, document_prompt) for doc in docs]\n",
|
||||
" return document_separator.join(doc_strings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7d007db6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Tuple, List\n",
|
||||
"def _format_chat_history(chat_history: List[Tuple]) -> str:\n",
|
||||
" buffer = \"\"\n",
|
||||
" for dialogue_turn in chat_history:\n",
|
||||
" human = \"Human: \" + dialogue_turn[0]\n",
|
||||
" ai = \"Assistant: \" + dialogue_turn[1]\n",
|
||||
" buffer += \"\\n\" + \"\\n\".join([human, ai])\n",
|
||||
" return buffer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "5c32cc89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"_inputs = RunnableMap(\n",
|
||||
" {\n",
|
||||
" \"standalone_question\": {\n",
|
||||
" \"question\": lambda x: x[\"question\"],\n",
|
||||
" \"chat_history\": lambda x: _format_chat_history(x['chat_history'])\n",
|
||||
" } | CONDENSE_QUESTION_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"_context = {\n",
|
||||
" \"context\": itemgetter(\"standalone_question\") | retriever | _combine_documents,\n",
|
||||
" \"question\": lambda x: x[\"standalone_question\"]\n",
|
||||
"}\n",
|
||||
"conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "135c8205",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Harrison was employed at Kensho.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversational_qa_chain.invoke({\n",
|
||||
" \"question\": \"where did harrison work?\",\n",
|
||||
" \"chat_history\": [],\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "424e7e7a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Harrison worked at Kensho.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversational_qa_chain.invoke({\n",
|
||||
" \"question\": \"where did he work?\",\n",
|
||||
" \"chat_history\": [(\"Who wrote this notebook?\", \"Harrison\")],\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c5543183",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### With Memory and returning source documents\n",
|
||||
"\n",
|
||||
"This shows how to use memory with the above. For memory, we need to manage that outside at the memory. For returning the retrieved documents, we just need to pass them through all the way."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "e31dd17c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "d4bffe94",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory = ConversationBufferMemory(return_messages=True, output_key=\"answer\", input_key=\"question\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "733be985",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# First we add a step to load memory\n",
|
||||
"# This needs to be a RunnableMap because its the first input\n",
|
||||
"loaded_memory = RunnableMap(\n",
|
||||
" {\n",
|
||||
" \"question\": itemgetter(\"question\"),\n",
|
||||
" \"memory\": memory.load_memory_variables,\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"# Next we add a step to expand memory into the variables\n",
|
||||
"expanded_memory = {\n",
|
||||
" \"question\": itemgetter(\"question\"),\n",
|
||||
" \"chat_history\": lambda x: x[\"memory\"][\"history\"]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Now we calculate the standalone question\n",
|
||||
"standalone_question = {\n",
|
||||
" \"standalone_question\": {\n",
|
||||
" \"question\": lambda x: x[\"question\"],\n",
|
||||
" \"chat_history\": lambda x: _format_chat_history(x['chat_history'])\n",
|
||||
" } | CONDENSE_QUESTION_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),\n",
|
||||
"}\n",
|
||||
"# Now we retrieve the documents\n",
|
||||
"retrieved_documents = {\n",
|
||||
" \"docs\": itemgetter(\"standalone_question\") | retriever,\n",
|
||||
" \"question\": lambda x: x[\"standalone_question\"]\n",
|
||||
"}\n",
|
||||
"# Now we construct the inputs for the final prompt\n",
|
||||
"final_inputs = {\n",
|
||||
" \"context\": lambda x: _combine_documents(x[\"docs\"]),\n",
|
||||
" \"question\": itemgetter(\"question\")\n",
|
||||
"}\n",
|
||||
"# And finally, we do the part that returns the answers\n",
|
||||
"answer = {\n",
|
||||
" \"answer\": final_inputs | ANSWER_PROMPT | ChatOpenAI(),\n",
|
||||
" \"docs\": itemgetter(\"docs\"),\n",
|
||||
"}\n",
|
||||
"# And now we put it all together!\n",
|
||||
"final_chain = loaded_memory | expanded_memory | standalone_question | retrieved_documents | answer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "806e390c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'answer': AIMessage(content='Harrison was employed at Kensho.', additional_kwargs={}, example=False),\n",
|
||||
" 'docs': [Document(page_content='harrison worked at kensho', metadata={})]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"question\": \"where did harrison work?\"}\n",
|
||||
"result = final_chain.invoke(inputs)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "977399fd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note that the memory does not save automatically\n",
|
||||
"# This will be improved in the future\n",
|
||||
"# For now you need to save it yourself\n",
|
||||
"memory.save_context(inputs, {\"answer\": result[\"answer\"].content})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "f94f7de4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': [HumanMessage(content='where did harrison work?', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='Harrison was employed at Kensho.', additional_kwargs={}, example=False)]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f2bf8d3",
|
||||
@@ -583,6 +888,98 @@
|
||||
"chain2.invoke({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d094d637",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Router\n",
|
||||
"\n",
|
||||
"You can also use the router runnable to conditionally route inputs to different runnables."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "252625fd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import create_tagging_chain_pydantic\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"class PromptToUse(BaseModel):\n",
|
||||
" \"\"\"Used to determine which prompt to use to answer the user's input.\"\"\"\n",
|
||||
" \n",
|
||||
" name: str = Field(description=\"Should be one of `math` or `english`\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "57886e84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tagger = create_tagging_chain_pydantic(PromptToUse, ChatOpenAI(temperature=0))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a303b089",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain1 = ChatPromptTemplate.from_template(\"You are a math genius. Answer the question: {question}\") | ChatOpenAI()\n",
|
||||
"chain2 = ChatPromptTemplate.from_template(\"You are an english major. Answer the question: {question}\") | ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "7aa9ea06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema.runnable import RouterRunnable\n",
|
||||
"router = RouterRunnable({\"math\": chain1, \"english\": chain2})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6a3d3f5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = {\n",
|
||||
" \"key\": {\"input\": lambda x: x[\"question\"]} | tagger | (lambda x: x['text'].name),\n",
|
||||
" \"input\": {\"question\": lambda x: x[\"question\"]}\n",
|
||||
"} | router"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8aeda930",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Thank you for the compliment! The sum of 2 + 2 is equal to 4.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29781123",
|
||||
@@ -982,10 +1379,452 @@
|
||||
"chain.invoke({\"input\": \"whats 2 plus 2\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5062941a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Memory\n",
|
||||
"\n",
|
||||
"This shows how to add memory to an arbitrary chain. Right now, you can use the memory classes but need to hook it up manually"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
"id": "7998efd8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain.schema.runnable import RunnableMap\n",
|
||||
"from langchain.prompts import MessagesPlaceholder\n",
|
||||
"model = ChatOpenAI()\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([\n",
|
||||
" (\"system\", \"You are a helpful chatbot\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{input}\")\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"id": "fa0087f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory = ConversationBufferMemory(return_messages=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"id": "06b531ae",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"id": "d9437af6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = RunnableMap({\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"memory\": memory.load_memory_variables\n",
|
||||
"}) | {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"history\": lambda x: x[\"memory\"][\"history\"]\n",
|
||||
"} | prompt | model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"id": "bed1e260",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 103,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"input\": \"hi im bob\"}\n",
|
||||
"response = chain.invoke(inputs)\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"id": "890475b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory.save_context(inputs, {\"output\": response.content})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"id": "e8fcb77f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': [HumanMessage(content='hi im bob', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False)]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"id": "d837d5c3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Your name is Bob. You mentioned it in your previous message. Is there anything else I can help you with, Bob?', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 106,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"input\": \"whats my name\"}\n",
|
||||
"response = chain.invoke(inputs)\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4927a727-b4c8-453c-8c83-bd87b4fcac14",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Moderation\n",
|
||||
"\n",
|
||||
"This shows how to add in moderation (or other safeguards) around your LLM application."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "4f5f6449-940a-4f5c-97c0-39b71c3e2a68",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import OpenAIModerationChain\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "fcb8312b-7e7a-424f-a3ec-76738c9a9d21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"moderate = OpenAIModerationChain()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "b24b9148-f6b0-4091-8ea8-d3fb281bd950",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = OpenAI()\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([\n",
|
||||
" (\"system\", \"repeat after me: {input}\")\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "1c8ed87c-9ca6-4559-bf60-d40e94a0af08",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = prompt | model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "5256b9bd-381a-42b0-bfa8-7e6d18f853cb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nYou are stupid.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"input\": \"you are stupid\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "fe6e3b33-dc9a-49d5-b194-ba750c58a628",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"moderated_chain = chain | moderate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "d8ba0cbd-c739-4d23-be9f-6ae092bd5ffb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': '\\n\\nYou are stupid.',\n",
|
||||
" 'output': \"Text was found that violates OpenAI's content policy.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"moderated_chain.invoke({\"input\": \"you are stupid\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9be88499",
|
||||
"id": "a0a85ba4-f782-47b8-b16f-8b7a61d6dab7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Conversational Retrieval With Memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92c87dd8-bb6f-4f32-a30d-8f5459ce6265",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fallbacks\n",
|
||||
"\n",
|
||||
"With LCEL you can easily introduce fallbacks for any Runnable component, like an LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1b1cb744-31fc-4261-ab25-65fe1fcad559",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='To get to the other side.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"bad_llm = ChatOpenAI(model_name=\"gpt-fake\")\n",
|
||||
"good_llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
|
||||
"llm = bad_llm.with_fallbacks([good_llm])\n",
|
||||
"\n",
|
||||
"llm.invoke(\"Why did the the chicken cross the road?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8cf3982-03f6-49b3-8ff5-7cd12444f19c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Looking at the trace, we can see that the first model failed but the second succeeded, so we still got an output: https://smith.langchain.com/public/dfaf0bf6-d86d-43e9-b084-dd16a56df15c/r\n",
|
||||
"\n",
|
||||
"We can add an arbitrary sequence of fallbacks, which will be executed in order:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "31819be0-7f40-4e67-b5ab-61340027b948",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='To get to the other side.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = bad_llm.with_fallbacks([bad_llm, bad_llm, good_llm])\n",
|
||||
"\n",
|
||||
"llm.invoke(\"Why did the the chicken cross the road?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acad6e88-8046-450e-b005-db7e50f33b80",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trace: https://smith.langchain.com/public/c09efd01-3184-4369-a225-c9da8efcaf47/r\n",
|
||||
"\n",
|
||||
"We can continue to use our Runnable with fallbacks the same way we use any Runnable, mean we can include it in sequences:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "bab114a1-bb93-4b7e-a639-e7e00f21aebc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='To show off its incredible jumping skills! Kangaroos are truly amazing creatures.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
|
||||
" (\"human\", \"Why did the {animal} cross the road\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke({\"animal\": \"kangaroo\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58340afa-8187-4ffe-9bd2-7912fb733a15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trace: https://smith.langchain.com/public/ba03895f-f8bd-4c70-81b7-8b930353eabd/r\n",
|
||||
"\n",
|
||||
"Note, since every sequence of Runnables is itself a Runnable, we can create fallbacks for whole Sequences. We can also continue using the full interface, including asynchronous calls, batched calls, and streams:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "45aa3170-b2e6-430d-887b-bd879048060a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[\"\\n\\nAnswer: The rabbit crossed the road to get to the other side. That's quite clever of him!\",\n",
|
||||
" '\\n\\nAnswer: The turtle crossed the road to get to the other side. You must be pretty clever to come up with that riddle!']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"chat_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
|
||||
" (\"human\", \"Why did the {animal} cross the road\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"chat_model = ChatOpenAI(model_name=\"gpt-fake\")\n",
|
||||
"\n",
|
||||
"prompt_template = \"\"\"Instructions: You should always include a compliment in your response.\n",
|
||||
"\n",
|
||||
"Question: Why did the {animal} cross the road?\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(prompt_template)\n",
|
||||
"llm = OpenAI()\n",
|
||||
"\n",
|
||||
"bad_chain = chat_prompt | chat_model\n",
|
||||
"good_chain = prompt | llm\n",
|
||||
"chain = bad_chain.with_fallbacks([good_chain])\n",
|
||||
"await chain.abatch([{\"animal\": \"rabbit\"}, {\"animal\": \"turtle\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af6731c6-0c73-4b1d-a433-6e8f6ecce2bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Traces: \n",
|
||||
"1. https://smith.langchain.com/public/ccd73236-9ae5-48a6-94b5-41210be18a46/r\n",
|
||||
"2. https://smith.langchain.com/public/f43f608e-075c-45c7-bf73-b64e4d3f3082/r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d2fe1fe-506b-4ee5-8056-8b9df801765f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
||||
@@ -19,9 +19,22 @@
|
||||
"- `ainvoke`: call the chain on an input async\n",
|
||||
"- `abatch`: call the chain on a list of inputs async\n",
|
||||
"\n",
|
||||
"The type of the input varies by component. For a prompt it is a dictionary, for a retriever it is a single string, for a model either a single string, a list of chat messages, or a PromptValue.\n",
|
||||
"The type of the input varies by component:\n",
|
||||
"\n",
|
||||
"The output type also varies by component. For an LLM it is a string, for a ChatModel it's a ChatMessage, for a prompt it's a PromptValue, for a retriever it's a list of documents.\n",
|
||||
"| Component | Input Type |\n",
|
||||
"| --- | --- |\n",
|
||||
"|Prompt|Dictionary|\n",
|
||||
"|Retriever|Single string|\n",
|
||||
"|Model| Single string, list of chat messages or a PromptValue|\n",
|
||||
"\n",
|
||||
"The output type also varies by component:\n",
|
||||
"\n",
|
||||
"| Component | Output Type |\n",
|
||||
"| --- | --- |\n",
|
||||
"| LLM | String |\n",
|
||||
"| ChatModel | ChatMessage |\n",
|
||||
"| Prompt | PromptValue |\n",
|
||||
"| Retriever | List of documents |\n",
|
||||
"\n",
|
||||
"Let's take a look at these methods! To do so, we'll create a super simple PromptTemplate + ChatModel chain."
|
||||
]
|
||||
@@ -95,7 +108,7 @@
|
||||
],
|
||||
"source": [
|
||||
"for s in chain.stream({\"topic\": \"bears\"}):\n",
|
||||
" print(s.content, end=\"\")"
|
||||
" print(s.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -183,7 +196,7 @@
|
||||
],
|
||||
"source": [
|
||||
"async for s in chain.astream({\"topic\": \"bears\"}):\n",
|
||||
" print(s.content, end=\"\")"
|
||||
" print(s.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -71,3 +71,6 @@ or any other local ENV management tool.
|
||||
|
||||
Currently `StreamlitCallbackHandler` is geared towards use with a LangChain Agent Executor. Support for additional agent types,
|
||||
use directly with Chains, etc will be added in the future.
|
||||
|
||||
You may also be interested in using
|
||||
[StreamlitChatMessageHistory](/docs/integrations/memory/streamlit_chat_message_history) for LangChain.
|
||||
|
||||
225
docs/extras/integrations/chat/anyscale.ipynb
Normal file
@@ -0,0 +1,225 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "642fd21c-600a-47a1-be96-6e1438b421a9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Anyscale\n",
|
||||
"\n",
|
||||
"This notebook demonstrates the use of `langchain.chat_models.ChatAnyscale` for [Anyscale Endpoints](https://endpoints.anyscale.com/).\n",
|
||||
"\n",
|
||||
"* Set `ANYSCALE_API_KEY` environment variable\n",
|
||||
"* or use the `anyscale_api_key` keyword argument"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install openai"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "d00d850917865298"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "72340871-ae2f-415f-b399-0777d32dc379",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"ANYSCALE_API_KEY\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5d7fc704-3ea0-4c35-96e7-89fcae6c73fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Let's try out each model offered on Anyscale Endpoints"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "0dc9428d-4217-47d2-97de-f784b1764186",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"dict_keys(['meta-llama/Llama-2-70b-chat-hf', 'meta-llama/Llama-2-7b-chat-hf', 'meta-llama/Llama-2-13b-chat-hf'])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatAnyscale\n",
|
||||
"\n",
|
||||
"chats = {\n",
|
||||
" model: ChatAnyscale(model_name=model, temperature=1.0)\n",
|
||||
" for model in ChatAnyscale.get_available_models()\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(chats.keys())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c4f124a-eaf7-4d78-a2c0-b0aa23fb25c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# We can use async methods and other stuff supported by ChatOpenAI\n",
|
||||
"\n",
|
||||
"This way, the three requests will only take as long as the longest individual request."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1f94f5d2-569e-4a2c-965e-de53c2845fbb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"from langchain.schema import SystemMessage, HumanMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a helpful AI that shares everything you know.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Tell me technical facts about yourself. Are you a transformer model? How many billions of parameters do you have?\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"async def get_msgs():\n",
|
||||
" tasks = [\n",
|
||||
" chat.apredict_messages(messages)\n",
|
||||
" for chat in chats.values()\n",
|
||||
" ]\n",
|
||||
" responses = await asyncio.gather(*tasks)\n",
|
||||
" return dict(zip(chats.keys(), responses))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b2ced871-869a-4ca6-a2ec-6bfececdf7da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "bc605fa5-9501-470d-a6c9-cd868d2145ef",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\tmeta-llama/Llama-2-70b-chat-hf\n",
|
||||
"\n",
|
||||
"Greetings! I'm just an AI, I don't have a personal identity like humans do, but I'm here to help you with any questions you have.\n",
|
||||
"\n",
|
||||
"I'm a large language model, which means I'm trained on a large corpus of text data to generate language outputs that are coherent and natural-sounding. My architecture is based on a transformer model, which is a type of neural network that's particularly well-suited for natural language processing tasks.\n",
|
||||
"\n",
|
||||
"As for my parameters, I have a few billion parameters, but I don't have access to the exact number as it's not relevant to my functioning. My training data includes a vast amount of text from various sources, including books, articles, and websites, which I use to learn patterns and relationships in language.\n",
|
||||
"\n",
|
||||
"I'm designed to be a helpful tool for a variety of tasks, such as answering questions, providing information, and generating text. I'm constantly learning and improving my abilities through machine learning algorithms and feedback from users like you.\n",
|
||||
"\n",
|
||||
"I hope this helps! Is there anything else you'd like to know about me or my capabilities?\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"\tmeta-llama/Llama-2-7b-chat-hf\n",
|
||||
"\n",
|
||||
"Ah, a fellow tech enthusiast! *adjusts glasses* I'm glad to share some technical details about myself. 🤓\n",
|
||||
"Indeed, I'm a transformer model, specifically a BERT-like language model trained on a large corpus of text data. My architecture is based on the transformer framework, which is a type of neural network designed for natural language processing tasks. 🏠\n",
|
||||
"As for the number of parameters, I have approximately 340 million. *winks* That's a pretty hefty number, if I do say so myself! These parameters allow me to learn and represent complex patterns in language, such as syntax, semantics, and more. 🤔\n",
|
||||
"But don't ask me to do math in my head – I'm a language model, not a calculating machine! 😅 My strengths lie in understanding and generating human-like text, so feel free to chat with me anytime you'd like. 💬\n",
|
||||
"Now, do you have any more technical questions for me? Or would you like to engage in a nice chat? 😊\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"\tmeta-llama/Llama-2-13b-chat-hf\n",
|
||||
"\n",
|
||||
"Hello! As a friendly and helpful AI, I'd be happy to share some technical facts about myself.\n",
|
||||
"\n",
|
||||
"I am a transformer-based language model, specifically a variant of the BERT (Bidirectional Encoder Representations from Transformers) architecture. BERT was developed by Google in 2018 and has since become one of the most popular and widely-used AI language models.\n",
|
||||
"\n",
|
||||
"Here are some technical details about my capabilities:\n",
|
||||
"\n",
|
||||
"1. Parameters: I have approximately 340 million parameters, which are the numbers that I use to learn and represent language. This is a relatively large number of parameters compared to some other languages models, but it allows me to learn and understand complex language patterns and relationships.\n",
|
||||
"2. Training: I was trained on a large corpus of text data, including books, articles, and other sources of written content. This training allows me to learn about the structure and conventions of language, as well as the relationships between words and phrases.\n",
|
||||
"3. Architectures: My architecture is based on the transformer model, which is a type of neural network that is particularly well-suited for natural language processing tasks. The transformer model uses self-attention mechanisms to allow the model to \"attend\" to different parts of the input text, allowing it to capture long-range dependencies and contextual relationships.\n",
|
||||
"4. Precision: I am capable of generating text with high precision and accuracy, meaning that I can produce text that is close to human-level quality in terms of grammar, syntax, and coherence.\n",
|
||||
"5. Generative capabilities: In addition to being able to generate text based on prompts and questions, I am also capable of generating text based on a given topic or theme. This allows me to create longer, more coherent pieces of text that are organized around a specific idea or concept.\n",
|
||||
"\n",
|
||||
"Overall, I am a powerful and versatile language model that is capable of a wide range of natural language processing tasks. I am constantly learning and improving, and I am here to help answer any questions you may have!\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"CPU times: user 371 ms, sys: 15.5 ms, total: 387 ms\n",
|
||||
"Wall time: 12 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"response_dict = asyncio.run(get_msgs())\n",
|
||||
"\n",
|
||||
"for model_name, response in response_dict.items():\n",
|
||||
" print(f'\\t{model_name}')\n",
|
||||
" print()\n",
|
||||
" print(response.content)\n",
|
||||
" print('\\n---\\n')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
95
docs/extras/integrations/chat/azureml_chat_endpoint.ipynb
Normal file
@@ -0,0 +1,95 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AzureML Chat Online Endpoint\n",
|
||||
"\n",
|
||||
"[AzureML](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. Azure Foundation Models include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use a chat model hosted on an `AzureML online endpoint`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models.azureml_endpoint import AzureMLChatOnlineEndpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up\n",
|
||||
"\n",
|
||||
"To use the wrapper, you must [deploy a model on AzureML](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-foundation-models?view=azureml-api-2#deploying-foundation-models-to-endpoints-for-inferencing) and obtain the following parameters:\n",
|
||||
"\n",
|
||||
"* `endpoint_api_key`: The API key provided by the endpoint\n",
|
||||
"* `endpoint_url`: The REST endpoint url provided by the endpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Content Formatter\n",
|
||||
"\n",
|
||||
"The `content_formatter` parameter is a handler class for transforming the request and response of an AzureML endpoint to match with required schema. Since there are a wide range of models in the model catalog, each of which may process data differently from one another, a `ContentFormatterBase` class is provided to allow users to transform data to their liking. The following content formatters are provided:\n",
|
||||
"\n",
|
||||
"* `LLamaContentFormatter`: Formats request and response data for LLaMa2-chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' The Collatz Conjecture is one of the most famous unsolved problems in mathematics, and it has been the subject of much study and research for many years. While it is impossible to predict with certainty whether the conjecture will ever be solved, there are several reasons why it is considered a challenging and important problem:\\n\\n1. Simple yet elusive: The Collatz Conjecture is a deceptively simple statement that has proven to be extraordinarily difficult to prove or disprove. Despite its simplicity, the conjecture has eluded some of the brightest minds in mathematics, and it remains one of the most famous open problems in the field.\\n2. Wide-ranging implications: The Collatz Conjecture has far-reaching implications for many areas of mathematics, including number theory, algebra, and analysis. A solution to the conjecture could have significant impacts on these fields and potentially lead to new insights and discoveries.\\n3. Computational evidence: While the conjecture remains unproven, extensive computational evidence supports its validity. In fact, no counterexample to the conjecture has been found for any starting value up to 2^64 (a number', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models.azureml_endpoint import LlamaContentFormatter\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"\n",
|
||||
"chat = AzureMLChatOnlineEndpoint(content_formatter=LlamaContentFormatter())\n",
|
||||
"response = chat(messages=[\n",
|
||||
" HumanMessage(content=\"Will the Collatz conjecture ever be solved?\")\n",
|
||||
"])\n",
|
||||
"response"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -8,11 +9,7 @@
|
||||
"\n",
|
||||
"Note: This is seperate from the Google PaLM integration. Google has chosen to offer an enterprise version of PaLM through GCP, and this supports the models made available through there. \n",
|
||||
"\n",
|
||||
"PaLM API on Vertex AI is a Preview offering, subject to the Pre-GA Offerings Terms of the [GCP Service Specific Terms](https://cloud.google.com/terms/service-terms). \n",
|
||||
"\n",
|
||||
"Pre-GA products and features may have limited support, and changes to pre-GA products and features may not be compatible with other pre-GA versions. For more information, see the [launch stage descriptions](https://cloud.google.com/products#product-launch-stages). Further, by using PaLM API on Vertex AI, you agree to the Generative AI Preview [terms and conditions](https://cloud.google.com/trustedtester/aitos) (Preview Terms).\n",
|
||||
"\n",
|
||||
"For PaLM API on Vertex AI, you can process personal data as outlined in the Cloud Data Processing Addendum, subject to applicable restrictions and obligations in the Agreement (as defined in the Preview Terms).\n",
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
"To use Vertex AI PaLM you must have the `google-cloud-aiplatform` Python package installed and either:\n",
|
||||
"- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
|
||||
@@ -90,6 +87,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -142,6 +140,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
|
||||
226
docs/extras/integrations/document_loaders/airbyte_cdk.ipynb
Normal file
@@ -0,0 +1,226 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte CDK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"A lot of source connectors are implemented using the [Airbyte CDK](https://docs.airbyte.com/connector-development/cdk-python/). This loader allows to run any of these connectors and return the data as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-cdk` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-cdk"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "085aa658",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, either install an existing connector from the [Airbyte Github repository](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors) or create your own connector using the [Airbyte CDK](https://docs.airbyte.io/connector-development/connector-development).\n",
|
||||
"\n",
|
||||
"For example, to install the Github connector, run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f6d04ef4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install \"source_github@git+https://github.com/airbytehq/airbyte.git@master#subdirectory=airbyte-integrations/connectors/source-github\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36069b74",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some sources are also published as regular packages on PyPI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can create an `AirbyteCDKLoader` based on the imported source. It takes a `config` object that's passed to the connector. You also have to pick the stream you want to retrieve records from by name (`stream_name`). Check the connectors documentation page and spec definition for more information on the config object and available streams. For the Github connectors these are:\n",
|
||||
"* [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-github/source_github/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-github/source_github/spec.json).\n",
|
||||
"* [https://docs.airbyte.com/integrations/sources/github/](https://docs.airbyte.com/integrations/sources/github/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteCDKLoader\n",
|
||||
"from source_github.source import SourceGithub # plug in your own source here\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your github configuration\n",
|
||||
" \"credentials\": {\n",
|
||||
" \"api_url\": \"api.github.com\",\n",
|
||||
" \"personal_access_token\": \"<token>\"\n",
|
||||
" },\n",
|
||||
" \"repository\": \"<repo>\",\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"issues_loader = AirbyteCDKLoader(source_class=SourceGithub, config=config, stream_name=\"issues\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = issues_loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = issues_loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To create documents in a different, pass in a record_handler function when creating the loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"] + \"\\n\" + (record.data[\"body\"] or \"\"), metadata=record.data)\n",
|
||||
"\n",
|
||||
"issues_loader = AirbyteCDKLoader(source_class=SourceGithub, config=config, stream_name=\"issues\", record_handler=handle_record)\n",
|
||||
"\n",
|
||||
"docs = issues_loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = issues_loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_issue_loader = AirbyteCDKLoader(source_class=SourceGithub, config=config, stream_name=\"issues\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_issue_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
206
docs/extras/integrations/document_loaders/airbyte_gong.ipynb
Normal file
@@ -0,0 +1,206 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Gong"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Gong connector as a document loader, allowing you to load various Gong objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-gong` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-gong"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/gong/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-gong/source_gong/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-gong/source_gong/spec.yaml).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"access_key\": \"<access key name>\",\n",
|
||||
" \"access_key_secret\": \"<access key secret>\",\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteGongLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your gong configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteGongLoader(config=config, stream_name=\"calls\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To process documents, create a class inheriting from the base loader and implement the `_handle_records` method yourself:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteGongLoader(config=config, record_handler=handle_record, stream_name=\"calls\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteGongLoader(config=config, stream_name=\"calls\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
208
docs/extras/integrations/document_loaders/airbyte_hubspot.ipynb
Normal file
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Hubspot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Hubspot connector as a document loader, allowing you to load various Hubspot objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-hubspot` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-hubspot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/hubspot/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.yaml).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
" \"credentials\": {\n",
|
||||
" \"credentials_title\": \"Private App Credentials\",\n",
|
||||
" \"access_token\": \"<access token of your private app>\"\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteHubspotLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your hubspot configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteHubspotLoader(config=config, stream_name=\"products\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To process documents, create a class inheriting from the base loader and implement the `_handle_records` method yourself:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteHubspotLoader(config=config, record_handler=handle_record, stream_name=\"products\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteHubspotLoader(config=config, stream_name=\"products\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Salesforce"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Salesforce connector as a document loader, allowing you to load various Salesforce objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-salesforce` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-salesforce"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/salesforce/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.yaml).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"client_id\": \"<oauth client id>\",\n",
|
||||
" \"client_secret\": \"<oauth client secret>\",\n",
|
||||
" \"refresh_token\": \"<oauth refresh token>\",\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
" \"is_sandbox\": False, # set to True if you're using a sandbox environment\n",
|
||||
" \"streams_criteria\": [ # Array of filters for salesforce objects that should be loadable\n",
|
||||
" {\"criteria\": \"exacts\", \"value\": \"Account\"}, # Exact name of salesforce object\n",
|
||||
" {\"criteria\": \"starts with\", \"value\": \"Asset\"}, # Prefix of the name\n",
|
||||
" # Other allowed criteria: ends with, contains, starts not with, ends not with, not contains, not exacts\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteSalesforceLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your salesforce configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteSalesforceLoader(config=config, stream_name=\"asset\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To create documents in a different, pass in a record_handler function when creating the loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteSalesforceLoader(config=config, record_handler=handle_record, stream_name=\"asset\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteSalesforceLoader(config=config, stream_name=\"asset\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
209
docs/extras/integrations/document_loaders/airbyte_shopify.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Shopify"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Shopify connector as a document loader, allowing you to load various Shopify objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-shopify` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-shopify"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/shopify/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
" \"shop\": \"<name of the shop you want to retrieve documents from>\",\n",
|
||||
" \"credentials\": {\n",
|
||||
" \"auth_method\": \"api_password\",\n",
|
||||
" \"api_password\": \"<your api password>\"\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteShopifyLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your shopify configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteShopifyLoader(config=config, stream_name=\"orders\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To create documents in a different, pass in a record_handler function when creating the loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteShopifyLoader(config=config, record_handler=handle_record, stream_name=\"orders\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteShopifyLoader(config=config, stream_name=\"orders\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
206
docs/extras/integrations/document_loaders/airbyte_stripe.ipynb
Normal file
@@ -0,0 +1,206 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Stripe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Stripe connector as a document loader, allowing you to load various Stripe objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-stripe` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-stripe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/stripe/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"client_secret\": \"<secret key>\",\n",
|
||||
" \"account_id\": \"<account id>\",\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteStripeLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your stripe configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteStripeLoader(config=config, stream_name=\"invoices\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To create documents in a different, pass in a record_handler function when creating the loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteStripeLoader(config=config, record_handler=handle_record, stream_name=\"invoices\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteStripeLoader(config=config, record_handler=handle_record, stream_name=\"invoices\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
209
docs/extras/integrations/document_loaders/airbyte_typeform.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Typeform"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Typeform connector as a document loader, allowing you to load various Typeform objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-typeform` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-typeform"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/typeform/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-typeform/source_typeform/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-typeform/source_typeform/spec.json).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"credentials\": {\n",
|
||||
" \"auth_type\": \"Private Token\",\n",
|
||||
" \"access_token\": \"<your auth token>\"\n",
|
||||
" },\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
" \"form_ids\": [\"<id of form to load records for>\"] # if omitted, records from all forms will be loaded\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteTypeformLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your typeform configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteTypeformLoader(config=config, stream_name=\"forms\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To create documents in a different, pass in a record_handler function when creating the loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteTypeformLoader(config=config, record_handler=handle_record, stream_name=\"forms\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteTypeformLoader(config=config, record_handler=handle_record, stream_name=\"forms\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte Zendesk Support"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This loader exposes the Zendesk Support connector as a document loader, allowing you to load various objects as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6847a40c",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b06fbde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3e9dc79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install the `airbyte-source-zendesk-support` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4d35e4e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install airbyte-source-zendesk-support"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae855210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02208f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/zendesk-support/) for details about how to configure the reader.\n",
|
||||
"The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json).\n",
|
||||
"\n",
|
||||
"The general shape looks like this:\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"subdomain\": \"<your zendesk subdomain>\",\n",
|
||||
" \"start_date\": \"<date from which to start retrieving records from in ISO format, e.g. 2020-10-20T00:00:00Z>\",\n",
|
||||
" \"credentials\": {\n",
|
||||
" \"credentials\": \"api_token\",\n",
|
||||
" \"email\": \"<your email>\",\n",
|
||||
" \"api_token\": \"<your api token>\"\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By default all fields are stored as metadata in the documents and the text is set to an empty string. Construct the text of the document by transforming the documents returned by the reader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89a99e58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.document_loaders.airbyte import AirbyteZendeskSupportLoader\n",
|
||||
"\n",
|
||||
"config = {\n",
|
||||
" # your zendesk-support configuration\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loader = AirbyteZendeskSupportLoader(config=config, stream_name=\"tickets\") # check the documentation linked above for a list of all streams"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cea23fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can load documents the usual way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dae75cdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a93dc2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As `load` returns a list, it will block until all documents are loaded. To have better control over this process, you can also you the `lazy_load` method which returns an iterator instead:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1782db09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_iterator = loader.lazy_load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a124086",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Keep in mind that by default the page content is empty and the metadata object contains all the information from the record. To create documents in a different, pass in a record_handler function when creating the loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5671395d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"\n",
|
||||
"def handle_record(record, id):\n",
|
||||
" return Document(page_content=record.data[\"title\"], metadata=record.data)\n",
|
||||
"\n",
|
||||
"loader = AirbyteZendeskSupportLoader(config=config, record_handler=handle_record, stream_name=\"tickets\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "223eb8bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Incremental loads\n",
|
||||
"\n",
|
||||
"Some streams allow incremental loading, this means the source keeps track of synced records and won't load them again. This is useful for sources that have a high volume of data and are updated frequently.\n",
|
||||
"\n",
|
||||
"To take advantage of this, store the `last_state` property of the loader and pass it in when creating the loader again. This will ensure that only new records are loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7061e735",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_state = loader.last_state # store safely\n",
|
||||
"\n",
|
||||
"incremental_loader = AirbyteZendeskSupportLoader(config=config, stream_name=\"tickets\", state=last_state)\n",
|
||||
"\n",
|
||||
"new_docs = incremental_loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
94
docs/extras/integrations/document_loaders/concurrent.ipynb
Normal file
@@ -0,0 +1,94 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23c6e167",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Concurrent Loader\n",
|
||||
"\n",
|
||||
"Works just like the GenericLoader but concurrently for those who choose to optimize their workflow.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6ff3fb1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import ConcurrentLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "ce96fa20",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ConcurrentLoader.from_filesystem('example_data/', glob=\"**/*.txt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "06a6cf5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"files = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "b87d3e58",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(files)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "668f1ee5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>Sample RSS feed subscriptions</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="Tech" title="Tech">
|
||||
<outline type="rss" text="Engadget" title="Engadget" xmlUrl="http://www.engadget.com/rss-full.xml" htmlUrl="http://www.engadget.com"/>
|
||||
<outline type="rss" text="Ars Technica - All content" title="Ars Technica - All content" xmlUrl="http://feeds.arstechnica.com/arstechnica/index/" htmlUrl="https://arstechnica.com"/>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
@@ -73,13 +73,27 @@
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "41c8a46f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to use an alternative loader, you can provide a custom function, for example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eba3002d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"from langchain.document_loaders import PyPDFLoader\n",
|
||||
"def load_pdf(file_path):\n",
|
||||
" return PyPDFLoader(file_path)\n",
|
||||
"\n",
|
||||
"loader = GCSFileLoader(project_name=\"aist\", bucket=\"testing-hwc\", blob=\"fake.pdf\", loader_func=load_pdf)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c83b6a4c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Huawei OBS Directory\n",
|
||||
"The following code demonstrates how to load objects from the Huawei OBS (Object Storage Service) as documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c2191935",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install the required package\n",
|
||||
"# pip install esdk-obs-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "55fca3b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import OBSDirectoryLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c3ed419f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint = \"your-endpoint\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3428fd4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Configure your access credentials\\n\n",
|
||||
"config = {\n",
|
||||
" \"ak\": \"your-access-key\",\n",
|
||||
" \"sk\": \"your-secret-key\"\n",
|
||||
"}\n",
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9beede9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e20a839",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specify a Prefix for Loading\n",
|
||||
"If you want to load objects with a specific prefix from the bucket, you can use the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "125f311d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint, config=config, prefix=\"test_prefix\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b3488037",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84c82c0a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Authentication Information from ECS\n",
|
||||
"If your langchain is deployed on Huawei Cloud ECS and [Agency is set up](https://support.huaweicloud.com/intl/en-us/usermanual-ecs/ecs_03_0166.html#section7), the loader can directly get the security token from ECS without needing access key and secret key. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1db99969",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = {\"get_token_from_ecs\": True}\n",
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "57dd9f35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30205d25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use a Public Bucket\n",
|
||||
"If your bucket's bucket policy allows anonymous access (anonymous users have `listBucket` and `GetObject` permissions), you can directly load the objects without configuring the `config` parameter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4dfa2ef0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OBSDirectoryLoader(\"your-bucket-name\", endpoint=endpoint)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67d4c1d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
180
docs/extras/integrations/document_loaders/huawei_obs_file.ipynb
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4394a872",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Huawei OBS File\n",
|
||||
"The following code demonstrates how to load an object from the Huawei OBS (Object Storage Service) as document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c43d811b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install the required package\n",
|
||||
"# pip install esdk-obs-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5e16bae6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.obs_file import OBSFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "75cc7e7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint = \"your-endpoint\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f9816984",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from obs import ObsClient\n",
|
||||
"obs_client = ObsClient(access_key_id=\"your-access-key\", secret_access_key=\"your-secret-key\", server=endpoint)\n",
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\", client=obs_client)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6143b39b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "633e05ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Each Loader with Separate Authentication Information\n",
|
||||
"If you don't need to reuse OBS connections between different loaders, you can directly configure the `config`. The loader will use the config information to initialize its own OBS client."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a5dd6a5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Configure your access credentials\\n\n",
|
||||
"config = {\n",
|
||||
" \"ak\": \"your-access-key\",\n",
|
||||
" \"sk\": \"your-secret-key\"\n",
|
||||
"}\n",
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\",endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9a741f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e2e611c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Authentication Information from ECS\n",
|
||||
"If your langchain is deployed on Huawei Cloud ECS and [Agency is set up](https://support.huaweicloud.com/intl/en-us/usermanual-ecs/ecs_03_0166.html#section7), the loader can directly get the security token from ECS without needing access key and secret key. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "338fafef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = {\"get_token_from_ecs\": True}\n",
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\", endpoint=endpoint, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73976c55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b77aa18c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Access a Publicly Accessible Object\n",
|
||||
"If the object you want to access allows anonymous user access (anonymous users have `GetObject` permission), you can directly load the object without configuring the `config` parameter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "df83d121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OBSFileLoader(\"your-bucket-name\", \"your-object-key\", endpoint=endpoint)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82a844ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
192
docs/extras/integrations/document_loaders/news.ipynb
Normal file
@@ -0,0 +1,192 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dfc4698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# News URL\n",
|
||||
"\n",
|
||||
"This covers how to load HTML news articles from a list of URLs into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "16c3699e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:18.886031400Z",
|
||||
"start_time": "2023-08-02T21:18:17.682345Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import NewsURLLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "836fbac1",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:18.895539800Z",
|
||||
"start_time": "2023-08-02T21:18:18.895539800Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\n",
|
||||
" \"https://www.bbc.com/news/world-us-canada-66388172\",\n",
|
||||
" \"https://www.bbc.com/news/entertainment-arts-66384971\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33089aba-ff74-4d00-8f40-9449c29587cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Pass in urls to load them into Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "00f46fda",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.227074500Z",
|
||||
"start_time": "2023-08-02T21:18:18.895539800Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"First article: page_content='In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact. Neither she nor her representatives have commented.' metadata={'title': 'Donald Trump indictment: What do we know about the six co-conspirators?', 'link': 'https://www.bbc.com/news/world-us-canada-66388172', 'authors': [], 'language': 'en', 'description': 'Six people accused of helping Mr Trump undermine the election have been described by prosecutors.', 'publish_date': None}\n",
|
||||
"\n",
|
||||
"Second article: page_content='Ms Williams added: \"If there\\'s anything that I can do in my power to ensure that dancers or singers or whoever decides to work with her don\\'t have to go through that same experience, I\\'m going to do that.\"' metadata={'title': \"Lizzo dancers Arianna Davis and Crystal Williams: 'No one speaks out, they are scared'\", 'link': 'https://www.bbc.com/news/entertainment-arts-66384971', 'authors': [], 'language': 'en', 'description': 'The US pop star is being sued for sexual harassment and fat-shaming but has yet to comment.', 'publish_date': None}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = NewsURLLoader(urls=urls)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(\"First article: \", data[0])\n",
|
||||
"print(\"\\nSecond article: \", data[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Use nlp=True to run nlp analysis and generate keywords + summary"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "98ac26c488315bff"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b68a26b3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.585758200Z",
|
||||
"start_time": "2023-08-02T21:18:19.227074500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"First article: page_content='In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact. Neither she nor her representatives have commented.' metadata={'title': 'Donald Trump indictment: What do we know about the six co-conspirators?', 'link': 'https://www.bbc.com/news/world-us-canada-66388172', 'authors': [], 'language': 'en', 'description': 'Six people accused of helping Mr Trump undermine the election have been described by prosecutors.', 'publish_date': None, 'keywords': ['powell', 'know', 'donald', 'trump', 'review', 'indictment', 'telling', 'view', 'reasonable', 'person', 'testimony', 'coconspirators', 'riot', 'representatives', 'claims'], 'summary': 'In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact.\\nNeither she nor her representatives have commented.'}\n",
|
||||
"\n",
|
||||
"Second article: page_content='Ms Williams added: \"If there\\'s anything that I can do in my power to ensure that dancers or singers or whoever decides to work with her don\\'t have to go through that same experience, I\\'m going to do that.\"' metadata={'title': \"Lizzo dancers Arianna Davis and Crystal Williams: 'No one speaks out, they are scared'\", 'link': 'https://www.bbc.com/news/entertainment-arts-66384971', 'authors': [], 'language': 'en', 'description': 'The US pop star is being sued for sexual harassment and fat-shaming but has yet to comment.', 'publish_date': None, 'keywords': ['davis', 'lizzo', 'singers', 'experience', 'crystal', 'ensure', 'arianna', 'theres', 'williams', 'power', 'going', 'dancers', 'im', 'speaks', 'work', 'ms', 'scared'], 'summary': 'Ms Williams added: \"If there\\'s anything that I can do in my power to ensure that dancers or singers or whoever decides to work with her don\\'t have to go through that same experience, I\\'m going to do that.\"'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = NewsURLLoader(urls=urls, nlp=True)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(\"First article: \", data[0])\n",
|
||||
"print(\"\\nSecond article: \", data[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "['powell',\n 'know',\n 'donald',\n 'trump',\n 'review',\n 'indictment',\n 'telling',\n 'view',\n 'reasonable',\n 'person',\n 'testimony',\n 'coconspirators',\n 'riot',\n 'representatives',\n 'claims']"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata['keywords']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.585758200Z",
|
||||
"start_time": "2023-08-02T21:18:19.585758200Z"
|
||||
}
|
||||
},
|
||||
"id": "ae37e004e0284b1d"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'In testimony to the congressional committee examining the 6 January riot, Mrs Powell said she did not review all of the many claims of election fraud she made, telling them that \"no reasonable person\" would view her claims as fact.\\nNeither she nor her representatives have commented.'"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata['summary']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-02T21:18:19.598966800Z",
|
||||
"start_time": "2023-08-02T21:18:19.594950200Z"
|
||||
}
|
||||
},
|
||||
"id": "7676155fb175e53e"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
144
docs/extras/integrations/document_loaders/nuclia.ipynb
Normal file
@@ -0,0 +1,144 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Nuclia Understanding API document loader\n",
|
||||
"\n",
|
||||
"[Nuclia](https://nuclia.com) automatically indexes your unstructured data from any internal and external source, providing optimized search results and generative answers. It can handle video and audio transcription, image content extraction, and document parsing.\n",
|
||||
"\n",
|
||||
"The Nuclia Understanding API supports the processing of unstructured data, including text, web pages, documents, and audio/video contents. It extracts all texts wherever they are (using speech-to-text or OCR when needed), it also extracts metadata, embedded files (like images in a PDF), and web links. If machine learning is enabled, it identifies entities, provides a summary of the content and generates embeddings for all the sentences.\n",
|
||||
"\n",
|
||||
"To use the Nuclia Understanding API, you need to have a Nuclia account. You can create one for free at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade protobuf\n",
|
||||
"#!pip install nucliadb-protos"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"NUCLIA_ZONE\"] = \"<YOUR_ZONE>\" # e.g. europe-1\n",
|
||||
"os.environ[\"NUCLIA_NUA_KEY\"] = \"<YOUR_API_KEY>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To use the Nuclia document loader, you need to instantiate a `NucliaUnderstandingAPI` tool:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.nuclia import NucliaUnderstandingAPI\n",
|
||||
"\n",
|
||||
"nua = NucliaUnderstandingAPI(enable_ml=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.nuclia import NucliaLoader\n",
|
||||
"\n",
|
||||
"loader = NucliaLoader(\"./interview.mp4\", nua)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can now call the `load` the document in a loop until you get the document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"pending = True\n",
|
||||
"while pending:\n",
|
||||
" time.sleep(15)\n",
|
||||
" docs = loader.load()\n",
|
||||
" if len(docs) > 0:\n",
|
||||
" print(docs[0].page_content)\n",
|
||||
" print(docs[0].metadata)\n",
|
||||
" pending = False\n",
|
||||
" else:\n",
|
||||
" print(\"waiting...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retrieved information\n",
|
||||
"\n",
|
||||
"Nuclia returns the following information:\n",
|
||||
"\n",
|
||||
"- file metadata\n",
|
||||
"- extracted text\n",
|
||||
"- nested text (like text in an embedded image)\n",
|
||||
"- paragraphs and sentences splitting (defined by the position of their first and last characters, plus start time and end time for a video or audio file)\n",
|
||||
"- links\n",
|
||||
"- a thumbnail\n",
|
||||
"- embedded files\n",
|
||||
"\n",
|
||||
"Note:\n",
|
||||
"\n",
|
||||
" Generated files (thumbnail, extracted embedded files, etc.) are provided as a token. You can download them with the [`/processing/download` endpoint](https://docs.nuclia.dev/docs/api#operation/Download_binary_file_processing_download_get).\n",
|
||||
"\n",
|
||||
" Also at any level, if an attribute exceeds a certain size, it will be put in a downloadable file and will be replaced in the document by a file pointer. This will consist of `{\"file\": {\"uri\": \"JWT_TOKEN\"}}`. The rule is that if the size of the message is greater than 1000000 characters, the biggest parts will be moved to downloadable files. First, the compression process will target vectors. If that is not enough, it will target large field metadata, and finally it will target extracted text.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
139
docs/extras/integrations/document_loaders/pubmed.ipynb
Normal file
@@ -0,0 +1,139 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3df0dcf8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PubMed\n",
|
||||
"\n",
|
||||
">[PubMed®](https://pubmed.ncbi.nlm.nih.gov/) by `The National Center for Biotechnology Information, National Library of Medicine` comprises more than 35 million citations for biomedical literature from `MEDLINE`, life science journals, and online books. Citations may include links to full text content from `PubMed Central` and publisher web sites."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "aecaff63",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PubMedLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f2f7e8d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PubMedLoader(\"chatgpt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ed115aa1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b68d3264-b893-45e4-8ab0-077b25a586dc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "9f4626d2-068d-4aed-9ffe-ad754ad4b4cd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'uid': '37548997',\n",
|
||||
" 'Title': 'Performance of ChatGPT on the Situational Judgement Test-A Professional Dilemmas-Based Examination for Doctors in the United Kingdom.',\n",
|
||||
" 'Published': '2023-08-07',\n",
|
||||
" 'Copyright Information': '©Robin J Borchert, Charlotte R Hickman, Jack Pepys, Timothy J Sadler. Originally published in JMIR Medical Education (https://mededu.jmir.org), 07.08.2023.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[1].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8000f687-b500-4cce-841b-70d6151304da",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"BACKGROUND: ChatGPT is a large language model that has performed well on professional examinations in the fields of medicine, law, and business. However, it is unclear how ChatGPT would perform on an examination assessing professionalism and situational judgement for doctors.\\nOBJECTIVE: We evaluated the performance of ChatGPT on the Situational Judgement Test (SJT): a national examination taken by all final-year medical students in the United Kingdom. This examination is designed to assess attributes such as communication, teamwork, patient safety, prioritization skills, professionalism, and ethics.\\nMETHODS: All questions from the UK Foundation Programme Office's (UKFPO's) 2023 SJT practice examination were inputted into ChatGPT. For each question, ChatGPT's answers and rationales were recorded and assessed on the basis of the official UK Foundation Programme Office scoring template. Questions were categorized into domains of Good Medical Practice on the basis of the domains referenced in the rationales provided in the scoring sheet. Questions without clear domain links were screened by reviewers and assigned one or multiple domains. ChatGPT's overall performance, as well as its performance across the domains of Good Medical Practice, was evaluated.\\nRESULTS: Overall, ChatGPT performed well, scoring 76% on the SJT but scoring full marks on only a few questions (9%), which may reflect possible flaws in ChatGPT's situational judgement or inconsistencies in the reasoning across questions (or both) in the examination itself. ChatGPT demonstrated consistent performance across the 4 outlined domains in Good Medical Practice for doctors.\\nCONCLUSIONS: Further research is needed to understand the potential applications of large language models, such as ChatGPT, in medical education for standardizing questions and providing consistent rationales for examinations assessing professionalism and ethics.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[1].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1070e571-697d-4c33-9a4f-0b2dd6909629",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"We may want to process load all URLs under a root directory.\n",
|
||||
"\n",
|
||||
"For example, let's look at the [LangChain JS documentation](https://js.langchain.com/docs/).\n",
|
||||
"For example, let's look at the [Python 3.9 Document](https://docs.python.org/3.9/).\n",
|
||||
"\n",
|
||||
"This has many interesting child pages that we may want to read in bulk.\n",
|
||||
"\n",
|
||||
@@ -19,13 +19,28 @@
|
||||
" \n",
|
||||
"We do this using the `RecursiveUrlLoader`.\n",
|
||||
"\n",
|
||||
"This also gives us the flexibility to exclude some children (e.g., the `api` directory with > 800 child pages)."
|
||||
"This also gives us the flexibility to exclude some children, customize the extractor, and more."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1be8094f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Parameters\n",
|
||||
"- url: str, the target url to crawl.\n",
|
||||
"- exclude_dirs: Optional[str], webpage directories to exclude.\n",
|
||||
"- use_async: Optional[bool], wether to use async requests, using async requests is usually faster in large tasks. However, async will disable the lazy loading feature(the function still works, but it is not lazy). By default, it is set to False.\n",
|
||||
"- extractor: Optional[Callable[[str], str]], a function to extract the text of the document from the webpage, by default it returns the page as it is. It is recommended to use tools like goose3 and beautifulsoup to extract the text. By default, it just returns the page as it is.\n",
|
||||
"- max_depth: Optional[int] = None, the maximum depth to crawl. By default, it is set to 2. If you need to crawl the whole website, set it to a number that is large enough would simply do the job.\n",
|
||||
"- timeout: Optional[int] = None, the timeout for each request, in the unit of seconds. By default, it is set to 10.\n",
|
||||
"- prevent_outside: Optional[bool] = None, whether to prevent crawling outside the root url. By default, it is set to True."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2e3532b2",
|
||||
"execution_count": null,
|
||||
"id": "23c18539",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -42,13 +57,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d69e5620",
|
||||
"execution_count": null,
|
||||
"id": "55394afe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = \"https://js.langchain.com/docs/modules/memory/examples/\"\n",
|
||||
"loader = RecursiveUrlLoader(url=url)\n",
|
||||
"from bs4 import BeautifulSoup as Soup\n",
|
||||
"\n",
|
||||
"url = \"https://docs.python.org/3.9/\"\n",
|
||||
"loader = RecursiveUrlLoader(url=url, max_depth=2, extractor=lambda x: Soup(x, \"html.parser\").text)\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
@@ -61,7 +78,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"12"
|
||||
"'\\n\\n\\n\\n\\nPython Frequently Asked Questions — Python 3.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@@ -70,19 +87,21 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
"docs[0].page_content[:50]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "89355b7c",
|
||||
"id": "13bd7e16",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\n\\n\\n\\nBuffer Window Memory | 🦜️🔗 Langchain\\n\\n\\n\\n\\n\\nSki'"
|
||||
"{'source': 'https://docs.python.org/3.9/library/index.html',\n",
|
||||
" 'title': 'The Python Standard Library — Python 3.9.17 documentation',\n",
|
||||
" 'language': None}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -91,137 +110,48 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content[:50]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "13bd7e16",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'source': 'https://js.langchain.com/docs/modules/memory/examples/buffer_window_memory',\n",
|
||||
" 'title': 'Buffer Window Memory | 🦜️🔗 Langchain',\n",
|
||||
" 'description': 'BufferWindowMemory keeps track of the back-and-forths in conversation, and then uses a window of size k to surface the last k back-and-forths to use as memory.',\n",
|
||||
" 'language': 'en'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata"
|
||||
"docs[-1].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "40fc13ef",
|
||||
"id": "5866e5a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's try a more extensive example, the `docs` root dir.\n",
|
||||
"\n",
|
||||
"We will skip everything under `api`.\n",
|
||||
"\n",
|
||||
"For this, we can `lazy_load` each page as we crawl the tree, using `WebBaseLoader` to load each as we go."
|
||||
"However, since it's hard to perform a perfect filter, you may still see some irrelevant results in the results. You can perform a filter on the returned documents by yourself, if it's needed. Most of the time, the returned results are good enough."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ec8ecef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Testing on LangChain docs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c938b9f",
|
||||
"execution_count": 2,
|
||||
"id": "349b5598",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = \"https://js.langchain.com/docs/\"\n",
|
||||
"exclude_dirs = [\"https://js.langchain.com/docs/api/\"]\n",
|
||||
"loader = RecursiveUrlLoader(url=url, exclude_dirs=exclude_dirs)\n",
|
||||
"# Lazy load each\n",
|
||||
"docs = [print(doc) or doc for doc in loader.lazy_load()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "30ff61d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load all pages\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "457e30f3",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"188"
|
||||
"8"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"url = \"https://js.langchain.com/docs/modules/memory/integrations/\"\n",
|
||||
"loader = RecursiveUrlLoader(url=url)\n",
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "bca80b4a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\n\\n\\n\\nAgent Simulations | 🦜️🔗 Langchain\\n\\n\\n\\n\\n\\nSkip t'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content[:50]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "df97cf22",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'source': 'https://js.langchain.com/docs/use_cases/agent_simulations/',\n",
|
||||
" 'title': 'Agent Simulations | 🦜️🔗 Langchain',\n",
|
||||
" 'description': 'Agent simulations involve taking multiple agents and having them interact with each other.',\n",
|
||||
" 'language': 'en'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
311
docs/extras/integrations/document_loaders/rss.ipynb
Normal file
@@ -0,0 +1,311 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dfc4698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# RSS Feeds\n",
|
||||
"\n",
|
||||
"This covers how to load HTML news articles from a list of RSS feed URLs into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e7c2cd52-c1f7-4a06-8539-b0117da91fba",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install feedparser newspaper3k listparser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "16c3699e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import RSSFeedLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "836fbac1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\"https://news.ycombinator.com/rss\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33089aba-ff74-4d00-8f40-9449c29587cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Pass in urls to load them into Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00f46fda",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = RSSFeedLoader(urls=urls)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(len(data))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "b447468cc42266d0",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(next Rich)\n",
|
||||
"\n",
|
||||
"04 August 2023\n",
|
||||
"\n",
|
||||
"Rich Hickey\n",
|
||||
"\n",
|
||||
"It is with a mixture of heartache and optimism that I announce today my (long planned) retirement from commercial software development, and my employment at Nubank. It’s been thrilling to see Clojure and Datomic successfully applied at scale.\n",
|
||||
"\n",
|
||||
"I look forward to continuing to lead ongoing work maintaining and enhancing Clojure with Alex, Stu, Fogus and many others, as an independent developer once again. We have many useful things planned for 1.12 and beyond. The community remains friendly, mature and productive, and is taking Clojure into many interesting new domains.\n",
|
||||
"\n",
|
||||
"I want to highlight and thank Nubank for their ongoing sponsorship of Alex, Fogus and the core team, as well as the Clojure community at large.\n",
|
||||
"\n",
|
||||
"Stu will continue to lead the development of Datomic at Nubank, where the Datomic team grows and thrives. I’m particularly excited to see where the new free availability of Datomic will lead.\n",
|
||||
"\n",
|
||||
"My time with Cognitect remains the highlight of my career. I have learned from absolutely everyone on our team, and am forever grateful to all for our interactions. There are too many people to thank here, but I must extend my sincerest appreciation and love to Stu and Justin for (repeatedly) taking a risk on me and my ideas, and for being the best of partners and friends, at all times fully embodying the notion of integrity. And of course to Alex Miller - who possesses in abundance many skills I lack, and without whose indomitable spirit, positivity and friendship Clojure would not have become what it did.\n",
|
||||
"\n",
|
||||
"I have made many friends through Clojure and Cognitect, and I hope to nurture those friendships moving forward.\n",
|
||||
"\n",
|
||||
"Retirement returns me to the freedom and independence I had when originally developing Clojure. The journey continues!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(data[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c36d3b0d329faf2a",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"You can pass arguments to the NewsURLLoader which it uses to load articles."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "5fdada62470d3019",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error fetching or processing https://twitter.com/andrewmccalip/status/1687405505604734978, exception: You must `parse()` an article first!\n",
|
||||
"Error processing entry https://twitter.com/andrewmccalip/status/1687405505604734978, exception: list index out of range\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"13\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = RSSFeedLoader(urls=urls, nlp=True)\n",
|
||||
"data = loader.load()\n",
|
||||
"print(len(data))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "11d71963f7735c1d",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['nubank',\n",
|
||||
" 'alex',\n",
|
||||
" 'stu',\n",
|
||||
" 'taking',\n",
|
||||
" 'team',\n",
|
||||
" 'remains',\n",
|
||||
" 'rich',\n",
|
||||
" 'clojure',\n",
|
||||
" 'thank',\n",
|
||||
" 'planned',\n",
|
||||
" 'datomic']"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata['keywords']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "9fb64ba0e8780966",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'It’s been thrilling to see Clojure and Datomic successfully applied at scale.\\nI look forward to continuing to lead ongoing work maintaining and enhancing Clojure with Alex, Stu, Fogus and many others, as an independent developer once again.\\nThe community remains friendly, mature and productive, and is taking Clojure into many interesting new domains.\\nI want to highlight and thank Nubank for their ongoing sponsorship of Alex, Fogus and the core team, as well as the Clojure community at large.\\nStu will continue to lead the development of Datomic at Nubank, where the Datomic team grows and thrives.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata['summary']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98ac26c488315bff",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"You can also use an OPML file such as a Feedly export. Pass in either a URL or the OPML contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "8b6f07ae526a897c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error fetching http://www.engadget.com/rss-full.xml, exception: Error fetching http://www.engadget.com/rss-full.xml, exception: document declared as us-ascii, but parsed as utf-8\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"20\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with open(\"example_data/sample_rss_feeds.opml\", \"r\") as f:\n",
|
||||
" loader = RSSFeedLoader(opml=f.read())\n",
|
||||
"data = loader.load()\n",
|
||||
"print(len(data))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "b68a26b3",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The electric vehicle startup Fisker made a splash in Huntington Beach last night, showing off a range of new EVs it plans to build alongside the Fisker Ocean, which is slowly beginning deliveries in Europe and the US. With shades of Lotus circa 2010, it seems there\\'s something for most tastes, with a powerful four-door GT, a versatile pickup truck, and an affordable electric city car.\\n\\n\"We want the world to know that we have big plans and intend to move into several different segments, redefining each with our unique blend of design, innovation, and sustainability,\" said CEO Henrik Fisker.\\n\\nStarting with the cheapest, the Fisker PEAR—a cutesy acronym for \"Personal Electric Automotive Revolution\"—is said to use 35 percent fewer parts than other small EVs. Although it\\'s a smaller car, the PEAR seats six thanks to front and rear bench seats. Oh, and it has a frunk, which the company is calling the \"froot,\" something that will satisfy some British English speakers like Ars\\' friend and motoring journalist Jonny Smith.\\n\\nBut most exciting is the price—starting at $29,900 and scheduled for 2025. Fisker plans to contract with Foxconn to build the PEAR in Lordstown, Ohio, meaning it would be eligible for federal tax incentives.\\n\\nAdvertisement\\n\\nThe Fisker Alaska is the company\\'s pickup truck, built on a modified version of the platform used by the Ocean. It has an extendable cargo bed, which can be as little as 4.5 feet (1,371 mm) or as much as 9.2 feet (2,804 mm) long. Fisker claims it will be both the lightest EV pickup on sale and the most sustainable pickup truck in the world. Range will be an estimated 230–240 miles (370–386 km).\\n\\nThis, too, is slated for 2025, and also at a relatively affordable price, starting at $45,400. Fisker hopes to build this car in North America as well, although it isn\\'t saying where that might take place.\\n\\nFinally, there\\'s the Ronin, a four-door GT that bears more than a passing resemblance to the Fisker Karma, Henrik Fisker\\'s 2012 creation. There\\'s no price for this one, but Fisker says its all-wheel drive powertrain will boast 1,000 hp (745 kW) and will hit 60 mph from a standing start in two seconds—just about as fast as modern tires will allow. Expect a massive battery in this one, as Fisker says it\\'s targeting a 600-mile (956 km) range.\\n\\n\"Innovation and sustainability, along with design, are our three brand values. By 2027, we intend to produce the world’s first climate-neutral vehicle, and as our customers reinvent their relationships with mobility, we want to be a leader in software-defined transportation,\" Fisker said.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d5a0cbe8-18a6-4af2-b447-7abb8b734451",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv",
|
||||
"language": "python",
|
||||
"name": "poetry-venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,320 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bda1f3f5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TensorFlow Datasets\n",
|
||||
"\n",
|
||||
">[TensorFlow Datasets](https://www.tensorflow.org/datasets) is a collection of datasets ready to use, with TensorFlow or other Python ML frameworks, such as Jax. All datasets are exposed as [tf.data.Datasets](https://www.tensorflow.org/api_docs/python/tf/data/Dataset), enabling easy-to-use and high-performance input pipelines. To get started see the [guide](https://www.tensorflow.org/datasets/overview) and the [list of datasets](https://www.tensorflow.org/datasets/catalog/overview#all_datasets).\n",
|
||||
"\n",
|
||||
"This notebook shows how to load `TensorFlow Datasets` into a Document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b7a1eef-7bf7-4e7d-8bfc-c4e27c9488cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2abd5578-aa3d-46b9-99af-8b262f0b3df8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You need to install `tensorflow` and `tensorflow-datasets` python packages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2e589036-351e-4c63-b734-c9a05fadb880",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install tensorflow"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b674aaea-ed3a-4541-8414-260a8f67f623",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install tensorflow-datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "95f05e1c-195e-4e2b-ae8e-8d6637f15be6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e66e211e-9419-4dbb-b3cd-afc3cf984305",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As an example, we use the [`mlqa/en` dataset](https://www.tensorflow.org/datasets/catalog/mlqa#mlqaen).\n",
|
||||
"\n",
|
||||
">`MLQA` (`Multilingual Question Answering Dataset`) is a benchmark dataset for evaluating multilingual question answering performance. The dataset consists of 7 languages: Arabic, German, Spanish, English, Hindi, Vietnamese, Chinese.\n",
|
||||
">\n",
|
||||
">- Homepage: https://github.com/facebookresearch/MLQA\n",
|
||||
">- Source code: `tfds.datasets.mlqa.Builder`\n",
|
||||
">- Download size: 72.21 MiB\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8968d645-c81c-4e3b-82bc-a3cbb5ddd93a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Feature structure of `mlqa/en` dataset:\n",
|
||||
"\n",
|
||||
"FeaturesDict({\n",
|
||||
" 'answers': Sequence({\n",
|
||||
" 'answer_start': int32,\n",
|
||||
" 'text': Text(shape=(), dtype=string),\n",
|
||||
" }),\n",
|
||||
" 'context': Text(shape=(), dtype=string),\n",
|
||||
" 'id': string,\n",
|
||||
" 'question': Text(shape=(), dtype=string),\n",
|
||||
" 'title': Text(shape=(), dtype=string),\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "30fcaba5-cc9b-4a0e-a8f4-c047018451c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"import tensorflow_datasets as tfds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"id": "e307dd67-029e-4ee3-a65f-e085c09b0b8b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<_TakeDataset element_spec={'answers': {'answer_start': TensorSpec(shape=(None,), dtype=tf.int32, name=None), 'text': TensorSpec(shape=(None,), dtype=tf.string, name=None)}, 'context': TensorSpec(shape=(), dtype=tf.string, name=None), 'id': TensorSpec(shape=(), dtype=tf.string, name=None), 'question': TensorSpec(shape=(), dtype=tf.string, name=None), 'title': TensorSpec(shape=(), dtype=tf.string, name=None)}>"
|
||||
]
|
||||
},
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# try directly access this dataset:\n",
|
||||
"ds = tfds.load('mlqa/en', split='test')\n",
|
||||
"ds = ds.take(1) # Only take a single example\n",
|
||||
"ds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c9c4b08-d94f-4b53-add0-93769811644e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we have to create a custom function to convert dataset sample into a Document.\n",
|
||||
"\n",
|
||||
"This is a requirement. There is no standard format for the TF datasets that's why we need to make a custom transformation function.\n",
|
||||
"\n",
|
||||
"Let's use `context` field as the `Document.page_content` and place other fields in the `Document.metadata`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"id": "78844113-f8d8-48a8-8105-685280b6cfa5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='After completing the journey around South America, on 23 February 2006, Queen Mary 2 met her namesake, the original RMS Queen Mary, which is permanently docked at Long Beach, California. Escorted by a flotilla of smaller ships, the two Queens exchanged a \"whistle salute\" which was heard throughout the city of Long Beach. Queen Mary 2 met the other serving Cunard liners Queen Victoria and Queen Elizabeth 2 on 13 January 2008 near the Statue of Liberty in New York City harbour, with a celebratory fireworks display; Queen Elizabeth 2 and Queen Victoria made a tandem crossing of the Atlantic for the meeting. This marked the first time three Cunard Queens have been present in the same location. Cunard stated this would be the last time these three ships would ever meet, due to Queen Elizabeth 2\\'s impending retirement from service in late 2008. However this would prove not to be the case, as the three Queens met in Southampton on 22 April 2008. Queen Mary 2 rendezvoused with Queen Elizabeth 2 in Dubai on Saturday 21 March 2009, after the latter ship\\'s retirement, while both ships were berthed at Port Rashid. With the withdrawal of Queen Elizabeth 2 from Cunard\\'s fleet and its docking in Dubai, Queen Mary 2 became the only ocean liner left in active passenger service.' metadata={'id': '5116f7cccdbf614d60bcd23498274ffd7b1e4ec7', 'title': 'RMS Queen Mary 2', 'question': 'What year did Queen Mary 2 complete her journey around South America?', 'answer': '2006'}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-08-03 14:27:08.482983: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def decode_to_str(item: tf.Tensor) -> str:\n",
|
||||
" return item.numpy().decode('utf-8')\n",
|
||||
"\n",
|
||||
"def mlqaen_example_to_document(example: dict) -> Document:\n",
|
||||
" return Document(\n",
|
||||
" page_content=decode_to_str(example[\"context\"]),\n",
|
||||
" metadata={\n",
|
||||
" \"id\": decode_to_str(example[\"id\"]),\n",
|
||||
" \"title\": decode_to_str(example[\"title\"]),\n",
|
||||
" \"question\": decode_to_str(example[\"question\"]),\n",
|
||||
" \"answer\": decode_to_str(example[\"answers\"][\"text\"][0]),\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"for example in ds: \n",
|
||||
" doc = mlqaen_example_to_document(example)\n",
|
||||
" print(doc)\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"id": "2d43c834-5145-4793-9558-8e301ccaf3b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.document_loaders import TensorflowDatasetLoader\n",
|
||||
"\n",
|
||||
"loader = TensorflowDatasetLoader(\n",
|
||||
" dataset_name=\"mlqa/en\",\n",
|
||||
" split_name=\"test\",\n",
|
||||
" load_max_docs=3,\n",
|
||||
" sample_to_document_function=mlqaen_example_to_document,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e29b954c-1407-4797-ae21-6ba8937156be",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`TensorflowDatasetLoader` has these parameters:\n",
|
||||
"- `dataset_name`: the name of the dataset to load\n",
|
||||
"- `split_name`: the name of the split to load. Defaults to \"train\".\n",
|
||||
"- `load_max_docs`: a limit to the number of loaded documents. Defaults to 100.\n",
|
||||
"- `sample_to_document_function`: a function that converts a dataset sample to a Document\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"id": "700e4ef2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-08-03 14:27:22.998964: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"id": "9138940a-e9fe-4145-83e8-77589b5272c9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'After completing the journey around South America, on 23 February 2006, Queen Mary 2 met her namesake, the original RMS Queen Mary, which is permanently docked at Long Beach, California. Escorted by a flotilla of smaller ships, the two Queens exchanged a \"whistle salute\" which was heard throughout the city of Long Beach. Queen Mary 2 met the other serving Cunard liners Queen Victoria and Queen Elizabeth 2 on 13 January 2008 near the Statue of Liberty in New York City harbour, with a celebratory fireworks display; Queen Elizabeth 2 and Queen Victoria made a tandem crossing of the Atlantic for the meeting. This marked the first time three Cunard Queens have been present in the same location. Cunard stated this would be the last time these three ships would ever meet, due to Queen Elizabeth 2\\'s impending retirement from service in late 2008. However this would prove not to be the case, as the three Queens met in Southampton on 22 April 2008. Queen Mary 2 rendezvoused with Queen Elizabeth 2 in Dubai on Saturday 21 March 2009, after the latter ship\\'s retirement, while both ships were berthed at Port Rashid. With the withdrawal of Queen Elizabeth 2 from Cunard\\'s fleet and its docking in Dubai, Queen Mary 2 became the only ocean liner left in active passenger service.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"id": "2f7f7832-fe4d-4a58-892d-bb987cdbed0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'id': '5116f7cccdbf614d60bcd23498274ffd7b1e4ec7',\n",
|
||||
" 'title': 'RMS Queen Mary 2',\n",
|
||||
" 'question': 'What year did Queen Mary 2 complete her journey around South America?',\n",
|
||||
" 'answer': '2006'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "125d073c-4f4f-4ae6-a0c7-9e9db3cc8d69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -18,8 +18,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Install package\n",
|
||||
"!pip install \"unstructured[local-inference]\"\n",
|
||||
"!pip install layoutparser[layoutmodels,tesseract]"
|
||||
"!pip install \"unstructured[all-docs]\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e48afb8d",
|
||||
"metadata": {},
|
||||
@@ -11,7 +12,8 @@
|
||||
"\n",
|
||||
"Below we show how to easily go from a YouTube url to text to chat!\n",
|
||||
"\n",
|
||||
"We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text.\n",
|
||||
"We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text, \n",
|
||||
"and the `OpenAIWhisperParserLocal` for local support and running on private clouds or on premise.\n",
|
||||
"\n",
|
||||
"Note: You will need to have an `OPENAI_API_KEY` supplied."
|
||||
]
|
||||
@@ -24,7 +26,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.generic import GenericLoader\n",
|
||||
"from langchain.document_loaders.parsers import OpenAIWhisperParser\n",
|
||||
"from langchain.document_loaders.parsers import OpenAIWhisperParser, OpenAIWhisperParserLocal\n",
|
||||
"from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader"
|
||||
]
|
||||
},
|
||||
@@ -46,7 +48,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install yt_dlp\n",
|
||||
"! pip install pydub"
|
||||
"! pip install pydub\n",
|
||||
"! pip install librosa"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -63,6 +66,18 @@
|
||||
"Let's take the first lecture of Andrej Karpathy's YouTube course as an example! "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8682f256",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set a flag to switch between local and remote parsing\n",
|
||||
"# change this to True if you want to use local parsing\n",
|
||||
"local = False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -102,7 +117,10 @@
|
||||
"save_dir = \"~/Downloads/YouTube\"\n",
|
||||
"\n",
|
||||
"# Transcribe the videos to text\n",
|
||||
"loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
|
||||
"if local:\n",
|
||||
" loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParserLocal())\n",
|
||||
"else:\n",
|
||||
" loader = GenericLoader(YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser())\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
@@ -275,7 +293,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -289,7 +307,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.11"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "97cc609b13305c559618ec78a438abc56230b9381f827f22d070313b9a1f3777"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Nuclia Understanding API document transformer\n",
|
||||
"\n",
|
||||
"[Nuclia](https://nuclia.com) automatically indexes your unstructured data from any internal and external source, providing optimized search results and generative answers. It can handle video and audio transcription, image content extraction, and document parsing.\n",
|
||||
"\n",
|
||||
"The Nuclia Understanding API document transformer splits text into paragraphs and sentences, identifies entities, provides a summary of the text and generates embeddings for all the sentences.\n",
|
||||
"\n",
|
||||
"To use the Nuclia Understanding API, you need to have a Nuclia account. You can create one for free at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro).\n",
|
||||
"\n",
|
||||
"from langchain.document_transformers.nuclia_text_transform import NucliaTextTransformer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade protobuf\n",
|
||||
"#!pip install nucliadb-protos"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"NUCLIA_ZONE\"] = \"<YOUR_ZONE>\" # e.g. europe-1\n",
|
||||
"os.environ[\"NUCLIA_NUA_KEY\"] = \"<YOUR_API_KEY>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To use the Nuclia document transformer, you need to instantiate a `NucliaUnderstandingAPI` tool with `enable_ml` set to `True`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.nuclia import NucliaUnderstandingAPI\n",
|
||||
"\n",
|
||||
"nua = NucliaUnderstandingAPI(enable_ml=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The Nuclia document transformer must be called in async mode, so you need to use the `atransform_documents` method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"from langchain.document_transformers.nuclia_text_transform import NucliaTextTransformer\n",
|
||||
"from langchain.schema.document import Document\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def process():\n",
|
||||
" documents = [\n",
|
||||
" Document(page_content=\"<TEXT 1>\", metadata={}),\n",
|
||||
" Document(page_content=\"<TEXT 2>\", metadata={}),\n",
|
||||
" Document(page_content=\"<TEXT 3>\", metadata={}),\n",
|
||||
" ]\n",
|
||||
" nuclia_transformer = NucliaTextTransformer(nua)\n",
|
||||
" transformed_documents = await nuclia_transformer.atransform_documents(documents)\n",
|
||||
" print(transformed_documents)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"asyncio.run(process())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
230
docs/extras/integrations/llms/Fireworks.ipynb
Normal file
@@ -0,0 +1,230 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc6caafa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Fireworks\n",
|
||||
"\n",
|
||||
">[Fireworks](https://app.fireworks.ai/) accelerates product development on generative AI by creating an innovative AI experiment and production platform. \n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with `Fireworks` models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "60b6dbb2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms.fireworks import Fireworks, FireworksChat\n",
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"from langchain.prompts.chat import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ccff689e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"\n",
|
||||
"Contact Fireworks AI for the an API Key to access our models\n",
|
||||
"\n",
|
||||
"Set up your model using a model id. If the model is not set, the default model is fireworks-llama-v2-7b-chat."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9ca87a2e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize a Fireworks LLM\n",
|
||||
"os.environ['FIREWORKS_API_KEY'] = \"\" #change this to your own API KEY\n",
|
||||
"llm = Fireworks(model_id=\"accounts/fireworks/models/fireworks-llama-v2-13b-chat\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "43a11ba8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create LLM chain\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acc24d0c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Calling the Model\n",
|
||||
"\n",
|
||||
"You can use the LLMs to call the model for specified prompt(s). \n",
|
||||
"\n",
|
||||
"Current Specified Models: \n",
|
||||
"* accounts/fireworks/models/fireworks-falcon-7b, accounts/fireworks/models/fireworks-falcon-40b-w8a16\n",
|
||||
"* accounts/fireworks/models/fireworks-starcoder-1b-w8a16-1gpu, accounts/fireworks/models/fireworks-starcoder-3b-w8a16-1gpu, accounts/fireworks/models/fireworks-starcoder-7b-w8a16-1gpu, accounts/fireworks/models/fireworks-starcoder-16b-w8a16 \n",
|
||||
"* accounts/fireworks/models/fireworks-llama-v2-13b, accounts/fireworks/models/fireworks-llama-v2-13b-chat, accounts/fireworks/models/fireworks-llama-v2-13b-w8a16, accounts/fireworks/models/fireworks-llama-v2-13b-chat-w8a16\n",
|
||||
"* accounts/fireworks/models/fireworks-llama-v2-7b, accounts/fireworks/models/fireworks-llama-v2-7b-chat, accounts/fireworks/models/fireworks-llama-v2-7b-w8a16, accounts/fireworks/models/fireworks-llama-v2-7b-chat-w8a16, accounts/fireworks/models/fireworks-llama-v2-70b-chat-4gpu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "bf0a425c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"It's a question that has been debated for years, with different analysts and fans making their cases for various signal-callers. Here are some of the top contenders for the title of best quarterback in the NFL:\n",
|
||||
"\n",
|
||||
"1. Tom Brady: The New England Patriots legend has won six Super Bowls and has been named Super Bowl MVP four times. He's known for his precision passing, pocket presence, and ability to read defenses.\n",
|
||||
"2. Aaron Rodgers: The Green Bay Packers quarterback has won two Super Bowls and has been named NFL MVP twice. He's known for his quick release, accuracy, and ability to extend plays with his feet.\n",
|
||||
"3. Drew Brees: The New Orleans Saints quarterback has won a Super Bowl and has been named NFL MVP once. He's known for his accuracy, pocket presence, and ability to read defenses.\n",
|
||||
"4. Patrick Mahomes: The Kansas City Chiefs quarterback has won a Super Bowl and has been named NFL MVP twice. He's known for his arm strength, athleticism, and ability to make plays outside of the pocket.\n",
|
||||
"5. Russell Wilson: The Seattle Seahawks quarterback has won a Super Bowl and has been named NFL MVP once. He's known for his mobility, accuracy, and ability to extend plays with his feet.\n",
|
||||
"\n",
|
||||
"Of course, there are other talented quarterbacks in the league, such as Lamar Jackson, Deshaun Watson, and Carson Wentz, who could also be considered among the best. Ultimately, the answer to the question of who's the best quarterback in the NFL is subjective and can vary depending on individual perspectives and criteria.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#single prompt\n",
|
||||
"output = llm(\"Who's the best quarterback in the NFL?\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "afc7de6f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generations=[[Generation(text=\"\\nWho is the best cricket player in the world in 2016?\\nThe best cricket player in the world in 2016 is Virat Kohli. The Indian captain has had a fabulous year, scoring heavily in all formats of the game, leading India to several victories, and breaking several records. In Test cricket, Kohli has scored 1215 runs at an average of 75.33 with 6 centuries and 4 fifties, which is the highest number of runs scored by any player in a calendar year. In ODI cricket, he has scored 1143 runs at an average of 83.42 with 7 centuries and 6 fifties, which is also the highest number of runs scored by any player in a calendar year. Additionally, Kohli has led India to the number one ranking in Test cricket, and has been named the ICC Test Player of the Year and the ICC ODI Player of the Year.\\nVirat Kohli has been in incredible form in 2016, and his performances have made him the standout player of the year. Other players who have had a great year include Steve Smith, Joe Root, and Kane Williamson, but Kohli's consistency and dominance in all formats of the game make him the best cricket player in the world in 2016.\", generation_info=None)], [Generation(text=\"\\n\\nA: LeBron James.\\n\\nB: Kevin Durant.\\n\\nC: Steph Curry.\\n\\nD: James Harden.\\n\\nE: Other (please specify).\\n\\nWhat's your answer?\", generation_info=None)]] llm_output={'token_usage': {}, 'model_id': 'fireworks-llama-v2-13b-chat'} run=[RunInfo(run_id=UUID('d14b6bee-7692-46ad-8798-acb6f72fc7fb')), RunInfo(run_id=UUID('b9f5b3b5-9e62-4eaf-b269-ecf0cbbcfb82'))]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#calling multiple prompts\n",
|
||||
"output = llm.generate([\"Who's the best cricket player in 2016?\", \"Who's the best basketball player in the league?\"])\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b801c20d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Kansas City in December can be quite chilly, with average high\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#setting parameters: model_id, temperature, max_tokens, top_p\n",
|
||||
"llm = Fireworks(model_id=\"accounts/fireworks/models/fireworks-llama-v2-13b-chat\", temperature=0.7, max_tokens=15, top_p=1.0)\n",
|
||||
"print(llm(\"What's the weather like in Kansas City in December?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "137662a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create and Run Chain\n",
|
||||
"\n",
|
||||
"Create a prompt template to be used with the LLM Chain. Once this prompt template is created, initialize the chain with the LLM and prompt template, and run the chain with the specified prompts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "fd2c6bc1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"(Note: I'm just an AI and not a branding expert, so take this as a starting point for your own research and brainstorming.)\n",
|
||||
"A good name for a company that makes football helmets could be:\n",
|
||||
"\n",
|
||||
"1. Helix Pro: This name plays off the idea of a helix, or spiral, shape that is commonly associated with football helmets. \"Pro\" implies a professional-level product.\n",
|
||||
"2. Gridiron Gear: \"Gridiron\" is a term used to describe a football field, and \"gear\" highlights the company's focus on producing high-quality football helmets.\n",
|
||||
"3. Linebacker Lab: \"Linebacker\" is a position on the football field, and \"Lab\" suggests a focus on research and development.\n",
|
||||
"4. Helmet Hut: This name is simple and easy to remember, and it immediately conveys the company's focus on football helmets.\n",
|
||||
"5. Tackle Tech: \"Tackle\" is a term used in football to describe a hit or collision, and \"Tech\" implies a focus on advanced technology and innovation.\n",
|
||||
"6. Victory Vest: \"Victory\" implies a focus on winning and success, and \"Vest\" could suggest a protective or armored design.\n",
|
||||
"7. Pigskin Pro: \"Pigskin\" is a term used to describe a football, and \"Pro\" implies a professional-level product.\n",
|
||||
"8. Football Fusion: This name could suggest a combination of different materials or technologies to create a high-quality football helmet.\n",
|
||||
"9. Endzone Edge: \"Endzone\" is the area of the football field where a team scores a touchdown, and \"Edge\" implies a competitive advantage.\n",
|
||||
"10. MVP Masks: \"MVP\" stands for \"Most Valuable Player,\" and \"Masks\" highlights the protective nature of the company's football helmets.\n",
|
||||
"\n",
|
||||
"Remember, the name you choose for your company should be memorable, easy to pronounce and spell, and convey a sense of quality and professionalism. It's also important to check that the name isn't already in use by another company, and to consider any potential trademark issues.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"human_message_prompt = HumanMessagePromptTemplate(\n",
|
||||
" prompt=PromptTemplate(\n",
|
||||
" template=\"What is a good name for a company that makes {product}?\",\n",
|
||||
" input_variables=[\"product\"],\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])\n",
|
||||
"chat = Fireworks()\n",
|
||||
"chain = LLMChain(llm=chat, prompt=chat_prompt_template)\n",
|
||||
"output = chain.run(\"football helmets\")\n",
|
||||
"\n",
|
||||
"print(output)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -28,9 +28,9 @@
|
||||
"\n",
|
||||
"To use the wrapper, you must [deploy a model on AzureML](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-foundation-models?view=azureml-api-2#deploying-foundation-models-to-endpoints-for-inferencing) and obtain the following parameters:\n",
|
||||
"\n",
|
||||
"* `endpoint_api_key`: The API key provided by the endpoint\n",
|
||||
"* `endpoint_url`: The REST endpoint url provided by the endpoint\n",
|
||||
"* `deployment_name`: The deployment name of the endpoint"
|
||||
"* `endpoint_api_key`: Required - The API key provided by the endpoint\n",
|
||||
"* `endpoint_url`: Required - The REST endpoint url provided by the endpoint\n",
|
||||
"* `deployment_name`: Not required - The deployment name of the model using the endpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,11 +39,14 @@
|
||||
"source": [
|
||||
"## Content Formatter\n",
|
||||
"\n",
|
||||
"The `content_formatter` parameter is a handler class for transforming the request and response of an AzureML endpoint to match with required schema. Since there are a wide range of models in the model catalog, each of which may process data differently from one another, a `ContentFormatterBase` class is provided to allow users to transform data to their liking. Additionally, there are three content formatters already provided:\n",
|
||||
"The `content_formatter` parameter is a handler class for transforming the request and response of an AzureML endpoint to match with required schema. Since there are a wide range of models in the model catalog, each of which may process data differently from one another, a `ContentFormatterBase` class is provided to allow users to transform data to their liking. The following content formatters are provided:\n",
|
||||
"\n",
|
||||
"* `OSSContentFormatter`: Formats request and response data for models from the Open Source category in the Model Catalog. Note, that not all models in the Open Source category may follow the same schema\n",
|
||||
"* `DollyContentFormatter`: Formats request and response data for the `dolly-v2-12b` model\n",
|
||||
"* `GPT2ContentFormatter`: Formats request and response data for GPT2\n",
|
||||
"* `DollyContentFormatter`: Formats request and response data for the Dolly-v2\n",
|
||||
"* `HFContentFormatter`: Formats request and response data for text-generation Hugging Face models\n",
|
||||
"* `LLamaContentFormatter`: Formats request and response data for LLaMa2\n",
|
||||
"\n",
|
||||
"*Note: `OSSContentFormatter` is being deprecated and replaced with `GPT2ContentFormatter`. The logic is the same but `GPT2ContentFormatter` is a more suitable name. You can still continue to use `OSSContentFormatter` as the changes are backwards compatibile.*\n",
|
||||
"\n",
|
||||
"Below is an example using a summarization model from Hugging Face."
|
||||
]
|
||||
@@ -100,7 +103,6 @@
|
||||
"llm = AzureMLOnlineEndpoint(\n",
|
||||
" endpoint_api_key=os.getenv(\"BART_ENDPOINT_API_KEY\"),\n",
|
||||
" endpoint_url=os.getenv(\"BART_ENDPOINT_URL\"),\n",
|
||||
" deployment_name=\"linydub-bart-large-samsum-3\",\n",
|
||||
" model_kwargs={\"temperature\": 0.8, \"max_new_tokens\": 400},\n",
|
||||
" content_formatter=content_formatter,\n",
|
||||
")\n",
|
||||
@@ -167,7 +169,6 @@
|
||||
"llm = AzureMLOnlineEndpoint(\n",
|
||||
" endpoint_api_key=os.getenv(\"DOLLY_ENDPOINT_API_KEY\"),\n",
|
||||
" endpoint_url=os.getenv(\"DOLLY_ENDPOINT_URL\"),\n",
|
||||
" deployment_name=\"databricks-dolly-v2-12b-4\",\n",
|
||||
" model_kwargs={\"temperature\": 0.8, \"max_tokens\": 300},\n",
|
||||
" content_formatter=content_formatter,\n",
|
||||
")\n",
|
||||
|
||||
375
docs/extras/integrations/llms/edenai.ipynb
Normal file
@@ -1,18 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud Platform Vertex AI PaLM \n",
|
||||
"\n",
|
||||
"Note: This is seperate from the Google PaLM integration. Google has chosen to offer an enterprise version of PaLM through GCP, and this supports the models made available through there. \n",
|
||||
"Note: This is seperate from the Google PaLM integration, it exposes [Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on Google Cloud. \n",
|
||||
"\n",
|
||||
"PaLM API on Vertex AI is a Preview offering, subject to the Pre-GA Offerings Terms of the [GCP Service Specific Terms](https://cloud.google.com/terms/service-terms). \n",
|
||||
"\n",
|
||||
"Pre-GA products and features may have limited support, and changes to pre-GA products and features may not be compatible with other pre-GA versions. For more information, see the [launch stage descriptions](https://cloud.google.com/products#product-launch-stages). Further, by using PaLM API on Vertex AI, you agree to the Generative AI Preview [terms and conditions](https://cloud.google.com/trustedtester/aitos) (Preview Terms).\n",
|
||||
"\n",
|
||||
"For PaLM API on Vertex AI, you can process personal data as outlined in the Cloud Data Processing Addendum, subject to applicable restrictions and obligations in the Agreement (as defined in the Preview Terms).\n",
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
"To use Vertex AI PaLM you must have the `google-cloud-aiplatform` Python package installed and either:\n",
|
||||
"- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
|
||||
@@ -101,6 +98,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
||||
@@ -4,12 +4,12 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Llama-cpp\n",
|
||||
"# Llama.cpp\n",
|
||||
"\n",
|
||||
"[llama-cpp](https://github.com/abetlen/llama-cpp-python) is a Python binding for [llama.cpp](https://github.com/ggerganov/llama.cpp). \n",
|
||||
"[llama-cpp-python](https://github.com/abetlen/llama-cpp-python) is a Python binding for [llama.cpp](https://github.com/ggerganov/llama.cpp). \n",
|
||||
"It supports [several LLMs](https://github.com/ggerganov/llama.cpp).\n",
|
||||
"\n",
|
||||
"This notebook goes over how to run `llama-cpp` within LangChain."
|
||||
"This notebook goes over how to run `llama-cpp-python` within LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,7 +18,7 @@
|
||||
"source": [
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"There is a bunch of options how to install the llama-cpp package: \n",
|
||||
"There are different options on how to install the llama-cpp package: \n",
|
||||
"- only CPU usage\n",
|
||||
"- CPU + GPU (using one of many BLAS backends)\n",
|
||||
"- Metal GPU (MacOS with Apple Silicon Chip) \n",
|
||||
@@ -61,7 +61,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**IMPORTANT**: If you have already installed a cpu only version of the package, you need to reinstall it from scratch: consider the following command: "
|
||||
"**IMPORTANT**: If you have already installed the CPU only version of the package, you need to reinstall it from scratch. Consider the following command: "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -79,7 +79,7 @@
|
||||
"source": [
|
||||
"### Installation with Metal\n",
|
||||
"\n",
|
||||
"`lama.cpp` supports Apple silicon first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks. Use the `FORCE_CMAKE=1` environment variable to force the use of cmake and install the pip package for the Metal support ([source](https://github.com/abetlen/llama-cpp-python/blob/main/docs/install/macos.md)).\n",
|
||||
"`llama.cpp` supports Apple silicon first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks. Use the `FORCE_CMAKE=1` environment variable to force the use of cmake and install the pip package for the Metal support ([source](https://github.com/abetlen/llama-cpp-python/blob/main/docs/install/macos.md)).\n",
|
||||
"\n",
|
||||
"Example installation with Metal Support:"
|
||||
]
|
||||
@@ -143,7 +143,7 @@
|
||||
"\n",
|
||||
"#### Compiling and installing\n",
|
||||
"\n",
|
||||
"In the same command prompt (anaconda prompt) you set the variables, you can cd into `llama-cpp-python` directory and run the following commands.\n",
|
||||
"In the same command prompt (anaconda prompt) you set the variables, you can `cd` into `llama-cpp-python` directory and run the following commands.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"python setup.py clean\n",
|
||||
@@ -164,7 +164,9 @@
|
||||
"source": [
|
||||
"Make sure you are following all instructions to [install all necessary model files](https://github.com/ggerganov/llama.cpp).\n",
|
||||
"\n",
|
||||
"You don't need an `API_TOKEN`!"
|
||||
"You don't need an `API_TOKEN` as you will run the LLM locally.\n",
|
||||
"\n",
|
||||
"It is worth understanding which models are suitable to be used on the desired machine."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -227,7 +229,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`Llama-v2`"
|
||||
"Example using a LLaMA 2 7B model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -304,7 +306,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`Llama-v1`"
|
||||
"Example using a LLaMA v1 model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -381,7 +383,7 @@
|
||||
"source": [
|
||||
"### GPU\n",
|
||||
"\n",
|
||||
"If the installation with BLAS backend was correct, you will see an `BLAS = 1` indicator in model properties.\n",
|
||||
"If the installation with BLAS backend was correct, you will see a `BLAS = 1` indicator in model properties.\n",
|
||||
"\n",
|
||||
"Two of the most important parameters for use with GPU are:\n",
|
||||
"\n",
|
||||
@@ -473,22 +475,15 @@
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metal\n",
|
||||
"\n",
|
||||
"If the installation with Metal was correct, you will see an `NEON = 1` indicator in model properties.\n",
|
||||
"If the installation with Metal was correct, you will see a `NEON = 1` indicator in model properties.\n",
|
||||
"\n",
|
||||
"Two of the most important parameters for use with GPU are:\n",
|
||||
"Two of the most important GPU parameters are:\n",
|
||||
"\n",
|
||||
"- `n_gpu_layers` - determines how many layers of the model are offloaded to your Metal GPU, in the most case, set it to `1` is enough for Metal\n",
|
||||
"- `n_batch` - how many tokens are processed in parallel, default is 8, set to bigger number.\n",
|
||||
@@ -522,7 +517,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The rest are almost same as GPU, the console log will show the following log to indicate the Metal was enable properly.\n",
|
||||
"The console log will show the following log to indicate Metal was enable properly.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
@@ -530,7 +525,9 @@
|
||||
"...\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You also could check the `Activity Monitor` by watching the % GPU of the process, the % CPU will drop dramatically after turn on `n_gpu_layers=1`. Also for the first time call LLM, the performance might be slow due to the model compilation in Metal GPU."
|
||||
"You also could check `Activity Monitor` by watching the GPU usage of the process, the CPU usage will drop dramatically after turn on `n_gpu_layers=1`. \n",
|
||||
"\n",
|
||||
"For the first call to the LLM, the performance may be slow due to the model compilation in Metal GPU."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
340
docs/extras/integrations/llms/ollama.ipynb
Normal file
@@ -0,0 +1,340 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Ollama\n",
|
||||
"\n",
|
||||
"[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as Llama 2, locally.\n",
|
||||
"\n",
|
||||
"Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. \n",
|
||||
"\n",
|
||||
"It optimizes setup and configuration details, including GPU usage.\n",
|
||||
"\n",
|
||||
"For a complete list of supported models and model variants, see the [Ollama model library](https://github.com/jmorganca/ollama#model-library).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"First, follow [these instructions](https://github.com/jmorganca/ollama) to set up and run a local Ollama instance:\n",
|
||||
"\n",
|
||||
"* [Download](https://ollama.ai/download)\n",
|
||||
"* Fetch a model, e.g., `Llama-7b`: `ollama pull llama2`\n",
|
||||
"* Run `ollama run llama2`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"You can see a full list of supported parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain.llms.ollama.Ollama.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import Ollama\n",
|
||||
"from langchain.callbacks.manager import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler \n",
|
||||
"llm = Ollama(base_url=\"http://localhost:11434\", \n",
|
||||
" model=\"llama2\", \n",
|
||||
" callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With `StreamingStdOutCallbackHandler`, you will see tokens streamed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Great! The history of Artificial Intelligence (AI) is a fascinating and complex topic that spans several decades. Here's a brief overview:\n",
|
||||
"\n",
|
||||
"1. Early Years (1950s-1960s): The term \"Artificial Intelligence\" was coined in 1956 by computer scientist John McCarthy. However, the concept of AI dates back to ancient Greece, where mythical creatures like Talos and Hephaestus were created to perform tasks without any human intervention. In the 1950s and 1960s, researchers began exploring ways to replicate human intelligence using computers, leading to the development of simple AI programs like ELIZA (1966) and PARRY (1972).\n",
|
||||
"2. Rule-Based Systems (1970s-1980s): As computing power increased, researchers developed rule-based systems, such as Mycin (1976), which could diagnose medical conditions based on a set of rules. This period also saw the rise of expert systems, like EDICT (1985), which mimicked human experts in specific domains.\n",
|
||||
"3. Machine Learning (1990s-2000s): With the advent of big data and machine learning algorithms, AI evolved to include neural networks, decision trees, and other techniques for training models on large datasets. This led to the development of applications like speech recognition (e.g., Siri, Alexa), image recognition (e.g., Google Image Search), and natural language processing (e.g., chatbots).\n",
|
||||
"4. Deep Learning (2010s-present): The rise of deep learning techniques, such as convolutional neural networks (CNNs) and recurrent neural networks (RNNs), has enabled AI to perform complex tasks like image and speech recognition, natural language processing, and even autonomous driving. Companies like Google, Facebook, and Baidu have invested heavily in deep learning research, leading to breakthroughs in areas like facial recognition, object detection, and machine translation.\n",
|
||||
"5. Current Trends (present-future): AI is currently being applied to various industries, including healthcare, finance, education, and entertainment. With the growth of cloud computing, edge AI, and autonomous systems, we can expect to see more sophisticated AI applications in the near future. However, there are also concerns about the ethical implications of AI, such as data privacy, algorithmic bias, and job displacement.\n",
|
||||
"\n",
|
||||
"Remember, AI has a long history, and its development is an ongoing process. As technology advances, we can expect to see even more innovative applications of AI in various fields."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nGreat! The history of Artificial Intelligence (AI) is a fascinating and complex topic that spans several decades. Here\\'s a brief overview:\\n\\n1. Early Years (1950s-1960s): The term \"Artificial Intelligence\" was coined in 1956 by computer scientist John McCarthy. However, the concept of AI dates back to ancient Greece, where mythical creatures like Talos and Hephaestus were created to perform tasks without any human intervention. In the 1950s and 1960s, researchers began exploring ways to replicate human intelligence using computers, leading to the development of simple AI programs like ELIZA (1966) and PARRY (1972).\\n2. Rule-Based Systems (1970s-1980s): As computing power increased, researchers developed rule-based systems, such as Mycin (1976), which could diagnose medical conditions based on a set of rules. This period also saw the rise of expert systems, like EDICT (1985), which mimicked human experts in specific domains.\\n3. Machine Learning (1990s-2000s): With the advent of big data and machine learning algorithms, AI evolved to include neural networks, decision trees, and other techniques for training models on large datasets. This led to the development of applications like speech recognition (e.g., Siri, Alexa), image recognition (e.g., Google Image Search), and natural language processing (e.g., chatbots).\\n4. Deep Learning (2010s-present): The rise of deep learning techniques, such as convolutional neural networks (CNNs) and recurrent neural networks (RNNs), has enabled AI to perform complex tasks like image and speech recognition, natural language processing, and even autonomous driving. Companies like Google, Facebook, and Baidu have invested heavily in deep learning research, leading to breakthroughs in areas like facial recognition, object detection, and machine translation.\\n5. Current Trends (present-future): AI is currently being applied to various industries, including healthcare, finance, education, and entertainment. With the growth of cloud computing, edge AI, and autonomous systems, we can expect to see more sophisticated AI applications in the near future. However, there are also concerns about the ethical implications of AI, such as data privacy, algorithmic bias, and job displacement.\\n\\nRemember, AI has a long history, and its development is an ongoing process. As technology advances, we can expect to see even more innovative applications of AI in various fields.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm(\"Tell me about the history of AI\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## RAG\n",
|
||||
"\n",
|
||||
"We can use Olama with RAG, [just as shown here](https://python.langchain.com/docs/use_cases/question_answering/how_to/local_retrieval_qa).\n",
|
||||
"\n",
|
||||
"Let's use the 13b model:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ollama pull llama2:13b\n",
|
||||
"ollama run llama2:13b \n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Let's also use local embeddings from `GPT4AllEmbeddings` and `Chroma`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install gpt4all chromadb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found model file at /Users/rlm/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.embeddings import GPT4AllEmbeddings\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"4"
|
||||
]
|
||||
},
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What are the approaches to Task Decomposition?\"\n",
|
||||
"docs = vectorstore.similarity_search(question)\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate\n",
|
||||
"\n",
|
||||
"# Prompt\n",
|
||||
"template = \"\"\"Use the following pieces of context to answer the question at the end. \n",
|
||||
"If you don't know the answer, just say that you don't know, don't try to make up an answer. \n",
|
||||
"Use three sentences maximum and keep the answer as concise as possible. \n",
|
||||
"{context}\n",
|
||||
"Question: {question}\n",
|
||||
"Helpful Answer:\"\"\"\n",
|
||||
"QA_CHAIN_PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"context\", \"question\"],\n",
|
||||
" template=template,\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LLM\n",
|
||||
"from langchain.llms import Ollama\n",
|
||||
"from langchain.callbacks.manager import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"llm = Ollama(base_url=\"http://localhost:11434\",\n",
|
||||
" model=\"llama2\",\n",
|
||||
" verbose=True,\n",
|
||||
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# QA chain\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(\n",
|
||||
" llm,\n",
|
||||
" retriever=vectorstore.as_retriever(),\n",
|
||||
" chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Task decomposition can be approached in different ways for AI agents, including:\n",
|
||||
"\n",
|
||||
"1. Using simple prompts like \"Steps for XYZ.\" or \"What are the subgoals for achieving XYZ?\" to guide the LLM.\n",
|
||||
"2. Providing task-specific instructions, such as \"Write a story outline\" for writing a novel.\n",
|
||||
"3. Utilizing human inputs to help the AI agent understand the task and break it down into smaller steps."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What are the various approaches to Task Decomposition for AI Agents?\"\n",
|
||||
"result = qa_chain({\"query\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also get logging for tokens."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Task decomposition can be approached in three ways: (1) using simple prompting like \"Steps for XYZ.\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions, or (3) with human inputs.{'model': 'llama2', 'created_at': '2023-08-08T04:01:09.005367Z', 'done': True, 'context': [1, 29871, 1, 13, 9314, 14816, 29903, 6778, 13, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29961, 25580, 29962, 4803, 278, 1494, 12785, 310, 3030, 304, 1234, 278, 1139, 472, 278, 1095, 29889, 29871, 13, 3644, 366, 1016, 29915, 29873, 1073, 278, 1234, 29892, 925, 1827, 393, 366, 1016, 29915, 29873, 1073, 29892, 1016, 29915, 29873, 1018, 304, 1207, 701, 385, 1234, 29889, 29871, 13, 11403, 2211, 25260, 7472, 322, 3013, 278, 1234, 408, 3022, 895, 408, 1950, 29889, 29871, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 1451, 16047, 267, 297, 1472, 29899, 8489, 18987, 322, 3414, 26227, 29901, 1858, 9450, 975, 263, 3309, 29891, 4955, 322, 17583, 3902, 8253, 278, 1650, 2913, 3933, 18066, 292, 29889, 365, 26369, 29879, 21117, 304, 10365, 13900, 746, 20050, 411, 15668, 4436, 29892, 3907, 963, 3109, 16424, 9401, 304, 25618, 1058, 5110, 515, 14260, 322, 1059, 29889, 13, 16492, 29901, 1724, 526, 278, 13501, 304, 9330, 897, 510, 3283, 29973, 13, 29648, 1319, 673, 29901, 518, 29914, 25580, 29962, 13, 5398, 26227, 508, 367, 26733, 297, 2211, 5837, 29901, 313, 29896, 29897, 773, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 2], 'total_duration': 1364428708, 'load_duration': 1246375, 'sample_count': 62, 'sample_duration': 44859000, 'prompt_eval_count': 1, 'eval_count': 62, 'eval_duration': 1313002000}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import LLMResult\n",
|
||||
"from langchain.callbacks.base import BaseCallbackHandler\n",
|
||||
"\n",
|
||||
"class GenerationStatisticsCallback(BaseCallbackHandler):\n",
|
||||
" def on_llm_end(self, response: LLMResult, **kwargs) -> None:\n",
|
||||
" print(response.generations[0][0].generation_info)\n",
|
||||
" \n",
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler(), GenerationStatisticsCallback()])\n",
|
||||
"\n",
|
||||
"llm = Ollama(base_url=\"http://localhost:11434\",\n",
|
||||
" model=\"llama2\",\n",
|
||||
" verbose=True,\n",
|
||||
" callback_manager=callback_manager)\n",
|
||||
"\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(\n",
|
||||
" llm,\n",
|
||||
" retriever=vectorstore.as_retriever(),\n",
|
||||
" chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"question = \"What are the approaches to Task Decomposition?\"\n",
|
||||
"result = qa_chain({\"query\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`eval_count` / (`eval_duration`/10e9) gets `tok / s`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"47.22003469910937"
|
||||
]
|
||||
},
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"62 / (1313002000/1000/1000/1000)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
106
docs/extras/integrations/llms/symblai_nebula.ipynb
Normal file
@@ -0,0 +1,106 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9597802c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Nebula\n",
|
||||
"\n",
|
||||
"[Nebula](https://symbl.ai/nebula/) is a fully-managed Conversation platform, on which you can build, deploy, and manage scalable AI applications.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with the [Nebula platform](https://docs.symbl.ai/docs/nebula-llm-overview). \n",
|
||||
"\n",
|
||||
"It will send the requests to Nebula Service endpoint, which concatenates `SYMBLAI_NEBULA_SERVICE_URL` and `SYMBLAI_NEBULA_SERVICE_PATH`, with a token defined in `SYMBLAI_NEBULA_SERVICE_TOKEN`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f15ebe0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integrate with a LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5472a7cd-af26-48ca-ae9b-5f6ae73c74d2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"NEBULA_SERVICE_URL\"] = NEBULA_SERVICE_URL\n",
|
||||
"os.environ[\"NEBULA_SERVICE_PATH\"] = NEBULA_SERVICE_PATH\n",
|
||||
"os.environ[\"NEBULA_SERVICE_API_KEY\"] = NEBULA_SERVICE_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6fb585dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenLLM\n",
|
||||
"\n",
|
||||
"llm = OpenLLM(\n",
|
||||
" conversation=\"<Drop your text conversation that you want to ask Nebula to analyze here>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"\n",
|
||||
"template = \"Identify the {count} main objectives or goals mentioned in this context concisely in less points. Emphasize on key intents.\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"count\"])\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"generated = llm_chain.run(count=\"five\")\n",
|
||||
"print(generated)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
196
docs/extras/integrations/llms/vllm.ipynb
Normal file
@@ -0,0 +1,196 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "499c3142-2033-437d-a60a-731988ac6074",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# vLLM\n",
|
||||
"\n",
|
||||
"[vLLM](https://vllm.readthedocs.io/en/latest/index.html) is a fast and easy-to-use library for LLM inference and serving, offering:\n",
|
||||
"* State-of-the-art serving throughput \n",
|
||||
"* Efficient management of attention key and value memory with PagedAttention\n",
|
||||
"* Continuous batching of incoming requests\n",
|
||||
"* Optimized CUDA kernels\n",
|
||||
"\n",
|
||||
"This notebooks goes over how to use a LLM with langchain and vLLM.\n",
|
||||
"\n",
|
||||
"To use, you should have the `vllm` python package installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8a3f2666-5c75-4797-967a-7915a247bf33",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install vllm -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "84e350f7-21f6-455b-b1f0-8b0116a2fd49",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO 08-06 11:37:33 llm_engine.py:70] Initializing an LLM engine with config: model='mosaicml/mpt-7b', tokenizer='mosaicml/mpt-7b', tokenizer_mode=auto, trust_remote_code=True, dtype=torch.bfloat16, use_dummy_weights=False, download_dir=None, use_np_weights=False, tensor_parallel_size=1, seed=0)\n",
|
||||
"INFO 08-06 11:37:41 llm_engine.py:196] # GPU blocks: 861, # CPU blocks: 512\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed prompts: 100%|██████████| 1/1 [00:00<00:00, 2.00it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"What is the capital of France ? The capital of France is Paris.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import VLLM\n",
|
||||
"\n",
|
||||
"llm = VLLM(model=\"mosaicml/mpt-7b\",\n",
|
||||
" trust_remote_code=True, # mandatory for hf models\n",
|
||||
" max_new_tokens=128,\n",
|
||||
" top_k=10,\n",
|
||||
" top_p=0.95,\n",
|
||||
" temperature=0.8,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(llm(\"What is the capital of France ?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "94a3b41d-8329-4f8f-94f9-453d7f132214",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Integrate the model in an LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5605b7a1-fa63-49c1-934d-8b4ef8d71dd5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed prompts: 100%|██████████| 1/1 [00:01<00:00, 1.34s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"1. The first Pokemon game was released in 1996.\n",
|
||||
"2. The president was Bill Clinton.\n",
|
||||
"3. Clinton was president from 1993 to 2001.\n",
|
||||
"4. The answer is Clinton.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"question = \"Who was the US president in the year the first Pokemon game was released?\"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "56826aba-d08b-4838-8bfa-ca96e463b25d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Distributed Inference\n",
|
||||
"\n",
|
||||
"vLLM supports distributed tensor-parallel inference and serving. \n",
|
||||
"\n",
|
||||
"To run multi-GPU inference with the LLM class, set the `tensor_parallel_size` argument to the number of GPUs you want to use. For example, to run inference on 4 GPUs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f8c25c35-47b5-459d-9985-3cf546e9ac16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import VLLM\n",
|
||||
"\n",
|
||||
"llm = VLLM(model=\"mosaicml/mpt-30b\",\n",
|
||||
" tensor_parallel_size=4,\n",
|
||||
" trust_remote_code=True, # mandatory for hf models\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm(\"What is the future of AI?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "conda_pytorch_p310",
|
||||
"language": "python",
|
||||
"name": "conda_pytorch_p310"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Rockset Chat Message History\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Rockset](https://rockset.com/docs) to store chat message history. \n",
|
||||
"\n",
|
||||
"To begin, with get your API key from the [Rockset console](https://console.rockset.com/apikeys). Find your API region for the Rockset [API reference](https://rockset.com/docs/rest-api#introduction)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.chat_message_histories import RocksetChatMessageHistory\n",
|
||||
"from rockset import RocksetClient, Regions\n",
|
||||
"\n",
|
||||
"history = RocksetChatMessageHistory(\n",
|
||||
" session_id=\"MySession\",\n",
|
||||
" client=RocksetClient(\n",
|
||||
" api_key=\"YOUR API KEY\", \n",
|
||||
" host=Regions.usw2a1 # us-west-2 Oregon\n",
|
||||
" ),\n",
|
||||
" collection=\"langchain_demo\",\n",
|
||||
" sync=True\n",
|
||||
")\n",
|
||||
"history.add_user_message(\"hi!\")\n",
|
||||
"history.add_ai_message(\"whats up?\")\n",
|
||||
"print(history.messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The output should be something like:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"[\n",
|
||||
" HumanMessage(content='hi!', additional_kwargs={'id': '2e62f1c2-e9f7-465e-b551-49bae07fe9f0'}, example=False), \n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={'id': 'b9be8eda-4c18-4cf8-81c3-e91e876927d0'}, example=False)\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"```"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91c6a7ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Streamlit Chat Message History\n",
|
||||
"\n",
|
||||
"This notebook goes over how to store and use chat message history in a Streamlit app. StreamlitChatMessageHistory will store messages in\n",
|
||||
"[Streamlit session state](https://docs.streamlit.io/library/api-reference/session-state)\n",
|
||||
"at the specified `key=`. The default key is `\"langchain_messages\"`.\n",
|
||||
"\n",
|
||||
"- Note, StreamlitChatMessageHistory only works when run in a Streamlit app.\n",
|
||||
"- You may also be interested in [StreamlitCallbackHandler](/docs/integrations/callbacks/streamlit) for LangChain.\n",
|
||||
"- For more on Streamlit check out their\n",
|
||||
"[getting started documentation](https://docs.streamlit.io/library/get-started).\n",
|
||||
"\n",
|
||||
"You can see the [full app example running here](https://langchain-st-memory.streamlit.app/), and more examples in\n",
|
||||
"[github.com/langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import StreamlitChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = StreamlitChatMessageHistory(key=\"chat_messages\")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"hi!\")\n",
|
||||
"history.add_ai_message(\"whats up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b60dc735",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can integrate StreamlitChatMessageHistory into ConversationBufferMemory and chains or agents as usual. The history will be persisted across re-runs of the Streamlit app within a given user session. A given StreamlitChatMessageHistory will NOT be persisted or shared across user sessions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "42ab5bf3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain.memory.chat_message_histories import StreamlitChatMessageHistory\n",
|
||||
"\n",
|
||||
"# Optionally, specify your own session_state key for storing messages\n",
|
||||
"msgs = StreamlitChatMessageHistory(key=\"special_app_key\")\n",
|
||||
"\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"history\", chat_memory=msgs)\n",
|
||||
"if len(msgs.messages) == 0:\n",
|
||||
" msgs.add_ai_message(\"How can I help you?\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a29252de",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"template = \"\"\"You are an AI chatbot having a conversation with a human.\n",
|
||||
"\n",
|
||||
"{history}\n",
|
||||
"Human: {human_input}\n",
|
||||
"AI: \"\"\"\n",
|
||||
"prompt = PromptTemplate(input_variables=[\"history\", \"human_input\"], template=template)\n",
|
||||
"\n",
|
||||
"# Add the memory to an LLMChain as usual\n",
|
||||
"llm_chain = LLMChain(llm=OpenAI(), prompt=prompt, memory=memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cd99b4b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Conversational Streamlit apps will often re-draw each previous chat message on every re-run. This is easy to do by iterating through `StreamlitChatMessageHistory.messages`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3bdb637b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import streamlit as st\n",
|
||||
"\n",
|
||||
"for msg in msgs.messages:\n",
|
||||
" st.chat_message(msg.type).write(msg.content)\n",
|
||||
"\n",
|
||||
"if prompt := st.chat_input():\n",
|
||||
" st.chat_message(\"human\").write(prompt)\n",
|
||||
"\n",
|
||||
" # As usual, new messages are added to StreamlitChatMessageHistory when the Chain is called.\n",
|
||||
" response = llm_chain.run(prompt)\n",
|
||||
" st.chat_message(\"ai\").write(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7adaf3d6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**[View the final app](https://langchain-st-memory.streamlit.app/).**"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv",
|
||||
"language": "python",
|
||||
"name": "poetry-venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -13,7 +13,7 @@ pip install boto3
|
||||
See a [usage example](/docs/integrations/llms/bedrock).
|
||||
|
||||
```python
|
||||
from langchain import Bedrock
|
||||
from langchain.llms.bedrock import Bedrock
|
||||
```
|
||||
|
||||
## Text Embedding Models
|
||||
|
||||
22
docs/extras/integrations/providers/fireworks.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Fireworks
|
||||
|
||||
This page covers how to use the Fireworks models within Langchain.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- To use the Fireworks model, you need to have a Fireworks API key. To generate one, sign up at platform.fireworks.ai
|
||||
- Authenticate by setting the FIREWORKS_API_KEY environment variable.
|
||||
|
||||
## LLM
|
||||
|
||||
Fireworks integrates with Langchain through the LLM module, which allows for standardized usage of any models deployed on the Fireworks models.
|
||||
|
||||
In this example, we'll work the llama-v2-13b.
|
||||
```python
|
||||
from langchain.llms.fireworks import Fireworks
|
||||
|
||||
llm = Fireworks(model="fireworks-llama-v2-13b-chat", max_tokens=256, temperature=0.4)
|
||||
llm("Name 3 sports.")
|
||||
```
|
||||
|
||||
For a more detailed walkthrough, see [here](/docs/integrations/llms/Fireworks).
|
||||
104
docs/extras/integrations/providers/log10.mdx
Normal file
@@ -0,0 +1,104 @@
|
||||
# Log10
|
||||
|
||||
This page covers how to use the [Log10](https://log10.io) within LangChain.
|
||||
|
||||
## What is Log10?
|
||||
|
||||
Log10 is an [open source](https://github.com/log10-io/log10) proxiless LLM data management and application development platform that lets you log, debug and tag your Langchain calls.
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Create your free account at [log10.io](https://log10.io)
|
||||
2. Add your `LOG10_TOKEN` and `LOG10_ORG_ID` from the Settings and Organization tabs respectively as environment variables.
|
||||
3. Also add `LOG10_URL=https://log10.io` and your usual LLM API key: for e.g. `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` to your environment
|
||||
|
||||
## How to enable Log10 data management for Langchain
|
||||
|
||||
Integration with log10 is a simple one-line `log10_callback` integration as shown below:
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.schema import HumanMessage
|
||||
|
||||
from log10.langchain import Log10Callback
|
||||
from log10.llm import Log10Config
|
||||
|
||||
log10_callback = Log10Callback(log10_config=Log10Config())
|
||||
|
||||
messages = [
|
||||
HumanMessage(content="You are a ping pong machine"),
|
||||
HumanMessage(content="Ping?"),
|
||||
]
|
||||
|
||||
llm = ChatOpenAI(model_name="gpt-3.5-turbo", callbacks=[log10_callback])
|
||||
```
|
||||
|
||||
[Log10 + Langchain + Logs docs](https://github.com/log10-io/log10/blob/main/logging.md#langchain-logger)
|
||||
|
||||
[More details + screenshots](https://log10.io/docs/logs) including instructions for self-hosting logs
|
||||
|
||||
## How to use tags with Log10
|
||||
|
||||
```python
|
||||
from langchain import OpenAI
|
||||
from langchain.chat_models import ChatAnthropic
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.schema import HumanMessage
|
||||
|
||||
from log10.langchain import Log10Callback
|
||||
from log10.llm import Log10Config
|
||||
|
||||
log10_callback = Log10Callback(log10_config=Log10Config())
|
||||
|
||||
messages = [
|
||||
HumanMessage(content="You are a ping pong machine"),
|
||||
HumanMessage(content="Ping?"),
|
||||
]
|
||||
|
||||
llm = ChatOpenAI(model_name="gpt-3.5-turbo", callbacks=[log10_callback], temperature=0.5, tags=["test"])
|
||||
completion = llm.predict_messages(messages, tags=["foobar"])
|
||||
print(completion)
|
||||
|
||||
llm = ChatAnthropic(model="claude-2", callbacks=[log10_callback], temperature=0.7, tags=["baz"])
|
||||
llm.predict_messages(messages)
|
||||
print(completion)
|
||||
|
||||
llm = OpenAI(model_name="text-davinci-003", callbacks=[log10_callback], temperature=0.5)
|
||||
completion = llm.predict("You are a ping pong machine.\nPing?\n")
|
||||
print(completion)
|
||||
```
|
||||
|
||||
You can also intermix direct OpenAI calls and Langchain LLM calls:
|
||||
|
||||
```python
|
||||
import os
|
||||
from log10.load import log10, log10_session
|
||||
import openai
|
||||
from langchain import OpenAI
|
||||
|
||||
log10(openai)
|
||||
|
||||
with log10_session(tags=["foo", "bar"]):
|
||||
# Log a direct OpenAI call
|
||||
response = openai.Completion.create(
|
||||
model="text-ada-001",
|
||||
prompt="Where is the Eiffel Tower?",
|
||||
temperature=0,
|
||||
max_tokens=1024,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
)
|
||||
print(response)
|
||||
|
||||
# Log a call via Langchain
|
||||
llm = OpenAI(model_name="text-ada-001", temperature=0.5)
|
||||
response = llm.predict("You are a ping pong machine.\nPing?\n")
|
||||
print(response)
|
||||
```
|
||||
|
||||
## How to debug Langchain calls
|
||||
|
||||
[Example of debugging](https://log10.io/docs/prompt_chain_debugging)
|
||||
|
||||
[More Langchain examples](https://github.com/log10-io/log10/tree/main/examples#langchain)
|
||||
@@ -1,6 +1,6 @@
|
||||
# MLflow AI Gateway
|
||||
|
||||
The MLflow AI Gateway service is a powerful tool designed to streamline the usage and management of various large language model (LLM) providers, such as OpenAI and Anthropic, within an organization. It offers a high-level interface that simplifies the interaction with these services by providing a unified endpoint to handle specific LLM related requests. See [the MLflow AI Gateway documentation](https://mlflow.org/docs/latest/gateway/index.html) for more details.
|
||||
>`The MLflow AI Gateway` service is a powerful tool designed to streamline the usage and management of various large language model (LLM) providers, such as OpenAI and Anthropic, within an organization. It offers a high-level interface that simplifies the interaction with these services by providing a unified endpoint to handle specific LLM related requests. See [the MLflow AI Gateway documentation](https://mlflow.org/docs/latest/gateway/index.html) for more details.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
|
||||
@@ -1,19 +1,49 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "5d184f91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MLflow\n",
|
||||
"\n",
|
||||
"This notebook goes over how to track your LangChain experiments into your MLflow Server"
|
||||
],
|
||||
"id": "5d184f91"
|
||||
">[MLflow](https://www.mlflow.org/docs/latest/what-is-mlflow.html) is a versatile, expandable, open-source platform for managing workflows and artifacts across the machine learning lifecycle. It has built-in integrations with many popular ML libraries, but can be used with any library, algorithm, or deployment tool. It is designed to be extensible, so you can write plugins to support new workflows, libraries, and tools.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to track your LangChain experiments into your `MLflow Server`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea73efae-7182-4a89-a492-c865b1fcf981",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## External examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97361a84-4e8f-45ba-b291-814cf73cd8f2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`MLflow` provides [several examples](https://github.com/mlflow/mlflow/tree/master/examples/langchain) for the `LangChain` integration:\n",
|
||||
"- [simple_chain](https://github.com/mlflow/mlflow/blob/master/examples/langchain/simple_chain.py)\n",
|
||||
"- [simple_agent](https://github.com/mlflow/mlflow/blob/master/examples/langchain/simple_agent.py)\n",
|
||||
"- [retriever_chain](https://github.com/mlflow/mlflow/blob/master/examples/langchain/retriever_chain.py)\n",
|
||||
"- [retrieval_qa_chain](https://github.com/mlflow/mlflow/blob/master/examples/langchain/retrieval_qa_chain.py)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e0cbd74b-1542-45a4-a72b-b2eedeffd2e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ca7bd72f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -24,12 +54,12 @@
|
||||
"!pip install openai\n",
|
||||
"!pip install google-search-results\n",
|
||||
"!python -m spacy download en_core_web_sm"
|
||||
],
|
||||
"id": "ca7bd72f"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bf8e1f5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -38,23 +68,23 @@
|
||||
"os.environ[\"MLFLOW_TRACKING_URI\"] = \"\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"\""
|
||||
],
|
||||
"id": "bf8e1f5c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fd49fd45",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import MlflowCallbackHandler\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
],
|
||||
"id": "fd49fd45"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "578cac8c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -70,12 +100,12 @@
|
||||
"llm = OpenAI(\n",
|
||||
" model_name=\"gpt-3.5-turbo\", temperature=0, callbacks=[mlflow_callback], verbose=True\n",
|
||||
")"
|
||||
],
|
||||
"id": "578cac8c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b20acae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -83,23 +113,23 @@
|
||||
"llm_result = llm.generate([\"Tell me a joke\"])\n",
|
||||
"\n",
|
||||
"mlflow_callback.flush_tracker(llm)"
|
||||
],
|
||||
"id": "9b20acae"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b872046",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.chains import LLMChain"
|
||||
],
|
||||
"id": "8b872046"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b2627ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -117,12 +147,12 @@
|
||||
"]\n",
|
||||
"synopsis_chain.apply(test_prompts)\n",
|
||||
"mlflow_callback.flush_tracker(synopsis_chain)"
|
||||
],
|
||||
"id": "1b2627ef"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e002823a",
|
||||
"metadata": {
|
||||
"id": "_jN73xcPVEpI"
|
||||
},
|
||||
@@ -130,12 +160,12 @@
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, load_tools\n",
|
||||
"from langchain.agents import AgentType"
|
||||
],
|
||||
"id": "e002823a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "655bd47e",
|
||||
"metadata": {
|
||||
"id": "Gpq4rk6VT9cu"
|
||||
},
|
||||
@@ -154,8 +184,7 @@
|
||||
" \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\"\n",
|
||||
")\n",
|
||||
"mlflow_callback.flush_tracker(agent, finish=True)"
|
||||
],
|
||||
"id": "655bd47e"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -177,9 +206,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
||||
30
docs/extras/integrations/providers/pubmed.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# PubMed
|
||||
|
||||
# PubMed
|
||||
|
||||
>[PubMed®](https://pubmed.ncbi.nlm.nih.gov/) by `The National Center for Biotechnology Information, National Library of Medicine`
|
||||
> comprises more than 35 million citations for biomedical literature from `MEDLINE`, life science journals, and online books.
|
||||
> Citations may include links to full text content from `PubMed Central` and publisher web sites.
|
||||
|
||||
## Setup
|
||||
You need to install a python package.
|
||||
|
||||
```bash
|
||||
pip install xmltodict
|
||||
```
|
||||
|
||||
### Retriever
|
||||
|
||||
See a [usage example](/docs/integrations/retrievers/pubmed).
|
||||
|
||||
```python
|
||||
from langchain.retrievers import PubMedRetriever
|
||||
```
|
||||
|
||||
### Document Loader
|
||||
|
||||
See a [usage example](/docs/integrations/document_loaders/pubmed).
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import PubMedLoader
|
||||
```
|
||||
@@ -23,4 +23,11 @@ from langchain.vectorstores import Rockset
|
||||
See a [usage example](/docs/integrations/document_loaders/rockset).
|
||||
```python
|
||||
from langchain.document_loaders import RocksetLoader
|
||||
```
|
||||
|
||||
## Chat Message History
|
||||
|
||||
See a [usage example](/docs/integrations/memory/rockset_chat_message_history).
|
||||
```python
|
||||
from langchain.memory.chat_message_histories import RocksetChatMessageHistory
|
||||
```
|
||||
916
docs/extras/integrations/providers/sagemaker_tracking.ipynb
Normal file
@@ -0,0 +1,916 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef3909cf-72ca-4841-85c6-ef4e0eae3aaf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SageMaker Tracking\n",
|
||||
"\n",
|
||||
"This notebook shows how LangChain Callback can be used to log and track prompts and other LLM hyperparameters into SageMaker Experiments. Here, we use different scenarios to showcase the capability:\n",
|
||||
"* **Scenario 1**: *Single LLM* - A case where a single LLM model is used to generate output based on a given prompt.\n",
|
||||
"* **Scenario 2**: *Sequential Chain* - A case where a sequential chain of two LLM models is used.\n",
|
||||
"* **Scenario 3**: *Agent with Tools (Chain of Thought)* - A case where multiple tools (search and math) are used in addition to an LLM.\n",
|
||||
"\n",
|
||||
"[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a fully managed service that is used to quickly and easily build, train and deploy machine learning (ML) models. \n",
|
||||
"\n",
|
||||
"[Amazon SageMaker Experiments](https://docs.aws.amazon.com/sagemaker/latest/dg/experiments.html) is a capability of Amazon SageMaker that lets you organize, track, compare and evaluate ML experiments and model versions.\n",
|
||||
"\n",
|
||||
"In this notebook, we will create a single experiment to log the prompts from each scenario."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "94c22cb4-3b1c-432b-b3be-0235eec79c5c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2353436d-17fe-4f58-a2f9-c299d56393fd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install sagemaker\n",
|
||||
"!pip install openai\n",
|
||||
"!pip install google-search-results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "65dcf62e-7a38-4119-adb9-d9e884e82499",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"First, setup the required API keys\n",
|
||||
"* OpenAI: https://platform.openai.com/account/api-keys (For OpenAI LLM model)\n",
|
||||
"* Google SERP API: https://serpapi.com/manage-api-key (For Google Search Tool)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ec2b898-0cfc-4308-8e86-569cd7b7cf41",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"## Add your API keys below\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"<ADD-KEY-HERE>\"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"<ADD-KEY-HERE>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "80968ebf-519f-46de-8703-97532ac39e3e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.chains import LLMChain, SimpleSequentialChain\n",
|
||||
"from langchain.agents import initialize_agent, load_tools\n",
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.callbacks import SageMakerCallbackHandler\n",
|
||||
"\n",
|
||||
"from sagemaker.analytics import ExperimentAnalytics\n",
|
||||
"from sagemaker.session import Session\n",
|
||||
"from sagemaker.experiments.run import Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b67d031f-a01f-4009-ad29-c80ab8ad50ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## LLM Prompt Tracking"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da2d70ee-173b-469d-a718-54c33d862844",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#LLM Hyperparameters\n",
|
||||
"HPARAMS = {\n",
|
||||
" \"temperature\": 0.1,\n",
|
||||
" \"model_name\": \"text-davinci-003\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#Bucket used to save prompt logs (Use `None` is used to save the default bucket or otherwise change it)\n",
|
||||
"BUCKET_NAME = None\n",
|
||||
"\n",
|
||||
"#Experiment name\n",
|
||||
"EXPERIMENT_NAME = \"langchain-sagemaker-tracker\"\n",
|
||||
"\n",
|
||||
"#Create SageMaker Session with the given bucket\n",
|
||||
"session = Session(default_bucket=BUCKET_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7239a39a-08d8-43cb-8922-81abdd5d9ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 1 - LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abc00335-50c8-4119-adb8-4c4ab8522e23",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RUN_NAME = \"run-scenario-1\"\n",
|
||||
"PROMPT_TEMPLATE = \"tell me a joke about {topic}\"\n",
|
||||
"INPUT_VARIABLES = {\"topic\": \"fish\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a3a3cbe-db85-4255-8d8b-eaafdca8c6e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with Run(experiment_name=EXPERIMENT_NAME, run_name=RUN_NAME, sagemaker_session=session) as run:\n",
|
||||
"\n",
|
||||
" # Create SageMaker Callback\n",
|
||||
" sagemaker_callback = SageMakerCallbackHandler(run)\n",
|
||||
"\n",
|
||||
" # Define LLM model with callback\n",
|
||||
" llm = OpenAI(callbacks=[sagemaker_callback], **HPARAMS)\n",
|
||||
"\n",
|
||||
" # Create prompt template\n",
|
||||
" prompt = PromptTemplate.from_template(template=PROMPT_TEMPLATE)\n",
|
||||
"\n",
|
||||
" # Create LLM Chain\n",
|
||||
" chain = LLMChain(llm=llm, prompt=prompt, callbacks=[sagemaker_callback])\n",
|
||||
"\n",
|
||||
" # Run chain\n",
|
||||
" chain.run(**INPUT_VARIABLES)\n",
|
||||
"\n",
|
||||
" # Reset the callback\n",
|
||||
" sagemaker_callback.flush_tracker()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7dc69934-9f42-40b7-9931-36a3371a38da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 2 - Sequential Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50b75ef9-9825-4ccc-8414-4cd7525a1b68",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RUN_NAME = \"run-scenario-2\"\n",
|
||||
"\n",
|
||||
"PROMPT_TEMPLATE_1 = \"\"\"You are a playwright. Given the title of play, it is your job to write a synopsis for that title.\n",
|
||||
"Title: {title}\n",
|
||||
"Playwright: This is a synopsis for the above play:\"\"\"\n",
|
||||
"PROMPT_TEMPLATE_2 = \"\"\"You are a play critic from the New York Times. Given the synopsis of play, it is your job to write a review for that play.\n",
|
||||
"Play Synopsis: {synopsis}\n",
|
||||
"Review from a New York Times play critic of the above play:\"\"\"\n",
|
||||
"\n",
|
||||
"INPUT_VARIABLES = {\n",
|
||||
" \"input\": \"documentary about good video games that push the boundary of game design\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb7fff5f-e89f-40e2-96b4-3641a0b6e9b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with Run(experiment_name=EXPERIMENT_NAME, run_name=RUN_NAME, sagemaker_session=session) as run:\n",
|
||||
"\n",
|
||||
" # Create SageMaker Callback\n",
|
||||
" sagemaker_callback = SageMakerCallbackHandler(run)\n",
|
||||
"\n",
|
||||
" # Create prompt templates for the chain\n",
|
||||
" prompt_template1 = PromptTemplate.from_template(template=PROMPT_TEMPLATE_1)\n",
|
||||
" prompt_template2 = PromptTemplate.from_template(template=PROMPT_TEMPLATE_2)\n",
|
||||
"\n",
|
||||
" # Define LLM model with callback\n",
|
||||
" llm = OpenAI(callbacks=[sagemaker_callback], **HPARAMS)\n",
|
||||
"\n",
|
||||
" # Create chain1\n",
|
||||
" chain1 = LLMChain(llm=llm, prompt=prompt_template1, callbacks=[sagemaker_callback])\n",
|
||||
"\n",
|
||||
" # Create chain2\n",
|
||||
" chain2 = LLMChain(llm=llm, prompt=prompt_template2, callbacks=[sagemaker_callback])\n",
|
||||
"\n",
|
||||
" # Create Sequential chain\n",
|
||||
" overall_chain = SimpleSequentialChain(chains=[chain1, chain2], callbacks=[sagemaker_callback])\n",
|
||||
"\n",
|
||||
" # Run overall sequential chain\n",
|
||||
" overall_chain.run(**INPUT_VARIABLES)\n",
|
||||
"\n",
|
||||
" # Reset the callback\n",
|
||||
" sagemaker_callback.flush_tracker()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b82bd0e-c626-4797-bb06-c1983f176315",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 3 - Agent with Tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5066f03-49dc-4868-be8e-d21ce22063fe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RUN_NAME = \"run-scenario-3\"\n",
|
||||
"PROMPT_TEMPLATE = \"Who is the oldest person alive? And what is their current age raised to the power of 1.51?\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98385c42-9e44-4b03-b76d-007cb4797864",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with Run(experiment_name=EXPERIMENT_NAME, run_name=RUN_NAME, sagemaker_session=session) as run:\n",
|
||||
"\n",
|
||||
" # Create SageMaker Callback\n",
|
||||
" sagemaker_callback = SageMakerCallbackHandler(run)\n",
|
||||
"\n",
|
||||
" # Define LLM model with callback\n",
|
||||
" llm = OpenAI(callbacks=[sagemaker_callback], **HPARAMS)\n",
|
||||
"\n",
|
||||
" # Define tools\n",
|
||||
" tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm, callbacks=[sagemaker_callback])\n",
|
||||
"\n",
|
||||
" # Initialize agent with all the tools\n",
|
||||
" agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", callbacks=[sagemaker_callback])\n",
|
||||
"\n",
|
||||
" # Run agent\n",
|
||||
" agent.run(input=PROMPT_TEMPLATE)\n",
|
||||
"\n",
|
||||
" # Reset the callback\n",
|
||||
" sagemaker_callback.flush_tracker()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c306a1c9-99f8-476d-96db-347746f5cfe0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Load Log Data\n",
|
||||
"\n",
|
||||
"Once the prompts are logged, we can easily load and convert them to Pandas DataFrame as follows."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec7b4af2-e01d-4f6c-9de5-70d2b4acb9e6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Load\n",
|
||||
"logs = ExperimentAnalytics(experiment_name=EXPERIMENT_NAME)\n",
|
||||
"\n",
|
||||
"#Convert as pandas dataframe\n",
|
||||
"df = logs.dataframe(force_refresh=True)\n",
|
||||
"\n",
|
||||
"print(df.shape)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29991c75-f9cf-4c36-abfd-903c09fb170d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As can be seen above, there are three runs (rows) in the experiment corresponding to each scenario. Each run logs the prompts and related LLM settings/hyperparameters as json and are saved in s3 bucket. Feel free to load and explore the log data from each json path."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "61a695d6-0aef-4284-9e12-eea8bc143dbd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"availableInstances": [
|
||||
{
|
||||
"_defaultOrder": 0,
|
||||
"_isFastLaunch": true,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 4,
|
||||
"name": "ml.t3.medium",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 1,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.t3.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 2,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.t3.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 3,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.t3.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 4,
|
||||
"_isFastLaunch": true,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.m5.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 5,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.m5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 6,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.m5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 7,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.m5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 8,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.m5.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 9,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.m5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 10,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.m5.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 11,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.m5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 12,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.m5d.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 13,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.m5d.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 14,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.m5d.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 15,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.m5d.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 16,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.m5d.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 17,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.m5d.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 18,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.m5d.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 19,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.m5d.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 20,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": true,
|
||||
"memoryGiB": 0,
|
||||
"name": "ml.geospatial.interactive",
|
||||
"supportedImageNames": [
|
||||
"sagemaker-geospatial-v1-0"
|
||||
],
|
||||
"vcpuNum": 0
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 21,
|
||||
"_isFastLaunch": true,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 4,
|
||||
"name": "ml.c5.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 22,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.c5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 23,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.c5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 24,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.c5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 25,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 72,
|
||||
"name": "ml.c5.9xlarge",
|
||||
"vcpuNum": 36
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 26,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 96,
|
||||
"name": "ml.c5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 27,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 144,
|
||||
"name": "ml.c5.18xlarge",
|
||||
"vcpuNum": 72
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 28,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.c5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 29,
|
||||
"_isFastLaunch": true,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.g4dn.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 30,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.g4dn.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 31,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.g4dn.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 32,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.g4dn.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 33,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.g4dn.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 34,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.g4dn.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 35,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 61,
|
||||
"name": "ml.p3.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 36,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 244,
|
||||
"name": "ml.p3.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 37,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 488,
|
||||
"name": "ml.p3.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 38,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 768,
|
||||
"name": "ml.p3dn.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 39,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.r5.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 40,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.r5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 41,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.r5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 42,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.r5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 43,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.r5.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 44,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.r5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 45,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 512,
|
||||
"name": "ml.r5.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 46,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 768,
|
||||
"name": "ml.r5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 47,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.g5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 48,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.g5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 49,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.g5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 50,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.g5.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 51,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.g5.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 52,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.g5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 53,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.g5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 54,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 768,
|
||||
"name": "ml.g5.48xlarge",
|
||||
"vcpuNum": 192
|
||||
}
|
||||
],
|
||||
"instance_type": "ml.t3.large",
|
||||
"kernelspec": {
|
||||
"display_name": "conda_pytorch_p310",
|
||||
"language": "python",
|
||||
"name": "conda_pytorch_p310"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
20
docs/extras/integrations/providers/symblai_nebula.mdx
Normal file
@@ -0,0 +1,20 @@
|
||||
# Nebula
|
||||
|
||||
This page covers how to use [Nebula](https://symbl.ai/nebula), [Symbl.ai](https://symbl.ai/)'s LLM, ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Nebula wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get an Nebula API Key and set as environment variables (`SYMBLAI_NEBULA_SERVICE_URL`, `SYMBLAI_NEBULA_SERVICE_PATH`, `SYMBLAI_NEBULA_SERVICE_TOKEN`)
|
||||
- Sign up for a FREE Symbl.ai/Nebula Account: [https://nebula.symbl.ai/playground/](https://nebula.symbl.ai/playground/)
|
||||
- Please see the [Nebula documentation](https://docs.symbl.ai/docs/nebula-llm-overview) for more details.
|
||||
- No time? Visit the [Nebula Quickstart Guide](https://docs.symbl.ai/docs/nebula-quickstart).
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
|
||||
There exists an Nebula LLM wrapper, which you can access with
|
||||
```python
|
||||
from langchain.llms import Nebula
|
||||
```
|
||||
31
docs/extras/integrations/providers/tensorflow_datasets.mdx
Normal file
@@ -0,0 +1,31 @@
|
||||
# TensorFlow Datasets
|
||||
|
||||
>[TensorFlow Datasets](https://www.tensorflow.org/datasets) is a collection of datasets ready to use,
|
||||
> with TensorFlow or other Python ML frameworks, such as Jax. All datasets are exposed
|
||||
> as [tf.data.Datasets](https://www.tensorflow.org/api_docs/python/tf/data/Dataset),
|
||||
> enabling easy-to-use and high-performance input pipelines. To get started see
|
||||
> the [guide](https://www.tensorflow.org/datasets/overview) and
|
||||
> the [list of datasets](https://www.tensorflow.org/datasets/catalog/overview#all_datasets).
|
||||
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
You need to install `tensorflow` and `tensorflow-datasets` python packages.
|
||||
|
||||
```bash
|
||||
pip install tensorflow
|
||||
```
|
||||
|
||||
```bash
|
||||
pip install tensorflow-dataset
|
||||
```
|
||||
|
||||
|
||||
## Document Loader
|
||||
|
||||
See a [usage example](/docs/integrations/document_loaders/tensorflow_datasets).
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import TensorflowDatasetLoader
|
||||
```
|
||||
@@ -11,7 +11,9 @@ ecosystem within LangChain.
|
||||
If you are using a loader that runs locally, use the following steps to get `unstructured` and
|
||||
its dependencies running locally.
|
||||
|
||||
- Install the Python SDK with `pip install "unstructured[local-inference]"`
|
||||
- Install the Python SDK with `pip install unstructured`.
|
||||
- You can install document specific dependencies with extras, i.e. `pip install "unstructured[docx]"`.
|
||||
- To install the dependencies for all document types, use `pip install "unstructured[all-docs]"`.
|
||||
- Install the following system dependencies if they are not already available on your system.
|
||||
Depending on what document types you're parsing, you may not need all of these.
|
||||
- `libmagic-dev` (filetype detection)
|
||||
|
||||
@@ -177,7 +177,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -7,14 +7,15 @@
|
||||
"source": [
|
||||
"# PubMed\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use `PubMed` as a retriever\n",
|
||||
"\n",
|
||||
"`PubMed®` comprises more than 35 million citations for biomedical literature from `MEDLINE`, life science journals, and online books. Citations may include links to full text content from `PubMed Central` and publisher web sites."
|
||||
">[PubMed®](https://pubmed.ncbi.nlm.nih.gov/) by `The National Center for Biotechnology Information, National Library of Medicine` comprises more than 35 million citations for biomedical literature from `MEDLINE`, life science journals, and online books. Citations may include links to full text content from `PubMed Central` and publisher web sites.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use `PubMed` as a retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 12,
|
||||
"id": "aecaff63",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -24,7 +25,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 34,
|
||||
"id": "f2f7e8d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -34,19 +35,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 35,
|
||||
"id": "ed115aa1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='', metadata={'uid': '37268021', 'title': 'Dermatology in the wake of an AI revolution: who gets a say?', 'pub_date': '<Year>2023</Year><Month>May</Month><Day>31</Day>'}),\n",
|
||||
" Document(page_content='', metadata={'uid': '37267643', 'title': 'What is ChatGPT and what do we do with it? Implications of the age of AI for nursing and midwifery practice and education: An editorial.', 'pub_date': '<Year>2023</Year><Month>May</Month><Day>30</Day>'}),\n",
|
||||
" Document(page_content='The nursing field has undergone notable changes over time and is projected to undergo further modifications in the future, owing to the advent of sophisticated technologies and growing healthcare needs. The advent of ChatGPT, an AI-powered language model, is expected to exert a significant influence on the nursing profession, specifically in the domains of patient care and instruction. The present article delves into the ramifications of ChatGPT within the nursing domain and accentuates its capacity and constraints to transform the discipline.', metadata={'uid': '37266721', 'title': 'The Impact of ChatGPT on the Nursing Profession: Revolutionizing Patient Care and Education.', 'pub_date': '<Year>2023</Year><Month>Jun</Month><Day>02</Day>'})]"
|
||||
"[Document(page_content='', metadata={'uid': '37549050', 'Title': 'ChatGPT: \"To Be or Not to Be\" in Bikini Bottom.', 'Published': '--', 'Copyright Information': ''}),\n",
|
||||
" Document(page_content=\"BACKGROUND: ChatGPT is a large language model that has performed well on professional examinations in the fields of medicine, law, and business. However, it is unclear how ChatGPT would perform on an examination assessing professionalism and situational judgement for doctors.\\nOBJECTIVE: We evaluated the performance of ChatGPT on the Situational Judgement Test (SJT): a national examination taken by all final-year medical students in the United Kingdom. This examination is designed to assess attributes such as communication, teamwork, patient safety, prioritization skills, professionalism, and ethics.\\nMETHODS: All questions from the UK Foundation Programme Office's (UKFPO's) 2023 SJT practice examination were inputted into ChatGPT. For each question, ChatGPT's answers and rationales were recorded and assessed on the basis of the official UK Foundation Programme Office scoring template. Questions were categorized into domains of Good Medical Practice on the basis of the domains referenced in the rationales provided in the scoring sheet. Questions without clear domain links were screened by reviewers and assigned one or multiple domains. ChatGPT's overall performance, as well as its performance across the domains of Good Medical Practice, was evaluated.\\nRESULTS: Overall, ChatGPT performed well, scoring 76% on the SJT but scoring full marks on only a few questions (9%), which may reflect possible flaws in ChatGPT's situational judgement or inconsistencies in the reasoning across questions (or both) in the examination itself. ChatGPT demonstrated consistent performance across the 4 outlined domains in Good Medical Practice for doctors.\\nCONCLUSIONS: Further research is needed to understand the potential applications of large language models, such as ChatGPT, in medical education for standardizing questions and providing consistent rationales for examinations assessing professionalism and ethics.\", metadata={'uid': '37548997', 'Title': 'Performance of ChatGPT on the Situational Judgement Test-A Professional Dilemmas-Based Examination for Doctors in the United Kingdom.', 'Published': '2023-08-07', 'Copyright Information': '©Robin J Borchert, Charlotte R Hickman, Jack Pepys, Timothy J Sadler. Originally published in JMIR Medical Education (https://mededu.jmir.org), 07.08.2023.'}),\n",
|
||||
" Document(page_content='', metadata={'uid': '37548971', 'Title': \"Large Language Models Answer Medical Questions Accurately, but Can't Match Clinicians' Knowledge.\", 'Published': '2023-08-07', 'Copyright Information': ''})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -54,6 +55,14 @@
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"chatgpt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a9ff7a25-bb4b-4cd5-896d-72f70f4af49b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -72,7 +81,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
222
docs/extras/integrations/retrievers/re_phrase.ipynb
Normal file
@@ -0,0 +1,222 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e8624be2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# RePhraseQueryRetriever\n",
|
||||
"\n",
|
||||
"Simple retriever that applies an LLM between the user input and the query pass the to retriever.\n",
|
||||
"\n",
|
||||
"It can be used to pre-process the user input in any way.\n",
|
||||
"\n",
|
||||
"The default prompt used in the `from_llm` classmethod:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"DEFAULT_TEMPLATE = \"\"\"You are an assistant tasked with taking a natural language \\\n",
|
||||
"query from a user and converting it into a query for a vectorstore. \\\n",
|
||||
"In this process, you strip out information that is not relevant for \\\n",
|
||||
"the retrieval task. Here is the user query: {question}\"\"\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Create a vectorstore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1bfa6834",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
"\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d0b51556",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"logging.basicConfig()\n",
|
||||
"logging.getLogger(\"langchain.retrievers.re_phraser\").setLevel(logging.INFO)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "20e1e787",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.retrievers import RePhraseQueryRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88c0a972",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using the default prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "503994bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"retriever_from_llm = RePhraseQueryRetriever.from_llm(\n",
|
||||
" retriever=vectorstore.as_retriever(), llm=llm\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8d17ecc9",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:langchain.retrievers.re_phraser:Re-phrased question: The user query can be converted into a query for a vectorstore as follows:\n",
|
||||
"\n",
|
||||
"\"approaches to Task Decomposition\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = retriever_from_llm.get_relevant_documents(\n",
|
||||
" \"Hi I'm Lance. What are the approaches to Task Decomposition?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "76d54f1a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: \"Types of Memory\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = retriever_from_llm.get_relevant_documents(\n",
|
||||
" \"I live in San Francisco. What are the Types of Memory?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0513a6e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Supply a prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "410d6a64",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"QUERY_PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"question\"],\n",
|
||||
" template=\"\"\"You are an assistant tasked with taking a natural languge query from a user\n",
|
||||
" and converting it into a query for a vectorstore. In the process, strip out all \n",
|
||||
" information that is not relevant for the retrieval task and return a new, simplified\n",
|
||||
" question for vectorstore retrieval. The new user query should be in pirate speech.\n",
|
||||
" Here is the user query: {question} \"\"\",\n",
|
||||
")\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2dbffdd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever_from_llm_chain = RePhraseQueryRetriever(\n",
|
||||
" retriever=vectorstore.as_retriever(), llm_chain=llm_chain\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "103b4be3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:langchain.retrievers.re_phraser:Re-phrased question: Ahoy matey! What be Maximum Inner Product Search, ye scurvy dog?\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = retriever_from_llm_chain.get_relevant_documents(\n",
|
||||
" \"Hi I'm Lance. What is Maximum Inner Product Search?\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -16,7 +16,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "a801b57c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -26,7 +26,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "393ac030",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -46,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "98b1c017",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -133,6 +133,68 @@
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "363f3c04",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Save and load\n",
|
||||
"\n",
|
||||
"You can easily save and load this retriever, making it handy for local development!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "10c90d03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever.save_local(\"testing.pkl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "fb3b153c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever_copy = TFIDFRetriever.load_local(\"testing.pkl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "c03ff3c7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='foo', metadata={}),\n",
|
||||
" Document(page_content='foo bar', metadata={}),\n",
|
||||
" Document(page_content='hello', metadata={}),\n",
|
||||
" Document(page_content='world', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever_copy.get_relevant_documents(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d7c5728",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -151,7 +213,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "8a920a89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -30,7 +30,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "f2d04da3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -41,17 +41,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "e6ecde96",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = AlephAlphaAsymmetricSemanticEmbedding()"
|
||||
"embeddings = AlephAlphaAsymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "90e68411",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -61,7 +61,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "55903233",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -79,7 +79,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "eabb763a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -89,7 +89,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"id": "0ad799f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -99,17 +99,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"id": "af86dc10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = AlephAlphaSymmetricSemanticEmbedding()"
|
||||
"embeddings = AlephAlphaSymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "d292536f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -119,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "c704a7cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -130,7 +130,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "33492471",
|
||||
"id": "5d999f8f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -152,7 +152,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.13"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "719619d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BGE Hugging Face Embeddings\n",
|
||||
"\n",
|
||||
"This notebook shows how to use BGE Embeddings through Hugging Face"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "f7a54279",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install sentence_transformers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9e1d5b6b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
|
||||
"\n",
|
||||
"model_name = \"BAAI/bge-small-en\"\n",
|
||||
"model_kwargs = {'device': 'cpu'}\n",
|
||||
"encode_kwargs = {'normalize_embeddings': False}\n",
|
||||
"hf = HuggingFaceBgeEmbeddings(\n",
|
||||
" model_name=model_name,\n",
|
||||
" model_kwargs=model_kwargs,\n",
|
||||
" encode_kwargs=encode_kwargs\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e59d1a89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embedding = hf.embed_query(\"hi this is harrison\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e596315f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
169
docs/extras/integrations/text_embedding/edenai.ipynb
Normal file
@@ -0,0 +1,169 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# EDEN AI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Eden AI is an AI consulting company that was founded to use its resources to empower people and create impactful products that use AI to improve the quality of life for individuals, businesses and societies at large."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This example goes over how to use LangChain to interact with Eden AI embedding models\n",
|
||||
"\n",
|
||||
"-----------------------------------------------------------------------------------\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Accessing the EDENAI's API requires an API key, \n",
|
||||
"\n",
|
||||
"which you can get by creating an account https://app.edenai.run/user/register and heading here https://app.edenai.run/admin/account/settings\n",
|
||||
"\n",
|
||||
"Once we have a key we'll want to set it as an environment variable by running:\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"export EDENAI_API_KEY=\"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you'd prefer not to set an environment variable you can pass the key in directly via the edenai_api_key named parameter\n",
|
||||
"\n",
|
||||
" when initiating the EdenAI embedding class:\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.edenai import EdenAiEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = EdenAiEmbeddings(edenai_api_key=\"...\",provider=\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Calling a model\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The EdenAI API brings together various providers.\n",
|
||||
"\n",
|
||||
"To access a specific model, you can simply use the \"provider\" when calling.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = EdenAiEmbeddings(provider=\"openai\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\"It's raining right now\", \"cats are cute\"]\n",
|
||||
"document_result = embeddings.embed_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"my umbrella is broken\"\n",
|
||||
"query_result = embeddings.embed_query(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cosine similarity between \"It's raining right now\" and query: 0.849261496107252\n",
|
||||
"Cosine similarity between \"cats are cute\" and query: 0.7525900655705218\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"query_numpy = np.array(query_result)\n",
|
||||
"for doc_res, doc in zip(document_result, docs):\n",
|
||||
" document_numpy = np.array(doc_res)\n",
|
||||
" similarity = np.dot(query_numpy, document_numpy) / (\n",
|
||||
" np.linalg.norm(query_numpy) * np.linalg.norm(document_numpy)\n",
|
||||
" )\n",
|
||||
" print(f'Cosine similarity between \"{doc}\" and query: {similarity}')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,18 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud Platform Vertex AI PaLM \n",
|
||||
"\n",
|
||||
"Note: This is seperate from the Google PaLM integration. Google has chosen to offer an enterprise version of PaLM through GCP, and this supports the models made available through there. \n",
|
||||
"Note: This is seperate from the Google PaLM integration, it exposes [Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on Google Cloud. \n",
|
||||
"\n",
|
||||
"PaLM API on Vertex AI is a Preview offering, subject to the Pre-GA Offerings Terms of the [GCP Service Specific Terms](https://cloud.google.com/terms/service-terms). \n",
|
||||
"\n",
|
||||
"Pre-GA products and features may have limited support, and changes to pre-GA products and features may not be compatible with other pre-GA versions. For more information, see the [launch stage descriptions](https://cloud.google.com/products#product-launch-stages). Further, by using PaLM API on Vertex AI, you agree to the Generative AI Preview [terms and conditions](https://cloud.google.com/trustedtester/aitos) (Preview Terms).\n",
|
||||
"\n",
|
||||
"For PaLM API on Vertex AI, you can process personal data as outlined in the Cloud Data Processing Addendum, subject to applicable restrictions and obligations in the Agreement (as defined in the Preview Terms).\n",
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
"To use Vertex AI PaLM you must have the `google-cloud-aiplatform` Python package installed and either:\n",
|
||||
"- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Multion Toolkit\n",
|
||||
"# MultiOn Toolkit\n",
|
||||
"\n",
|
||||
"This notebook walks you through connecting LangChain to the MultiOn Client in your browser\n",
|
||||
"\n",
|
||||
@@ -18,7 +18,32 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade multion > /dev/null"
|
||||
"!pip install --upgrade multion langchain -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import MultionToolkit\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"toolkit = MultionToolkit()\n",
|
||||
"\n",
|
||||
"toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -38,8 +63,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Authorize connection to your Browser extention\n",
|
||||
"import multion \n",
|
||||
"multion.login()\n"
|
||||
"import multion\n",
|
||||
"multion.login()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -57,38 +83,18 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import create_multion_agent\n",
|
||||
"from langchain.tools.multion.tool import MultionClientTool\n",
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"agent_executor = create_multion_agent(\n",
|
||||
" llm=ChatOpenAI(temperature=0),\n",
|
||||
" tool=MultionClientTool(),\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
" verbose=True\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\"show me the weather today\")"
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"from langchain.agents.agent_toolkits import MultionToolkit\n",
|
||||
"toolkit = MultionToolkit()\n",
|
||||
"tools=toolkit.get_tools()\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose = True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -100,7 +106,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Tweet about Elon Musk\"\n",
|
||||
" \"Tweet 'Hi from MultiOn'\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import load_tools, AgentType\n",
|
||||
"from langchain.agents import load_tools, initialize_agent, AgentType\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
|
||||
181
docs/extras/integrations/tools/dalle_image_generator.ipynb
Normal file
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Dall-E Image Generator\n",
|
||||
"\n",
|
||||
"This notebook shows how you can generate images from a prompt synthesized using an OpenAI LLM. The images are generated using Dall-E, which uses the same OpenAI API key as the LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Needed if you would like to display images in the notebook\n",
|
||||
"!pip install opencv-python scikit-image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"id": "q-k8wmp0zquh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"import os\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"<your-key-here>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run as a chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities.dalle_image_generator import DallEAPIWrapper\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0.9)\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"image_desc\"],\n",
|
||||
" template=\"Generate a detailed prompt to generate an image based on the following description: {image_desc}\",\n",
|
||||
")\n",
|
||||
"chain = LLMChain(llm=llm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-mg1OWiziXxQN1aR2XRsLNndg.png?st=2023-01-31T07%3A34%3A15Z&se=2023-01-31T09%3A34%3A15Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A44Z&ske=2023-01-31T22%3A19%3A44Z&sks=b&skv=2021-08-06&sig=XDPee5aEng%2BcbXq2mqhh39uHGZTBmJgGAerSd0g%2BMEs%3D\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"image_url = DallEAPIWrapper().run(chain.run(\"halloween night at a haunted museum\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can click on the link above to display the image for\n",
|
||||
"# Or you can try the options below to display the image inline in this notebook\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import google.colab\n",
|
||||
" IN_COLAB = True\n",
|
||||
"except:\n",
|
||||
" IN_COLAB = False\n",
|
||||
"\n",
|
||||
"if IN_COLAB:\n",
|
||||
" from google.colab.patches import cv2_imshow # for image display\n",
|
||||
" from skimage import io\n",
|
||||
"\n",
|
||||
" image = io.imread(image_url) \n",
|
||||
" cv2_imshow(image)\n",
|
||||
"else:\n",
|
||||
" import cv2\n",
|
||||
" from skimage import io\n",
|
||||
"\n",
|
||||
" image = io.imread(image_url) \n",
|
||||
" cv2.imshow('image', image)\n",
|
||||
" cv2.waitKey(0) #wait for a keyboard input\n",
|
||||
" cv2.destroyAllWindows()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run as a tool with an agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m What is the best way to turn this description into an image?\n",
|
||||
"Action: Dall-E Image Generator\n",
|
||||
"Action Input: A spooky Halloween night at a haunted museum\u001b[0mhttps://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A36Z&ske=2023-01-31T22%3A19%3A36Z&sks=b&skv=2021-08-06&sig=XsomxxBfu2CP78SzR9lrWUlbask4wBNnaMsHamy4VvU%3D\n",
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mhttps://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A36Z&ske=2023-01-31T22%3A19%3A36Z&sks=b&skv=2021-08-06&sig=XsomxxBfu2CP78SzR9lrWUlbask4wBNnaMsHamy4VvU%3D\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m With the image generated, I can now make my final answer.\n",
|
||||
"Final Answer: An image of a Halloween night at a haunted museum can be seen here: https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"\n",
|
||||
"tools = load_tools(['dalle-image-generator'])\n",
|
||||
"agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)\n",
|
||||
"output = agent.run(\"Create an image of a halloween night at a haunted museum\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "3570c8892273ffbeee7ead61dc7c022b73551d9f55fb2584ac0e8e8920b18a89"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"This notebook goes over how to use the google search component.\n",
|
||||
"\n",
|
||||
"First, you need to set up the proper API keys and environment variables. To set it up, create the GOOGLE_API_KEY in the Google Cloud credential console (https://console.cloud.google.com/apis/credentials) and a GOOGLE_CSE_ID using the Programmable Search Enginge (https://programmablesearchengine.google.com/controlpanel/create). Next, it is good to follow the instructions found [here](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search).\n",
|
||||
"First, you need to set up the proper API keys and environment variables. To set it up, create the GOOGLE_API_KEY in the Google Cloud credential console (https://console.cloud.google.com/apis/credentials) and a GOOGLE_CSE_ID using the Programmable Search Engine (https://programmablesearchengine.google.com/controlpanel/create). Next, it is good to follow the instructions found [here](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search).\n",
|
||||
"\n",
|
||||
"Then we will need to set some environment variables."
|
||||
]
|
||||
|
||||
173
docs/extras/integrations/tools/nuclia.ipynb
Normal file
@@ -0,0 +1,173 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Nuclia Understanding API tool\n",
|
||||
"\n",
|
||||
"[Nuclia](https://nuclia.com) automatically indexes your unstructured data from any internal and external source, providing optimized search results and generative answers. It can handle video and audio transcription, image content extraction, and document parsing.\n",
|
||||
"\n",
|
||||
"The Nuclia Understanding API supports the processing of unstructured data, including text, web pages, documents, and audio/video contents. It extracts all texts wherever it is (using speech-to-text or OCR when needed), it identifies entities, it aslo extracts metadata, embedded files (like images in a PDF), and web links. It also provides a summary of the content.\n",
|
||||
"\n",
|
||||
"To use the Nuclia Understanding API, you need to have a Nuclia account. You can create one for free at [https://nuclia.cloud](https://nuclia.cloud), and then [create a NUA key](https://docs.nuclia.dev/docs/docs/using/understanding/intro)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade protobuf\n",
|
||||
"#!pip install nucliadb-protos"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"NUCLIA_ZONE\"] = \"<YOUR_ZONE>\" # e.g. europe-1\n",
|
||||
"os.environ[\"NUCLIA_NUA_KEY\"] = \"<YOUR_API_KEY>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.nuclia import NucliaUnderstandingAPI\n",
|
||||
"\n",
|
||||
"nua = NucliaUnderstandingAPI(enable_ml=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can push files to the Nuclia Understanding API using the `push` action. As the processing is done asynchronously, the results might be returned in a different order than the files were pushed. That is why you need to provide an `id` to match the results with the corresponding file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"nua.run({\"action\": \"push\", \"id\": \"1\", \"path\": \"./report.docx\"})\n",
|
||||
"nua.run({\"action\": \"push\", \"id\": \"2\", \"path\": \"./interview.mp4\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can now call the `pull` action in a loop until you get the JSON-formatted result."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"pending = True\n",
|
||||
"data = None\n",
|
||||
"while pending:\n",
|
||||
" time.sleep(15)\n",
|
||||
" data = nua.run({\"action\": \"pull\", \"id\": \"1\", \"path\": None})\n",
|
||||
" if data:\n",
|
||||
" print(data)\n",
|
||||
" pending = False\n",
|
||||
" else:\n",
|
||||
" print(\"waiting...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also do it in one step in `async` mode, you only need to do a push, and it will wait until the results are pulled:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def process():\n",
|
||||
" data = await nua.arun(\n",
|
||||
" {\"action\": \"push\", \"id\": \"1\", \"path\": \"./talk.mp4\", \"text\": None}\n",
|
||||
" )\n",
|
||||
" print(data)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"asyncio.run(process())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retrieved information\n",
|
||||
"\n",
|
||||
"Nuclia returns the following information:\n",
|
||||
"\n",
|
||||
"- file metadata\n",
|
||||
"- extracted text\n",
|
||||
"- nested text (like text in an embedded image)\n",
|
||||
"- a summary (only when `enable_ml` is set to `True`)\n",
|
||||
"- paragraphs and sentences splitting (defined by the position of their first and last characters, plus start time and end time for a video or audio file)\n",
|
||||
"- named entities: people, dates, places, organizations, etc. (only when `enable_ml` is set to `True`)\n",
|
||||
"- links\n",
|
||||
"- a thumbnail\n",
|
||||
"- embedded files\n",
|
||||
"- the vector representations of the text (only when `enable_ml` is set to `True`)\n",
|
||||
"\n",
|
||||
"Note:\n",
|
||||
"\n",
|
||||
" Generated files (thumbnail, extracted embedded files, etc.) are provided as a token. You can download them with the [`/processing/download` endpoint](https://docs.nuclia.dev/docs/api#operation/Download_binary_file_processing_download_get).\n",
|
||||
"\n",
|
||||
" Also at any level, if an attribute exceeds a certain size, it will be put in a downloadable file and will be replaced in the document by a file pointer. This will consist of `{\"file\": {\"uri\": \"JWT_TOKEN\"}}`. The rule is that if the size of the message is greater than 1000000 characters, the biggest parts will be moved to downloadable files. First, the compression process will target vectors. If that is not enough, it will target large field metadata, and finally it will target extracted text.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||