mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-10 03:00:59 +00:00
Compare commits
73 Commits
v0.0.218
...
harrison/s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df62837fb2 | ||
|
|
6d15854cda | ||
|
|
153b56d19b | ||
|
|
1feac83323 | ||
|
|
77ae8084a0 | ||
|
|
e41b382e1c | ||
|
|
5a45363954 | ||
|
|
7acd524210 | ||
|
|
9dc77614e3 | ||
|
|
e5f6f0ffc4 | ||
|
|
052c797429 | ||
|
|
dc2264619a | ||
|
|
6a64870ea0 | ||
|
|
7ebb76a5fa | ||
|
|
8d2281a8ca | ||
|
|
3bfe7cf467 | ||
|
|
a93cdba936 | ||
|
|
876eed53d0 | ||
|
|
3860412365 | ||
|
|
556c425042 | ||
|
|
8628c30b3e | ||
|
|
9f1efe82c6 | ||
|
|
88c1ba3e18 | ||
|
|
0498dad562 | ||
|
|
59697b406d | ||
|
|
aa37b10b28 | ||
|
|
b0859c9b18 | ||
|
|
a5b206caf3 | ||
|
|
b26cca8008 | ||
|
|
e4625846e5 | ||
|
|
b3bc8a18be | ||
|
|
f1b05317f2 | ||
|
|
bc8cce90d2 | ||
|
|
e3b7effc8f | ||
|
|
1ce9ef3828 | ||
|
|
eb180e321f | ||
|
|
64039b9f11 | ||
|
|
13c62cf6b1 | ||
|
|
8c73037dff | ||
|
|
8f5eca236f | ||
|
|
60b0d6ea35 | ||
|
|
521c6f0233 | ||
|
|
bd6a0ee9e9 | ||
|
|
f780678910 | ||
|
|
73831ef3d8 | ||
|
|
7d8830f707 | ||
|
|
0f6737735d | ||
|
|
e9950392dd | ||
|
|
429f4dbe4d | ||
|
|
76d03f398d | ||
|
|
5861770a53 | ||
|
|
140ba682f1 | ||
|
|
9ca1cf003c | ||
|
|
20c6ade2fc | ||
|
|
6157bdf9d9 | ||
|
|
1c66aa6d56 | ||
|
|
0ba175e13f | ||
|
|
75fb9d2fdc | ||
|
|
f5663603cf | ||
|
|
be164b20d8 | ||
|
|
8502117f62 | ||
|
|
6370808d41 | ||
|
|
cbd759aaeb | ||
|
|
3ac08c3de4 | ||
|
|
a6b40b73e5 | ||
|
|
99cfe192da | ||
|
|
2e39ede848 | ||
|
|
398e4cd2dc | ||
|
|
57f370cde9 | ||
|
|
c9c8d2599e | ||
|
|
16b11bda83 | ||
|
|
f07dd02b50 | ||
|
|
ec6bdc16f8 |
@@ -9,6 +9,9 @@ build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.11"
|
||||
jobs:
|
||||
pre_build:
|
||||
- python docs/api_reference/create_api_rst.py
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// Load the external dependencies
|
||||
function loadScript(src, onLoadCallback) {
|
||||
const script = document.createElement('script');
|
||||
script.src = src;
|
||||
script.onload = onLoadCallback;
|
||||
document.head.appendChild(script);
|
||||
}
|
||||
|
||||
function createRootElement() {
|
||||
const rootElement = document.createElement('div');
|
||||
rootElement.id = 'my-component-root';
|
||||
document.body.appendChild(rootElement);
|
||||
return rootElement;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function initializeMendable() {
|
||||
const rootElement = createRootElement();
|
||||
const { MendableFloatingButton } = Mendable;
|
||||
|
||||
|
||||
const iconSpan1 = React.createElement('span', {
|
||||
}, '🦜');
|
||||
|
||||
const iconSpan2 = React.createElement('span', {
|
||||
}, '🔗');
|
||||
|
||||
const icon = React.createElement('p', {
|
||||
style: { color: '#ffffff', fontSize: '22px',width: '48px', height: '48px', margin: '0px', padding: '0px', display: 'flex', alignItems: 'center', justifyContent: 'center', textAlign: 'center' },
|
||||
}, [iconSpan1, iconSpan2]);
|
||||
|
||||
const mendableFloatingButton = React.createElement(
|
||||
MendableFloatingButton,
|
||||
{
|
||||
style: { darkMode: false, accentColor: '#010810' },
|
||||
floatingButtonStyle: { color: '#ffffff', backgroundColor: '#010810' },
|
||||
anon_key: '82842b36-3ea6-49b2-9fb8-52cfc4bde6bf', // Mendable Search Public ANON key, ok to be public
|
||||
cmdShortcutKey:'j',
|
||||
messageSettings: {
|
||||
openSourcesInNewTab: false,
|
||||
prettySources: true // Prettify the sources displayed now
|
||||
},
|
||||
icon: icon,
|
||||
}
|
||||
);
|
||||
|
||||
ReactDOM.render(mendableFloatingButton, rootElement);
|
||||
}
|
||||
|
||||
loadScript('https://unpkg.com/react@17/umd/react.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/react-dom@17/umd/react-dom.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.102/dist/umd/mendable.min.js', initializeMendable);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,12 +0,0 @@
|
||||
Agents
|
||||
==============
|
||||
|
||||
Reference guide for Agents and associated abstractions.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/agents
|
||||
modules/tools
|
||||
modules/agent_toolkits
|
||||
1860
docs/api_reference/api_reference.rst
Normal file
1860
docs/api_reference/api_reference.rst
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,12 +11,13 @@
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
# import os
|
||||
# import sys
|
||||
# sys.path.insert(0, os.path.abspath('.'))
|
||||
import os
|
||||
import sys
|
||||
|
||||
import toml
|
||||
|
||||
sys.path.insert(0, os.path.abspath("."))
|
||||
|
||||
with open("../../pyproject.toml") as f:
|
||||
data = toml.load(f)
|
||||
|
||||
@@ -45,11 +46,9 @@ extensions = [
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinxcontrib.autodoc_pydantic",
|
||||
"myst_nb",
|
||||
"sphinx_copybutton",
|
||||
"sphinx_panels",
|
||||
"IPython.sphinxext.ipython_console_highlighting",
|
||||
"sphinx_tabs.tabs",
|
||||
]
|
||||
source_suffix = [".rst"]
|
||||
|
||||
@@ -59,24 +58,22 @@ autodoc_pydantic_config_members = False
|
||||
autodoc_pydantic_model_show_config_summary = False
|
||||
autodoc_pydantic_model_show_validator_members = False
|
||||
autodoc_pydantic_model_show_validator_summary = False
|
||||
autodoc_pydantic_model_show_field_summary = False
|
||||
autodoc_pydantic_model_members = False
|
||||
autodoc_pydantic_model_undoc_members = False
|
||||
autodoc_pydantic_model_hide_paramlist = False
|
||||
autodoc_pydantic_model_signature_prefix = "class"
|
||||
autodoc_pydantic_field_signature_prefix = "attribute"
|
||||
autodoc_pydantic_model_summary_list_order = "bysource"
|
||||
autodoc_member_order = "bysource"
|
||||
autodoc_pydantic_field_signature_prefix = "param"
|
||||
autodoc_member_order = "groupwise"
|
||||
autoclass_content = "both"
|
||||
autodoc_typehints_format = "short"
|
||||
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"show-inheritance": True,
|
||||
"undoc_members": True,
|
||||
"inherited_members": "BaseModel",
|
||||
"inherited-members": "BaseModel",
|
||||
"undoc-members": True,
|
||||
"special-members": "__call__",
|
||||
}
|
||||
autodoc_typehints = "description"
|
||||
|
||||
# autodoc_typehints = "description"
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
templates_path = ["templates"]
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
@@ -89,14 +86,16 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_theme = "scikit-learn-modern"
|
||||
html_theme_path = ["themes"]
|
||||
|
||||
html_theme_options = {
|
||||
"path_to_docs": "docs",
|
||||
"repository_url": "https://github.com/hwchase17/langchain",
|
||||
"use_repository_button": True,
|
||||
# "style_nav_header_background": "white"
|
||||
# redirects dictionary maps from old links to new links
|
||||
html_additional_pages = {}
|
||||
redirects = {
|
||||
"index": "api_reference",
|
||||
}
|
||||
for old_link in redirects:
|
||||
html_additional_pages[old_link] = "redirects.html"
|
||||
|
||||
html_context = {
|
||||
"display_github": True, # Integrate GitHub
|
||||
@@ -104,6 +103,7 @@ html_context = {
|
||||
"github_repo": "langchain", # Repo name
|
||||
"github_version": "master", # Version
|
||||
"conf_py_path": "/docs/api_reference", # Path in the checkout to the docs root
|
||||
"redirects": redirects,
|
||||
}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
@@ -116,10 +116,9 @@ html_static_path = ["_static"]
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
html_use_index = False
|
||||
|
||||
html_js_files = [
|
||||
"js/mendablesearch.js",
|
||||
]
|
||||
|
||||
nb_execution_mode = "off"
|
||||
myst_enable_extensions = ["colon_fence"]
|
||||
|
||||
# generate autosummary even if no references
|
||||
autosummary_generate = True
|
||||
|
||||
94
docs/api_reference/create_api_rst.py
Normal file
94
docs/api_reference/create_api_rst.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Script for auto-generating api_reference.rst"""
|
||||
import glob
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
ROOT_DIR = Path(__file__).parents[2].absolute()
|
||||
PKG_DIR = ROOT_DIR / "langchain"
|
||||
WRITE_FILE = Path(__file__).parent / "api_reference.rst"
|
||||
|
||||
|
||||
def load_members() -> dict:
|
||||
members: dict = {}
|
||||
for py in glob.glob(str(PKG_DIR) + "/**/*.py", recursive=True):
|
||||
module = py[len(str(PKG_DIR)) + 1 :].replace(".py", "").replace("/", ".")
|
||||
top_level = module.split(".")[0]
|
||||
if top_level not in members:
|
||||
members[top_level] = {"classes": [], "functions": []}
|
||||
with open(py, "r") as f:
|
||||
for line in f.readlines():
|
||||
cls = re.findall(r"^class ([^_].*)\(", line)
|
||||
members[top_level]["classes"].extend([module + "." + c for c in cls])
|
||||
func = re.findall(r"^def ([^_].*)\(", line)
|
||||
members[top_level]["functions"].extend([module + "." + f for f in func])
|
||||
return members
|
||||
|
||||
|
||||
def construct_doc(members: dict) -> str:
|
||||
full_doc = """\
|
||||
.. _api_reference:
|
||||
|
||||
=============
|
||||
API Reference
|
||||
=============
|
||||
|
||||
"""
|
||||
for module, _members in sorted(members.items(), key=lambda kv: kv[0]):
|
||||
classes = _members["classes"]
|
||||
functions = _members["functions"]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
|
||||
module_title = module.replace("_", " ").title()
|
||||
if module_title == "Llms":
|
||||
module_title = "LLMs"
|
||||
section = f":mod:`langchain.{module}`: {module_title}"
|
||||
full_doc += f"""\
|
||||
{section}
|
||||
{'=' * (len(section) + 1)}
|
||||
|
||||
.. automodule:: langchain.{module}
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
"""
|
||||
|
||||
if classes:
|
||||
cstring = "\n ".join(sorted(classes))
|
||||
full_doc += f"""\
|
||||
Classes
|
||||
--------------
|
||||
.. currentmodule:: langchain
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: class.rst
|
||||
|
||||
{cstring}
|
||||
|
||||
"""
|
||||
if functions:
|
||||
fstring = "\n ".join(sorted(functions))
|
||||
full_doc += f"""\
|
||||
Functions
|
||||
--------------
|
||||
.. currentmodule:: langchain
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
return full_doc
|
||||
|
||||
|
||||
def main() -> None:
|
||||
members = load_members()
|
||||
full_doc = construct_doc(members)
|
||||
with open(WRITE_FILE, "w") as f:
|
||||
f.write(full_doc)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,13 +0,0 @@
|
||||
Data connection
|
||||
==============
|
||||
LangChain has a number of modules that help you load, structure, store, and retrieve documents.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/document_loaders
|
||||
modules/document_transformers
|
||||
modules/embeddings
|
||||
modules/vectorstores
|
||||
modules/retrievers
|
||||
@@ -1,29 +1,8 @@
|
||||
API Reference
|
||||
==========================
|
||||
|
||||
| Full documentation on all methods, classes, and APIs in the LangChain Python package.
|
||||
=============
|
||||
LangChain API
|
||||
=============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Abstractions
|
||||
:maxdepth: 2
|
||||
|
||||
./modules/base_classes.rst
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Core
|
||||
|
||||
./model_io.rst
|
||||
./data_connection.rst
|
||||
./modules/chains.rst
|
||||
./agents.rst
|
||||
./modules/memory.rst
|
||||
./modules/callbacks.rst
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Additional
|
||||
|
||||
./modules/utilities.rst
|
||||
./modules/experimental.rst
|
||||
api_reference.rst
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
Model I/O
|
||||
==============
|
||||
|
||||
LangChain provides interfaces and integrations for working with language models.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
./prompts.rst
|
||||
./models.rst
|
||||
./modules/output_parsers.rst
|
||||
@@ -1,11 +0,0 @@
|
||||
Models
|
||||
==============
|
||||
|
||||
LangChain provides interfaces and integrations for a number of different types of models.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/llms
|
||||
modules/chat_models
|
||||
@@ -1,7 +0,0 @@
|
||||
Agent Toolkits
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.agents.agent_toolkits
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
Agents
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.agents
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
Base classes
|
||||
========================
|
||||
|
||||
.. automodule:: langchain.schema
|
||||
:inherited-members:
|
||||
@@ -1,7 +0,0 @@
|
||||
Callbacks
|
||||
=======================
|
||||
|
||||
.. automodule:: langchain.callbacks
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
Chains
|
||||
=======================
|
||||
|
||||
.. automodule:: langchain.chains
|
||||
:members:
|
||||
:undoc-members:
|
||||
:inherited-members: BaseModel
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
Chat Models
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.chat_models
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
Document Loaders
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.document_loaders
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
Document Transformers
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.document_transformers
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
Text Splitters
|
||||
------------------------------
|
||||
|
||||
.. automodule:: langchain.text_splitter
|
||||
:members:
|
||||
:undoc-members:
|
||||
@@ -1,5 +0,0 @@
|
||||
Embeddings
|
||||
===========================
|
||||
|
||||
.. automodule:: langchain.embeddings
|
||||
:members:
|
||||
@@ -1,5 +0,0 @@
|
||||
Example Selector
|
||||
=========================================
|
||||
|
||||
.. automodule:: langchain.prompts.example_selector
|
||||
:members:
|
||||
@@ -1,28 +0,0 @@
|
||||
====================
|
||||
Experimental
|
||||
====================
|
||||
|
||||
This module contains experimental modules and reproductions of existing work using LangChain primitives.
|
||||
|
||||
Autonomous agents
|
||||
------------------
|
||||
|
||||
Here, we document the BabyAGI and AutoGPT classes from the langchain.experimental module.
|
||||
|
||||
.. autoclass:: langchain.experimental.BabyAGI
|
||||
:members:
|
||||
|
||||
.. autoclass:: langchain.experimental.AutoGPT
|
||||
:members:
|
||||
|
||||
|
||||
Generative agents
|
||||
------------------
|
||||
|
||||
Here, we document the GenerativeAgent and GenerativeAgentMemory classes from the langchain.experimental module.
|
||||
|
||||
.. autoclass:: langchain.experimental.GenerativeAgent
|
||||
:members:
|
||||
|
||||
.. autoclass:: langchain.experimental.GenerativeAgentMemory
|
||||
:members:
|
||||
@@ -1,7 +0,0 @@
|
||||
LLMs
|
||||
=======================
|
||||
|
||||
.. automodule:: langchain.llms
|
||||
:members:
|
||||
:inherited-members:
|
||||
:special-members: __call__
|
||||
@@ -1,7 +0,0 @@
|
||||
Memory
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.memory
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
Output Parsers
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.output_parsers
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
Prompt Templates
|
||||
========================
|
||||
|
||||
.. automodule:: langchain.prompts
|
||||
:members:
|
||||
:undoc-members:
|
||||
@@ -1,14 +0,0 @@
|
||||
Retrievers
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.retrievers
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
Document compressors
|
||||
-------------------------------
|
||||
|
||||
.. automodule:: langchain.retrievers.document_compressors
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
Tools
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.tools
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
Utilities
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.utilities
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
Vector Stores
|
||||
=============================
|
||||
|
||||
.. automodule:: langchain.vectorstores
|
||||
:members:
|
||||
:undoc-members:
|
||||
@@ -1,11 +0,0 @@
|
||||
Prompts
|
||||
==============
|
||||
|
||||
The reference guides here all relate to objects for working with Prompts.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/prompts
|
||||
modules/example_selector
|
||||
27
docs/api_reference/templates/COPYRIGHT.txt
Normal file
27
docs/api_reference/templates/COPYRIGHT.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
Copyright (c) 2007-2023 The scikit-learn developers.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
28
docs/api_reference/templates/class.rst
Normal file
28
docs/api_reference/templates/class.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
{% block methods %}
|
||||
{% if methods %}
|
||||
.. rubric:: {{ _('Methods') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
{% block attributes %}
|
||||
{% if attributes %}
|
||||
.. rubric:: {{ _('Attributes') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ name }}.{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
15
docs/api_reference/templates/redirects.html
Normal file
15
docs/api_reference/templates/redirects.html
Normal file
@@ -0,0 +1,15 @@
|
||||
{% set redirect = pathto(redirects[pagename]) %}
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta http-equiv="Refresh" content="0; url={{ redirect }}" />
|
||||
<meta name="Description" content="scikit-learn: machine learning in Python">
|
||||
<link rel="canonical" href="{{ redirect }}" />
|
||||
<title>scikit-learn: machine learning in Python</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>You will be automatically redirected to the <a href="{{ redirect }}">new location of this page</a>.</p>
|
||||
</body>
|
||||
</html>
|
||||
27
docs/api_reference/themes/COPYRIGHT.txt
Normal file
27
docs/api_reference/themes/COPYRIGHT.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
Copyright (c) 2007-2023 The scikit-learn developers.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -0,0 +1,67 @@
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
/* Add a [>>>] button on the top-right corner of code samples to hide
|
||||
* the >>> and ... prompts and the output and thus make the code
|
||||
* copyable. */
|
||||
var div = $('.highlight-python .highlight,' +
|
||||
'.highlight-python3 .highlight,' +
|
||||
'.highlight-pycon .highlight,' +
|
||||
'.highlight-default .highlight')
|
||||
var pre = div.find('pre');
|
||||
|
||||
// get the styles from the current theme
|
||||
pre.parent().parent().css('position', 'relative');
|
||||
var hide_text = 'Hide prompts and outputs';
|
||||
var show_text = 'Show prompts and outputs';
|
||||
|
||||
// create and add the button to all the code blocks that contain >>>
|
||||
div.each(function(index) {
|
||||
var jthis = $(this);
|
||||
if (jthis.find('.gp').length > 0) {
|
||||
var button = $('<span class="copybutton">>>></span>');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
jthis.prepend(button);
|
||||
}
|
||||
// tracebacks (.gt) contain bare text elements that need to be
|
||||
// wrapped in a span to work with .nextUntil() (see later)
|
||||
jthis.find('pre:has(.gt)').contents().filter(function() {
|
||||
return ((this.nodeType == 3) && (this.data.trim().length > 0));
|
||||
}).wrap('<span>');
|
||||
});
|
||||
|
||||
// define the behavior of the button when it's clicked
|
||||
$('.copybutton').click(function(e){
|
||||
e.preventDefault();
|
||||
var button = $(this);
|
||||
if (button.data('hidden') === 'false') {
|
||||
// hide the code output
|
||||
button.parent().find('.go, .gp, .gt').hide();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
|
||||
button.css('text-decoration', 'line-through');
|
||||
button.attr('title', show_text);
|
||||
button.data('hidden', 'true');
|
||||
} else {
|
||||
// show the code output
|
||||
button.parent().find('.go, .gp, .gt').show();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
|
||||
button.css('text-decoration', 'none');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
}
|
||||
});
|
||||
|
||||
/*** Add permalink buttons next to glossary terms ***/
|
||||
$('dl.glossary > dt[id]').append(function() {
|
||||
return ('<a class="headerlink" href="#' +
|
||||
this.getAttribute('id') +
|
||||
'" title="Permalink to this term">¶</a>');
|
||||
});
|
||||
});
|
||||
|
||||
</script>
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{% if theme_mathjax_path %}
|
||||
<script id="MathJax-script" async src="{{ theme_mathjax_path }}"></script>
|
||||
{% endif %}
|
||||
{%- endif %}
|
||||
142
docs/api_reference/themes/scikit-learn-modern/layout.html
Normal file
142
docs/api_reference/themes/scikit-learn-modern/layout.html
Normal file
@@ -0,0 +1,142 @@
|
||||
{# TEMPLATE VAR SETTINGS #}
|
||||
{%- set url_root = pathto('', 1) %}
|
||||
{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
|
||||
{%- if not embedded and docstitle %}
|
||||
{%- set titlesuffix = " — "|safe + docstitle|e %}
|
||||
{%- else %}
|
||||
{%- set titlesuffix = "" %}
|
||||
{%- endif %}
|
||||
{%- set lang_attr = 'en' %}
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="{{ lang_attr }}" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="{{ lang_attr }}" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
{{ metatags }}
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
{% block htmltitle %}
|
||||
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
|
||||
{% endblock %}
|
||||
<link rel="canonical" href="http://scikit-learn.org/stable/{{pagename}}.html" />
|
||||
|
||||
{% if favicon_url %}
|
||||
<link rel="shortcut icon" href="{{ favicon_url|e }}"/>
|
||||
{% endif %}
|
||||
|
||||
<link rel="stylesheet" href="{{ pathto('_static/css/vendor/bootstrap.min.css', 1) }}" type="text/css" />
|
||||
{%- for css in css_files %}
|
||||
{%- if css|attr("rel") %}
|
||||
<link rel="{{ css.rel }}" href="{{ pathto(css.filename, 1) }}" type="text/css"{% if css.title is not none %} title="{{ css.title }}"{% endif %} />
|
||||
{%- else %}
|
||||
<link rel="stylesheet" href="{{ pathto(css, 1) }}" type="text/css" />
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
<link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css" />
|
||||
<script id="documentation_options" data-url_root="{{ pathto('', 1) }}" src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
|
||||
<script src="{{ pathto('_static/jquery.js', 1) }}"></script>
|
||||
{%- block extrahead %} {% endblock %}
|
||||
</head>
|
||||
<body>
|
||||
{% include "nav.html" %}
|
||||
{%- block content %}
|
||||
<div class="d-flex" id="sk-doc-wrapper">
|
||||
<input type="checkbox" name="sk-toggle-checkbox" id="sk-toggle-checkbox">
|
||||
<label id="sk-sidemenu-toggle" class="sk-btn-toggle-toc btn sk-btn-primary" for="sk-toggle-checkbox">Toggle Menu</label>
|
||||
<div id="sk-sidebar-wrapper" class="border-right">
|
||||
<div class="sk-sidebar-toc-wrapper">
|
||||
<div class="btn-group w-100 mb-2" role="group" aria-label="rellinks">
|
||||
{%- if prev %}
|
||||
<a href="{{ prev.link|e }}" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="{{ prev.title|striptags }}">Prev</a>
|
||||
{%- else %}
|
||||
<a href="#" role="button" class="btn sk-btn-rellink py-1 disabled"">Prev</a>
|
||||
{%- endif %}
|
||||
{%- if parents -%}
|
||||
<a href="{{ parents[-1].link|e }}" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="{{ parents[-1].title|striptags }}">Up</a>
|
||||
{%- else %}
|
||||
<a href="#" role="button" class="btn sk-btn-rellink disabled py-1">Up</a>
|
||||
{%- endif %}
|
||||
{%- if next %}
|
||||
<a href="{{ next.link|e }}" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="{{ next.title|striptags }}">Next</a>
|
||||
{%- else %}
|
||||
<a href="#" role="button" class="btn sk-btn-rellink py-1 disabled"">Next</a>
|
||||
{%- endif %}
|
||||
</div>
|
||||
{%- if pagename != "install" %}
|
||||
<div class="alert alert-warning p-1 mb-2" role="alert">
|
||||
<p class="text-center mb-0">
|
||||
<strong>LangChain {{ release }}</strong><br/>
|
||||
</p>
|
||||
</div>
|
||||
{%- endif %}
|
||||
{%- if meta and meta['parenttoc']|tobool %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{% set nav = get_nav_object(maxdepth=3, collapse=True, numbered=True) %}
|
||||
<ul>
|
||||
{% for main_nav_item in nav %}
|
||||
{% if main_nav_item.active %}
|
||||
<li>
|
||||
<a href="{{ main_nav_item.url }}" class="sk-toc-active">{{ main_nav_item.title }}</a>
|
||||
</li>
|
||||
<ul>
|
||||
{% for nav_item in main_nav_item.children %}
|
||||
<li>
|
||||
<a href="{{ nav_item.url }}" class="{% if nav_item.active %}sk-toc-active{% endif %}">{{ nav_item.title }}</a>
|
||||
{% if nav_item.children %}
|
||||
<ul>
|
||||
{% for inner_child in nav_item.children %}
|
||||
<li class="sk-toctree-l3">
|
||||
<a href="{{ inner_child.url }}">{{ inner_child.title }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{%- elif meta and meta['globalsidebartoc']|tobool %}
|
||||
<div class="sk-sidebar-toc sk-sidebar-global-toc">
|
||||
{{ toctree(maxdepth=2, titles_only=True) }}
|
||||
</div>
|
||||
{%- else %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{{ toc }}
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div id="sk-page-content-wrapper">
|
||||
<div class="sk-page-content container-fluid body px-md-3" role="main">
|
||||
{% block body %}{% endblock %}
|
||||
</div>
|
||||
<div class="container">
|
||||
<footer class="sk-content-footer">
|
||||
{%- if pagename != 'index' %}
|
||||
{%- if show_copyright %}
|
||||
{%- if hasdoc('copyright') %}
|
||||
{% trans path=pathto('copyright'), copyright=copyright|e %}© {{ copyright }}.{% endtrans %}
|
||||
{%- else %}
|
||||
{% trans copyright=copyright|e %}© {{ copyright }}.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if last_updated %}
|
||||
{% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- if show_source and has_source and sourcename %}
|
||||
<a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow">{{ _('Show this page source') }}</a>
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{%- endblock %}
|
||||
<script src="{{ pathto('_static/js/vendor/bootstrap.min.js', 1) }}"></script>
|
||||
{% include "javascript.html" %}
|
||||
</body>
|
||||
</html>
|
||||
85
docs/api_reference/themes/scikit-learn-modern/nav.html
Normal file
85
docs/api_reference/themes/scikit-learn-modern/nav.html
Normal file
@@ -0,0 +1,85 @@
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{%- set nav_bar_class = "sk-docs-navbar" %}
|
||||
{%- set top_container_cls = "sk-docs-container" %}
|
||||
{%- else %}
|
||||
{%- set nav_bar_class = "sk-landing-navbar" %}
|
||||
{%- set top_container_cls = "sk-landing-container" %}
|
||||
{%- endif %}
|
||||
|
||||
{% if theme_link_to_live_contributing_page|tobool %}
|
||||
{# Link to development page for live builds #}
|
||||
{%- set development_link = "https://scikit-learn.org/dev/developers/index.html" %}
|
||||
{# Open on a new development page in new window/tab for live builds #}
|
||||
{%- set development_attrs = 'target="_blank" rel="noopener noreferrer"' %}
|
||||
{%- else %}
|
||||
{%- set development_link = pathto('developers/index') %}
|
||||
{%- set development_attrs = '' %}
|
||||
{%- endif %}
|
||||
|
||||
{# title, link, link_attrs #}
|
||||
{%- set drop_down_navigation = [
|
||||
('Getting Started', pathto('getting_started'), ''),
|
||||
('Tutorial', pathto('tutorial/index'), ''),
|
||||
("What's new", pathto('whats_new/v' + version), ''),
|
||||
('Glossary', pathto('glossary'), ''),
|
||||
('Development', development_link, development_attrs),
|
||||
('FAQ', pathto('faq'), ''),
|
||||
('Support', pathto('support'), ''),
|
||||
('Related packages', pathto('related_projects'), ''),
|
||||
('Roadmap', pathto('roadmap'), ''),
|
||||
('Governance', pathto('governance'), ''),
|
||||
('About us', pathto('about'), ''),
|
||||
('GitHub', 'https://github.com/scikit-learn/scikit-learn', ''),
|
||||
('Other Versions and Download', 'https://scikit-learn.org/dev/versions.html', '')]
|
||||
-%}
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
{%- if logo_url %}
|
||||
<a class="navbar-brand py-0" href="{{ pathto('index') }}">
|
||||
<img
|
||||
class="sk-brand-img"
|
||||
src="{{ logo_url|e }}"
|
||||
alt="logo"/>
|
||||
</a>
|
||||
{%- endif %}
|
||||
<button
|
||||
id="sk-navbar-toggler"
|
||||
class="navbar-toggler"
|
||||
type="button"
|
||||
data-toggle="collapse"
|
||||
data-target="#navbarSupportedContent"
|
||||
aria-controls="navbarSupportedContent"
|
||||
aria-expanded="false"
|
||||
aria-label="Toggle navigation"
|
||||
>
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
|
||||
<div class="sk-navbar-collapse collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav mr-auto">
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('api_reference') }}">API</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" target="_blank" rel="noopener noreferrer" href="https://python.langchain.com/">Python Docs</a>
|
||||
</li>
|
||||
{%- for title, link, link_attrs in drop_down_navigation %}
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link nav-more-item-mobile-items" href="{{ link }}" {{ link_attrs }}>{{ title }}</a>
|
||||
</li>
|
||||
{%- endfor %}
|
||||
</ul>
|
||||
{%- if pagename != "search"%}
|
||||
<div id="searchbox" role="search">
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="{{ pathto('search') }}" method="get">
|
||||
<input class="sk-search-text-input" type="text" name="q" aria-labelledby="searchlabel" />
|
||||
<input class="sk-search-text-btn" type="submit" value="{{ _('Go') }}" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
16
docs/api_reference/themes/scikit-learn-modern/search.html
Normal file
16
docs/api_reference/themes/scikit-learn-modern/search.html
Normal file
@@ -0,0 +1,16 @@
|
||||
{%- extends "basic/search.html" %}
|
||||
{% block extrahead %}
|
||||
<script type="text/javascript" src="{{ pathto('_static/underscore.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('searchindex.js', 1) }}" defer></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/doctools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/language_data.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
|
||||
<!-- <script type="text/javascript" src="{{ pathto('_static/sphinx_highlight.js', 1) }}"></script> -->
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() {
|
||||
if (!Search.out) {
|
||||
Search.init();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
1395
docs/api_reference/themes/scikit-learn-modern/static/css/theme.css
Normal file
1395
docs/api_reference/themes/scikit-learn-modern/static/css/theme.css
Normal file
File diff suppressed because it is too large
Load Diff
6
docs/api_reference/themes/scikit-learn-modern/static/css/vendor/bootstrap.min.css
vendored
Normal file
6
docs/api_reference/themes/scikit-learn-modern/static/css/vendor/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
6
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/bootstrap.min.js
vendored
Normal file
6
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/bootstrap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
2
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/jquery-3.6.3.slim.min.js
vendored
Normal file
2
docs/api_reference/themes/scikit-learn-modern/static/js/vendor/jquery-3.6.3.slim.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
8
docs/api_reference/themes/scikit-learn-modern/theme.conf
Normal file
8
docs/api_reference/themes/scikit-learn-modern/theme.conf
Normal file
@@ -0,0 +1,8 @@
|
||||
[theme]
|
||||
inherit = basic
|
||||
pygments_style = default
|
||||
stylesheet = css/theme.css
|
||||
|
||||
[options]
|
||||
link_to_live_contributing_page = false
|
||||
mathjax_path =
|
||||
@@ -47,7 +47,7 @@ import ChatModel from "@snippets/get_started/quickstart/chat_model.mdx"
|
||||
|
||||
## Prompt templates
|
||||
|
||||
Most LLM applications do not pass user input directly into to an LLM. Usually they will add the user input to a larger piece of text, called a prompt template, that provides additional context on the specific task at hand.
|
||||
Most LLM applications do not pass user input directly into an LLM. Usually they will add the user input to a larger piece of text, called a prompt template, that provides additional context on the specific task at hand.
|
||||
|
||||
In the previous example, the text we passed to the model contained instructions to generate a company name. For our application, it'd be great if the user only had to provide the description of a company/product, without having to worry about giving the model instructions.
|
||||
|
||||
@@ -138,7 +138,7 @@ The chains and agents we've looked at so far have been stateless, but for many a
|
||||
|
||||
The Memory module gives you a way to maintain application state. The base Memory interface is simple: it lets you update state given the latest run inputs and outputs and it lets you modify (or contextualize) the next input using the stored state.
|
||||
|
||||
There are a number of built-in memory systems. The simplest of these are is a buffer memory which just prepends the last few inputs/outputs to the current input - we will use this in the example below.
|
||||
There are a number of built-in memory systems. The simplest of these is a buffer memory which just prepends the last few inputs/outputs to the current input - we will use this in the example below.
|
||||
|
||||
import MemoryLLM from "@snippets/get_started/quickstart/memory_llms.mdx"
|
||||
import MemoryChatModel from "@snippets/get_started/quickstart/memory_chat_models.mdx"
|
||||
@@ -155,4 +155,4 @@ You can use Memory with chains and agents initialized with chat models. The main
|
||||
<MemoryChatModel/>
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
</Tabs>
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
>[JSON (JavaScript Object Notation)](https://en.wikipedia.org/wiki/JSON) is an open standard file format and data interchange format that uses human-readable text to store and transmit data objects consisting of attribute–value pairs and arrays (or other serializable values).
|
||||
|
||||
>[JSON Lines](https://jsonlines.org/) is a file format where each line is a valid JSON value.
|
||||
|
||||
import Example from "@snippets/modules/data_connection/document_loaders/how_to/json.mdx"
|
||||
|
||||
<Example/>
|
||||
|
||||
@@ -10,7 +10,7 @@ for you.
|
||||
|
||||
## Get started
|
||||
|
||||
This walkthrough showcases basic functionality related to VectorStores. A key part of working with vector stores is creating the vector to put in them, which is usually created via embeddings. Therefore, it is recommended that you familiarize yourself with the [text embedding model](/docs/modules/model_io/models/embeddings.html) interfaces before diving into this.
|
||||
This walkthrough showcases basic functionality related to VectorStores. A key part of working with vector stores is creating the vector to put in them, which is usually created via embeddings. Therefore, it is recommended that you familiarize yourself with the [text embedding model](/docs/modules/data_connection/text_embedding/) interfaces before diving into this.
|
||||
|
||||
import GetStarted from "@snippets/modules/data_connection/vectorstores/get_started.mdx"
|
||||
|
||||
|
||||
@@ -7,7 +7,10 @@ const { ProvidePlugin } = require("webpack");
|
||||
const path = require("path");
|
||||
|
||||
const examplesPath = path.resolve(__dirname, "..", "examples", "src");
|
||||
const snippetsPath = path.resolve(__dirname, "..", "snippets")
|
||||
const snippetsPath = path.resolve(__dirname, "..", "snippets");
|
||||
|
||||
const baseLightCodeBlockTheme = require("prism-react-renderer/themes/vsLight");
|
||||
const baseDarkCodeBlockTheme = require("prism-react-renderer/themes/vsDark");
|
||||
|
||||
/** @type {import('@docusaurus/types').Config} */
|
||||
const config = {
|
||||
@@ -127,8 +130,20 @@ const config = {
|
||||
},
|
||||
},
|
||||
prism: {
|
||||
theme: require("prism-react-renderer/themes/vsLight"),
|
||||
darkTheme: require("prism-react-renderer/themes/vsDark"),
|
||||
theme: {
|
||||
...baseLightCodeBlockTheme,
|
||||
plain: {
|
||||
...baseLightCodeBlockTheme.plain,
|
||||
backgroundColor: "#F5F5F5",
|
||||
},
|
||||
},
|
||||
darkTheme: {
|
||||
...baseDarkCodeBlockTheme,
|
||||
plain: {
|
||||
...baseDarkCodeBlockTheme.plain,
|
||||
backgroundColor: "#222222",
|
||||
},
|
||||
},
|
||||
},
|
||||
image: "img/parrot-chainlink-icon.png",
|
||||
navbar: {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
28
docs/extras/ecosystem/integrations/airtable.md
Normal file
28
docs/extras/ecosystem/integrations/airtable.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Airtable
|
||||
|
||||
>[Airtable](https://en.wikipedia.org/wiki/Airtable) is a cloud collaboration service.
|
||||
`Airtable` is a spreadsheet-database hybrid, with the features of a database but applied to a spreadsheet.
|
||||
> The fields in an Airtable table are similar to cells in a spreadsheet, but have types such as 'checkbox',
|
||||
> 'phone number', and 'drop-down list', and can reference file attachments like images.
|
||||
|
||||
>Users can create a database, set up column types, add records, link tables to one another, collaborate, sort records
|
||||
> and publish views to external websites.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install pyairtable
|
||||
```
|
||||
|
||||
* Get your [API key](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).
|
||||
* Get the [ID of your base](https://airtable.com/developers/web/api/introduction).
|
||||
* Get the [table ID from the table url](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl).
|
||||
|
||||
## Document Loader
|
||||
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import AirtableLoader
|
||||
```
|
||||
|
||||
See an [example](/docs/modules/data_connection/document_loaders/integrations/airtable.html).
|
||||
464
docs/extras/ecosystem/integrations/arthur_tracking.ipynb
Normal file
464
docs/extras/ecosystem/integrations/arthur_tracking.ipynb
Normal file
@@ -0,0 +1,464 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "944e4194",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Arthur LangChain integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b1ccdfe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Arthur](https://www.arthur.ai/) is a model monitoring and observability platform.\n",
|
||||
"\n",
|
||||
"This notebook shows how to register LLMs (chat and non-chat) as models with the Arthur platform. Then we show how to set up langchain LLMs with an Arthur callback that will automatically log model inferences to Arthur.\n",
|
||||
"\n",
|
||||
"For more information about how to use the Arthur SDK, visit our [docs](http://docs.arthur.ai), in particular our [model onboarding guide](https://docs.arthur.ai/user-guide/walkthroughs/model-onboarding/index.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "961c6691",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import ArthurCallbackHandler\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.chat_models import ChatOpenAI, ChatAnthropic\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain.llms import OpenAI, Cohere, HuggingFacePipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a23d1963",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from arthurai import ArthurAI\n",
|
||||
"from arthurai.common.constants import InputType, OutputType, Stage, ValueType\n",
|
||||
"from arthurai.core.attributes import ArthurAttribute, AttributeCategory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4d1b90c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArthurModel for chatbot with only input text and output text attributes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a4a4a8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Connect to Arthur client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f49e9b79",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_url = \"https://app.arthur.ai\"\n",
|
||||
"arthur_login = \"your-username-here\"\n",
|
||||
"arthur = ArthurAI(url=arthur_url, login=arthur_login)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6e063bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before you can register model inferences to Arthur, you must have a registered model with an ID in the Arthur platform. We will provide this ID to the ArthurCallbackHandler.\n",
|
||||
"\n",
|
||||
"You can register a model with Arthur here in the notebook using this `register_chat_llm()` function. This function returns the ID of the model saved to the platform. To use the function, uncomment `arthur_model_chatbot_id = register_chat_llm()` in the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "31b17b5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def register_chat_llm():\n",
|
||||
"\n",
|
||||
" arthur_model = arthur.model(\n",
|
||||
" display_name=\"LangChainChat\",\n",
|
||||
" input_type=InputType.NLP,\n",
|
||||
" output_type=OutputType.TokenSequence\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_input_text\",\n",
|
||||
" stage=Stage.ModelPipelineInput,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=True\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_text\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=False,\n",
|
||||
" ))\n",
|
||||
" \n",
|
||||
" return arthur_model.save()\n",
|
||||
"# arthur_model_chatbot_id = register_chat_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d1d1e60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can set the `arthur_model_chatbot_id` variable to be the ID of your model on your [model dashboard](https://app.arthur.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cdfa02c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_model_chatbot_id = \"your-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58be5234",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function creates a Langchain chat LLM with the ArthurCallbackHandler to log inferences to Arthur. We provide our `arthur_model_chatbot_id`, as well as the Arthur url and login we are using."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "448a8fee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_langchain_chat_llm(chat_model=ChatOpenAI):\n",
|
||||
" if chat_model not in [ChatOpenAI, ChatAnthropic]:\n",
|
||||
" raise ValueError(\"For this notebook, use one of the chat models imported from langchain.chat_models\")\n",
|
||||
" return chat_model(\n",
|
||||
" streaming=True, \n",
|
||||
" temperature=0.1,\n",
|
||||
" callbacks=[\n",
|
||||
" StreamingStdOutCallbackHandler(), \n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17c182da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2dfc00ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat_llm = make_langchain_chat_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "139291f2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the chatbot (it will save the chat history in the `history` list so that the conversation can reference earlier messages)\n",
|
||||
"\n",
|
||||
"Type `q` to quit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "7480a443",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def run_langchain_chat_llm(llm):\n",
|
||||
" history = []\n",
|
||||
" while True:\n",
|
||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||
" if user_input == 'q': break\n",
|
||||
" history.append(HumanMessage(content=user_input))\n",
|
||||
" history.append(llm(history))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "6868ce71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_chat_llm(chat_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0be7d01",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArthurModel with input text, output text, token likelihoods, finish reason, and amount of token usage attributes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ee4b741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function registers an LLM with additional metadata attributes to log to Arthur with each inference\n",
|
||||
"\n",
|
||||
"As above, you can register your callback handler for an LLM using this function here in the notebook or by pasting the ID of an already-registered model from your [model dashboard](https://app.arthur.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e671836c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def register_llm():\n",
|
||||
"\n",
|
||||
" arthur_model = arthur.model(\n",
|
||||
" display_name=\"LangChainLLM\",\n",
|
||||
" input_type=InputType.NLP,\n",
|
||||
" output_type=OutputType.TokenSequence\n",
|
||||
" )\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_input_text\",\n",
|
||||
" stage=Stage.ModelPipelineInput,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=True\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_text\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=False,\n",
|
||||
" token_attribute_link=\"my_output_likelihoods\"\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_likelihoods\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.TokenLikelihoods,\n",
|
||||
" token_attribute_link=\"my_output_text\"\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"finish_reason\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.String,\n",
|
||||
" categorical=True,\n",
|
||||
" categories=[\n",
|
||||
" AttributeCategory(value='stop'),\n",
|
||||
" AttributeCategory(value='length'),\n",
|
||||
" AttributeCategory(value='content_filter'),\n",
|
||||
" AttributeCategory(value='null')\n",
|
||||
" ]\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"prompt_tokens\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Integer\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"completion_tokens\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Integer\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"duration\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Float\n",
|
||||
" ))\n",
|
||||
" \n",
|
||||
" return arthur_model.save()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2a6686f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_model_llm_id = \"your-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dcacb96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These functions create Langchain LLMs with the ArthurCallbackHandler to log inferences to Arthur.\n",
|
||||
"\n",
|
||||
"There are small differences in the underlying Langchain integrations with these libraries and the available metadata for model inputs & outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "34cf0072",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_langchain_openai_llm():\n",
|
||||
" return OpenAI(\n",
|
||||
" temperature=0.1,\n",
|
||||
" model_kwargs = {'logprobs': 3},\n",
|
||||
" callbacks=[\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_llm_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"def make_langchain_cohere_llm():\n",
|
||||
" return Cohere(\n",
|
||||
" temperature=0.1,\n",
|
||||
" callbacks=[\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"def make_langchain_huggingface_llm():\n",
|
||||
" llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"bert-base-uncased\", \n",
|
||||
" task=\"text-generation\", \n",
|
||||
" model_kwargs={\"temperature\":2.5, \"max_length\":64})\n",
|
||||
" llm.callbacks = [\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ]\n",
|
||||
" return llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f40c3ce0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_llm = make_langchain_openai_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "8476d531",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cohere_llm = make_langchain_cohere_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7483b9d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"huggingface_llm = make_langchain_huggingface_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c17d8e86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the LLM (each completion is independent, no chat history is saved as we were doing above with the chat llms)\n",
|
||||
"\n",
|
||||
"Type `q` to quit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "72ee0790",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def run_langchain_llm(llm):\n",
|
||||
" while True:\n",
|
||||
" print(\"Type your text for completion:\\n\")\n",
|
||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||
" if user_input == 'q': break\n",
|
||||
" print(llm(user_input), \"\\n================\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "fb864057",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(openai_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e6673769",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(cohere_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "85541f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(huggingface_llm)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,19 +1,31 @@
|
||||
# Cassandra
|
||||
|
||||
>[Cassandra](https://en.wikipedia.org/wiki/Apache_Cassandra) is a free and open-source, distributed, wide-column
|
||||
>[Apache Cassandra®](https://cassandra.apache.org/) is a free and open-source, distributed, wide-column
|
||||
> store, NoSQL database management system designed to handle large amounts of data across many commodity servers,
|
||||
> providing high availability with no single point of failure. `Cassandra` offers support for clusters spanning
|
||||
> providing high availability with no single point of failure. Cassandra offers support for clusters spanning
|
||||
> multiple datacenters, with asynchronous masterless replication allowing low latency operations for all clients.
|
||||
> `Cassandra` was designed to implement a combination of `Amazon's Dynamo` distributed storage and replication
|
||||
> techniques combined with `Google's Bigtable` data and storage engine model.
|
||||
> Cassandra was designed to implement a combination of _Amazon's Dynamo_ distributed storage and replication
|
||||
> techniques combined with _Google's Bigtable_ data and storage engine model.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install cassandra-drive
|
||||
pip install cassandra-driver
|
||||
pip install cassio
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/vectorstores/integrations/cassandra.html).
|
||||
|
||||
```python
|
||||
from langchain.memory import CassandraChatMessageHistory
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Memory
|
||||
|
||||
See a [usage example](/docs/modules/memory/integrations/cassandra_chat_message_history.html).
|
||||
|
||||
153
docs/extras/ecosystem/integrations/flyte.mdx
Normal file
153
docs/extras/ecosystem/integrations/flyte.mdx
Normal file
@@ -0,0 +1,153 @@
|
||||
# Flyte
|
||||
|
||||
> [Flyte](https://github.com/flyteorg/flyte) is an open-source orchestrator that facilitates building production-grade data and ML pipelines.
|
||||
> It is built for scalability and reproducibility, leveraging Kubernetes as its underlying platform.
|
||||
|
||||
The purpose of this notebook is to demonstrate the integration of a `FlyteCallback` into your Flyte task, enabling you to effectively monitor and track your LangChain experiments.
|
||||
|
||||
## Installation & Setup
|
||||
|
||||
- Install the Flytekit library by running the command `pip install flytekit`.
|
||||
- Install the Flytekit-Envd plugin by running the command `pip install flytekitplugins-envd`.
|
||||
- Install LangChain by running the command `pip install langchain`.
|
||||
- Install [Docker](https://docs.docker.com/engine/install/) on your system.
|
||||
|
||||
## Flyte Tasks
|
||||
|
||||
A Flyte [task](https://docs.flyte.org/projects/cookbook/en/latest/auto/core/flyte_basics/task.html) serves as the foundational building block of Flyte.
|
||||
To execute LangChain experiments, you need to write Flyte tasks that define the specific steps and operations involved.
|
||||
|
||||
NOTE: The [getting started guide](https://docs.flyte.org/projects/cookbook/en/latest/index.html) offers detailed, step-by-step instructions on installing Flyte locally and running your initial Flyte pipeline.
|
||||
|
||||
First, import the necessary dependencies to support your LangChain experiments.
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
from flytekit import ImageSpec, task
|
||||
from langchain.agents import AgentType, initialize_agent, load_tools
|
||||
from langchain.callbacks import FlyteCallbackHandler
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.schema import HumanMessage
|
||||
```
|
||||
|
||||
Set up the necessary environment variables to utilize the OpenAI API and Serp API:
|
||||
|
||||
```python
|
||||
# Set OpenAI API key
|
||||
os.environ["OPENAI_API_KEY"] = "<your_openai_api_key>"
|
||||
|
||||
# Set Serp API key
|
||||
os.environ["SERPAPI_API_KEY"] = "<your_serp_api_key>"
|
||||
```
|
||||
|
||||
Replace `<your_openai_api_key>` and `<your_serp_api_key>` with your respective API keys obtained from OpenAI and Serp API.
|
||||
|
||||
To guarantee reproducibility of your pipelines, Flyte tasks are containerized.
|
||||
Each Flyte task must be associated with an image, which can either be shared across the entire Flyte [workflow](https://docs.flyte.org/projects/cookbook/en/latest/auto/core/flyte_basics/basic_workflow.html) or provided separately for each task.
|
||||
|
||||
To streamline the process of supplying the required dependencies for each Flyte task, you can initialize an [`ImageSpec`](https://docs.flyte.org/projects/cookbook/en/latest/auto/core/image_spec/image_spec.html) object.
|
||||
This approach automatically triggers a Docker build, alleviating the need for users to manually create a Docker image.
|
||||
|
||||
```python
|
||||
custom_image = ImageSpec(
|
||||
name="langchain-flyte",
|
||||
packages=[
|
||||
"langchain",
|
||||
"openai",
|
||||
"spacy",
|
||||
"https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz",
|
||||
"textstat",
|
||||
"google-search-results",
|
||||
],
|
||||
registry="<your-registry>",
|
||||
)
|
||||
```
|
||||
|
||||
You have the flexibility to push the Docker image to a registry of your preference.
|
||||
[Docker Hub](https://hub.docker.com/) or [GitHub Container Registry (GHCR)](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry) is a convenient option to begin with.
|
||||
|
||||
Once you have selected a registry, you can proceed to create Flyte tasks that log the LangChain metrics to Flyte Deck.
|
||||
|
||||
The following examples demonstrate tasks related to OpenAI LLM, chains and agent with tools:
|
||||
|
||||
### LLM
|
||||
|
||||
```python
|
||||
@task(disable_deck=False, container_image=custom_image)
|
||||
def langchain_llm() -> str:
|
||||
llm = ChatOpenAI(
|
||||
model_name="gpt-3.5-turbo",
|
||||
temperature=0.2,
|
||||
callbacks=[FlyteCallbackHandler()],
|
||||
)
|
||||
return llm([HumanMessage(content="Tell me a joke")]).content
|
||||
```
|
||||
|
||||
### Chain
|
||||
|
||||
```python
|
||||
@task(disable_deck=False, container_image=custom_image)
|
||||
def langchain_chain() -> list[dict[str, str]]:
|
||||
template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
|
||||
Title: {title}
|
||||
Playwright: This is a synopsis for the above play:"""
|
||||
llm = ChatOpenAI(
|
||||
model_name="gpt-3.5-turbo",
|
||||
temperature=0,
|
||||
callbacks=[FlyteCallbackHandler()],
|
||||
)
|
||||
prompt_template = PromptTemplate(input_variables=["title"], template=template)
|
||||
synopsis_chain = LLMChain(
|
||||
llm=llm, prompt=prompt_template, callbacks=[FlyteCallbackHandler()]
|
||||
)
|
||||
test_prompts = [
|
||||
{
|
||||
"title": "documentary about good video games that push the boundary of game design"
|
||||
},
|
||||
]
|
||||
return synopsis_chain.apply(test_prompts)
|
||||
```
|
||||
|
||||
### Agent
|
||||
|
||||
```python
|
||||
@task(disable_deck=False, container_image=custom_image)
|
||||
def langchain_agent() -> str:
|
||||
llm = OpenAI(
|
||||
model_name="gpt-3.5-turbo",
|
||||
temperature=0,
|
||||
callbacks=[FlyteCallbackHandler()],
|
||||
)
|
||||
tools = load_tools(
|
||||
["serpapi", "llm-math"], llm=llm, callbacks=[FlyteCallbackHandler()]
|
||||
)
|
||||
agent = initialize_agent(
|
||||
tools,
|
||||
llm,
|
||||
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
||||
callbacks=[FlyteCallbackHandler()],
|
||||
verbose=True,
|
||||
)
|
||||
return agent.run(
|
||||
"Who is Leonardo DiCaprio's girlfriend? Could you calculate her current age and raise it to the power of 0.43?"
|
||||
)
|
||||
```
|
||||
|
||||
These tasks serve as a starting point for running your LangChain experiments within Flyte.
|
||||
|
||||
## Execute the Flyte Tasks on Kubernetes
|
||||
|
||||
To execute the Flyte tasks on the configured Flyte backend, use the following command:
|
||||
|
||||
```bash
|
||||
pyflyte run --image <your-image> langchain_flyte.py langchain_llm
|
||||
```
|
||||
|
||||
This command will initiate the execution of the `langchain_llm` task on the Flyte backend. You can trigger the remaining two tasks in a similar manner.
|
||||
|
||||
The metrics will be displayed on the Flyte UI as follows:
|
||||
|
||||

|
||||
44
docs/extras/ecosystem/integrations/grobid.mdx
Normal file
44
docs/extras/ecosystem/integrations/grobid.mdx
Normal file
@@ -0,0 +1,44 @@
|
||||
# Grobid
|
||||
|
||||
This page covers how to use the Grobid to parse articles for LangChain.
|
||||
It is seperated into two parts: installation and running the server
|
||||
|
||||
## Installation and Setup
|
||||
#Ensure You have Java installed
|
||||
!apt-get install -y openjdk-11-jdk -q
|
||||
!update-alternatives --set java /usr/lib/jvm/java-11-openjdk-amd64/bin/java
|
||||
|
||||
#Clone and install the Grobid Repo
|
||||
import os
|
||||
!git clone https://github.com/kermitt2/grobid.git
|
||||
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
|
||||
os.chdir('grobid')
|
||||
!./gradlew clean install
|
||||
|
||||
#Run the server,
|
||||
get_ipython().system_raw('nohup ./gradlew run > grobid.log 2>&1 &')
|
||||
|
||||
You can now use the GrobidParser to produce documents
|
||||
```python
|
||||
from langchain.document_loaders.parsers import GrobidParser
|
||||
from langchain.document_loaders.generic import GenericLoader
|
||||
|
||||
#Produce chunks from article paragraphs
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"/Users/31treehaus/Desktop/Papers/",
|
||||
glob="*",
|
||||
suffixes=[".pdf"],
|
||||
parser= GrobidParser(segment_sentences=False)
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
#Produce chunks from article sentences
|
||||
loader = GenericLoader.from_filesystem(
|
||||
"/Users/31treehaus/Desktop/Papers/",
|
||||
glob="*",
|
||||
suffixes=[".pdf"],
|
||||
parser= GrobidParser(segment_sentences=True)
|
||||
)
|
||||
docs = loader.load()
|
||||
```
|
||||
Chunk metadata will include bboxes although these are a bit funky to parse, see https://grobid.readthedocs.io/en/latest/Coordinates-in-PDF/
|
||||
@@ -39,6 +39,21 @@ vectara = Vectara(
|
||||
```
|
||||
The customer_id, corpus_id and api_key are optional, and if they are not supplied will be read from the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively.
|
||||
|
||||
Afer you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example:
|
||||
|
||||
```python
|
||||
vectara.add_texts(["to be or not to be", "that is the question"])
|
||||
```
|
||||
|
||||
|
||||
Since Vectara supports file-upload, we also added the ability to upload files (PDF, TXT, HTML, PPT, DOC, etc) directly as file. When using this method, the file is uploaded directly to the Vectara backend, processed and chunked optimally there, so you don't have to use the LangChain document loader or chunking mechanism.
|
||||
|
||||
As an example:
|
||||
|
||||
```python
|
||||
vectara.add_files(["path/to/file1.pdf", "path/to/file2.pdf",...])
|
||||
```
|
||||
|
||||
To query the vectorstore, you can use the `similarity_search` method (or `similarity_search_with_score`), which takes a query string and returns a list of results:
|
||||
```python
|
||||
results = vectara.similarity_score("what is LangChain?")
|
||||
|
||||
@@ -243,8 +243,8 @@
|
||||
" pred_a, pred_b = res_b, res_a\n",
|
||||
" a, b = \"b\", \"a\"\n",
|
||||
" eval_res = eval_chain.evaluate_string_pairs(\n",
|
||||
" output_a=pred_a['output'] if isinstance(pred_a, dict) else str(pred_a),\n",
|
||||
" output_b=pred_b['output'] if isinstance(pred_b, dict) else str(pred_b),\n",
|
||||
" prediction=pred_a['output'] if isinstance(pred_a, dict) else str(pred_a),\n",
|
||||
" prediction_b=pred_b['output'] if isinstance(pred_b, dict) else str(pred_b),\n",
|
||||
" input=input_\n",
|
||||
" )\n",
|
||||
" if eval_res[\"value\"] == \"A\":\n",
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Evaluating an OpenAPI Chain\n",
|
||||
"\n",
|
||||
"This notebook goes over ways to semantically evaluate an [OpenAPI Chain](/docs/modules/chains/additiona/openapi.html), which calls an endpoint defined by the OpenAPI specification using purely natural language."
|
||||
"This notebook goes over ways to semantically evaluate an [OpenAPI Chain](/docs/modules/chains/additional/openapi.html), which calls an endpoint defined by the OpenAPI specification using purely natural language."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,386 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "g9EmNu5DD9YI"
|
||||
},
|
||||
"source": [
|
||||
"# Custom functions with OpenAI Functions Agent\n",
|
||||
"\n",
|
||||
"This notebook goes through how to integrate custom functions with OpenAI Functions agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "LFKylC3CPtTl"
|
||||
},
|
||||
"source": [
|
||||
"Install libraries which are required to run this example notebook\n",
|
||||
"\n",
|
||||
"`pip install -q openai langchain yfinance`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E2DqzmEGDPak"
|
||||
},
|
||||
"source": [
|
||||
"## Define custom functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "SiucthMs6SIK"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import yfinance as yf\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"\n",
|
||||
"def get_current_stock_price(ticker):\n",
|
||||
" \"\"\"Method to get current stock price\"\"\"\n",
|
||||
"\n",
|
||||
" ticker_data = yf.Ticker(ticker)\n",
|
||||
" recent = ticker_data.history(period='1d')\n",
|
||||
" return {\n",
|
||||
" 'price': recent.iloc[0]['Close'],\n",
|
||||
" 'currency': ticker_data.info['currency']\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"def get_stock_performance(ticker, days):\n",
|
||||
" \"\"\"Method to get stock price change in percentage\"\"\"\n",
|
||||
"\n",
|
||||
" past_date = datetime.today() - timedelta(days=days)\n",
|
||||
" ticker_data = yf.Ticker(ticker)\n",
|
||||
" history = ticker_data.history(start=past_date)\n",
|
||||
" old_price = history.iloc[0]['Close']\n",
|
||||
" current_price = history.iloc[-1]['Close']\n",
|
||||
" return {\n",
|
||||
" 'percent_change': ((current_price - old_price)/old_price)*100\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "vRLINGvQR1rO",
|
||||
"outputId": "68230a4b-dda2-4273-b956-7439661e3785"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'price': 334.57000732421875, 'currency': 'USD'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_current_stock_price('MSFT')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "57T190q235mD",
|
||||
"outputId": "c6ee66ec-0659-4632-85d1-263b08826e68"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'percent_change': 1.014466941163018}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_stock_performance('MSFT', 30)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "MT8QsdyBDhwg"
|
||||
},
|
||||
"source": [
|
||||
"## Make custom tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"id": "NvLOUv-XP3Ap"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Type\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.tools import BaseTool\n",
|
||||
"\n",
|
||||
"class CurrentStockPriceInput(BaseModel):\n",
|
||||
" \"\"\"Inputs for get_current_stock_price\"\"\"\n",
|
||||
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
||||
"\n",
|
||||
"class CurrentStockPriceTool(BaseTool):\n",
|
||||
" name = \"get_current_stock_price\"\n",
|
||||
" description = \"\"\"\n",
|
||||
" Useful when you want to get current stock price.\n",
|
||||
" You should enter the stock ticker symbol recognized by the yahoo finance\n",
|
||||
" \"\"\"\n",
|
||||
" args_schema: Type[BaseModel] = CurrentStockPriceInput\n",
|
||||
"\n",
|
||||
" def _run(self, ticker: str):\n",
|
||||
" price_response = get_current_stock_price(ticker)\n",
|
||||
" return price_response\n",
|
||||
"\n",
|
||||
" def _arun(self, ticker: str):\n",
|
||||
" raise NotImplementedError(\"get_current_stock_price does not support async\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class StockPercentChangeInput(BaseModel):\n",
|
||||
" \"\"\"Inputs for get_stock_performance\"\"\"\n",
|
||||
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
||||
" days: int = Field(description='Timedelta days to get past date from current date')\n",
|
||||
"\n",
|
||||
"class StockPerformanceTool(BaseTool):\n",
|
||||
" name = \"get_stock_performance\"\n",
|
||||
" description = \"\"\"\n",
|
||||
" Useful when you want to check performance of the stock.\n",
|
||||
" You should enter the stock ticker symbol recognized by the yahoo finance.\n",
|
||||
" You should enter days as number of days from today from which performance needs to be check.\n",
|
||||
" output will be the change in the stock price represented as a percentage.\n",
|
||||
" \"\"\"\n",
|
||||
" args_schema: Type[BaseModel] = StockPercentChangeInput\n",
|
||||
"\n",
|
||||
" def _run(self, ticker: str, days: int):\n",
|
||||
" response = get_stock_performance(ticker, days)\n",
|
||||
" return response\n",
|
||||
"\n",
|
||||
" def _arun(self, ticker: str):\n",
|
||||
" raise NotImplementedError(\"get_stock_performance does not support async\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PVKoqeCyFKHF"
|
||||
},
|
||||
"source": [
|
||||
"## Create Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"id": "yY7qNB7vSQGh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(\n",
|
||||
" model=\"gpt-3.5-turbo-0613\",\n",
|
||||
" temperature=0\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"tools = [\n",
|
||||
" CurrentStockPriceTool(),\n",
|
||||
" StockPerformanceTool()\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 321
|
||||
},
|
||||
"id": "4X96xmgwRkcC",
|
||||
"outputId": "a91b13ef-9643-4f60-d067-c4341e0b285e"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `get_current_stock_price` with `{'ticker': 'MSFT'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{'price': 334.57000732421875, 'currency': 'USD'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `get_stock_performance` with `{'ticker': 'MSFT', 'days': 180}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{'percent_change': 40.163963297187905}\u001b[0m\u001b[32;1m\u001b[1;3mThe current price of Microsoft stock is $334.57 USD. \n",
|
||||
"\n",
|
||||
"Over the past 6 months, Microsoft stock has performed well with a 40.16% increase in its price.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current price of Microsoft stock is $334.57 USD. \\n\\nOver the past 6 months, Microsoft stock has performed well with a 40.16% increase in its price.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the current price of Microsoft stock? How it has performed over past 6 months?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 285
|
||||
},
|
||||
"id": "nkZ_vmAcT7Al",
|
||||
"outputId": "092ebc55-4d28-4a4b-aa2a-98ae47ceec20"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `get_current_stock_price` with `{'ticker': 'GOOGL'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{'price': 118.33000183105469, 'currency': 'USD'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `get_current_stock_price` with `{'ticker': 'META'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{'price': 287.04998779296875, 'currency': 'USD'}\u001b[0m\u001b[32;1m\u001b[1;3mThe recent stock price of Google (GOOGL) is $118.33 USD and the recent stock price of Meta (META) is $287.05 USD.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The recent stock price of Google (GOOGL) is $118.33 USD and the recent stock price of Meta (META) is $287.05 USD.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Give me recent stock prices of Google and Meta?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 466
|
||||
},
|
||||
"id": "jLU-HjMq7n1o",
|
||||
"outputId": "a42194dd-26ed-4b5a-d4a2-1038420045c4"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `get_stock_performance` with `{'ticker': 'MSFT', 'days': 90}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{'percent_change': 18.043096235165596}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `get_stock_performance` with `{'ticker': 'GOOGL', 'days': 90}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{'percent_change': 17.286155760642853}\u001b[0m\u001b[32;1m\u001b[1;3mIn the past 3 months, Microsoft (MSFT) has performed better than Google (GOOGL). Microsoft's stock price has increased by 18.04% while Google's stock price has increased by 17.29%.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"In the past 3 months, Microsoft (MSFT) has performed better than Google (GOOGL). Microsoft's stock price has increased by 18.04% while Google's stock price has increased by 17.29%.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run('In the past 3 months, which stock between Microsoft and Google has performed the best?')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -21,7 +21,7 @@
|
||||
"\n",
|
||||
"2. User-facing (Oauth): for production scenarios where you are deploying an end-user facing application and LangChain needs access to end-user's exposed actions and connected accounts on Zapier.com\n",
|
||||
"\n",
|
||||
"This quick start will focus on the server-side use case for brevity. Review [full docs](https://nla.zapier.com/start/) for user-facing oauth developer support.\n",
|
||||
"This quick start will focus mostly on the server-side use case for brevity. Jump to [Example Using OAuth Access Token](#oauth) to see a short example how to set up Zapier for user-facing situations. Review [full docs](https://nla.zapier.com/start/) for full user-facing oauth developer support.\n",
|
||||
"\n",
|
||||
"This example goes over how to use the Zapier integration with a `SimpleSequentialChain`, then an `Agent`.\n",
|
||||
"In code, below:"
|
||||
@@ -149,7 +149,7 @@
|
||||
"id": "bcdea831",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Example with SimpleSequentialChain\n",
|
||||
"## Example with SimpleSequentialChain\n",
|
||||
"If you need more explicit control, use a chain, like below."
|
||||
]
|
||||
},
|
||||
@@ -323,12 +323,34 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"id": "09ff954e-45f2-4595-92ea-91627abde4a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## <a id=\"oauth\">Example Using OAuth Access Token</a>\n",
|
||||
"The below snippet shows how to initialize the wrapper with a procured OAuth access token. Note the argument being passed in as opposed to setting an environment variable. Review the [authentication docs](https://nla.zapier.com/docs/authentication/#oauth-credentials) for full user-facing oauth developer support.\n",
|
||||
"\n",
|
||||
"The developer is tasked with handling the OAuth handshaking to procure and refresh the access token."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7c6835c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token='<fill in access token here>')\n",
|
||||
"toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent.run(\n",
|
||||
" \"Summarize the last email I received regarding Silicon Valley Bank. Send the summary to the #test-zapier channel in slack.\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -71,11 +71,13 @@
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from langchain.schema import BaseRetriever\n",
|
||||
"from langchain.callbacks.manager import AsyncCallbackManagerForRetrieverRun, CallbackManagerForRetrieverRun\n",
|
||||
"from langchain.utilities import GoogleSerperAPIWrapper\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.schema import Document"
|
||||
"from langchain.schema import Document\n",
|
||||
"from typing import Any"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -97,11 +99,16 @@
|
||||
" def __init__(self, search):\n",
|
||||
" self.search = search\n",
|
||||
"\n",
|
||||
" def get_relevant_documents(self, query: str):\n",
|
||||
" def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any) -> List[Document]:\n",
|
||||
" return [Document(page_content=self.search.run(query))]\n",
|
||||
"\n",
|
||||
" async def aget_relevant_documents(self, query: str):\n",
|
||||
" raise NotImplemented\n",
|
||||
" async def _aget_relevant_documents(self,\n",
|
||||
" query: str,\n",
|
||||
" *,\n",
|
||||
" run_manager: AsyncCallbackManagerForRetrieverRun,\n",
|
||||
" **kwargs: Any,\n",
|
||||
" ) -> List[Document]:\n",
|
||||
" raise NotImplementedError()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"retriever = SerperSearchRetriever(GoogleSerperAPIWrapper())"
|
||||
|
||||
@@ -134,7 +134,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 1,
|
||||
"id": "40cd9806",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -44,7 +44,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"id": "2d20b852",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -56,7 +56,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "579fa702",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"id": "90c1d899",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -80,7 +80,7 @@
|
||||
"[Document(page_content='This is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', metadata={'source': 'example_data/fake-email.eml'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -128,7 +128,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='This is a test email to use for unit tests.', lookup_str='', metadata={'source': 'example_data/fake-email.eml'}, lookup_index=0)"
|
||||
"Document(page_content='This is a test email to use for unit tests.', metadata={'source': 'example_data/fake-email.eml', 'filename': 'fake-email.eml', 'file_directory': 'example_data', 'date': '2022-12-16T17:04:16-05:00', 'filetype': 'message/rfc822', 'sent_from': ['Matthew Robinson <mrobinson@unstructured.io>'], 'sent_to': ['Matthew Robinson <mrobinson@unstructured.io>'], 'subject': 'Test Email', 'category': 'NarrativeText'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -140,6 +140,61 @@
|
||||
"data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5021f20a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Processing Attachments\n",
|
||||
"\n",
|
||||
"You can process attachments with `UnstructuredEmailLoader` by setting `process_attachments=True` in the constructor. By default, attachments will be partitioned using the `partition` function from `unstructured`. You can use a different partitioning function by passing the function to the `attachment_partitioner` kwarg."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6539f166",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredEmailLoader(\n",
|
||||
" \"example_data/fake-email.eml\",\n",
|
||||
" mode=\"elements\",\n",
|
||||
" process_attachments=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "aebead38",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ddeb60f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='This is a test email to use for unit tests.', metadata={'source': 'example_data/fake-email.eml', 'filename': 'fake-email.eml', 'file_directory': 'example_data', 'date': '2022-12-16T17:04:16-05:00', 'filetype': 'message/rfc822', 'sent_from': ['Matthew Robinson <mrobinson@unstructured.io>'], 'sent_to': ['Matthew Robinson <mrobinson@unstructured.io>'], 'subject': 'Test Email', 'category': 'NarrativeText'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a074515",
|
||||
@@ -234,7 +289,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
{"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"}
|
||||
{"sender_name": "User 1", "timestamp_ms": 1675597435669, "content": "Oh no worries! Bye"}
|
||||
{"sender_name": "User 2", "timestamp_ms": 1675596277579, "content": "No Im sorry it was my mistake, the blue one is not for sale"}
|
||||
@@ -0,0 +1,50 @@
|
||||
MIME-Version: 1.0
|
||||
Date: Fri, 23 Dec 2022 12:08:48 -0600
|
||||
Message-ID: <CAPgNNXSzLVJ-d1OCX_TjFgJU7ugtQrjFybPtAMmmYZzphxNFYg@mail.gmail.com>
|
||||
Subject: Fake email with attachment
|
||||
From: Mallori Harrell <mallori@unstructured.io>
|
||||
To: Mallori Harrell <mallori@unstructured.io>
|
||||
Content-Type: multipart/mixed; boundary="0000000000005d654405f082adb7"
|
||||
|
||||
--0000000000005d654405f082adb7
|
||||
Content-Type: multipart/alternative; boundary="0000000000005d654205f082adb5"
|
||||
|
||||
--0000000000005d654205f082adb5
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
|
||||
Hello!
|
||||
|
||||
Here's the attachments!
|
||||
|
||||
It includes:
|
||||
|
||||
- Lots of whitespace
|
||||
- Little to no content
|
||||
- and is a quick read
|
||||
|
||||
Best,
|
||||
|
||||
Mallori
|
||||
|
||||
--0000000000005d654205f082adb5
|
||||
Content-Type: text/html; charset="UTF-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">Hello!=C2=A0<div><br></div><div>Here's the attachments=
|
||||
!</div><div><br></div><div>It includes:</div><div><ul><li style=3D"margin-l=
|
||||
eft:15px">Lots of whitespace</li><li style=3D"margin-left:15px">Little=C2=
|
||||
=A0to no content</li><li style=3D"margin-left:15px">and is a quick read</li=
|
||||
></ul><div>Best,</div></div><div><br></div><div>Mallori</div><div dir=3D"lt=
|
||||
r" class=3D"gmail_signature" data-smartmail=3D"gmail_signature"><div dir=3D=
|
||||
"ltr"><div><div><br></div></div></div></div></div>
|
||||
|
||||
--0000000000005d654205f082adb5--
|
||||
--0000000000005d654405f082adb7
|
||||
Content-Type: text/plain; charset="US-ASCII"; name="fake-attachment.txt"
|
||||
Content-Disposition: attachment; filename="fake-attachment.txt"
|
||||
Content-Transfer-Encoding: base64
|
||||
X-Attachment-Id: f_lc0tto5j0
|
||||
Content-ID: <f_lc0tto5j0>
|
||||
|
||||
SGV5IHRoaXMgaXMgYSBmYWtlIGF0dGFjaG1lbnQh
|
||||
--0000000000005d654405f082adb7--
|
||||
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bdccb278",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Grobid\n",
|
||||
"\n",
|
||||
"GROBID is a machine learning library for extracting, parsing, and re-structuring raw documents.\n",
|
||||
"\n",
|
||||
"It is particularly good for sturctured PDFs, like academic papers.\n",
|
||||
"\n",
|
||||
"This loader uses GROBIB to parse PDFs into `Documents` that retain metadata associated with the section of text.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"For users on `Mac` - \n",
|
||||
"\n",
|
||||
"(Note: additional instructions can be found [here](https://python.langchain.com/docs/ecosystem/integrations/grobid.mdx).)\n",
|
||||
"\n",
|
||||
"Install Java (Apple Silicon):\n",
|
||||
"```\n",
|
||||
"$ arch -arm64 brew install openjdk@11\n",
|
||||
"$ brew --prefix openjdk@11\n",
|
||||
"/opt/homebrew/opt/openjdk@ 11\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"In `~/.zshrc`:\n",
|
||||
"```\n",
|
||||
"export JAVA_HOME=/opt/homebrew/opt/openjdk@11\n",
|
||||
"export PATH=$JAVA_HOME/bin:$PATH\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then, in Terminal:\n",
|
||||
"```\n",
|
||||
"$ source ~/.zshrc\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Confirm install:\n",
|
||||
"```\n",
|
||||
"$ which java\n",
|
||||
"/opt/homebrew/opt/openjdk@11/bin/java\n",
|
||||
"$ java -version \n",
|
||||
"openjdk version \"11.0.19\" 2023-04-18\n",
|
||||
"OpenJDK Runtime Environment Homebrew (build 11.0.19+0)\n",
|
||||
"OpenJDK 64-Bit Server VM Homebrew (build 11.0.19+0, mixed mode)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then, get [Grobid](https://grobid.readthedocs.io/en/latest/Install-Grobid/#getting-grobid):\n",
|
||||
"```\n",
|
||||
"$ curl -LO https://github.com/kermitt2/grobid/archive/0.7.3.zip\n",
|
||||
"$ unzip 0.7.3.zip\n",
|
||||
"```\n",
|
||||
" \n",
|
||||
"Build\n",
|
||||
"```\n",
|
||||
"$ ./gradlew clean install\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then, run the server:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2d8992fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! get_ipython().system_raw('nohup ./gradlew run > grobid.log 2>&1 &')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b41bfb1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, we can use the data loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "640e9a4b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.parsers import GrobidParser\n",
|
||||
"from langchain.document_loaders.generic import GenericLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ecdc1fb9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GenericLoader.from_filesystem(\n",
|
||||
" \"../Papers/\",\n",
|
||||
" glob=\"*\",\n",
|
||||
" suffixes=[\".pdf\"],\n",
|
||||
" parser= GrobidParser(segment_sentences=False)\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "efe9e356",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Unlike Chinchilla, PaLM, or GPT-3, we only use publicly available data, making our work compatible with open-sourcing, while most existing models rely on data which is either not publicly available or undocumented (e.g.\"Books -2TB\" or \"Social media conversations\").There exist some exceptions, notably OPT (Zhang et al., 2022), GPT-NeoX (Black et al., 2022), BLOOM (Scao et al., 2022) and GLM (Zeng et al., 2022), but none that are competitive with PaLM-62B or Chinchilla.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[3].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "5be03d17",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'text': 'Unlike Chinchilla, PaLM, or GPT-3, we only use publicly available data, making our work compatible with open-sourcing, while most existing models rely on data which is either not publicly available or undocumented (e.g.\"Books -2TB\" or \"Social media conversations\").There exist some exceptions, notably OPT (Zhang et al., 2022), GPT-NeoX (Black et al., 2022), BLOOM (Scao et al., 2022) and GLM (Zeng et al., 2022), but none that are competitive with PaLM-62B or Chinchilla.',\n",
|
||||
" 'para': '2',\n",
|
||||
" 'bboxes': \"[[{'page': '1', 'x': '317.05', 'y': '509.17', 'h': '207.73', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '522.72', 'h': '220.08', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '536.27', 'h': '218.27', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '549.82', 'h': '218.65', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '563.37', 'h': '136.98', 'w': '9.46'}], [{'page': '1', 'x': '446.49', 'y': '563.37', 'h': '78.11', 'w': '9.46'}, {'page': '1', 'x': '304.69', 'y': '576.92', 'h': '138.32', 'w': '9.46'}], [{'page': '1', 'x': '447.75', 'y': '576.92', 'h': '76.66', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '590.47', 'h': '219.63', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '604.02', 'h': '218.27', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '617.56', 'h': '218.27', 'w': '9.46'}, {'page': '1', 'x': '306.14', 'y': '631.11', 'h': '220.18', 'w': '9.46'}]]\",\n",
|
||||
" 'pages': \"('1', '1')\",\n",
|
||||
" 'section_title': 'Introduction',\n",
|
||||
" 'section_number': '1',\n",
|
||||
" 'paper_title': 'LLaMA: Open and Efficient Foundation Language Models',\n",
|
||||
" 'file_path': '/Users/31treehaus/Desktop/Papers/2302.13971.pdf'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[3].metadata"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -58,6 +58,7 @@
|
||||
"from langchain.memory.chat_message_histories import ZepChatMessageHistory\n",
|
||||
"from langchain.schema import HumanMessage, AIMessage\n",
|
||||
"from uuid import uuid4\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"# Set this to your Zep server URL\n",
|
||||
"ZEP_API_URL = \"http://localhost:8000\""
|
||||
@@ -75,6 +76,25 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Provide your Zep API key. Note that this is optional. See https://docs.getzep.com/deployment/auth\n",
|
||||
"\n",
|
||||
"zep_api_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:29.118416Z",
|
||||
@@ -93,12 +113,13 @@
|
||||
"zep_chat_history = ZepChatMessageHistory(\n",
|
||||
" session_id=session_id,\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" api_key=zep_api_key\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:30.271181Z",
|
||||
@@ -172,7 +193,7 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"for msg in test_history:\n",
|
||||
" zep_chat_history.append(\n",
|
||||
" zep_chat_history.add_message(\n",
|
||||
" HumanMessage(content=msg[\"content\"])\n",
|
||||
" if msg[\"role\"] == \"human\"\n",
|
||||
" else AIMessage(content=msg[\"content\"])\n",
|
||||
@@ -192,7 +213,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:32.979155Z",
|
||||
@@ -207,14 +228,14 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7759001673780126, 'uuid': '3a82a02f-056e-4c6a-b960-67ebdf3b2b93', 'created_at': '2023-05-25T15:03:30.2041Z', 'role': 'human', 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602262941130749, 'uuid': 'a2fc9c21-0897-46c8-bef7-6f5c0f71b04a', 'created_at': '2023-05-25T15:03:30.248065Z', 'role': 'ai', 'token_count': 27}),\n",
|
||||
" Document(page_content='Who were her contemporaries?', metadata={'score': 0.757553366415519, 'uuid': '41f9c41a-a205-41e1-b48b-a0a4cd943fc8', 'created_at': '2023-05-25T15:03:30.243995Z', 'role': 'human', 'token_count': 8}),\n",
|
||||
" Document(page_content='Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', metadata={'score': 0.7546211059317948, 'uuid': '34678311-0098-4f1a-8fd4-5615ac692deb', 'created_at': '2023-05-25T15:03:30.231427Z', 'role': 'ai', 'token_count': 31}),\n",
|
||||
" Document(page_content='Which books of hers were made into movies?', metadata={'score': 0.7496714959247069, 'uuid': '18046c3a-9666-4d3e-b4f0-43d1394732b7', 'created_at': '2023-05-25T15:03:30.236837Z', 'role': 'human', 'token_count': 11})]"
|
||||
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.8897116216176073, 'uuid': 'db60ff57-f259-4ec4-8a81-178ed4c6e54f', 'created_at': '2023-06-26T23:40:22.816214Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56}),\n",
|
||||
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8856661080361157, 'uuid': 'f1a5981a-8f6d-4168-a548-6e9c32f35fa1', 'created_at': '2023-06-26T23:40:22.809621Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n",
|
||||
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7757595298492976, 'uuid': '361d0043-1009-4e13-a7f0-8aea8b1ee869', 'created_at': '2023-06-26T23:40:22.709886Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject wants to know about the identity or background of an individual named Octavia Butler.'}}, 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7601242516059306, 'uuid': '56c45e8a-0f65-45f0-bc46-d9e65164b563', 'created_at': '2023-06-26T23:40:22.778836Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': \"The subject is providing information about Octavia Butler's contemporaries.\"}}, 'token_count': 27}),\n",
|
||||
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7594731095320668, 'uuid': '6951f2fd-dfa4-4e05-9380-f322ef8f72f8', 'created_at': '2023-06-26T23:40:22.80464Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -226,6 +247,7 @@
|
||||
" session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" top_k=5,\n",
|
||||
" api_key=zep_api_key\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"await zep_retriever.aget_relevant_documents(\"Who wrote Parable of the Sower?\")"
|
||||
@@ -240,7 +262,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:34.713354Z",
|
||||
@@ -255,14 +277,14 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.8897321402776546, 'uuid': '1c09603a-52c1-40d7-9d69-29f26256029c', 'created_at': '2023-05-25T15:03:30.268257Z', 'role': 'ai', 'token_count': 56}),\n",
|
||||
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8857628682610436, 'uuid': 'f6706e8c-6c91-452f-8c1b-9559fd924657', 'created_at': '2023-05-25T15:03:30.265302Z', 'role': 'human', 'token_count': 23}),\n",
|
||||
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7759670375149477, 'uuid': '3a82a02f-056e-4c6a-b960-67ebdf3b2b93', 'created_at': '2023-05-25T15:03:30.2041Z', 'role': 'human', 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602854653476563, 'uuid': 'a2fc9c21-0897-46c8-bef7-6f5c0f71b04a', 'created_at': '2023-05-25T15:03:30.248065Z', 'role': 'ai', 'token_count': 27}),\n",
|
||||
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7595293992240313, 'uuid': 'f22f2498-6118-4c74-8718-aa89ccd7e3d6', 'created_at': '2023-05-25T15:03:30.261198Z', 'role': 'ai', 'token_count': 18})]"
|
||||
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.889661105796371, 'uuid': 'db60ff57-f259-4ec4-8a81-178ed4c6e54f', 'created_at': '2023-06-26T23:40:22.816214Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56}),\n",
|
||||
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.885754241595424, 'uuid': 'f1a5981a-8f6d-4168-a548-6e9c32f35fa1', 'created_at': '2023-06-26T23:40:22.809621Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n",
|
||||
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7758688965570713, 'uuid': '361d0043-1009-4e13-a7f0-8aea8b1ee869', 'created_at': '2023-06-26T23:40:22.709886Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject wants to know about the identity or background of an individual named Octavia Butler.'}}, 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602672137411663, 'uuid': '56c45e8a-0f65-45f0-bc46-d9e65164b563', 'created_at': '2023-06-26T23:40:22.778836Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': \"The subject is providing information about Octavia Butler's contemporaries.\"}}, 'token_count': 27}),\n",
|
||||
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7596040989115522, 'uuid': '6951f2fd-dfa4-4e05-9380-f322ef8f72f8', 'created_at': '2023-06-26T23:40:22.80464Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -304,7 +326,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -43,7 +43,6 @@
|
||||
"import openai\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.schema import BaseRetriever\n",
|
||||
"from langchain.vectorstores.azuresearch import AzureSearch"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cassio>=0.0.5\""
|
||||
"!pip install \"cassio>=0.0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,14 +44,16 @@
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"database_mode = (input('\\n(L)ocal Cassandra or (A)stra DB? ')).upper()\n",
|
||||
"database_mode = (input('\\n(C)assandra or (A)stra DB? ')).upper()\n",
|
||||
"\n",
|
||||
"keyspace_name = input('\\nKeyspace name? ')\n",
|
||||
"\n",
|
||||
"if database_mode == 'A':\n",
|
||||
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
||||
" #\n",
|
||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')"
|
||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')\n",
|
||||
"elif database_mode == 'C':\n",
|
||||
" CASSANDRA_CONTACT_POINTS = input('Contact points? (comma-separated, empty for localhost) ').strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -72,8 +74,15 @@
|
||||
"from cassandra.cluster import Cluster\n",
|
||||
"from cassandra.auth import PlainTextAuthProvider\n",
|
||||
"\n",
|
||||
"if database_mode == 'L':\n",
|
||||
" cluster = Cluster()\n",
|
||||
"if database_mode == 'C':\n",
|
||||
" if CASSANDRA_CONTACT_POINTS:\n",
|
||||
" cluster = Cluster([\n",
|
||||
" cp.strip()\n",
|
||||
" for cp in CASSANDRA_CONTACT_POINTS.split(',')\n",
|
||||
" if cp.strip()\n",
|
||||
" ])\n",
|
||||
" else:\n",
|
||||
" cluster = Cluster()\n",
|
||||
" session = cluster.connect()\n",
|
||||
"elif database_mode == 'A':\n",
|
||||
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
||||
@@ -261,7 +270,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,399 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ClickHouse Vector Search\n",
|
||||
"\n",
|
||||
"> [ClickHouse](https://clickhouse.com/) is the fastest and most resource efficient open-source database for real-time apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries. Lately added data structures and distance search functions (like `L2Distance`) as well as [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes) enable ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `ClickHouse` vector search."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43ead5d5-2c1f-4dce-a69a-cb00e4f9d6f0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up envrionments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2c434bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Setting up local clickhouse server with docker (optional)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "249a7751",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:43:43.035606Z",
|
||||
"start_time": "2023-06-03T08:43:42.618531Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! docker run -d -p 8123:8123 -p9000:9000 --name langchain-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server:23.4.2.11"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7bd3c1c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Setup up clickhouse client driver"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9d614bf8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install clickhouse-connect"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "15a1d477-9cdb-4d82-b019-96951ecb2b72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use OpenAIEmbeddings so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "91003ea5-0c8c-436c-a5de-aaeaeef2f458",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:49:35.383673Z",
|
||||
"start_time": "2023-06-03T08:49:33.984547Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"if not os.environ['OPENAI_API_KEY']:\n",
|
||||
" os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:33:31.554934Z",
|
||||
"start_time": "2023-06-03T08:33:31.549590Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Clickhouse, ClickhouseSettings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:33:32.527387Z",
|
||||
"start_time": "2023-06-03T08:33:32.501312Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6e104aee",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:33:35.503823Z",
|
||||
"start_time": "2023-06-03T08:33:33.745832Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Inserting data...: 100%|██████████| 42/42 [00:00<00:00, 2801.49it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for d in docs:\n",
|
||||
" d.metadata = {'some': 'metadata'}\n",
|
||||
"settings = ClickhouseSettings(table=\"clickhouse_vector_search_example\")\n",
|
||||
"docsearch = Clickhouse.from_documents(docs, embeddings, config=settings)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9c608226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3a8b105",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get connection info and data schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "69996818",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:28:58.252991Z",
|
||||
"start_time": "2023-06-03T08:28:58.197560Z"
|
||||
},
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[92m\u001b[1mdefault.clickhouse_vector_search_example @ localhost:8123\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1musername: None\u001b[0m\n",
|
||||
"\n",
|
||||
"Table Schema:\n",
|
||||
"---------------------------------------------------\n",
|
||||
"|\u001b[94mid \u001b[0m|\u001b[96mNullable(String) \u001b[0m|\n",
|
||||
"|\u001b[94mdocument \u001b[0m|\u001b[96mNullable(String) \u001b[0m|\n",
|
||||
"|\u001b[94membedding \u001b[0m|\u001b[96mArray(Float32) \u001b[0m|\n",
|
||||
"|\u001b[94mmetadata \u001b[0m|\u001b[96mObject('json') \u001b[0m|\n",
|
||||
"|\u001b[94muuid \u001b[0m|\u001b[96mUUID \u001b[0m|\n",
|
||||
"---------------------------------------------------\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(str(docsearch))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "324ac147",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Clickhouse table schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b5bd7c5b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> Clickhouse table will be automatically created if not exist by default. Advanced users could pre-create the table with optimized settings. For distributed Clickhouse cluster with sharding, table engine should be configured as `Distributed`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "54f4f561",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Clickhouse Table DDL:\n",
|
||||
"\n",
|
||||
"CREATE TABLE IF NOT EXISTS default.clickhouse_vector_search_example(\n",
|
||||
" id Nullable(String),\n",
|
||||
" document Nullable(String),\n",
|
||||
" embedding Array(Float32),\n",
|
||||
" metadata JSON,\n",
|
||||
" uuid UUID DEFAULT generateUUIDv4(),\n",
|
||||
" CONSTRAINT cons_vec_len CHECK length(embedding) = 1536,\n",
|
||||
" INDEX vec_idx embedding TYPE annoy(100,'L2Distance') GRANULARITY 1000\n",
|
||||
") ENGINE = MergeTree ORDER BY uuid SETTINGS index_granularity = 8192\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Clickhouse Table DDL:\\n\\n{docsearch.schema}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f59360c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering\n",
|
||||
"\n",
|
||||
"You can have direct access to ClickHouse SQL where statement. You can write `WHERE` clause following standard SQL.\n",
|
||||
"\n",
|
||||
"**NOTE**: Please be aware of SQL injection, this interface must not be directly called by end-user.\n",
|
||||
"\n",
|
||||
"If you custimized your `column_map` under your setting, you search with filter like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "232055f6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:29:36.680805Z",
|
||||
"start_time": "2023-06-03T08:29:34.963676Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Inserting data...: 100%|██████████| 42/42 [00:00<00:00, 6939.56it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.vectorstores import Clickhouse, ClickhouseSettings\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"for i, d in enumerate(docs):\n",
|
||||
" d.metadata = {'doc_id': i}\n",
|
||||
"\n",
|
||||
"docsearch = Clickhouse.from_documents(docs, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ddbcee77",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:29:43.487436Z",
|
||||
"start_time": "2023-06-03T08:29:43.040831Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6779101415357189 {'doc_id': 0} Madam Speaker, Madam...\n",
|
||||
"0.6997970363474885 {'doc_id': 8} And so many families...\n",
|
||||
"0.7044504914336727 {'doc_id': 1} Groups of citizens b...\n",
|
||||
"0.7053558702165094 {'doc_id': 6} And I’m taking robus...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"meta = docsearch.metadata_column\n",
|
||||
"output = docsearch.similarity_search_with_relevance_scores('What did the president say about Ketanji Brown Jackson?', \n",
|
||||
" k=4, where_str=f\"{meta}.doc_id<10\")\n",
|
||||
"for d, dist in output:\n",
|
||||
" print(dist, d.metadata, d.page_content[:20] + '...')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a359ed74",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deleting your data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fb6a9d36",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:30:24.822384Z",
|
||||
"start_time": "2023-06-03T08:30:24.798571Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch.drop()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,169 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MongoDB Atlas Vector Search\n",
|
||||
"\n",
|
||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a document database managed in the cloud. It also enables Lucene and its vector search feature.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the functionality related to the `MongoDB Atlas Vector Search` feature where you can store your embeddings in MongoDB documents and create a Lucene vector index to perform a KNN search.\n",
|
||||
"\n",
|
||||
"It uses the [knnBeta Operator](https://www.mongodb.com/docs/atlas/atlas-search/knn-beta) available in MongoDB Atlas Search. This feature is in early access and available only for evaluation purposes, to validate functionality, and to gather feedback from a small closed group of early access users. It is not recommended for production deployments as we may introduce breaking changes.\n",
|
||||
"\n",
|
||||
"To use MongoDB Atlas, you must have first deployed a cluster. Free clusters are available. \n",
|
||||
"Here is the MongoDB Atlas [quick start](https://www.mongodb.com/docs/atlas/getting-started/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4c41cad-08ef-4f72-a545-2151e4598efe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install pymongo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1e38361-c1fe-4ac6-86e9-c90ebaf7ae87",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"MONGODB_ATLAS_URI = os.environ['MONGODB_ATLAS_URI']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "320af802-9271-46ee-948f-d2453933d44b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key. Make sure the environment variable `OPENAI_API_KEY` is set up before proceeding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3ecc42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create a Lucene vector index on your cluster. In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings-for-vector-search) to get more details on how to define an Atlas Search index.\n",
|
||||
"You can name the index `langchain_demo` and create the index on the namespace `lanchain_db.langchain_col`. Finally, write the following definition in the JSON editor:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"mappings\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"fields\": {\n",
|
||||
" \"embedding\": {\n",
|
||||
" \"dimensions\": 1536,\n",
|
||||
" \"similarity\": \"cosine\",\n",
|
||||
" \"type\": \"knnVector\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pymongo import MongoClient\n",
|
||||
"\n",
|
||||
"# initialize MongoDB python client\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_CONNECTION_STRING)\n",
|
||||
"\n",
|
||||
"db_name = \"lanchain_db\"\n",
|
||||
"collection_name = \"langchain_col\"\n",
|
||||
"collection = client[db_name][collection_name]\n",
|
||||
"index_name = \"langchain_demo\"\n",
|
||||
"\n",
|
||||
"# insert the documents in MongoDB Atlas with their embedding\n",
|
||||
"docsearch = MongoDBAtlasVectorSearch.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" collection=collection,\n",
|
||||
" index_name=index_name\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# perform a similarity search between the embedding of the query and the embeddings of the documents\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c608226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -626,6 +626,44 @@
|
||||
"source": [
|
||||
"## Customizing Qdrant\n",
|
||||
"\n",
|
||||
"There are some options to use an existing Qdrant collection within your Langchain application. In such cases you may need to define how to map Qdrant point into the Langchain `Document`.\n",
|
||||
"\n",
|
||||
"### Named vectors\n",
|
||||
"\n",
|
||||
"Qdrant supports [multiple vectors per point](https://qdrant.tech/documentation/concepts/collections/#collection-with-multiple-vectors) by named vectors. Langchain requires just a single embedding per document and, by default, uses a single vector. However, if you work with a collection created externally or want to have the named vector used, you can configure it by providing its name.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Qdrant.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" location=\":memory:\",\n",
|
||||
" collection_name=\"my_documents_2\",\n",
|
||||
" vector_name=\"custom_vector\",\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"As a Langchain user, you won't see any difference whether you use named vectors or not. Qdrant integration will handle the conversion under the hood."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Metadata\n",
|
||||
"\n",
|
||||
"Qdrant stores your vector embeddings along with the optional JSON-like payload. Payloads are optional, but since LangChain assumes the embeddings are generated from the documents, we keep the context data, so you can extract the original texts as well.\n",
|
||||
"\n",
|
||||
"By default, your document is going to be stored in the following payload structure:\n",
|
||||
@@ -639,8 +677,11 @@
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can, however, decide to use different keys for the page content and metadata. That's useful if you already have a collection that you'd like to reuse. You can always change the "
|
||||
]
|
||||
"You can, however, decide to use different keys for the page content and metadata. That's useful if you already have a collection that you'd like to reuse."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
||||
@@ -11,43 +11,11 @@
|
||||
">[Vectara](https://vectara.com/) is a API platform for building LLM-powered applications. It provides a simple to use API for document indexing and query that is managed by Vectara and is optimized for performance and accuracy. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Vectara` vector database. \n",
|
||||
"This notebook shows how to use functionality related to the `Vectara` vector database or the `Vectara` retriever. \n",
|
||||
"\n",
|
||||
"See the [Vectara API documentation ](https://docs.vectara.com/docs/) for more information on how to use the API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7b2f111b-357a-4f42-9730-ef0603bdc1b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "082e7e8b-ac52-430c-98d6-8f0924457642",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key:········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -61,33 +29,13 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"import os\n",
|
||||
"from langchain.embeddings import FakeEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Vectara\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:22.520144Z",
|
||||
"start_time": "2023-04-04T10:51:22.285826Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -96,7 +44,21 @@
|
||||
"source": [
|
||||
"## Connecting to Vectara from LangChain\n",
|
||||
"\n",
|
||||
"The Vectara API provides simple API endpoints for indexing and querying."
|
||||
"The Vectara API provides simple API endpoints for indexing and querying, which is encapsulated in the Vectara integration.\n",
|
||||
"First let's ingest the documents using the from_documents() method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "be0a4973",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -112,7 +74,50 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectara = Vectara.from_documents(docs, embedding=None)"
|
||||
"vectara = Vectara.from_documents(docs, \n",
|
||||
" embedding=FakeEmbeddings(size=768), \n",
|
||||
" doc_metadata = {\"speech\": \"state-of-the-union\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "90dbf3e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Vectara's indexing API provides a file upload API where the file is handled directly by Vectara - pre-processed, chunked optimally and added to the Vectara vector store.\n",
|
||||
"To use this, we added the add_files() method (and from_files()). \n",
|
||||
"\n",
|
||||
"Let's see this in action. We pick two PDF documents to upload: \n",
|
||||
"1. The \"I have a dream\" speech by Dr. King\n",
|
||||
"2. Churchill's \"We Shall Fight on the Beaches\" speech"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "85ef3468",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tempfile\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"urls = [\n",
|
||||
" ['https://www.gilderlehrman.org/sites/default/files/inline-pdfs/king.dreamspeech.excerpts.pdf', 'I-have-a-dream'],\n",
|
||||
" ['https://www.parkwayschools.net/cms/lib/MO01931486/Centricity/Domain/1578/Churchill_Beaches_Speech.pdf', 'we shall fight on the beaches'],\n",
|
||||
"]\n",
|
||||
"files_list = []\n",
|
||||
"for url,_ in urls:\n",
|
||||
" name = tempfile.NamedTemporaryFile().name\n",
|
||||
" urllib.request.urlretrieve(url, name)\n",
|
||||
" files_list.append(name)\n",
|
||||
"\n",
|
||||
"docsearch: Vectara = Vectara.from_files(\n",
|
||||
" files=files_list,\n",
|
||||
" embedding=FakeEmbeddings(size=768),\n",
|
||||
" metadatas=[{\"url\": url, \"speech\": title} for url,title in urls],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,7 +138,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -145,12 +150,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = vectara.similarity_search(query, n_sentence_context=0)"
|
||||
"found_docs = vectara.similarity_search(query, n_sentence_context=0, filter=\"doc.speech = 'state-of-the-union'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "fc516993",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -191,7 +196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "8804a21d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -202,12 +207,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = vectara.similarity_search_with_score(query)"
|
||||
"found_docs = vectara.similarity_search_with_score(query, filter=\"doc.speech = 'state-of-the-union'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "756a6887",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -228,7 +233,7 @@
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"\n",
|
||||
"Score: 0.7129974\n"
|
||||
"Score: 0.4917977\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -238,6 +243,37 @@
|
||||
"print(f\"\\nScore: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1f9876a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's do similar search for content in the files we uploaded"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "47784de5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='We must forever conduct our struggle on the high plane of dignity and discipline.', metadata={'section': '1'}), 0.7962591)\n",
|
||||
"(Document(page_content='We must not allow our\\ncreative protests to degenerate into physical violence. . . .', metadata={'section': '1'}), 0.25983918)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"We must forever conduct our struggle\"\n",
|
||||
"found_docs = vectara.similarity_search_with_score(query, filter=\"doc.speech = 'I-have-a-dream'\")\n",
|
||||
"print(found_docs[0])\n",
|
||||
"print(found_docs[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -246,12 +282,12 @@
|
||||
"source": [
|
||||
"## Vectara as a Retriever\n",
|
||||
"\n",
|
||||
"Vectara, as all the other vector stores, is a LangChain Retriever, by using cosine similarity. "
|
||||
"Vectara, as all the other vector stores, can be used also as a LangChain Retriever:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"id": "9427195f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -263,10 +299,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"VectaraRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x122db2830>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '0'})"
|
||||
"VectaraRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x12772caf0>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '0'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -278,7 +314,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"id": "f3c70c31",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -293,7 +329,7 @@
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -1,34 +1,116 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "91c6a7ef",
|
||||
"id": "90cd3ded",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cassandra Chat Message History\n",
|
||||
"\n",
|
||||
">[Apache Cassandra®](https://cassandra.apache.org) is a NoSQL, row-oriented, highly scalable and highly available database, well suited for storing large amounts of data.\n",
|
||||
"\n",
|
||||
"Cassandra is a good choice for storing chat message history because it is easy to scale and can handle a large number of writes.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Cassandra to store chat message history.\n",
|
||||
"\n",
|
||||
"Cassandra is a distributed database that is well suited for storing large amounts of data. \n",
|
||||
"\n",
|
||||
"It is a good choice for storing chat message history because it is easy to scale and can handle a large number of writes.\n"
|
||||
"To run this notebook you need either a running Cassandra cluster or a DataStax Astra DB instance running in the cloud (you can get one for free at [datastax.com](https://astra.datastax.com)). Check [cassio.org](https://cassio.org/start_here/) for more information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "47a601d2",
|
||||
"execution_count": null,
|
||||
"id": "d7092199",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# List of contact points to try connecting to Cassandra cluster.\n",
|
||||
"contact_points = [\"cassandra\"]"
|
||||
"!pip install \"cassio>=0.0.6\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3d97b65",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Please provide database connection parameters and secrets:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "163d97f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"database_mode = (input('\\n(C)assandra or (A)stra DB? ')).upper()\n",
|
||||
"\n",
|
||||
"keyspace_name = input('\\nKeyspace name? ')\n",
|
||||
"\n",
|
||||
"if database_mode == 'A':\n",
|
||||
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
||||
" #\n",
|
||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')\n",
|
||||
"elif database_mode == 'C':\n",
|
||||
" CASSANDRA_CONTACT_POINTS = input('Contact points? (comma-separated, empty for localhost) ').strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55860b2d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### depending on whether local or cloud-based Astra DB, create the corresponding database connection \"Session\" object"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8dff2798",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cassandra.cluster import Cluster\n",
|
||||
"from cassandra.auth import PlainTextAuthProvider\n",
|
||||
"\n",
|
||||
"if database_mode == 'C':\n",
|
||||
" if CASSANDRA_CONTACT_POINTS:\n",
|
||||
" cluster = Cluster([\n",
|
||||
" cp.strip()\n",
|
||||
" for cp in CASSANDRA_CONTACT_POINTS.split(',')\n",
|
||||
" if cp.strip()\n",
|
||||
" ])\n",
|
||||
" else:\n",
|
||||
" cluster = Cluster()\n",
|
||||
" session = cluster.connect()\n",
|
||||
"elif database_mode == 'A':\n",
|
||||
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
||||
" cluster = Cluster(\n",
|
||||
" cloud={\n",
|
||||
" \"secure_connect_bundle\": ASTRA_DB_SECURE_BUNDLE_PATH,\n",
|
||||
" },\n",
|
||||
" auth_provider=PlainTextAuthProvider(\n",
|
||||
" ASTRA_DB_CLIENT_ID,\n",
|
||||
" ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" ),\n",
|
||||
" )\n",
|
||||
" session = cluster.connect()\n",
|
||||
"else:\n",
|
||||
" raise NotImplementedError"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36c163e8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creation and usage of the Chat Message History"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -36,7 +118,9 @@
|
||||
"from langchain.memory import CassandraChatMessageHistory\n",
|
||||
"\n",
|
||||
"message_history = CassandraChatMessageHistory(\n",
|
||||
" contact_points=contact_points, session_id=\"test-session\"\n",
|
||||
" session_id=\"test-session\",\n",
|
||||
" session=session,\n",
|
||||
" keyspace=keyspace_name,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"message_history.add_user_message(\"hi!\")\n",
|
||||
@@ -46,22 +130,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"message_history.messages"
|
||||
]
|
||||
@@ -83,7 +155,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -40,10 +40,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:41.754535Z",
|
||||
"start_time": "2023-05-25T15:09:40.897232Z"
|
||||
}
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -51,8 +48,8 @@
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.schema import HumanMessage, AIMessage\n",
|
||||
"from langchain.tools import DuckDuckGoSearchRun\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"from langchain.utilities import WikipediaAPIWrapper\n",
|
||||
"from langchain.agents import initialize_agent, AgentType, Tool\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -73,19 +70,37 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "True"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load your OpenAI key from a .env file\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"# Provide your OpenAI key\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
"openai_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Provide your Zep API key. Note that this is optional. See https://docs.getzep.com/deployment/auth\n",
|
||||
"\n",
|
||||
"zep_api_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,7 +113,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:41.840440Z",
|
||||
@@ -107,13 +122,20 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ddg = DuckDuckGoSearchRun()\n",
|
||||
"tools = [ddg]\n",
|
||||
"search = WikipediaAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to search online for answers. You should ask targeted questions\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Set up Zep Chat History\n",
|
||||
"zep_chat_history = ZepChatMessageHistory(\n",
|
||||
" session_id=session_id,\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" api_key=zep_api_key\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use a standard ConversationBufferMemory to encapsulate the Zep chat history\n",
|
||||
@@ -122,7 +144,7 @@
|
||||
")\n",
|
||||
"\n",
|
||||
"# Initialize the agent\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"llm = OpenAI(temperature=0, openai_api_key=openai_key)\n",
|
||||
"agent_chain = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
@@ -142,7 +164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:41.960661Z",
|
||||
@@ -212,7 +234,7 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"for msg in test_history:\n",
|
||||
" zep_chat_history.append(\n",
|
||||
" zep_chat_history.add_message(\n",
|
||||
" HumanMessage(content=msg[\"content\"])\n",
|
||||
" if msg[\"role\"] == \"human\"\n",
|
||||
" else AIMessage(content=msg[\"content\"])\n",
|
||||
@@ -231,7 +253,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:50.485377Z",
|
||||
@@ -245,18 +267,20 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: Do I need to use a tool? No\n",
|
||||
"AI: Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.\u001b[0m\n",
|
||||
"\u001B[1m> Entering new chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mThought: Do I need to use a tool? No\n",
|
||||
"AI: Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.'"
|
||||
"text/plain": [
|
||||
"'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -281,7 +305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:50.493438Z",
|
||||
@@ -293,19 +317,17 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The conversation is about Octavia Butler. The AI describes her as an American science fiction author and mentions the\n",
|
||||
"FX series Kindred as a well-known adaptation of her work. The human then asks about her contemporaries, and the AI lists \n",
|
||||
"Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\n",
|
||||
"The human asks about Octavia Butler and the AI identifies her as an American science fiction author. They continue to discuss her works and the fact that the FX series Kindred is based on one of her novels. The AI also lists Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ as Butler's contemporaries.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"{'role': 'human', 'content': 'What awards did she win?', 'uuid': '9fa75c3c-edae-41e3-b9bc-9fcf16b523c9', 'created_at': '2023-05-25T15:09:41.91662Z', 'token_count': 8}\n",
|
||||
"{'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'uuid': 'def4636c-32cb-49ed-b671-32035a034712', 'created_at': '2023-05-25T15:09:41.919874Z', 'token_count': 21}\n",
|
||||
"{'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'uuid': '6e87bd4a-bc23-451e-ae36-05a140415270', 'created_at': '2023-05-25T15:09:41.923771Z', 'token_count': 14}\n",
|
||||
"{'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'uuid': 'f65d8dde-9ee8-4983-9da6-ba789b7e8aa4', 'created_at': '2023-05-25T15:09:41.935254Z', 'token_count': 18}\n",
|
||||
"{'role': 'human', 'content': \"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", 'uuid': '5678d056-7f05-4e70-b8e5-f85efa56db01', 'created_at': '2023-05-25T15:09:41.938974Z', 'token_count': 23}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'uuid': '50d64946-9239-4327-83e6-71dcbdd16198', 'created_at': '2023-05-25T15:09:41.957437Z', 'token_count': 56}\n",
|
||||
"{'role': 'human', 'content': \"WWhat is the book's relevance to the challenges facing contemporary society?\", 'uuid': 'a39cfc07-8858-480a-9026-fc47a8ef7001', 'created_at': '2023-05-25T15:09:50.469533Z', 'token_count': 16}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.', 'uuid': 'a4ecf0fe-fdd0-4aad-b72b-efde2e6830cc', 'created_at': '2023-05-25T15:09:50.473793Z', 'token_count': 62}\n"
|
||||
"{'role': 'human', 'content': 'What awards did she win?', 'uuid': 'a4bdc592-71a5-47d0-9c64-230b882aab48', 'created_at': '2023-06-26T23:37:56.383953Z', 'token_count': 8, 'metadata': {'system': {'entities': [], 'intent': 'The subject is asking about the awards someone won, likely referring to a specific individual.'}}}\n",
|
||||
"{'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'uuid': '60cc6e6b-7cd4-4a81-aebc-72ef997286b4', 'created_at': '2023-06-26T23:37:56.389935Z', 'token_count': 21, 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 14, 'Start': 0, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 33, 'Start': 19, 'Text': 'the Hugo Award'}], 'Name': 'the Hugo Award'}, {'Label': 'EVENT', 'Matches': [{'End': 81, 'Start': 57, 'Text': 'the MacArthur Fellowship'}], 'Name': 'the MacArthur Fellowship'}], 'intent': 'The subject is stating the accomplishments and awards received by Octavia Butler.'}}}\n",
|
||||
"{'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'uuid': 'b189fc60-1510-4a4b-a503-899481d652de', 'created_at': '2023-06-26T23:37:56.395722Z', 'token_count': 14, 'metadata': {'system': {'entities': [], 'intent': 'The subject is looking for recommendations on women science fiction writers to read.'}}}\n",
|
||||
"{'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'uuid': '4be1ccbb-a915-45d6-9f18-7a0c1cbd9907', 'created_at': '2023-06-26T23:37:56.403596Z', 'token_count': 18, 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': 'The subject is suggesting reading material and making a literary recommendation.'}}}\n",
|
||||
"{'role': 'human', 'content': \"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", 'uuid': 'ac3c5e3e-26a7-4f3b-aeb0-bba084e22753', 'created_at': '2023-06-26T23:37:56.410662Z', 'token_count': 23, 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}], 'intent': 'The subject is asking for a brief overview or summary of the book \"Parable of the Sower\" written by Butler.'}}}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'uuid': '4a463b4c-bcab-473c-bed1-fc56a7a20ae2', 'created_at': '2023-06-26T23:37:56.41764Z', 'token_count': 56, 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}}\n",
|
||||
"{'role': 'human', 'content': \"WWhat is the book's relevance to the challenges facing contemporary society?\", 'uuid': '41bab0c7-5e20-40a4-9303-f82069977c91', 'created_at': '2023-06-26T23:38:03.559642Z', 'token_count': 16, 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 5, 'Start': 0, 'Text': 'WWhat'}], 'Name': 'WWhat'}]}}}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.', 'uuid': 'bfd8146a-4632-4c8c-98b6-9468bb624339', 'created_at': '2023-06-26T23:38:03.589312Z', 'token_count': 62, 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}]}}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -332,7 +354,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:50.751203Z",
|
||||
@@ -344,16 +366,16 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'uuid': '6e87bd4a-bc23-451e-ae36-05a140415270', 'created_at': '2023-05-25T15:09:41.923771Z', 'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'token_count': 14} 0.9118298949424545\n",
|
||||
"{'uuid': 'f65d8dde-9ee8-4983-9da6-ba789b7e8aa4', 'created_at': '2023-05-25T15:09:41.935254Z', 'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'token_count': 18} 0.8533024416448016\n",
|
||||
"{'uuid': '52cfe3e8-b800-4dd8-a7dd-8e9e4764dfc8', 'created_at': '2023-05-25T15:09:41.913856Z', 'role': 'ai', 'content': \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", 'token_count': 27} 0.852352466457884\n",
|
||||
"{'uuid': 'd40da612-0867-4a43-92ec-778b86490a39', 'created_at': '2023-05-25T15:09:41.858543Z', 'role': 'human', 'content': 'Who was Octavia Butler?', 'token_count': 8} 0.8235468913583194\n",
|
||||
"{'uuid': '4fcfbce4-7bfa-44bd-879a-8cbf265bdcf9', 'created_at': '2023-05-25T15:09:41.893848Z', 'role': 'ai', 'content': 'Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', 'token_count': 31} 0.8204317130595353\n",
|
||||
"{'uuid': 'def4636c-32cb-49ed-b671-32035a034712', 'created_at': '2023-05-25T15:09:41.919874Z', 'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'token_count': 21} 0.8196714827228725\n",
|
||||
"{'uuid': '862107de-8f6f-43c0-91fa-4441f01b2b3a', 'created_at': '2023-05-25T15:09:41.898149Z', 'role': 'human', 'content': 'Which books of hers were made into movies?', 'token_count': 11} 0.7954322970428519\n",
|
||||
"{'uuid': '97164506-90fe-4c71-9539-69ebcd1d90a2', 'created_at': '2023-05-25T15:09:41.90887Z', 'role': 'human', 'content': 'Who were her contemporaries?', 'token_count': 8} 0.7942531405021976\n",
|
||||
"{'uuid': '50d64946-9239-4327-83e6-71dcbdd16198', 'created_at': '2023-05-25T15:09:41.957437Z', 'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'token_count': 56} 0.78144769172694\n",
|
||||
"{'uuid': 'c460ffd4-0715-4c69-b793-1092054973e6', 'created_at': '2023-05-25T15:09:41.903082Z', 'role': 'ai', 'content': \"The most well-known adaptation of Octavia Butler's work is the FX series Kindred, based on her novel of the same name.\", 'token_count': 29} 0.7811962820699464\n"
|
||||
"{'uuid': 'b189fc60-1510-4a4b-a503-899481d652de', 'created_at': '2023-06-26T23:37:56.395722Z', 'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'metadata': {'system': {'entities': [], 'intent': 'The subject is looking for recommendations on women science fiction writers to read.'}}, 'token_count': 14} 0.9119619869747062\n",
|
||||
"{'uuid': '4be1ccbb-a915-45d6-9f18-7a0c1cbd9907', 'created_at': '2023-06-26T23:37:56.403596Z', 'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': 'The subject is suggesting reading material and making a literary recommendation.'}}, 'token_count': 18} 0.8534346954749745\n",
|
||||
"{'uuid': '76ec2a3d-b908-4c23-a55d-71ff92865a7a', 'created_at': '2023-06-26T23:37:56.378345Z', 'role': 'ai', 'content': \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': 'The subject is stating the contemporaries of Octavia Butler, who are also science fiction writers.'}}, 'token_count': 27} 0.8523930955780226\n",
|
||||
"{'uuid': '1feb02c7-63c9-4616-854d-0d97fb590ea5', 'created_at': '2023-06-26T23:37:56.313009Z', 'role': 'human', 'content': 'Who was Octavia Butler?', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject is asking about the identity of Octavia Butler, likely seeking information about her background or accomplishments.'}}, 'token_count': 8} 0.8236355436055457\n",
|
||||
"{'uuid': 'ebe4696d-b5fa-4ca0-88c9-da794d9611ab', 'created_at': '2023-06-26T23:37:56.332247Z', 'role': 'ai', 'content': 'Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 0, 'Text': 'Octavia Estelle Butler'}], 'Name': 'Octavia Estelle Butler'}, {'Label': 'DATE', 'Matches': [{'End': 37, 'Start': 24, 'Text': 'June 22, 1947'}], 'Name': 'June 22, 1947'}, {'Label': 'DATE', 'Matches': [{'End': 57, 'Start': 40, 'Text': 'February 24, 2006'}], 'Name': 'February 24, 2006'}, {'Label': 'NORP', 'Matches': [{'End': 74, 'Start': 66, 'Text': 'American'}], 'Name': 'American'}], 'intent': 'The subject is making a statement about the background and profession of Octavia Estelle Butler, an American author.'}}, 'token_count': 31} 0.8206687242257686\n",
|
||||
"{'uuid': '60cc6e6b-7cd4-4a81-aebc-72ef997286b4', 'created_at': '2023-06-26T23:37:56.389935Z', 'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 14, 'Start': 0, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 33, 'Start': 19, 'Text': 'the Hugo Award'}], 'Name': 'the Hugo Award'}, {'Label': 'EVENT', 'Matches': [{'End': 81, 'Start': 57, 'Text': 'the MacArthur Fellowship'}], 'Name': 'the MacArthur Fellowship'}], 'intent': 'The subject is stating the accomplishments and awards received by Octavia Butler.'}}, 'token_count': 21} 0.8194249796585193\n",
|
||||
"{'uuid': '0fa4f336-909d-4880-b01a-8e80e91fa8f2', 'created_at': '2023-06-26T23:37:56.344552Z', 'role': 'human', 'content': 'Which books of hers were made into movies?', 'metadata': {'system': {'entities': [], 'intent': 'The subject is inquiring about which books written by an unknown female author were adapted into movies.'}}, 'token_count': 11} 0.7955105671310818\n",
|
||||
"{'uuid': 'f91de7f2-4b84-4c5a-8a33-a71f38f3a59c', 'created_at': '2023-06-26T23:37:56.368146Z', 'role': 'human', 'content': 'Who were her contemporaries?', 'metadata': {'system': {'entities': [], 'intent': 'The subject is asking about the people who lived during the same time period as a specific individual.'}}, 'token_count': 8} 0.7942358617914813\n",
|
||||
"{'uuid': '4a463b4c-bcab-473c-bed1-fc56a7a20ae2', 'created_at': '2023-06-26T23:37:56.41764Z', 'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56} 0.7816448549236643\n",
|
||||
"{'uuid': '6161d934-a629-4ba2-8bba-0b0996c93964', 'created_at': '2023-06-26T23:37:56.358632Z', 'role': 'ai', 'content': \"The most well-known adaptation of Octavia Butler's work is the FX series Kindred, based on her novel of the same name.\", 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 50, 'Start': 34, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 65, 'Start': 63, 'Text': 'FX'}], 'Name': 'FX'}, {'Label': 'GPE', 'Matches': [{'End': 80, 'Start': 73, 'Text': 'Kindred'}], 'Name': 'Kindred'}], 'intent': \"The subject is discussing Octavia Butler's work being adapted into a TV series called Kindred.\"}}, 'token_count': 29} 0.7815841371388998\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -362,11 +384,25 @@
|
||||
"for r in search_results:\n",
|
||||
" print(r.message, r.dist)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -380,10 +416,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## OctoAI Compute Service\n",
|
||||
"This example goes over how to use LangChain to interact with `OctoAI` [LLM endpoints](https://octoai.cloud/templates)\n",
|
||||
"## Environment setup\n",
|
||||
"\n",
|
||||
"To run our example app, there are four simple steps to take:\n",
|
||||
"\n",
|
||||
"1. Clone the MPT-7B demo template to your OctoAI account by visiting <https://octoai.cloud/templates/mpt-7b-demo> then clicking \"Clone Template.\" \n",
|
||||
" 1. If you want to use a different LLM model, you can also containerize the model and make a custom OctoAI endpoint yourself, by following [Build a Container from Python](doc:create-custom-endpoints-from-python-code) and [Create a Custom Endpoint from a Container](doc:create-custom-endpoints-from-a-container)\n",
|
||||
" \n",
|
||||
"2. Paste your Endpoint URL in the code cell below\n",
|
||||
"\n",
|
||||
"3. Get an API Token from [your OctoAI account page](https://octoai.cloud/settings).\n",
|
||||
" \n",
|
||||
"4. Paste your API key in in the code cell below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OCTOAI_API_TOKEN\"] = \"OCTOAI_API_TOKEN\"\n",
|
||||
"os.environ[\"ENDPOINT_URL\"] = \"https://mpt-7b-demo-kk0powt97tmb.octoai.cloud/generate\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms.octoai_endpoint import OctoAIEndpoint\n",
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\\n Instruction:\\n{question}\\n Response: \"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OctoAIEndpoint(\n",
|
||||
" model_kwargs={\n",
|
||||
" \"max_new_tokens\": 200,\n",
|
||||
" \"temperature\": 0.75,\n",
|
||||
" \"top_p\": 0.95,\n",
|
||||
" \"repetition_penalty\": 1,\n",
|
||||
" \"seed\": None,\n",
|
||||
" \"stop\": [],\n",
|
||||
" },\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nLeonardo da Vinci was an Italian polymath and painter regarded by many as one of the greatest painters of all time. He is best known for his masterpieces including Mona Lisa, The Last Supper, and The Virgin of the Rocks. He was a draftsman, sculptor, architect, and one of the most important figures in the history of science. Da Vinci flew gliders, experimented with water turbines and windmills, and invented the catapult and a joystick-type human-powered aircraft control. He may have pioneered helicopters. As a scholar, he was interested in anatomy, geology, botany, engineering, mathematics, and astronomy.\\nOther painters and patrons claimed to be more talented, but Leonardo da Vinci was an incredibly productive artist, sculptor, engineer, anatomist, and scientist.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"Who was leonardo davinci?\"\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "97697b63fdcee0a640856f91cb41326ad601964008c341809e43189d1cab1047"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -30,14 +30,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"id": "2e587f65",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load Notion page as a markdownfile file\n",
|
||||
"from langchain.document_loaders import NotionDirectoryLoader\n",
|
||||
"path='.../Notion_Folder_With_Markdown_File'\n",
|
||||
"path='../Notion_DB/'\n",
|
||||
"loader = NotionDirectoryLoader(path)\n",
|
||||
"docs = loader.load()\n",
|
||||
"md_file=docs[0].page_content"
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 15,
|
||||
"id": "1cd3fd7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -69,7 +69,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 26,
|
||||
"id": "7fbff95f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -110,8 +110,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build vectorstore and keep the metadata\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"vectorstore = Chroma.from_documents(texts=all_splits,metadatas=all_metadatas,embedding=OpenAIEmbeddings())"
|
||||
"vectorstore = Chroma.from_documents(documents=all_splits,\n",
|
||||
" embedding=OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -157,6 +159,37 @@
|
||||
"We can see that we can query *only for texts* in the `Introduction` of the document!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "d688db6e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='Introduction' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='Section', value='Introduction') limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='', metadata={'Section': 'Introduction'}),\n",
|
||||
" Document(page_content='Q+A systems often use a two-step approach: retrieve relevant text chunks and then synthesize them into an answer. There many ways to approach this. For example, we recently [discussed](https://blog.langchain.dev/auto-evaluation-of-anthropic-100k-context-window/) the Retriever-Less option (at bottom in the below diagram), highlighting the Anthropic 100k context window model. Metadata filtering is an alternative approach that pre-filters chunks based on a user-defined criteria in a VectorDB using', metadata={'Section': 'Introduction'}),\n",
|
||||
" Document(page_content='metadata tags prior to semantic search.', metadata={'Section': 'Introduction'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Test\n",
|
||||
"retriever.get_relevant_documents(\"Summarize the Introduction section of the document\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
@@ -287,6 +320,11 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -11,5 +11,4 @@ myst_nb
|
||||
sphinx_copybutton
|
||||
pydata-sphinx-theme==0.13.1
|
||||
nbdoc
|
||||
urllib3<2
|
||||
sphinx_tabs
|
||||
urllib3<2
|
||||
@@ -78,11 +78,14 @@ pprint(data)
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
## Using `JSONLoader`
|
||||
|
||||
Suppose we are interested in extracting the values under the `content` field within the `messages` key of the JSON data. This can easily be done through the `JSONLoader` as shown below.
|
||||
|
||||
|
||||
### JSON file
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
file_path='./example_data/facebook_chat.json',
|
||||
@@ -114,6 +117,81 @@ pprint(data)
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
### JSON Lines file
|
||||
|
||||
If you want to load documents from a JSON Lines file, you pass `json_lines=True`
|
||||
and specify `jq_schema` to extract `page_content` from a single JSON object.
|
||||
|
||||
```python
|
||||
file_path = './example_data/facebook_chat_messages.jsonl'
|
||||
pprint(Path(file_path).read_text())
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
('{"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"}\n'
|
||||
'{"sender_name": "User 1", "timestamp_ms": 1675597435669, "content": "Oh no '
|
||||
'worries! Bye"}\n'
|
||||
'{"sender_name": "User 2", "timestamp_ms": 1675596277579, "content": "No Im '
|
||||
'sorry it was my mistake, the blue one is not for sale"}\n')
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
file_path='./example_data/facebook_chat_messages.jsonl',
|
||||
jq_schema='.content',
|
||||
json_lines=True)
|
||||
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
```python
|
||||
pprint(data)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[Document(page_content='Bye!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}),
|
||||
Document(page_content='Oh no worries! Bye', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 2}),
|
||||
Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 3})]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
Another option is set `jq_schema='.'` and provide `content_key`:
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
file_path='./example_data/facebook_chat_messages.jsonl',
|
||||
jq_schema='.',
|
||||
content_key='sender_name',
|
||||
json_lines=True)
|
||||
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
```python
|
||||
pprint(data)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[Document(page_content='User 2', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}),
|
||||
Document(page_content='User 1', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 2}),
|
||||
Document(page_content='User 2', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 3})]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
## Extracting metadata
|
||||
|
||||
Generally, we want to include metadata available in the JSON file into the documents that we create from the content.
|
||||
|
||||
@@ -1,24 +1,40 @@
|
||||
The `BaseRetriever` class in LangChain is as follows:
|
||||
The public API of the `BaseRetriever` class in LangChain is as follows:
|
||||
|
||||
```python
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
from typing import Any, List
|
||||
from langchain.schema import Document
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
|
||||
class BaseRetriever(ABC):
|
||||
@abstractmethod
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
"""Get texts relevant for a query.
|
||||
|
||||
...
|
||||
def get_relevant_documents(
|
||||
self, query: str, *, callbacks: Callbacks = None, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Retrieve documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant texts for
|
||||
|
||||
query: string to find relevant documents for
|
||||
callbacks: Callback manager or list of callbacks
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
...
|
||||
|
||||
async def aget_relevant_documents(
|
||||
self, query: str, *, callbacks: Callbacks = None, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
callbacks: Callback manager or list of callbacks
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
It's that simple! The `get_relevant_documents` method can be implemented however you see fit.
|
||||
It's that simple! You can call `get_relevant_documents` or the async `get_relevant_documents` methods to retrieve documents relevant to a query, where "relevance" is defined by
|
||||
the specific retriever object you are calling.
|
||||
|
||||
Of course, we also help construct what we think useful Retrievers are. The main type of Retriever that we focus on is a Vectorstore retriever. We will focus on that for the rest of this guide.
|
||||
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
# Implement a Custom Retriever
|
||||
|
||||
In this walkthrough, you will implement a simple custom retriever in LangChain using a simple dot product distance lookup.
|
||||
|
||||
All retrievers inherit from the `BaseRetriever` class and override the following abstract methods:
|
||||
|
||||
```python
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, List
|
||||
from langchain.schema import Document
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
|
||||
class BaseRetriever(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
"""Get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def _aget_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
```
|
||||
|
||||
|
||||
The `_get_relevant_documents` and async `_get_relevant_documents` methods can be implemented however you see fit. The `run_manager` is useful if your retriever calls other traceable LangChain primitives like LLMs, chains, or tools.
|
||||
|
||||
|
||||
Below, implement an example that fetches the most similar documents from a list of documents using a numpy array of embeddings.
|
||||
|
||||
|
||||
```python
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
class NumpyRetriever(BaseRetriever):
|
||||
"""Retrieves documents from a numpy array."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
texts: List[str],
|
||||
vectors: np.ndarray,
|
||||
embeddings: Optional[Embeddings] = None,
|
||||
num_to_return: int = 1,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.embeddings = embeddings or OpenAIEmbeddings()
|
||||
self.texts = texts
|
||||
self.vectors = vectors
|
||||
self.num_to_return = num_to_return
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embeddings: Optional[Embeddings] = None,
|
||||
**kwargs: Any,
|
||||
) -> "NumpyRetriever":
|
||||
embeddings = embeddings or OpenAIEmbeddings()
|
||||
vectors = np.array(embeddings.embed_documents(texts))
|
||||
return cls(texts, vectors, embeddings)
|
||||
|
||||
def _get_relevant_documents_from_query_vector(
|
||||
self, vector_query: np.ndarray
|
||||
) -> List[Document]:
|
||||
dot_product = np.dot(self.vectors, vector_query)
|
||||
# Get the indices of the min 5 documents
|
||||
indices = np.argpartition(
|
||||
dot_product, -min(self.num_to_return, len(self.vectors))
|
||||
)[-self.num_to_return :]
|
||||
# Sort indices by distance
|
||||
indices = indices[np.argsort(dot_product[indices])]
|
||||
return [
|
||||
Document(
|
||||
page_content=self.texts[idx],
|
||||
metadata={"index": idx},
|
||||
)
|
||||
for idx in indices
|
||||
]
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
"""Get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
vector_query = np.array(self.embeddings.embed_query(query))
|
||||
return self._get_relevant_documents_from_query_vector(vector_query)
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
query_emb = await self.embeddings.aembed_query(query)
|
||||
return self._get_relevant_documents_from_query_vector(np.array(query_emb))
|
||||
```
|
||||
|
||||
The retriever can be instantiated through the class method `from_texts`. It embeds the texts and stores them in a numpy array. To look up documents, it embeds the query and finds the most similar documents using a simple dot product distance.
|
||||
Once the retriever is implemented, you can use it like any other retriever in LangChain.
|
||||
|
||||
|
||||
```python
|
||||
retriever = NumpyRetriever.from_texts(texts= ["hello world", "goodbye world"])
|
||||
```
|
||||
|
||||
You can then use the retriever to get relevant documents.
|
||||
|
||||
```python
|
||||
retriever.get_relevant_documents("Hi there!")
|
||||
|
||||
# [Document(page_content='hello world', metadata={'index': 0})]
|
||||
```
|
||||
|
||||
```python
|
||||
retriever.get_relevant_documents("Bye!")
|
||||
# [Document(page_content='goodbye world', metadata={'index': 1})]
|
||||
```
|
||||
@@ -31,7 +31,7 @@ embeddings_model = OpenAIEmbeddings()
|
||||
#### Embed list of texts
|
||||
|
||||
```python
|
||||
embeddings = embedding_model.embed_documents(
|
||||
embeddings = embeddings_model.embed_documents(
|
||||
[
|
||||
"Hi there!",
|
||||
"Oh, hello!",
|
||||
@@ -56,7 +56,7 @@ len(embeddings), len(embeddings[0])
|
||||
Embed a single piece of text for the purpose of comparing to other embedded pieces of texts.
|
||||
|
||||
```python
|
||||
embedded_query = embedding_model.embed_query("What was the name mentioned in the conversation?")
|
||||
embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")
|
||||
embedded_query[:5]
|
||||
```
|
||||
|
||||
|
||||
@@ -26,7 +26,8 @@ raw_documents = TextLoader('../../../state_of_the_union.txt').load()
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
documents = text_splitter.split_documents(raw_documents)
|
||||
|
||||
db = FAISS.from_documents(documents, OpenAIEmbeddings())
|
||||
embeddings = OpenAIEmbeddings()
|
||||
db = FAISS.from_documents(documents, embeddings)
|
||||
```
|
||||
|
||||
### Similarity search
|
||||
|
||||
@@ -65,7 +65,7 @@ pipeline_prompt.input_variables
|
||||
print(pipeline_prompt.format(
|
||||
person="Elon Musk",
|
||||
example_q="What's your favorite car?",
|
||||
example_a="Telsa",
|
||||
example_a="Tesla",
|
||||
input="What's your favorite social media site?"
|
||||
))
|
||||
```
|
||||
@@ -77,7 +77,7 @@ print(pipeline_prompt.format(
|
||||
Here's an example of an interaction:
|
||||
|
||||
Q: What's your favorite car?
|
||||
A: Telsa
|
||||
A: Tesla
|
||||
Now, do this for real!
|
||||
|
||||
Q: What's your favorite social media site?
|
||||
|
||||
@@ -32,6 +32,7 @@ from langchain.agents.load_tools import (
|
||||
from langchain.agents.loading import load_agent
|
||||
from langchain.agents.mrkl.base import MRKLChain, ZeroShotAgent
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.agents.openai_functions_multi_agent.base import OpenAIMultiFunctionsAgent
|
||||
from langchain.agents.react.base import ReActChain, ReActTextWorldAgent
|
||||
from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain
|
||||
from langchain.agents.structured_chat.base import StructuredChatAgent
|
||||
@@ -49,6 +50,7 @@ __all__ = [
|
||||
"LLMSingleActionAgent",
|
||||
"MRKLChain",
|
||||
"OpenAIFunctionsAgent",
|
||||
"OpenAIMultiFunctionsAgent",
|
||||
"ReActChain",
|
||||
"ReActTextWorldAgent",
|
||||
"SelfAskWithSearchChain",
|
||||
|
||||
@@ -32,10 +32,10 @@ from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
BaseMessage,
|
||||
BaseOutputParser,
|
||||
OutputParserException,
|
||||
)
|
||||
from langchain.schema.messages import BaseMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.asyncio import asyncio_timeout
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.schema import SystemMessage
|
||||
from langchain.schema.messages import SystemMessage
|
||||
from langchain.tools.python.tool import PythonAstREPLTool
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from langchain.agents.types import AgentType
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.schema import SystemMessage
|
||||
from langchain.schema.messages import SystemMessage
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ from langchain.prompts.chat import (
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
)
|
||||
from langchain.schema import AIMessage, SystemMessage
|
||||
from langchain.schema.messages import AIMessage, SystemMessage
|
||||
|
||||
|
||||
def create_sql_agent(
|
||||
|
||||
@@ -25,11 +25,9 @@ from langchain.prompts.chat import (
|
||||
)
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
BaseOutputParser,
|
||||
HumanMessage,
|
||||
)
|
||||
from langchain.schema.messages import AIMessage, BaseMessage, HumanMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
|
||||
|
||||
@@ -21,10 +21,12 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
OutputParserException,
|
||||
)
|
||||
from langchain.schema.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
FunctionMessage,
|
||||
OutputParserException,
|
||||
SystemMessage,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
@@ -21,10 +21,12 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
OutputParserException,
|
||||
)
|
||||
from langchain.schema.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
FunctionMessage,
|
||||
OutputParserException,
|
||||
SystemMessage,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
@@ -152,7 +154,7 @@ class OpenAIMultiFunctionsAgent(BaseMultiActionAgent):
|
||||
tools: The tools this agent has access to.
|
||||
prompt: The prompt for this agent, should support agent_scratchpad as one
|
||||
of the variables. For an easy way to construct this prompt, use
|
||||
`OpenAIFunctionsAgent.create_prompt(...)`
|
||||
`OpenAIMultiFunctionsAgent.create_prompt(...)`
|
||||
"""
|
||||
|
||||
llm: BaseLanguageModel
|
||||
|
||||
@@ -6,7 +6,8 @@ from typing import Any, List, Optional, Sequence, Set
|
||||
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.load.serializable import Serializable
|
||||
from langchain.schema import BaseMessage, LLMResult, PromptValue, get_buffer_string
|
||||
from langchain.schema import LLMResult, PromptValue
|
||||
from langchain.schema.messages import BaseMessage, get_buffer_string
|
||||
|
||||
|
||||
def _get_token_ids_default_method(text: str) -> List[int]:
|
||||
|
||||
@@ -262,7 +262,9 @@ class RedisSemanticCache(BaseCache):
|
||||
score_threshold (float, 0.2):
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import langchain
|
||||
|
||||
from langchain.cache import RedisSemanticCache
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
from langchain.callbacks.aim_callback import AimCallbackHandler
|
||||
from langchain.callbacks.argilla_callback import ArgillaCallbackHandler
|
||||
from langchain.callbacks.arize_callback import ArizeCallbackHandler
|
||||
from langchain.callbacks.arthur_callback import ArthurCallbackHandler
|
||||
from langchain.callbacks.clearml_callback import ClearMLCallbackHandler
|
||||
from langchain.callbacks.comet_ml_callback import CometCallbackHandler
|
||||
from langchain.callbacks.file import FileCallbackHandler
|
||||
from langchain.callbacks.flyte_callback import FlyteCallbackHandler
|
||||
from langchain.callbacks.human import HumanApprovalCallbackHandler
|
||||
from langchain.callbacks.infino_callback import InfinoCallbackHandler
|
||||
from langchain.callbacks.manager import (
|
||||
@@ -15,6 +17,7 @@ from langchain.callbacks.manager import (
|
||||
)
|
||||
from langchain.callbacks.mlflow_callback import MlflowCallbackHandler
|
||||
from langchain.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain.callbacks.promptlayer_callback import PromptLayerCallbackHandler
|
||||
from langchain.callbacks.stdout import StdOutCallbackHandler
|
||||
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
@@ -29,22 +32,25 @@ __all__ = [
|
||||
"AimCallbackHandler",
|
||||
"ArgillaCallbackHandler",
|
||||
"ArizeCallbackHandler",
|
||||
"AsyncIteratorCallbackHandler",
|
||||
"PromptLayerCallbackHandler",
|
||||
"ArthurCallbackHandler",
|
||||
"ClearMLCallbackHandler",
|
||||
"CometCallbackHandler",
|
||||
"FileCallbackHandler",
|
||||
"FinalStreamingStdOutCallbackHandler",
|
||||
"HumanApprovalCallbackHandler",
|
||||
"InfinoCallbackHandler",
|
||||
"MlflowCallbackHandler",
|
||||
"OpenAICallbackHandler",
|
||||
"StdOutCallbackHandler",
|
||||
"AsyncIteratorCallbackHandler",
|
||||
"StreamingStdOutCallbackHandler",
|
||||
"StreamlitCallbackHandler",
|
||||
"FinalStreamingStdOutCallbackHandler",
|
||||
"LLMThoughtLabeler",
|
||||
"StreamlitCallbackHandler",
|
||||
"WandbCallbackHandler",
|
||||
"WhyLabsCallbackHandler",
|
||||
"get_openai_callback",
|
||||
"tracing_enabled",
|
||||
"wandb_tracing_enabled",
|
||||
"FlyteCallbackHandler",
|
||||
]
|
||||
|
||||
@@ -30,6 +30,7 @@ class BaseMetadataCallbackHandler:
|
||||
ignore_llm_ (bool): Whether to ignore llm callbacks.
|
||||
ignore_chain_ (bool): Whether to ignore chain callbacks.
|
||||
ignore_agent_ (bool): Whether to ignore agent callbacks.
|
||||
ignore_retriever_ (bool): Whether to ignore retriever callbacks.
|
||||
always_verbose_ (bool): Whether to always be verbose.
|
||||
chain_starts (int): The number of times the chain start method has been called.
|
||||
chain_ends (int): The number of times the chain end method has been called.
|
||||
@@ -52,6 +53,7 @@ class BaseMetadataCallbackHandler:
|
||||
self.ignore_llm_ = False
|
||||
self.ignore_chain_ = False
|
||||
self.ignore_agent_ = False
|
||||
self.ignore_retriever_ = False
|
||||
self.always_verbose_ = False
|
||||
|
||||
self.chain_starts = 0
|
||||
@@ -86,6 +88,11 @@ class BaseMetadataCallbackHandler:
|
||||
"""Whether to ignore agent callbacks."""
|
||||
return self.ignore_agent_
|
||||
|
||||
@property
|
||||
def ignore_retriever(self) -> bool:
|
||||
"""Whether to ignore retriever callbacks."""
|
||||
return self.ignore_retriever_
|
||||
|
||||
def get_custom_callback_meta(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"step": self.step,
|
||||
|
||||
302
langchain/callbacks/arthur_callback.py
Normal file
302
langchain/callbacks/arthur_callback.py
Normal file
@@ -0,0 +1,302 @@
|
||||
"""ArthurAI's Callback Handler."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from time import time
|
||||
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import arthurai
|
||||
from arthurai.core.models import ArthurModel
|
||||
|
||||
PROMPT_TOKENS = "prompt_tokens"
|
||||
COMPLETION_TOKENS = "completion_tokens"
|
||||
TOKEN_USAGE = "token_usage"
|
||||
FINISH_REASON = "finish_reason"
|
||||
DURATION = "duration"
|
||||
|
||||
|
||||
def _lazy_load_arthur() -> arthurai:
|
||||
"""Lazy load Arthur."""
|
||||
try:
|
||||
import arthurai
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"To use the ArthurCallbackHandler you need the"
|
||||
" `arthurai` package. Please install it with"
|
||||
" `pip install arthurai`.",
|
||||
e,
|
||||
)
|
||||
|
||||
return arthurai
|
||||
|
||||
|
||||
class ArthurCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback Handler that logs to Arthur platform.
|
||||
|
||||
Arthur helps enterprise teams optimize model operations
|
||||
and performance at scale. The Arthur API tracks model
|
||||
performance, explainability, and fairness across tabular,
|
||||
NLP, and CV models. Our API is model- and platform-agnostic,
|
||||
and continuously scales with complex and dynamic enterprise needs.
|
||||
To learn more about Arthur, visit our website at
|
||||
https://www.arthur.ai/ or read the Arthur docs at
|
||||
https://docs.arthur.ai/
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
arthur_model: ArthurModel,
|
||||
) -> None:
|
||||
"""Initialize callback handler."""
|
||||
super().__init__()
|
||||
arthurai = _lazy_load_arthur()
|
||||
Stage = arthurai.common.constants.Stage
|
||||
ValueType = arthurai.common.constants.ValueType
|
||||
self.arthur_model = arthur_model
|
||||
# save the attributes of this model to be used when preparing
|
||||
# inferences to log to Arthur in on_llm_end()
|
||||
self.attr_names = set([a.name for a in self.arthur_model.get_attributes()])
|
||||
self.input_attr = [
|
||||
x
|
||||
for x in self.arthur_model.get_attributes()
|
||||
if x.stage == Stage.ModelPipelineInput
|
||||
and x.value_type == ValueType.Unstructured_Text
|
||||
][0].name
|
||||
self.output_attr = [
|
||||
x
|
||||
for x in self.arthur_model.get_attributes()
|
||||
if x.stage == Stage.PredictedValue
|
||||
and x.value_type == ValueType.Unstructured_Text
|
||||
][0].name
|
||||
self.token_likelihood_attr = None
|
||||
if (
|
||||
len(
|
||||
[
|
||||
x
|
||||
for x in self.arthur_model.get_attributes()
|
||||
if x.value_type == ValueType.TokenLikelihoods
|
||||
]
|
||||
)
|
||||
> 0
|
||||
):
|
||||
self.token_likelihood_attr = [
|
||||
x
|
||||
for x in self.arthur_model.get_attributes()
|
||||
if x.value_type == ValueType.TokenLikelihoods
|
||||
][0].name
|
||||
|
||||
self.run_map: DefaultDict[str, Any] = defaultdict(dict)
|
||||
|
||||
@classmethod
|
||||
def from_credentials(
|
||||
cls,
|
||||
model_id: str,
|
||||
arthur_url: Optional[str] = "https://app.arthur.ai",
|
||||
arthur_login: Optional[str] = None,
|
||||
arthur_password: Optional[str] = None,
|
||||
) -> ArthurCallbackHandler:
|
||||
"""Initialize callback handler from Arthur credentials.
|
||||
|
||||
Args:
|
||||
model_id (str): The ID of the arthur model to log to.
|
||||
arthur_url (str, optional): The URL of the Arthur instance to log to.
|
||||
Defaults to "https://app.arthur.ai".
|
||||
arthur_login (str, optional): The login to use to connect to Arthur.
|
||||
Defaults to None.
|
||||
arthur_password (str, optional): The password to use to connect to
|
||||
Arthur. Defaults to None.
|
||||
|
||||
Returns:
|
||||
ArthurCallbackHandler: The initialized callback handler.
|
||||
"""
|
||||
arthurai = _lazy_load_arthur()
|
||||
ArthurAI = arthurai.ArthurAI
|
||||
ResponseClientError = arthurai.common.exceptions.ResponseClientError
|
||||
|
||||
# connect to Arthur
|
||||
if arthur_login is None:
|
||||
try:
|
||||
arthur_api_key = os.environ["ARTHUR_API_KEY"]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
"No Arthur authentication provided. Either give"
|
||||
" a login to the ArthurCallbackHandler"
|
||||
" or set an ARTHUR_API_KEY as an environment variable."
|
||||
)
|
||||
arthur = ArthurAI(url=arthur_url, access_key=arthur_api_key)
|
||||
else:
|
||||
if arthur_password is None:
|
||||
arthur = ArthurAI(url=arthur_url, login=arthur_login)
|
||||
else:
|
||||
arthur = ArthurAI(
|
||||
url=arthur_url, login=arthur_login, password=arthur_password
|
||||
)
|
||||
# get model from Arthur by the provided model ID
|
||||
try:
|
||||
arthur_model = arthur.get_model(model_id)
|
||||
except ResponseClientError:
|
||||
raise ValueError(
|
||||
f"Was unable to retrieve model with id {model_id} from Arthur."
|
||||
" Make sure the ID corresponds to a model that is currently"
|
||||
" registered with your Arthur account."
|
||||
)
|
||||
return cls(arthur_model)
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""On LLM start, save the input prompts"""
|
||||
run_id = kwargs["run_id"]
|
||||
self.run_map[run_id]["input_texts"] = prompts
|
||||
self.run_map[run_id]["start_time"] = time()
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""On LLM end, send data to Arthur."""
|
||||
try:
|
||||
import pytz # type: ignore[import]
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import pytz. Please install it with 'pip install pytz'."
|
||||
) from e
|
||||
|
||||
run_id = kwargs["run_id"]
|
||||
|
||||
# get the run params from this run ID,
|
||||
# or raise an error if this run ID has no corresponding metadata in self.run_map
|
||||
try:
|
||||
run_map_data = self.run_map[run_id]
|
||||
except KeyError as e:
|
||||
raise KeyError(
|
||||
"This function has been called with a run_id"
|
||||
" that was never registered in on_llm_start()."
|
||||
" Restart and try running the LLM again"
|
||||
) from e
|
||||
|
||||
# mark the duration time between on_llm_start() and on_llm_end()
|
||||
time_from_start_to_end = time() - run_map_data["start_time"]
|
||||
|
||||
# create inferences to log to Arthur
|
||||
inferences = []
|
||||
for i, generations in enumerate(response.generations):
|
||||
for generation in generations:
|
||||
inference = {
|
||||
"partner_inference_id": str(uuid.uuid4()),
|
||||
"inference_timestamp": datetime.now(tz=pytz.UTC),
|
||||
self.input_attr: run_map_data["input_texts"][i],
|
||||
self.output_attr: generation.text,
|
||||
}
|
||||
|
||||
if generation.generation_info is not None:
|
||||
# add finish reason to the inference
|
||||
# if generation info contains a finish reason and
|
||||
# if the ArthurModel was registered to monitor finish_reason
|
||||
if (
|
||||
FINISH_REASON in generation.generation_info
|
||||
and FINISH_REASON in self.attr_names
|
||||
):
|
||||
inference[FINISH_REASON] = generation.generation_info[
|
||||
FINISH_REASON
|
||||
]
|
||||
|
||||
# add token likelihoods data to the inference if the ArthurModel
|
||||
# was registered to monitor token likelihoods
|
||||
logprobs_data = generation.generation_info["logprobs"]
|
||||
if (
|
||||
logprobs_data is not None
|
||||
and self.token_likelihood_attr is not None
|
||||
):
|
||||
logprobs = logprobs_data["top_logprobs"]
|
||||
likelihoods = [
|
||||
{k: np.exp(v) for k, v in logprobs[i].items()}
|
||||
for i in range(len(logprobs))
|
||||
]
|
||||
inference[self.token_likelihood_attr] = likelihoods
|
||||
|
||||
# add token usage counts to the inference if the
|
||||
# ArthurModel was registered to monitor token usage
|
||||
if (
|
||||
isinstance(response.llm_output, dict)
|
||||
and TOKEN_USAGE in response.llm_output
|
||||
):
|
||||
token_usage = response.llm_output[TOKEN_USAGE]
|
||||
if (
|
||||
PROMPT_TOKENS in token_usage
|
||||
and PROMPT_TOKENS in self.attr_names
|
||||
):
|
||||
inference[PROMPT_TOKENS] = token_usage[PROMPT_TOKENS]
|
||||
if (
|
||||
COMPLETION_TOKENS in token_usage
|
||||
and COMPLETION_TOKENS in self.attr_names
|
||||
):
|
||||
inference[COMPLETION_TOKENS] = token_usage[COMPLETION_TOKENS]
|
||||
|
||||
# add inference duration to the inference if the ArthurModel
|
||||
# was registered to monitor inference duration
|
||||
if DURATION in self.attr_names:
|
||||
inference[DURATION] = time_from_start_to_end
|
||||
|
||||
inferences.append(inference)
|
||||
|
||||
# send inferences to arthur
|
||||
self.arthur_model.send_inferences(inferences)
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""On chain start, do nothing."""
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""On chain end, do nothing."""
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing when LLM outputs an error."""
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""On new token, pass."""
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing when LLM chain outputs an error."""
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing when tool starts."""
|
||||
|
||||
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
|
||||
"""Do nothing when agent takes a specific action."""
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing when tool ends."""
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing when tool outputs an error."""
|
||||
|
||||
def on_text(self, text: str, **kwargs: Any) -> None:
|
||||
"""Do nothing"""
|
||||
|
||||
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
|
||||
"""Do nothing"""
|
||||
@@ -1,15 +1,37 @@
|
||||
"""Base callback handler that can be used to handle callbacks in langchain."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||
from uuid import UUID
|
||||
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
BaseMessage,
|
||||
LLMResult,
|
||||
)
|
||||
from langchain.schema.agent import AgentAction, AgentFinish
|
||||
from langchain.schema.document import Document
|
||||
from langchain.schema.messages import BaseMessage
|
||||
from langchain.schema.output import LLMResult
|
||||
|
||||
|
||||
class RetrieverManagerMixin:
|
||||
"""Mixin for Retriever callbacks."""
|
||||
|
||||
def on_retriever_error(
|
||||
self,
|
||||
error: Union[Exception, KeyboardInterrupt],
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when Retriever errors."""
|
||||
|
||||
def on_retriever_end(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when Retriever ends running."""
|
||||
|
||||
|
||||
class LLMManagerMixin:
|
||||
@@ -144,6 +166,16 @@ class CallbackManagerMixin:
|
||||
f"{self.__class__.__name__} does not implement `on_chat_model_start`"
|
||||
)
|
||||
|
||||
def on_retriever_start(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when Retriever starts running."""
|
||||
|
||||
def on_chain_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
@@ -187,6 +219,7 @@ class BaseCallbackHandler(
|
||||
LLMManagerMixin,
|
||||
ChainManagerMixin,
|
||||
ToolManagerMixin,
|
||||
RetrieverManagerMixin,
|
||||
CallbackManagerMixin,
|
||||
RunManagerMixin,
|
||||
):
|
||||
@@ -211,6 +244,11 @@ class BaseCallbackHandler(
|
||||
"""Whether to ignore agent callbacks."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def ignore_retriever(self) -> bool:
|
||||
"""Whether to ignore retriever callbacks."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def ignore_chat_model(self) -> bool:
|
||||
"""Whether to ignore chat model callbacks."""
|
||||
@@ -371,6 +409,36 @@ class AsyncCallbackHandler(BaseCallbackHandler):
|
||||
) -> None:
|
||||
"""Run on agent end."""
|
||||
|
||||
async def on_retriever_start(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run on retriever start."""
|
||||
|
||||
async def on_retriever_end(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run on retriever end."""
|
||||
|
||||
async def on_retriever_error(
|
||||
self,
|
||||
error: Union[Exception, KeyboardInterrupt],
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run on retriever error."""
|
||||
|
||||
|
||||
class BaseCallbackManager(CallbackManagerMixin):
|
||||
"""Base callback manager that can be used to handle callbacks from LangChain."""
|
||||
|
||||
366
langchain/callbacks/flyte_callback.py
Normal file
366
langchain/callbacks/flyte_callback.py
Normal file
@@ -0,0 +1,366 @@
|
||||
"""FlyteKit callback handler."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.callbacks.utils import (
|
||||
BaseMetadataCallbackHandler,
|
||||
flatten_dict,
|
||||
import_pandas,
|
||||
import_spacy,
|
||||
import_textstat,
|
||||
)
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import flytekit
|
||||
from flytekitplugins.deck import renderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def import_flytekit() -> Tuple[flytekit, renderer]:
|
||||
try:
|
||||
import flytekit # noqa: F401
|
||||
from flytekitplugins.deck import renderer # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"To use the flyte callback manager you need"
|
||||
"to have the `flytekit` and `flytekitplugins-deck-standard`"
|
||||
"packages installed. Please install them with `pip install flytekit`"
|
||||
"and `pip install flytekitplugins-deck-standard`."
|
||||
)
|
||||
return flytekit, renderer
|
||||
|
||||
|
||||
def analyze_text(
|
||||
text: str,
|
||||
nlp: Any = None,
|
||||
) -> dict:
|
||||
"""Analyze text using textstat and spacy.
|
||||
|
||||
Parameters:
|
||||
text (str): The text to analyze.
|
||||
nlp (spacy.lang): The spacy language model to use for visualization.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing the complexity metrics and visualization
|
||||
files serialized to HTML string.
|
||||
"""
|
||||
resp: Dict[str, Any] = {}
|
||||
textstat = import_textstat()
|
||||
text_complexity_metrics = {
|
||||
"flesch_reading_ease": textstat.flesch_reading_ease(text),
|
||||
"flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
|
||||
"smog_index": textstat.smog_index(text),
|
||||
"coleman_liau_index": textstat.coleman_liau_index(text),
|
||||
"automated_readability_index": textstat.automated_readability_index(text),
|
||||
"dale_chall_readability_score": textstat.dale_chall_readability_score(text),
|
||||
"difficult_words": textstat.difficult_words(text),
|
||||
"linsear_write_formula": textstat.linsear_write_formula(text),
|
||||
"gunning_fog": textstat.gunning_fog(text),
|
||||
"fernandez_huerta": textstat.fernandez_huerta(text),
|
||||
"szigriszt_pazos": textstat.szigriszt_pazos(text),
|
||||
"gutierrez_polini": textstat.gutierrez_polini(text),
|
||||
"crawford": textstat.crawford(text),
|
||||
"gulpease_index": textstat.gulpease_index(text),
|
||||
"osman": textstat.osman(text),
|
||||
}
|
||||
resp.update({"text_complexity_metrics": text_complexity_metrics})
|
||||
resp.update(text_complexity_metrics)
|
||||
|
||||
if nlp is not None:
|
||||
spacy = import_spacy()
|
||||
doc = nlp(text)
|
||||
dep_out = spacy.displacy.render( # type: ignore
|
||||
doc, style="dep", jupyter=False, page=True
|
||||
)
|
||||
|
||||
ent_out = spacy.displacy.render( # type: ignore
|
||||
doc, style="ent", jupyter=False, page=True
|
||||
)
|
||||
|
||||
text_visualizations = {
|
||||
"dependency_tree": dep_out,
|
||||
"entities": ent_out,
|
||||
}
|
||||
|
||||
resp.update(text_visualizations)
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
class FlyteCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
|
||||
"""This callback handler is designed specifically for usage within a Flyte task."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize callback handler."""
|
||||
import_textstat() # Raise error since it is required
|
||||
flytekit, renderer = import_flytekit()
|
||||
self.pandas = import_pandas()
|
||||
|
||||
spacy = None
|
||||
try:
|
||||
spacy = import_spacy()
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Spacy library is not installed. \
|
||||
It may result in the inability to log \
|
||||
certain metrics that can be captured with Spacy."
|
||||
)
|
||||
|
||||
super().__init__()
|
||||
|
||||
self.nlp = None
|
||||
if spacy:
|
||||
try:
|
||||
self.nlp = spacy.load("en_core_web_sm")
|
||||
except OSError:
|
||||
logger.warning(
|
||||
"FlyteCallbackHandler uses spacy's en_core_web_sm model"
|
||||
" for certain metrics. To download,"
|
||||
" run the following command in your terminal:"
|
||||
" `python -m spacy download en_core_web_sm` command."
|
||||
)
|
||||
|
||||
self.table_renderer = renderer.TableRenderer
|
||||
self.markdown_renderer = renderer.MarkdownRenderer
|
||||
|
||||
self.deck = flytekit.Deck(
|
||||
"LangChain Metrics",
|
||||
self.markdown_renderer().to_html("## LangChain Metrics"),
|
||||
)
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when LLM starts."""
|
||||
|
||||
self.step += 1
|
||||
self.llm_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update({"action": "on_llm_start"})
|
||||
resp.update(flatten_dict(serialized))
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
prompt_responses = []
|
||||
for prompt in prompts:
|
||||
prompt_responses.append(prompt)
|
||||
|
||||
resp.update({"prompts": prompt_responses})
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### LLM Start"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Run when LLM generates a new token."""
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""Run when LLM ends running."""
|
||||
self.step += 1
|
||||
self.llm_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update({"action": "on_llm_end"})
|
||||
resp.update(flatten_dict(response.llm_output or {}))
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### LLM End"))
|
||||
self.deck.append(self.table_renderer().to_html(self.pandas.DataFrame([resp])))
|
||||
|
||||
for generations in response.generations:
|
||||
for generation in generations:
|
||||
generation_resp = deepcopy(resp)
|
||||
generation_resp.update(flatten_dict(generation.dict()))
|
||||
if self.nlp:
|
||||
generation_resp.update(
|
||||
analyze_text(
|
||||
generation.text,
|
||||
nlp=self.nlp,
|
||||
)
|
||||
)
|
||||
|
||||
complexity_metrics: Dict[str, float] = generation_resp.pop("text_complexity_metrics") # type: ignore # noqa: E501
|
||||
self.deck.append(
|
||||
self.markdown_renderer().to_html("#### Text Complexity Metrics")
|
||||
)
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(
|
||||
self.pandas.DataFrame([complexity_metrics])
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
dependency_tree = generation_resp["dependency_tree"]
|
||||
self.deck.append(
|
||||
self.markdown_renderer().to_html("#### Dependency Tree")
|
||||
)
|
||||
self.deck.append(dependency_tree)
|
||||
|
||||
entities = generation_resp["entities"]
|
||||
self.deck.append(self.markdown_renderer().to_html("#### Entities"))
|
||||
self.deck.append(entities)
|
||||
else:
|
||||
self.deck.append(
|
||||
self.markdown_renderer().to_html("#### Generated Response")
|
||||
)
|
||||
self.deck.append(self.markdown_renderer().to_html(generation.text))
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when LLM errors."""
|
||||
self.step += 1
|
||||
self.errors += 1
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when chain starts running."""
|
||||
self.step += 1
|
||||
self.chain_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update({"action": "on_chain_start"})
|
||||
resp.update(flatten_dict(serialized))
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
chain_input = ",".join([f"{k}={v}" for k, v in inputs.items()])
|
||||
input_resp = deepcopy(resp)
|
||||
input_resp["inputs"] = chain_input
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### Chain Start"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([input_resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Run when chain ends running."""
|
||||
self.step += 1
|
||||
self.chain_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
chain_output = ",".join([f"{k}={v}" for k, v in outputs.items()])
|
||||
resp.update({"action": "on_chain_end", "outputs": chain_output})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### Chain End"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when chain errors."""
|
||||
self.step += 1
|
||||
self.errors += 1
|
||||
|
||||
def on_tool_start(
|
||||
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when tool starts running."""
|
||||
self.step += 1
|
||||
self.tool_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update({"action": "on_tool_start", "input_str": input_str})
|
||||
resp.update(flatten_dict(serialized))
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### Tool Start"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_tool_end(self, output: str, **kwargs: Any) -> None:
|
||||
"""Run when tool ends running."""
|
||||
self.step += 1
|
||||
self.tool_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update({"action": "on_tool_end", "output": output})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### Tool End"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when tool errors."""
|
||||
self.step += 1
|
||||
self.errors += 1
|
||||
|
||||
def on_text(self, text: str, **kwargs: Any) -> None:
|
||||
"""
|
||||
Run when agent is ending.
|
||||
"""
|
||||
self.step += 1
|
||||
self.text_ctr += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update({"action": "on_text", "text": text})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### On Text"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
|
||||
"""Run when agent ends running."""
|
||||
self.step += 1
|
||||
self.agent_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update(
|
||||
{
|
||||
"action": "on_agent_finish",
|
||||
"output": finish.return_values["output"],
|
||||
"log": finish.log,
|
||||
}
|
||||
)
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### Agent Finish"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
|
||||
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
|
||||
"""Run on agent action."""
|
||||
self.step += 1
|
||||
self.tool_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
resp: Dict[str, Any] = {}
|
||||
resp.update(
|
||||
{
|
||||
"action": "on_agent_action",
|
||||
"tool": action.tool,
|
||||
"tool_input": action.tool_input,
|
||||
"log": action.log,
|
||||
}
|
||||
)
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.deck.append(self.markdown_renderer().to_html("### Agent Action"))
|
||||
self.deck.append(
|
||||
self.table_renderer().to_html(self.pandas.DataFrame([resp])) + "\n"
|
||||
)
|
||||
@@ -14,6 +14,7 @@ from typing import (
|
||||
Generator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
@@ -27,6 +28,7 @@ from langchain.callbacks.base import (
|
||||
BaseCallbackManager,
|
||||
ChainManagerMixin,
|
||||
LLMManagerMixin,
|
||||
RetrieverManagerMixin,
|
||||
RunManagerMixin,
|
||||
ToolManagerMixin,
|
||||
)
|
||||
@@ -39,10 +41,10 @@ from langchain.callbacks.tracers.wandb import WandbTracer
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
BaseMessage,
|
||||
Document,
|
||||
LLMResult,
|
||||
get_buffer_string,
|
||||
)
|
||||
from langchain.schema.messages import BaseMessage, get_buffer_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
Callbacks = Optional[Union[List[BaseCallbackHandler], BaseCallbackManager]]
|
||||
@@ -379,8 +381,8 @@ class BaseRunManager(RunManagerMixin):
|
||||
handlers: List[BaseCallbackHandler],
|
||||
inheritable_handlers: List[BaseCallbackHandler],
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: List[str],
|
||||
inheritable_tags: List[str],
|
||||
tags: Optional[List[str]] = None,
|
||||
inheritable_tags: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
"""Initialize the run manager.
|
||||
|
||||
@@ -391,15 +393,15 @@ class BaseRunManager(RunManagerMixin):
|
||||
The list of inheritable handlers.
|
||||
parent_run_id (UUID, optional): The ID of the parent run.
|
||||
Defaults to None.
|
||||
tags (List[str]): The list of tags.
|
||||
inheritable_tags (List[str]): The list of inheritable tags.
|
||||
tags (Optional[List[str]]): The list of tags.
|
||||
inheritable_tags (Optional[List[str]]): The list of inheritable tags.
|
||||
"""
|
||||
self.run_id = run_id
|
||||
self.handlers = handlers
|
||||
self.inheritable_handlers = inheritable_handlers
|
||||
self.tags = tags
|
||||
self.inheritable_tags = inheritable_tags
|
||||
self.parent_run_id = parent_run_id
|
||||
self.tags = tags or []
|
||||
self.inheritable_tags = inheritable_tags or []
|
||||
|
||||
@classmethod
|
||||
def get_noop_manager(cls: Type[BRM]) -> BRM:
|
||||
@@ -899,6 +901,97 @@ class AsyncCallbackManagerForToolRun(AsyncRunManager, ToolManagerMixin):
|
||||
)
|
||||
|
||||
|
||||
class CallbackManagerForRetrieverRun(RunManager, RetrieverManagerMixin):
|
||||
"""Callback manager for retriever run."""
|
||||
|
||||
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
|
||||
"""Get a child callback manager."""
|
||||
manager = CallbackManager([], parent_run_id=self.run_id)
|
||||
manager.set_handlers(self.inheritable_handlers)
|
||||
manager.add_tags(self.inheritable_tags)
|
||||
if tag is not None:
|
||||
manager.add_tags([tag], False)
|
||||
return manager
|
||||
|
||||
def on_retriever_end(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when retriever ends running."""
|
||||
_handle_event(
|
||||
self.handlers,
|
||||
"on_retriever_end",
|
||||
"ignore_retriever",
|
||||
documents,
|
||||
run_id=self.run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def on_retriever_error(
|
||||
self,
|
||||
error: Union[Exception, KeyboardInterrupt],
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when retriever errors."""
|
||||
_handle_event(
|
||||
self.handlers,
|
||||
"on_retriever_error",
|
||||
"ignore_retriever",
|
||||
error,
|
||||
run_id=self.run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class AsyncCallbackManagerForRetrieverRun(
|
||||
AsyncRunManager,
|
||||
RetrieverManagerMixin,
|
||||
):
|
||||
"""Async callback manager for retriever run."""
|
||||
|
||||
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
|
||||
"""Get a child callback manager."""
|
||||
manager = AsyncCallbackManager([], parent_run_id=self.run_id)
|
||||
manager.set_handlers(self.inheritable_handlers)
|
||||
manager.add_tags(self.inheritable_tags)
|
||||
if tag is not None:
|
||||
manager.add_tags([tag], False)
|
||||
return manager
|
||||
|
||||
async def on_retriever_end(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when retriever ends running."""
|
||||
await _ahandle_event(
|
||||
self.handlers,
|
||||
"on_retriever_end",
|
||||
"ignore_retriever",
|
||||
documents,
|
||||
run_id=self.run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def on_retriever_error(
|
||||
self,
|
||||
error: Union[Exception, KeyboardInterrupt],
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when retriever errors."""
|
||||
await _ahandle_event(
|
||||
self.handlers,
|
||||
"on_retriever_error",
|
||||
"ignore_retriever",
|
||||
error,
|
||||
run_id=self.run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class CallbackManager(BaseCallbackManager):
|
||||
"""Callback manager that can be used to handle callbacks from langchain."""
|
||||
|
||||
@@ -1077,6 +1170,36 @@ class CallbackManager(BaseCallbackManager):
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
def on_retriever_start(
|
||||
self,
|
||||
query: str,
|
||||
run_id: Optional[UUID] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> CallbackManagerForRetrieverRun:
|
||||
"""Run when retriever starts running."""
|
||||
if run_id is None:
|
||||
run_id = uuid4()
|
||||
|
||||
_handle_event(
|
||||
self.handlers,
|
||||
"on_retriever_start",
|
||||
"ignore_retriever",
|
||||
query,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return CallbackManagerForRetrieverRun(
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def configure(
|
||||
cls,
|
||||
@@ -1313,6 +1436,36 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
async def on_retriever_start(
|
||||
self,
|
||||
query: str,
|
||||
run_id: Optional[UUID] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> AsyncCallbackManagerForRetrieverRun:
|
||||
"""Run when retriever starts running."""
|
||||
if run_id is None:
|
||||
run_id = uuid4()
|
||||
|
||||
await _ahandle_event(
|
||||
self.handlers,
|
||||
"on_retriever_start",
|
||||
"ignore_retriever",
|
||||
query,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return AsyncCallbackManagerForRetrieverRun(
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def configure(
|
||||
cls,
|
||||
|
||||
162
langchain/callbacks/promptlayer_callback.py
Normal file
162
langchain/callbacks/promptlayer_callback.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""Callback handler for promptlayer."""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
|
||||
from uuid import UUID
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import (
|
||||
ChatGeneration,
|
||||
LLMResult,
|
||||
)
|
||||
from langchain.schema.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
ChatMessage,
|
||||
HumanMessage,
|
||||
SystemMessage,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import promptlayer
|
||||
|
||||
|
||||
def _lazy_import_promptlayer() -> promptlayer:
|
||||
"""Lazy import promptlayer to avoid circular imports."""
|
||||
try:
|
||||
import promptlayer
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The PromptLayerCallbackHandler requires the promptlayer package. "
|
||||
" Please install it with `pip install promptlayer`."
|
||||
)
|
||||
return promptlayer
|
||||
|
||||
|
||||
class PromptLayerCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback handler for promptlayer."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pl_id_callback: Optional[Callable[..., Any]] = None,
|
||||
pl_tags: Optional[List[str]] = [],
|
||||
) -> None:
|
||||
"""Initialize the PromptLayerCallbackHandler."""
|
||||
_lazy_import_promptlayer()
|
||||
self.pl_id_callback = pl_id_callback
|
||||
self.pl_tags = pl_tags
|
||||
self.runs: Dict[UUID, Dict[str, Any]] = {}
|
||||
|
||||
def on_chat_model_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
messages: List[List[BaseMessage]],
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
self.runs[run_id] = {
|
||||
"messages": [self._create_message_dicts(m)[0] for m in messages],
|
||||
"invocation_params": kwargs.get("invocation_params", {}),
|
||||
"name": ".".join(serialized["id"]),
|
||||
"request_start_time": datetime.datetime.now().timestamp(),
|
||||
"tags": tags,
|
||||
}
|
||||
|
||||
def on_llm_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
prompts: List[str],
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
self.runs[run_id] = {
|
||||
"prompts": prompts,
|
||||
"invocation_params": kwargs.get("invocation_params", {}),
|
||||
"name": ".".join(serialized["id"]),
|
||||
"request_start_time": datetime.datetime.now().timestamp(),
|
||||
"tags": tags,
|
||||
}
|
||||
|
||||
def on_llm_end(
|
||||
self,
|
||||
response: LLMResult,
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
from promptlayer.utils import get_api_key, promptlayer_api_request
|
||||
|
||||
run_info = self.runs.get(run_id, {})
|
||||
if not run_info:
|
||||
return
|
||||
run_info["request_end_time"] = datetime.datetime.now().timestamp()
|
||||
for i in range(len(response.generations)):
|
||||
generation = response.generations[i][0]
|
||||
|
||||
resp = {
|
||||
"text": generation.text,
|
||||
"llm_output": response.llm_output,
|
||||
}
|
||||
model_params = run_info.get("invocation_params", {})
|
||||
is_chat_model = run_info.get("messages", None) is not None
|
||||
model_input = (
|
||||
run_info.get("messages", [])[i]
|
||||
if is_chat_model
|
||||
else [run_info.get("prompts", [])[i]]
|
||||
)
|
||||
model_response = (
|
||||
[self._convert_message_to_dict(generation.message)]
|
||||
if is_chat_model and isinstance(generation, ChatGeneration)
|
||||
else resp
|
||||
)
|
||||
|
||||
pl_request_id = promptlayer_api_request(
|
||||
run_info.get("name"),
|
||||
"langchain",
|
||||
model_input,
|
||||
model_params,
|
||||
self.pl_tags,
|
||||
model_response,
|
||||
run_info.get("request_start_time"),
|
||||
run_info.get("request_end_time"),
|
||||
get_api_key(),
|
||||
return_pl_id=bool(self.pl_id_callback is not None),
|
||||
metadata={
|
||||
"_langchain_run_id": str(run_id),
|
||||
"_langchain_parent_run_id": str(parent_run_id),
|
||||
"_langchain_tags": str(run_info.get("tags", [])),
|
||||
},
|
||||
)
|
||||
|
||||
if self.pl_id_callback:
|
||||
self.pl_id_callback(pl_request_id)
|
||||
|
||||
def _convert_message_to_dict(self, message: BaseMessage) -> Dict[str, Any]:
|
||||
if isinstance(message, HumanMessage):
|
||||
message_dict = {"role": "user", "content": message.content}
|
||||
elif isinstance(message, AIMessage):
|
||||
message_dict = {"role": "assistant", "content": message.content}
|
||||
elif isinstance(message, SystemMessage):
|
||||
message_dict = {"role": "system", "content": message.content}
|
||||
elif isinstance(message, ChatMessage):
|
||||
message_dict = {"role": message.role, "content": message.content}
|
||||
else:
|
||||
raise ValueError(f"Got unknown type {message}")
|
||||
if "name" in message.additional_kwargs:
|
||||
message_dict["name"] = message.additional_kwargs["name"]
|
||||
return message_dict
|
||||
|
||||
def _create_message_dicts(
|
||||
self, messages: List[BaseMessage]
|
||||
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
|
||||
params: Dict[str, Any] = {}
|
||||
message_dicts = [self._convert_message_to_dict(m) for m in messages]
|
||||
return message_dicts, params
|
||||
@@ -4,12 +4,12 @@ from __future__ import annotations
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||
from uuid import UUID
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.callbacks.tracers.schemas import Run, RunTypeEnum
|
||||
from langchain.schema import LLMResult
|
||||
from langchain.schema import Document, LLMResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -265,6 +265,65 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
self._end_trace(tool_run)
|
||||
self._on_tool_error(tool_run)
|
||||
|
||||
def on_retriever_start(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when Retriever starts running."""
|
||||
parent_run_id_ = str(parent_run_id) if parent_run_id else None
|
||||
execution_order = self._get_execution_order(parent_run_id_)
|
||||
retrieval_run = Run(
|
||||
id=run_id,
|
||||
name="Retriever",
|
||||
parent_run_id=parent_run_id,
|
||||
inputs={"query": query},
|
||||
extra=kwargs,
|
||||
start_time=datetime.utcnow(),
|
||||
execution_order=execution_order,
|
||||
child_execution_order=execution_order,
|
||||
child_runs=[],
|
||||
run_type=RunTypeEnum.retriever,
|
||||
)
|
||||
self._start_trace(retrieval_run)
|
||||
self._on_retriever_start(retrieval_run)
|
||||
|
||||
def on_retriever_error(
|
||||
self,
|
||||
error: Union[Exception, KeyboardInterrupt],
|
||||
*,
|
||||
run_id: UUID,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when Retriever errors."""
|
||||
if not run_id:
|
||||
raise TracerException("No run_id provided for on_retriever_error callback.")
|
||||
retrieval_run = self.run_map.get(str(run_id))
|
||||
if retrieval_run is None or retrieval_run.run_type != RunTypeEnum.retriever:
|
||||
raise TracerException("No retriever Run found to be traced")
|
||||
|
||||
retrieval_run.error = repr(error)
|
||||
retrieval_run.end_time = datetime.utcnow()
|
||||
self._end_trace(retrieval_run)
|
||||
self._on_retriever_error(retrieval_run)
|
||||
|
||||
def on_retriever_end(
|
||||
self, documents: Sequence[Document], *, run_id: UUID, **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when Retriever ends running."""
|
||||
if not run_id:
|
||||
raise TracerException("No run_id provided for on_retriever_end callback.")
|
||||
retrieval_run = self.run_map.get(str(run_id))
|
||||
if retrieval_run is None or retrieval_run.run_type != RunTypeEnum.retriever:
|
||||
raise TracerException("No retriever Run found to be traced")
|
||||
retrieval_run.outputs = {"documents": documents}
|
||||
retrieval_run.end_time = datetime.utcnow()
|
||||
self._end_trace(retrieval_run)
|
||||
self._on_retriever_end(retrieval_run)
|
||||
|
||||
def __deepcopy__(self, memo: dict) -> BaseTracer:
|
||||
"""Deepcopy the tracer."""
|
||||
return self
|
||||
@@ -302,3 +361,12 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
|
||||
def _on_chat_model_start(self, run: Run) -> None:
|
||||
"""Process the Chat Model Run upon start."""
|
||||
|
||||
def _on_retriever_start(self, run: Run) -> None:
|
||||
"""Process the Retriever Run upon start."""
|
||||
|
||||
def _on_retriever_end(self, run: Run) -> None:
|
||||
"""Process the Retriever Run."""
|
||||
|
||||
def _on_retriever_error(self, run: Run) -> None:
|
||||
"""Process the Retriever Run upon error."""
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user