mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-20 05:04:50 +00:00
Compare commits
129 Commits
eugene/wra
...
bagatur/ep
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3373cd99c | ||
|
|
d21a4c80cb | ||
|
|
fdbeb52756 | ||
|
|
0c8a88b3fa | ||
|
|
08feed3332 | ||
|
|
a758496236 | ||
|
|
103094286e | ||
|
|
fd8fe209cb | ||
|
|
e51bccdb28 | ||
|
|
09a92bb9bf | ||
|
|
a214fe8a2d | ||
|
|
a956b69720 | ||
|
|
d87cfd33e8 | ||
|
|
be9bc62f8b | ||
|
|
0808949e54 | ||
|
|
129d056085 | ||
|
|
5b3dbf12a5 | ||
|
|
9f545825b7 | ||
|
|
616e728ef9 | ||
|
|
83d2a871eb | ||
|
|
292ae8468e | ||
|
|
b58d492e05 | ||
|
|
df8e35fd81 | ||
|
|
083726ecda | ||
|
|
82f28ca9ef | ||
|
|
82fb56b79c | ||
|
|
99e5eaa9b1 | ||
|
|
d4f790fd40 | ||
|
|
c29fbede59 | ||
|
|
eee0d1d0dd | ||
|
|
ade683c589 | ||
|
|
50b8f4dcc7 | ||
|
|
f02a04e2b5 | ||
|
|
2b06792c81 | ||
|
|
61e4a06447 | ||
|
|
ead04487fd | ||
|
|
8976483f3a | ||
|
|
edcb03943e | ||
|
|
89a8121eaa | ||
|
|
d5eb228874 | ||
|
|
463019ac3e | ||
|
|
a3dd4dcadf | ||
|
|
9417961b17 | ||
|
|
0689628489 | ||
|
|
0dd2c21089 | ||
|
|
589927e9e1 | ||
|
|
5d60ced7b3 | ||
|
|
ce78877a87 | ||
|
|
0c4683ebcc | ||
|
|
b11c233304 | ||
|
|
8c986221e4 | ||
|
|
25be71e4f3 | ||
|
|
8f2d321dd0 | ||
|
|
019aa04b06 | ||
|
|
77b359edf5 | ||
|
|
7e63270e04 | ||
|
|
a69d1b84f4 | ||
|
|
f2560188ec | ||
|
|
c0d67420e5 | ||
|
|
c194828be0 | ||
|
|
c71afb46d1 | ||
|
|
995ef8a7fc | ||
|
|
de8dfde7f7 | ||
|
|
e842131425 | ||
|
|
6dedd94ba4 | ||
|
|
c5e23293f8 | ||
|
|
90d7c55343 | ||
|
|
3c8e9a9641 | ||
|
|
2673b3a314 | ||
|
|
4c2de2a7f2 | ||
|
|
1c089cadd7 | ||
|
|
2e8733cf54 | ||
|
|
b04e472acf | ||
|
|
090411842e | ||
|
|
84a97d55e1 | ||
|
|
09aa1eac03 | ||
|
|
0f9f213833 | ||
|
|
15f1af8ed6 | ||
|
|
8bebc9206f | ||
|
|
a3c79b1909 | ||
|
|
23928a3311 | ||
|
|
ba5fbaba70 | ||
|
|
3e6cea46e2 | ||
|
|
63601551b1 | ||
|
|
1d55141c50 | ||
|
|
2519580994 | ||
|
|
74a64cfbab | ||
|
|
b9ca5cc5ea | ||
|
|
afba2be3dc | ||
|
|
9abf60acb6 | ||
|
|
b30f449dae | ||
|
|
bfbb97b74c | ||
|
|
1b3942ba74 | ||
|
|
852722ea45 | ||
|
|
358562769a | ||
|
|
3eccd72382 | ||
|
|
76d09b4ed0 | ||
|
|
1aae77f26f | ||
|
|
cf17c58b47 | ||
|
|
a091b4bf4c | ||
|
|
e0162baa3b | ||
|
|
5e9687a196 | ||
|
|
0a04e63811 | ||
|
|
0470198fb5 | ||
|
|
e986afa13a | ||
|
|
4b505060bd | ||
|
|
664ff28cba | ||
|
|
08a8363fc6 | ||
|
|
5e43768f61 | ||
|
|
a8aa1aba1c | ||
|
|
68d8f73698 | ||
|
|
ef0664728e | ||
|
|
eac4ddb4bb | ||
|
|
71d5b7c9bf | ||
|
|
41279a3ae1 | ||
|
|
22858d99b5 | ||
|
|
249d7d06a2 | ||
|
|
9529483c2a | ||
|
|
969e1683de | ||
|
|
c478fc208e | ||
|
|
d0a0d560ad | ||
|
|
93dd499997 | ||
|
|
a69cb95850 | ||
|
|
7810ea5812 | ||
|
|
b0896210c7 | ||
|
|
7124f2ebfa | ||
|
|
17ae2998e7 | ||
|
|
3f601b5809 | ||
|
|
03ea0762a1 |
4
.github/actions/poetry_setup/action.yml
vendored
4
.github/actions/poetry_setup/action.yml
vendored
@@ -66,12 +66,14 @@ runs:
|
||||
id: cache-poetry
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles('poetry.lock') }}
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
|
||||
- run: ${{ inputs.install-command }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
606
.github/tools/git-restore-mtime
vendored
Executable file
606
.github/tools/git-restore-mtime
vendored
Executable file
@@ -0,0 +1,606 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# git-restore-mtime - Change mtime of files based on commit date of last change
|
||||
#
|
||||
# Copyright (C) 2012 Rodrigo Silva (MestreLion) <linux@rodrigosilva.com>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. See <http://www.gnu.org/licenses/gpl.html>
|
||||
#
|
||||
# Source: https://github.com/MestreLion/git-tools
|
||||
# Version: July 13, 2023 (commit hash 5f832e72453e035fccae9d63a5056918d64476a2)
|
||||
"""
|
||||
Change the modification time (mtime) of files in work tree, based on the
|
||||
date of the most recent commit that modified the file, including renames.
|
||||
|
||||
Ignores untracked files and uncommitted deletions, additions and renames, and
|
||||
by default modifications too.
|
||||
---
|
||||
Useful prior to generating release tarballs, so each file is archived with a
|
||||
date that is similar to the date when the file was actually last modified,
|
||||
assuming the actual modification date and its commit date are close.
|
||||
"""
|
||||
|
||||
# TODO:
|
||||
# - Add -z on git whatchanged/ls-files, so we don't deal with filename decoding
|
||||
# - When Python is bumped to 3.7, use text instead of universal_newlines on subprocess
|
||||
# - Update "Statistics for some large projects" with modern hardware and repositories.
|
||||
# - Create a README.md for git-restore-mtime alone. It deserves extensive documentation
|
||||
# - Move Statistics there
|
||||
# - See git-extras as a good example on project structure and documentation
|
||||
|
||||
# FIXME:
|
||||
# - When current dir is outside the worktree, e.g. using --work-tree, `git ls-files`
|
||||
# assume any relative pathspecs are to worktree root, not the current dir. As such,
|
||||
# relative pathspecs may not work.
|
||||
# - Renames are tricky:
|
||||
# - R100 should not change mtime, but original name is not on filelist. Should
|
||||
# track renames until a valid (A, M) mtime found and then set on current name.
|
||||
# - Should set mtime for both current and original directories.
|
||||
# - Check mode changes with unchanged blobs?
|
||||
# - Check file (A, D) for the directory mtime is not sufficient:
|
||||
# - Renames also change dir mtime, unless rename was on a parent dir
|
||||
# - If most recent change of all files in a dir was a Modification (M),
|
||||
# dir might not be touched at all.
|
||||
# - Dirs containing only subdirectories but no direct files will also
|
||||
# not be touched. They're files' [grand]parent dir, but never their dirname().
|
||||
# - Some solutions:
|
||||
# - After files done, perform some dir processing for missing dirs, finding latest
|
||||
# file (A, D, R)
|
||||
# - Simple approach: dir mtime is the most recent child (dir or file) mtime
|
||||
# - Use a virtual concept of "created at most at" to fill missing info, bubble up
|
||||
# to parents and grandparents
|
||||
# - When handling [grand]parent dirs, stay inside <pathspec>
|
||||
# - Better handling of merge commits. `-m` is plain *wrong*. `-c/--cc` is perfect, but
|
||||
# painfully slow. First pass without merge commits is not accurate. Maybe add a new
|
||||
# `--accurate` mode for `--cc`?
|
||||
|
||||
if __name__ != "__main__":
|
||||
raise ImportError("{} should not be used as a module.".format(__name__))
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import logging
|
||||
import os.path
|
||||
import shlex
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
__version__ = "2022.12+dev"
|
||||
|
||||
# Update symlinks only if the platform supports not following them
|
||||
UPDATE_SYMLINKS = bool(os.utime in getattr(os, 'supports_follow_symlinks', []))
|
||||
|
||||
# Call os.path.normpath() only if not in a POSIX platform (Windows)
|
||||
NORMALIZE_PATHS = (os.path.sep != '/')
|
||||
|
||||
# How many files to process in each batch when re-trying merge commits
|
||||
STEPMISSING = 100
|
||||
|
||||
# (Extra) keywords for the os.utime() call performed by touch()
|
||||
UTIME_KWS = {} if not UPDATE_SYMLINKS else {'follow_symlinks': False}
|
||||
|
||||
|
||||
# Command-line interface ######################################################
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__.split('\n---')[0])
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument('--quiet', '-q', dest='loglevel',
|
||||
action="store_const", const=logging.WARNING, default=logging.INFO,
|
||||
help="Suppress informative messages and summary statistics.")
|
||||
group.add_argument('--verbose', '-v', action="count", help="""
|
||||
Print additional information for each processed file.
|
||||
Specify twice to further increase verbosity.
|
||||
""")
|
||||
|
||||
parser.add_argument('--cwd', '-C', metavar="DIRECTORY", help="""
|
||||
Run as if %(prog)s was started in directory %(metavar)s.
|
||||
This affects how --work-tree, --git-dir and PATHSPEC arguments are handled.
|
||||
See 'man 1 git' or 'git --help' for more information.
|
||||
""")
|
||||
|
||||
parser.add_argument('--git-dir', dest='gitdir', metavar="GITDIR", help="""
|
||||
Path to the git repository, by default auto-discovered by searching
|
||||
the current directory and its parents for a .git/ subdirectory.
|
||||
""")
|
||||
|
||||
parser.add_argument('--work-tree', dest='workdir', metavar="WORKTREE", help="""
|
||||
Path to the work tree root, by default the parent of GITDIR if it's
|
||||
automatically discovered, or the current directory if GITDIR is set.
|
||||
""")
|
||||
|
||||
parser.add_argument('--force', '-f', default=False, action="store_true", help="""
|
||||
Force updating files with uncommitted modifications.
|
||||
Untracked files and uncommitted deletions, renames and additions are
|
||||
always ignored.
|
||||
""")
|
||||
|
||||
parser.add_argument('--merge', '-m', default=False, action="store_true", help="""
|
||||
Include merge commits.
|
||||
Leads to more recent times and more files per commit, thus with the same
|
||||
time, which may or may not be what you want.
|
||||
Including merge commits may lead to fewer commits being evaluated as files
|
||||
are found sooner, which can improve performance, sometimes substantially.
|
||||
But as merge commits are usually huge, processing them may also take longer.
|
||||
By default, merge commits are only used for files missing from regular commits.
|
||||
""")
|
||||
|
||||
parser.add_argument('--first-parent', default=False, action="store_true", help="""
|
||||
Consider only the first parent, the "main branch", when evaluating merge commits.
|
||||
Only effective when merge commits are processed, either when --merge is
|
||||
used or when finding missing files after the first regular log search.
|
||||
See --skip-missing.
|
||||
""")
|
||||
|
||||
parser.add_argument('--skip-missing', '-s', dest="missing", default=True,
|
||||
action="store_false", help="""
|
||||
Do not try to find missing files.
|
||||
If merge commits were not evaluated with --merge and some files were
|
||||
not found in regular commits, by default %(prog)s searches for these
|
||||
files again in the merge commits.
|
||||
This option disables this retry, so files found only in merge commits
|
||||
will not have their timestamp updated.
|
||||
""")
|
||||
|
||||
parser.add_argument('--no-directories', '-D', dest='dirs', default=True,
|
||||
action="store_false", help="""
|
||||
Do not update directory timestamps.
|
||||
By default, use the time of its most recently created, renamed or deleted file.
|
||||
Note that just modifying a file will NOT update its directory time.
|
||||
""")
|
||||
|
||||
parser.add_argument('--test', '-t', default=False, action="store_true",
|
||||
help="Test run: do not actually update any file timestamp.")
|
||||
|
||||
parser.add_argument('--commit-time', '-c', dest='commit_time', default=False,
|
||||
action='store_true', help="Use commit time instead of author time.")
|
||||
|
||||
parser.add_argument('--oldest-time', '-o', dest='reverse_order', default=False,
|
||||
action='store_true', help="""
|
||||
Update times based on the oldest, instead of the most recent commit of a file.
|
||||
This reverses the order in which the git log is processed to emulate a
|
||||
file "creation" date. Note this will be inaccurate for files deleted and
|
||||
re-created at later dates.
|
||||
""")
|
||||
|
||||
parser.add_argument('--skip-older-than', metavar='SECONDS', type=int, help="""
|
||||
Ignore files that are currently older than %(metavar)s.
|
||||
Useful in workflows that assume such files already have a correct timestamp,
|
||||
as it may improve performance by processing fewer files.
|
||||
""")
|
||||
|
||||
parser.add_argument('--skip-older-than-commit', '-N', default=False,
|
||||
action='store_true', help="""
|
||||
Ignore files older than the timestamp it would be updated to.
|
||||
Such files may be considered "original", likely in the author's repository.
|
||||
""")
|
||||
|
||||
parser.add_argument('--unique-times', default=False, action="store_true", help="""
|
||||
Set the microseconds to a unique value per commit.
|
||||
Allows telling apart changes that would otherwise have identical timestamps,
|
||||
as git's time accuracy is in seconds.
|
||||
""")
|
||||
|
||||
parser.add_argument('pathspec', nargs='*', metavar='PATHSPEC', help="""
|
||||
Only modify paths matching %(metavar)s, relative to current directory.
|
||||
By default, update all but untracked files and submodules.
|
||||
""")
|
||||
|
||||
parser.add_argument('--version', '-V', action='version',
|
||||
version='%(prog)s version {version}'.format(version=get_version()))
|
||||
|
||||
args_ = parser.parse_args()
|
||||
if args_.verbose:
|
||||
args_.loglevel = max(logging.TRACE, logging.DEBUG // args_.verbose)
|
||||
args_.debug = args_.loglevel <= logging.DEBUG
|
||||
return args_
|
||||
|
||||
|
||||
def get_version(version=__version__):
|
||||
if not version.endswith('+dev'):
|
||||
return version
|
||||
try:
|
||||
cwd = os.path.dirname(os.path.realpath(__file__))
|
||||
return Git(cwd=cwd, errors=False).describe().lstrip('v')
|
||||
except Git.Error:
|
||||
return '-'.join((version, "unknown"))
|
||||
|
||||
|
||||
# Helper functions ############################################################
|
||||
|
||||
def setup_logging():
|
||||
"""Add TRACE logging level and corresponding method, return the root logger"""
|
||||
logging.TRACE = TRACE = logging.DEBUG // 2
|
||||
logging.Logger.trace = lambda _, m, *a, **k: _.log(TRACE, m, *a, **k)
|
||||
return logging.getLogger()
|
||||
|
||||
|
||||
def normalize(path):
|
||||
r"""Normalize paths from git, handling non-ASCII characters.
|
||||
|
||||
Git stores paths as UTF-8 normalization form C.
|
||||
If path contains non-ASCII or non-printable characters, git outputs the UTF-8
|
||||
in octal-escaped notation, escaping double-quotes and backslashes, and then
|
||||
double-quoting the whole path.
|
||||
https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
|
||||
|
||||
This function reverts this encoding, so:
|
||||
normalize(r'"Back\\slash_double\"quote_a\303\247a\303\255"') =>
|
||||
r'Back\slash_double"quote_açaí')
|
||||
|
||||
Paths with invalid UTF-8 encoding, such as single 0x80-0xFF bytes (e.g, from
|
||||
Latin1/Windows-1251 encoding) are decoded using surrogate escape, the same
|
||||
method used by Python for filesystem paths. So 0xE6 ("æ" in Latin1, r'\\346'
|
||||
from Git) is decoded as "\udce6". See https://peps.python.org/pep-0383/ and
|
||||
https://vstinner.github.io/painful-history-python-filesystem-encoding.html
|
||||
|
||||
Also see notes on `windows/non-ascii-paths.txt` about path encodings on
|
||||
non-UTF-8 platforms and filesystems.
|
||||
"""
|
||||
if path and path[0] == '"':
|
||||
# Python 2: path = path[1:-1].decode("string-escape")
|
||||
# Python 3: https://stackoverflow.com/a/46650050/624066
|
||||
path = (path[1:-1] # Remove enclosing double quotes
|
||||
.encode('latin1') # Convert to bytes, required by 'unicode-escape'
|
||||
.decode('unicode-escape') # Perform the actual octal-escaping decode
|
||||
.encode('latin1') # 1:1 mapping to bytes, UTF-8 encoded
|
||||
.decode('utf8', 'surrogateescape')) # Decode from UTF-8
|
||||
if NORMALIZE_PATHS:
|
||||
# Make sure the slash matches the OS; for Windows we need a backslash
|
||||
path = os.path.normpath(path)
|
||||
return path
|
||||
|
||||
|
||||
def dummy(*_args, **_kwargs):
|
||||
"""No-op function used in dry-run tests"""
|
||||
|
||||
|
||||
def touch(path, mtime):
|
||||
"""The actual mtime update"""
|
||||
os.utime(path, (mtime, mtime), **UTIME_KWS)
|
||||
|
||||
|
||||
def touch_ns(path, mtime_ns):
|
||||
"""The actual mtime update, using nanoseconds for unique timestamps"""
|
||||
os.utime(path, None, ns=(mtime_ns, mtime_ns), **UTIME_KWS)
|
||||
|
||||
|
||||
def isodate(secs: int):
|
||||
# time.localtime() accepts floats, but discards fractional part
|
||||
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(secs))
|
||||
|
||||
|
||||
def isodate_ns(ns: int):
|
||||
# for integers fromtimestamp() is equivalent and ~16% slower than isodate()
|
||||
return datetime.datetime.fromtimestamp(ns / 1000000000).isoformat(sep=' ')
|
||||
|
||||
|
||||
def get_mtime_ns(secs: int, idx: int):
|
||||
# Time resolution for filesystems and functions:
|
||||
# ext-4 and other POSIX filesystems: 1 nanosecond
|
||||
# NTFS (Windows default): 100 nanoseconds
|
||||
# datetime.datetime() (due to 64-bit float epoch): 1 microsecond
|
||||
us = idx % 1000000 # 10**6
|
||||
return 1000 * (1000000 * secs + us)
|
||||
|
||||
|
||||
def get_mtime_path(path):
|
||||
return os.path.getmtime(path)
|
||||
|
||||
|
||||
# Git class and parse_log(), the heart of the script ##########################
|
||||
|
||||
class Git:
|
||||
def __init__(self, workdir=None, gitdir=None, cwd=None, errors=True):
|
||||
self.gitcmd = ['git']
|
||||
self.errors = errors
|
||||
self._proc = None
|
||||
if workdir: self.gitcmd.extend(('--work-tree', workdir))
|
||||
if gitdir: self.gitcmd.extend(('--git-dir', gitdir))
|
||||
if cwd: self.gitcmd.extend(('-C', cwd))
|
||||
self.workdir, self.gitdir = self._get_repo_dirs()
|
||||
|
||||
def ls_files(self, paths: list = None):
|
||||
return (normalize(_) for _ in self._run('ls-files --full-name', paths))
|
||||
|
||||
def ls_dirty(self, force=False):
|
||||
return (normalize(_[3:].split(' -> ', 1)[-1])
|
||||
for _ in self._run('status --porcelain')
|
||||
if _[:2] != '??' and (not force or (_[0] in ('R', 'A')
|
||||
or _[1] == 'D')))
|
||||
|
||||
def log(self, merge=False, first_parent=False, commit_time=False,
|
||||
reverse_order=False, paths: list = None):
|
||||
cmd = 'whatchanged --pretty={}'.format('%ct' if commit_time else '%at')
|
||||
if merge: cmd += ' -m'
|
||||
if first_parent: cmd += ' --first-parent'
|
||||
if reverse_order: cmd += ' --reverse'
|
||||
return self._run(cmd, paths)
|
||||
|
||||
def describe(self):
|
||||
return self._run('describe --tags', check=True)[0]
|
||||
|
||||
def terminate(self):
|
||||
if self._proc is None:
|
||||
return
|
||||
try:
|
||||
self._proc.terminate()
|
||||
except OSError:
|
||||
# Avoid errors on OpenBSD
|
||||
pass
|
||||
|
||||
def _get_repo_dirs(self):
|
||||
return (os.path.normpath(_) for _ in
|
||||
self._run('rev-parse --show-toplevel --absolute-git-dir', check=True))
|
||||
|
||||
def _run(self, cmdstr: str, paths: list = None, output=True, check=False):
|
||||
cmdlist = self.gitcmd + shlex.split(cmdstr)
|
||||
if paths:
|
||||
cmdlist.append('--')
|
||||
cmdlist.extend(paths)
|
||||
popen_args = dict(universal_newlines=True, encoding='utf8')
|
||||
if not self.errors:
|
||||
popen_args['stderr'] = subprocess.DEVNULL
|
||||
log.trace("Executing: %s", ' '.join(cmdlist))
|
||||
if not output:
|
||||
return subprocess.call(cmdlist, **popen_args)
|
||||
if check:
|
||||
try:
|
||||
stdout: str = subprocess.check_output(cmdlist, **popen_args)
|
||||
return stdout.splitlines()
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise self.Error(e.returncode, e.cmd, e.output, e.stderr)
|
||||
self._proc = subprocess.Popen(cmdlist, stdout=subprocess.PIPE, **popen_args)
|
||||
return (_.rstrip() for _ in self._proc.stdout)
|
||||
|
||||
def __del__(self):
|
||||
self.terminate()
|
||||
|
||||
class Error(subprocess.CalledProcessError):
|
||||
"""Error from git executable"""
|
||||
|
||||
|
||||
def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
|
||||
mtime = 0
|
||||
datestr = isodate(0)
|
||||
for line in git.log(
|
||||
merge,
|
||||
args.first_parent,
|
||||
args.commit_time,
|
||||
args.reverse_order,
|
||||
filterlist
|
||||
):
|
||||
stats['loglines'] += 1
|
||||
|
||||
# Blank line between Date and list of files
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Date line
|
||||
if line[0] != ':': # Faster than `not line.startswith(':')`
|
||||
stats['commits'] += 1
|
||||
mtime = int(line)
|
||||
if args.unique_times:
|
||||
mtime = get_mtime_ns(mtime, stats['commits'])
|
||||
if args.debug:
|
||||
datestr = isodate(mtime)
|
||||
continue
|
||||
|
||||
# File line: three tokens if it describes a renaming, otherwise two
|
||||
tokens = line.split('\t')
|
||||
|
||||
# Possible statuses:
|
||||
# M: Modified (content changed)
|
||||
# A: Added (created)
|
||||
# D: Deleted
|
||||
# T: Type changed: to/from regular file, symlinks, submodules
|
||||
# R099: Renamed (moved), with % of unchanged content. 100 = pure rename
|
||||
# Not possible in log: C=Copied, U=Unmerged, X=Unknown, B=pairing Broken
|
||||
status = tokens[0].split(' ')[-1]
|
||||
file = tokens[-1]
|
||||
|
||||
# Handles non-ASCII chars and OS path separator
|
||||
file = normalize(file)
|
||||
|
||||
def do_file():
|
||||
if args.skip_older_than_commit and get_mtime_path(file) <= mtime:
|
||||
stats['skip'] += 1
|
||||
return
|
||||
if args.debug:
|
||||
log.debug("%d\t%d\t%d\t%s\t%s",
|
||||
stats['loglines'], stats['commits'], stats['files'],
|
||||
datestr, file)
|
||||
try:
|
||||
touch(os.path.join(git.workdir, file), mtime)
|
||||
stats['touches'] += 1
|
||||
except Exception as e:
|
||||
log.error("ERROR: %s: %s", e, file)
|
||||
stats['errors'] += 1
|
||||
|
||||
def do_dir():
|
||||
if args.debug:
|
||||
log.debug("%d\t%d\t-\t%s\t%s",
|
||||
stats['loglines'], stats['commits'],
|
||||
datestr, "{}/".format(dirname or '.'))
|
||||
try:
|
||||
touch(os.path.join(git.workdir, dirname), mtime)
|
||||
stats['dirtouches'] += 1
|
||||
except Exception as e:
|
||||
log.error("ERROR: %s: %s", e, dirname)
|
||||
stats['direrrors'] += 1
|
||||
|
||||
if file in filelist:
|
||||
stats['files'] -= 1
|
||||
filelist.remove(file)
|
||||
do_file()
|
||||
|
||||
if args.dirs and status in ('A', 'D'):
|
||||
dirname = os.path.dirname(file)
|
||||
if dirname in dirlist:
|
||||
dirlist.remove(dirname)
|
||||
do_dir()
|
||||
|
||||
# All files done?
|
||||
if not stats['files']:
|
||||
git.terminate()
|
||||
return
|
||||
|
||||
|
||||
# Main Logic ##################################################################
|
||||
|
||||
def main():
|
||||
start = time.time() # yes, Wall time. CPU time is not realistic for users.
|
||||
stats = {_: 0 for _ in ('loglines', 'commits', 'touches', 'skip', 'errors',
|
||||
'dirtouches', 'direrrors')}
|
||||
|
||||
logging.basicConfig(level=args.loglevel, format='%(message)s')
|
||||
log.trace("Arguments: %s", args)
|
||||
|
||||
# First things first: Where and Who are we?
|
||||
if args.cwd:
|
||||
log.debug("Changing directory: %s", args.cwd)
|
||||
try:
|
||||
os.chdir(args.cwd)
|
||||
except OSError as e:
|
||||
log.critical(e)
|
||||
return e.errno
|
||||
# Using both os.chdir() and `git -C` is redundant, but might prevent side effects
|
||||
# `git -C` alone could be enough if we make sure that:
|
||||
# - all paths, including args.pathspec, are processed by git: ls-files, rev-parse
|
||||
# - touch() / os.utime() path argument is always prepended with git.workdir
|
||||
try:
|
||||
git = Git(workdir=args.workdir, gitdir=args.gitdir, cwd=args.cwd)
|
||||
except Git.Error as e:
|
||||
# Not in a git repository, and git already informed user on stderr. So we just...
|
||||
return e.returncode
|
||||
|
||||
# Get the files managed by git and build file list to be processed
|
||||
if UPDATE_SYMLINKS and not args.skip_older_than:
|
||||
filelist = set(git.ls_files(args.pathspec))
|
||||
else:
|
||||
filelist = set()
|
||||
for path in git.ls_files(args.pathspec):
|
||||
fullpath = os.path.join(git.workdir, path)
|
||||
|
||||
# Symlink (to file, to dir or broken - git handles the same way)
|
||||
if not UPDATE_SYMLINKS and os.path.islink(fullpath):
|
||||
log.warning("WARNING: Skipping symlink, no OS support for updates: %s",
|
||||
path)
|
||||
continue
|
||||
|
||||
# skip files which are older than given threshold
|
||||
if (args.skip_older_than
|
||||
and start - get_mtime_path(fullpath) > args.skip_older_than):
|
||||
continue
|
||||
|
||||
# Always add files relative to worktree root
|
||||
filelist.add(path)
|
||||
|
||||
# If --force, silently ignore uncommitted deletions (not in the filesystem)
|
||||
# and renames / additions (will not be found in log anyway)
|
||||
if args.force:
|
||||
filelist -= set(git.ls_dirty(force=True))
|
||||
# Otherwise, ignore any dirty files
|
||||
else:
|
||||
dirty = set(git.ls_dirty())
|
||||
if dirty:
|
||||
log.warning("WARNING: Modified files in the working directory were ignored."
|
||||
"\nTo include such files, commit your changes or use --force.")
|
||||
filelist -= dirty
|
||||
|
||||
# Build dir list to be processed
|
||||
dirlist = set(os.path.dirname(_) for _ in filelist) if args.dirs else set()
|
||||
|
||||
stats['totalfiles'] = stats['files'] = len(filelist)
|
||||
log.info("{0:,} files to be processed in work dir".format(stats['totalfiles']))
|
||||
|
||||
if not filelist:
|
||||
# Nothing to do. Exit silently and without errors, just like git does
|
||||
return
|
||||
|
||||
# Process the log until all files are 'touched'
|
||||
log.debug("Line #\tLog #\tF.Left\tModification Time\tFile Name")
|
||||
parse_log(filelist, dirlist, stats, git, args.merge, args.pathspec)
|
||||
|
||||
# Missing files
|
||||
if filelist:
|
||||
# Try to find them in merge logs, if not done already
|
||||
# (usually HUGE, thus MUCH slower!)
|
||||
if args.missing and not args.merge:
|
||||
filterlist = list(filelist)
|
||||
missing = len(filterlist)
|
||||
log.info("{0:,} files not found in log, trying merge commits".format(missing))
|
||||
for i in range(0, missing, STEPMISSING):
|
||||
parse_log(filelist, dirlist, stats, git,
|
||||
merge=True, filterlist=filterlist[i:i + STEPMISSING])
|
||||
|
||||
# Still missing some?
|
||||
for file in filelist:
|
||||
log.warning("WARNING: not found in the log: %s", file)
|
||||
|
||||
# Final statistics
|
||||
# Suggestion: use git-log --before=mtime to brag about skipped log entries
|
||||
def log_info(msg, *a, width=13):
|
||||
ifmt = '{:%d,}' % (width,) # not using 'n' for consistency with ffmt
|
||||
ffmt = '{:%d,.2f}' % (width,)
|
||||
# %-formatting lacks a thousand separator, must pre-render with .format()
|
||||
log.info(msg.replace('%d', ifmt).replace('%f', ffmt).format(*a))
|
||||
|
||||
log_info(
|
||||
"Statistics:\n"
|
||||
"%f seconds\n"
|
||||
"%d log lines processed\n"
|
||||
"%d commits evaluated",
|
||||
time.time() - start, stats['loglines'], stats['commits'])
|
||||
|
||||
if args.dirs:
|
||||
if stats['direrrors']: log_info("%d directory update errors", stats['direrrors'])
|
||||
log_info("%d directories updated", stats['dirtouches'])
|
||||
|
||||
if stats['touches'] != stats['totalfiles']:
|
||||
log_info("%d files", stats['totalfiles'])
|
||||
if stats['skip']: log_info("%d files skipped", stats['skip'])
|
||||
if stats['files']: log_info("%d files missing", stats['files'])
|
||||
if stats['errors']: log_info("%d file update errors", stats['errors'])
|
||||
|
||||
log_info("%d files updated", stats['touches'])
|
||||
|
||||
if args.test:
|
||||
log.info("TEST RUN - No files modified!")
|
||||
|
||||
|
||||
# Keep only essential, global assignments here. Any other logic must be in main()
|
||||
log = setup_logging()
|
||||
args = parse_args()
|
||||
|
||||
# Set the actual touch() and other functions based on command-line arguments
|
||||
if args.unique_times:
|
||||
touch = touch_ns
|
||||
isodate = isodate_ns
|
||||
|
||||
# Make sure this is always set last to ensure --test behaves as intended
|
||||
if args.test:
|
||||
touch = dummy
|
||||
|
||||
# UI done, it's showtime!
|
||||
try:
|
||||
sys.exit(main())
|
||||
except KeyboardInterrupt:
|
||||
log.info("\nAborting")
|
||||
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
||||
os.kill(os.getpid(), signal.SIGINT)
|
||||
105
.github/workflows/_lint.yml
vendored
105
.github/workflows/_lint.yml
vendored
@@ -10,37 +10,132 @@ on:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
# This number is set "by eye": we want it to be big enough
|
||||
# so that it's bigger than the number of commits in any reasonable PR,
|
||||
# and also as small as possible since increasing the number makes
|
||||
# the initial `git fetch` slower.
|
||||
FETCH_DEPTH: 50
|
||||
strategy:
|
||||
matrix:
|
||||
# Only lint on the min and max supported Python versions.
|
||||
# It's extremely unlikely that there's a lint issue on any version in between
|
||||
# that doesn't show up on the min or max versions.
|
||||
#
|
||||
# GitHub rate-limits how many jobs can be running at any one time.
|
||||
# Starting new jobs is also relatively slow,
|
||||
# so linting on fewer versions makes CI faster.
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
# Fetch the last FETCH_DEPTH commits, so the mtime-changing script
|
||||
# can accurately set the mtimes of files modified in the last FETCH_DEPTH commits.
|
||||
fetch-depth: ${{ env.FETCH_DEPTH }}
|
||||
- name: Restore workdir file mtimes to last-edited commit date
|
||||
id: restore-mtimes
|
||||
# This is needed to make black caching work.
|
||||
# Black's cache uses file (mtime, size) to check whether a lookup is a cache hit.
|
||||
# Without this command, files in the repo would have the current time as the modified time,
|
||||
# since the previous action step just created them.
|
||||
# This command resets the mtime to the last time the files were modified in git instead,
|
||||
# which is a high-quality and stable representation of the last modification date.
|
||||
run: |
|
||||
# Important considerations:
|
||||
# - These commands run at base of the repo, since we never `cd` to the `WORKDIR`.
|
||||
# - We only want to alter mtimes for Python files, since that's all black checks.
|
||||
# - We don't need to alter mtimes for directories, since black doesn't look at those.
|
||||
# - We also only alter mtimes inside the `WORKDIR` since that's all we'll lint.
|
||||
# - This should run before `poetry install`, because poetry's venv also contains
|
||||
# Python files, and we don't want to alter their mtimes since they aren't linted.
|
||||
|
||||
# Ensure we fail on non-zero exits and on undefined variables.
|
||||
# Also print executed commands, for easier debugging.
|
||||
set -eux
|
||||
|
||||
# Restore the mtimes of Python files in the workdir based on git history.
|
||||
.github/tools/git-restore-mtime --no-directories "$WORKDIR/**/*.py"
|
||||
|
||||
# Since CI only does a partial fetch (to `FETCH_DEPTH`) for efficiency,
|
||||
# the local git repo doesn't have full history. There are probably files
|
||||
# that were last modified in a commit *older than* the oldest fetched commit.
|
||||
# After `git-restore-mtime`, such files have a mtime set to the oldest fetched commit.
|
||||
#
|
||||
# As new commits get added, that timestamp will keep moving forward.
|
||||
# If left unchanged, this will make `black` think that the files were edited
|
||||
# more recently than its cache suggests. Instead, we can set their mtime
|
||||
# to a fixed date in the far past that won't change and won't cause cache misses in black.
|
||||
#
|
||||
# For all workdir Python files modified in or before the oldest few fetched commits,
|
||||
# make their mtime be 2000-01-01 00:00:00.
|
||||
OLDEST_COMMIT="$(git log --reverse '--pretty=format:%H' | head -1)"
|
||||
OLDEST_COMMIT_TIME="$(git show -s '--format=%ai' "$OLDEST_COMMIT")"
|
||||
find "$WORKDIR" -name '*.py' -type f -not -newermt "$OLDEST_COMMIT_TIME" -exec touch -c -m -t '200001010000' '{}' '+'
|
||||
|
||||
echo "oldest-commit=$OLDEST_COMMIT" >> "$GITHUB_OUTPUT"
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
name: Cache langchain editable pip install - ${{ matrix.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-editable-langchain-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ matrix.python-version }}
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: poetry
|
||||
cache-dependency-path: |
|
||||
${{ env.WORKDIR }}/**/poetry.lock
|
||||
- name: Install dependencies
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry install
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
if: ${{ inputs.working-directory != 'langchain' }}
|
||||
run: |
|
||||
pip install -e ../langchain
|
||||
|
||||
- name: Restore black cache
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
CACHE_BASE: black-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.black_cache
|
||||
key: ${{ env.CACHE_BASE }}-${{ steps.restore-mtimes.outputs.oldest-commit }}
|
||||
restore-keys:
|
||||
# If we can't find an exact match for our cache key, accept any with this prefix.
|
||||
${{ env.CACHE_BASE }}-
|
||||
|
||||
- name: Get .mypy_cache to speed up mypy
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.mypy_cache
|
||||
key: mypy-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
|
||||
- name: Analysing the code with our lint
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
env:
|
||||
BLACK_CACHE_DIR: .black_cache
|
||||
run: |
|
||||
make lint
|
||||
|
||||
45
.github/workflows/_test.yml
vendored
45
.github/workflows/_test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
test_type:
|
||||
type: string
|
||||
description: "Test types to run"
|
||||
default: '["core", "extended"]'
|
||||
default: '["core", "extended", "core-pydantic-2"]'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
@@ -43,19 +43,46 @@ jobs:
|
||||
if [ "${{ matrix.test_type }}" == "core" ]; then
|
||||
echo "Running core tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
elif [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
|
||||
echo "Running core-pydantic-v2 tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install 'pydantic>=2.1,<3'
|
||||
else
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing
|
||||
fi
|
||||
- name: Install langchain editable
|
||||
if: ${{ inputs.working-directory != 'langchain' }}
|
||||
- name: Verify pydantic version
|
||||
run: |
|
||||
pip install -e ../langchain
|
||||
if [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
|
||||
EXPECTED_VERSION=2
|
||||
else
|
||||
EXPECTED_VERSION=1
|
||||
fi
|
||||
echo "Checking pydantic version... Expecting ${EXPECTED_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$VERSION" -ne $EXPECTED_VERSION ]]; then
|
||||
echo "Error: pydantic version must be equal to ${EXPECTED_VERSION}; Found: ${VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${VERSION}, as expected"
|
||||
shell: bash
|
||||
- name: Run ${{matrix.test_type}} tests
|
||||
run: |
|
||||
if [ "${{ matrix.test_type }}" == "core" ]; then
|
||||
make test
|
||||
else
|
||||
make extended_tests
|
||||
fi
|
||||
case "${{ matrix.test_type }}" in
|
||||
core | core-pydantic-2)
|
||||
make test
|
||||
;;
|
||||
*)
|
||||
make extended_tests
|
||||
;;
|
||||
esac
|
||||
shell: bash
|
||||
|
||||
1
.github/workflows/langchain_ci.yml
vendored
1
.github/workflows/langchain_ci.yml
vendored
@@ -24,4 +24,5 @@ jobs:
|
||||
./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
test_type: '["core", "extended", "core-pydantic-2"]'
|
||||
secrets: inherit
|
||||
14
README.md
14
README.md
@@ -2,18 +2,18 @@
|
||||
|
||||
⚡ Building applications with LLMs through composability ⚡
|
||||
|
||||
[](https://github.com/hwchase17/langchain/releases)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/langchain_ci.yml)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/langchain_experimental_ci.yml)
|
||||
[](https://github.com/langchain-ai/langchain/releases)
|
||||
[](https://github.com/langchain-ai/langchain/actions/workflows/langchain_ci.yml)
|
||||
[](https://github.com/langchain-ai/langchain/actions/workflows/langchain_experimental_ci.yml)
|
||||
[](https://pepy.tech/project/langchain)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://twitter.com/langchainai)
|
||||
[](https://discord.gg/6adMQxSpJS)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/hwchase17/langchain)
|
||||
[](https://codespaces.new/hwchase17/langchain)
|
||||
[](https://star-history.com/#hwchase17/langchain)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
|
||||
[](https://codespaces.new/langchain-ai/langchain)
|
||||
[](https://star-history.com/#langchain-ai/langchain)
|
||||
[](https://libraries.io/github/langchain-ai/langchain)
|
||||
[](https://github.com/hwchase17/langchain/issues)
|
||||
[](https://github.com/langchain-ai/langchain/issues)
|
||||
|
||||
|
||||
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/hwchase17/langchainjs).
|
||||
|
||||
@@ -150,7 +150,8 @@ def _load_package_modules(
|
||||
|
||||
relative_module_name = file_path.relative_to(package_path)
|
||||
|
||||
if relative_module_name.name.startswith("_"):
|
||||
# Skip if any module part starts with an underscore
|
||||
if any(part.startswith("_") for part in relative_module_name.parts):
|
||||
continue
|
||||
|
||||
# Get the full namespace of the module
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
||||
# Community Navigator
|
||||
# Community navigator
|
||||
|
||||
Hi! Thanks for being here. We’re lucky to have a community of so many passionate developers building with LangChain–we have so much to teach and learn from each other. Community members contribute code, host meetups, write blog posts, amplify each other’s work, become each other's customers and collaborators, and so much more.
|
||||
|
||||
@@ -28,7 +28,7 @@ LangChain is the product of over 5,000+ contributions by 1,500+ contributors, an
|
||||
# 🌍 Meetups, Events, and Hackathons
|
||||
|
||||
One of our favorite things about working in AI is how much enthusiasm there is for building together. We want to help make that as easy and impactful for you as possible!
|
||||
- **Find a meetup, hackathon, or webinar:** you can find the one for you on on our [global events calendar](https://mirror-feeling-d80.notion.site/0bc81da76a184297b86ca8fc782ee9a3?v=0d80342540df465396546976a50cfb3f).
|
||||
- **Find a meetup, hackathon, or webinar:** you can find the one for you on our [global events calendar](https://mirror-feeling-d80.notion.site/0bc81da76a184297b86ca8fc782ee9a3?v=0d80342540df465396546976a50cfb3f).
|
||||
- **Submit an event to our calendar:** email us at events@langchain.dev with a link to your event page! We can also help you spread the word with our local communities.
|
||||
- **Host a meetup:** If you want to bring a group of builders together, we want to help! We can publicize your event on our event calendar/Twitter, share with our local communities in Discord, send swag, or potentially hook you up with a sponsor. Email us at events@langchain.dev to tell us about your event!
|
||||
- **Become a meetup sponsor:** we often hear from groups of builders that want to get together, but are blocked or limited on some dimension (space to host, budget for snacks, prizes to distribute, etc.). If you’d like to help, send us an email to events@langchain.dev we can share more about how it works!
|
||||
|
||||
BIN
docs/docs_skeleton/static/img/OSS_LLM_overview.png
Normal file
BIN
docs/docs_skeleton/static/img/OSS_LLM_overview.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 288 KiB |
BIN
docs/docs_skeleton/static/img/llama-memory-weights.png
Normal file
BIN
docs/docs_skeleton/static/img/llama-memory-weights.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
BIN
docs/docs_skeleton/static/img/llama_t_put.png
Normal file
BIN
docs/docs_skeleton/static/img/llama_t_put.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
File diff suppressed because it is too large
Load Diff
@@ -1,15 +1,15 @@
|
||||
# Tutorials
|
||||
|
||||
Below are links to video tutorials and courses on LangChain. For written guides on common use cases for LangChain, check out the [use cases guides](/docs/use_cases).
|
||||
Below are links to tutorials and courses on LangChain. For written guides on common use cases for LangChain, check out the [use cases guides](/docs/use_cases).
|
||||
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
⛓ icon marks a new addition [last update 2023-08-20]
|
||||
|
||||
---------------------
|
||||
|
||||
### DeepLearning.AI courses
|
||||
by [Harrison Chase](https://github.com/hwchase17) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
|
||||
- [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
|
||||
- ⛓ [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
|
||||
- [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
|
||||
|
||||
### Handbook
|
||||
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
@@ -36,14 +36,14 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
|
||||
- #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
|
||||
- [Using NEW `MPT-7B` in Hugging Face and LangChain](https://youtu.be/DXpk9K7DgMo)
|
||||
- ⛓ [`MPT-30B` Chatbot with LangChain](https://youtu.be/pnem-EhT6VI)
|
||||
- [`MPT-30B` Chatbot with LangChain](https://youtu.be/pnem-EhT6VI)
|
||||
|
||||
|
||||
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Greg Kamradt (Data Indy)](https://www.youtube.com/@DataIndependent)
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [Beginner Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
|
||||
- [Beginner's Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [Beginner's Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
|
||||
- [Agents Overview + Google Searches](https://youtu.be/Jq9Sf68ozk0)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
@@ -63,7 +63,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- [Build Your Own `AI Twitter Bot` Using LLMs](https://youtu.be/yLWLDjT01q8)
|
||||
- [ChatGPT made my interview questions for me (`Streamlit` + LangChain)](https://youtu.be/zvoAMx0WKkw)
|
||||
- [Function Calling via ChatGPT API - First Look With LangChain](https://youtu.be/0-zlUy7VUjg)
|
||||
- ⛓ [Extract Topics From Video/Audio With LLMs (Topic Modeling w/ LangChain)](https://youtu.be/pEkxRQFNAs4)
|
||||
- [Extract Topics From Video/Audio With LLMs (Topic Modeling w/ LangChain)](https://youtu.be/pEkxRQFNAs4)
|
||||
|
||||
|
||||
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai)
|
||||
@@ -73,7 +73,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [`PAL`: Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
@@ -85,7 +85,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
- [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
|
||||
- [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- [Using LangChain with `DuckDuckGO`, `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
|
||||
- [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
|
||||
- [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
|
||||
@@ -99,7 +99,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- [`OpenAI Functions` + LangChain : Building a Multi Tool Agent](https://youtu.be/4KXK6c6TVXQ)
|
||||
- [What can you do with 16K tokens in LangChain?](https://youtu.be/z2aCZBAtWXs)
|
||||
- [Tagging and Extraction - Classification using `OpenAI Functions`](https://youtu.be/a8hMgIcUEnE)
|
||||
- ⛓ [HOW to Make Conversational Form with LangChain](https://youtu.be/IT93On2LB5k)
|
||||
- [HOW to Make Conversational Form with LangChain](https://youtu.be/IT93On2LB5k)
|
||||
|
||||
|
||||
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
@@ -107,7 +107,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
- [Langchain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
|
||||
- [LangChain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
|
||||
- [LangFlow: Build Chatbots without Writing Code](https://youtu.be/KJ-ux3hre4s)
|
||||
- [LangChain: Giving Memory to LLMs](https://youtu.be/dxO6pzlgJiY)
|
||||
- [BEST OPEN Alternative to `OPENAI's EMBEDDINGs` for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY)
|
||||
@@ -121,5 +121,9 @@ Below are links to video tutorials and courses on LangChain. For written guides
|
||||
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI)
|
||||
|
||||
|
||||
### Codebase Analysis
|
||||
- ⛓ [Codebase Analysis: Langchain Agents](https://carbonated-yacht-2c5.notion.site/Codebase-Analysis-Langchain-Agents-0b0587acd50647ca88aaae7cff5df1f2)
|
||||
|
||||
|
||||
---------------------
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
⛓ icon marks a new addition [last update 2023-08-20]
|
||||
|
||||
@@ -1,265 +1,375 @@
|
||||
# Dependents
|
||||
|
||||
Dependents stats for `hwchase17/langchain`
|
||||
Dependents stats for `langchain-ai/langchain`
|
||||
|
||||
[](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=244&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=9697&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=19827&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[](https://github.com/langchain-ai/langchain/network/dependents)
|
||||
[&message=355&color=informational&logo=slickpic)](https://github.com/langchain-ai/langchain/network/dependents)
|
||||
[&message=19140&color=informational&logo=slickpic)](https://github.com/langchain-ai/langchain/network/dependents)
|
||||
[&message=22524&color=informational&logo=slickpic)](https://github.com/langchain-ai/langchain/network/dependents)
|
||||
|
||||
|
||||
[update: 2023-07-07; only dependent repositories with Stars > 100]
|
||||
[update: `2023-08-17`; only dependent repositories with Stars > 100]
|
||||
|
||||
|
||||
| Repository | Stars |
|
||||
| :-------- | -----: |
|
||||
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 41047 |
|
||||
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 33983 |
|
||||
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 33375 |
|
||||
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 31114 |
|
||||
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 30369 |
|
||||
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 24116 |
|
||||
|[OpenBB-finance/OpenBBTerminal](https://github.com/OpenBB-finance/OpenBBTerminal) | 22565 |
|
||||
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 18375 |
|
||||
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 17723 |
|
||||
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 16958 |
|
||||
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 14632 |
|
||||
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 11273 |
|
||||
|[openai/evals](https://github.com/openai/evals) | 10745 |
|
||||
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 10298 |
|
||||
|[imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | 9838 |
|
||||
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 9247 |
|
||||
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 8768 |
|
||||
|[PromtEngineer/localGPT](https://github.com/PromtEngineer/localGPT) | 8651 |
|
||||
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 8119 |
|
||||
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 7418 |
|
||||
|[gventuri/pandas-ai](https://github.com/gventuri/pandas-ai) | 7301 |
|
||||
|[PipedreamHQ/pipedream](https://github.com/PipedreamHQ/pipedream) | 6636 |
|
||||
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 5849 |
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5129 |
|
||||
|[langgenius/dify](https://github.com/langgenius/dify) | 4804 |
|
||||
|[serge-chat/serge](https://github.com/serge-chat/serge) | 4448 |
|
||||
|[csunny/DB-GPT](https://github.com/csunny/DB-GPT) | 4350 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 4268 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4244 |
|
||||
|[intitni/CopilotForXcode](https://github.com/intitni/CopilotForXcode) | 4232 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4154 |
|
||||
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 4080 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 3949 |
|
||||
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 3920 |
|
||||
|[bentoml/OpenLLM](https://github.com/bentoml/OpenLLM) | 3481 |
|
||||
|[MineDojo/Voyager](https://github.com/MineDojo/Voyager) | 3453 |
|
||||
|[mmabrouk/chatgpt-wrapper](https://github.com/mmabrouk/chatgpt-wrapper) | 3355 |
|
||||
|[postgresml/postgresml](https://github.com/postgresml/postgresml) | 3328 |
|
||||
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 3100 |
|
||||
|[kyegomez/tree-of-thoughts](https://github.com/kyegomez/tree-of-thoughts) | 3049 |
|
||||
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 2844 |
|
||||
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2833 |
|
||||
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 2809 |
|
||||
|[hwchase17/chat-langchain](https://github.com/hwchase17/chat-langchain) | 2809 |
|
||||
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2664 |
|
||||
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 2650 |
|
||||
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 2525 |
|
||||
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2372 |
|
||||
|[ParisNeo/lollms-webui](https://github.com/ParisNeo/lollms-webui) | 2287 |
|
||||
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 2265 |
|
||||
|[SamurAIGPT/privateGPT](https://github.com/SamurAIGPT/privateGPT) | 2084 |
|
||||
|[Chainlit/chainlit](https://github.com/Chainlit/chainlit) | 1912 |
|
||||
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1869 |
|
||||
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 1864 |
|
||||
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 1849 |
|
||||
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 1766 |
|
||||
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 1745 |
|
||||
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1732 |
|
||||
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1716 |
|
||||
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1619 |
|
||||
|[pinterest/querybook](https://github.com/pinterest/querybook) | 1468 |
|
||||
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1446 |
|
||||
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 1430 |
|
||||
|[Mintplex-Labs/anything-llm](https://github.com/Mintplex-Labs/anything-llm) | 1419 |
|
||||
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1416 |
|
||||
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1327 |
|
||||
|[psychic-api/psychic](https://github.com/psychic-api/psychic) | 1307 |
|
||||
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1242 |
|
||||
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1239 |
|
||||
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1203 |
|
||||
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1179 |
|
||||
|[keephq/keep](https://github.com/keephq/keep) | 1169 |
|
||||
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1156 |
|
||||
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 1090 |
|
||||
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 1088 |
|
||||
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 1074 |
|
||||
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1057 |
|
||||
|[noahshinn024/reflexion](https://github.com/noahshinn024/reflexion) | 1045 |
|
||||
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 1036 |
|
||||
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 999 |
|
||||
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 989 |
|
||||
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 974 |
|
||||
|[homanp/superagent](https://github.com/homanp/superagent) | 970 |
|
||||
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 941 |
|
||||
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 896 |
|
||||
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 856 |
|
||||
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 840 |
|
||||
|[chatarena/chatarena](https://github.com/chatarena/chatarena) | 829 |
|
||||
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 816 |
|
||||
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 816 |
|
||||
|[hashintel/hash](https://github.com/hashintel/hash) | 806 |
|
||||
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 790 |
|
||||
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 752 |
|
||||
|[cheshire-cat-ai/core](https://github.com/cheshire-cat-ai/core) | 713 |
|
||||
|[e-johnstonn/BriefGPT](https://github.com/e-johnstonn/BriefGPT) | 686 |
|
||||
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 685 |
|
||||
|[refuel-ai/autolabel](https://github.com/refuel-ai/autolabel) | 673 |
|
||||
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 617 |
|
||||
|[billxbf/ReWOO](https://github.com/billxbf/ReWOO) | 616 |
|
||||
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 609 |
|
||||
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 592 |
|
||||
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 581 |
|
||||
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 574 |
|
||||
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 572 |
|
||||
|[kreneskyp/ix](https://github.com/kreneskyp/ix) | 564 |
|
||||
|[akshata29/chatpdf](https://github.com/akshata29/chatpdf) | 540 |
|
||||
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 540 |
|
||||
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 537 |
|
||||
|[khoj-ai/khoj](https://github.com/khoj-ai/khoj) | 531 |
|
||||
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 528 |
|
||||
|[microsoft/PodcastCopilot](https://github.com/microsoft/PodcastCopilot) | 526 |
|
||||
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 515 |
|
||||
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 494 |
|
||||
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 483 |
|
||||
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 472 |
|
||||
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 465 |
|
||||
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 464 |
|
||||
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 464 |
|
||||
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 455 |
|
||||
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 455 |
|
||||
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 450 |
|
||||
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 446 |
|
||||
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 445 |
|
||||
|[plastic-labs/tutor-gpt](https://github.com/plastic-labs/tutor-gpt) | 426 |
|
||||
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 426 |
|
||||
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 418 |
|
||||
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 416 |
|
||||
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 401 |
|
||||
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 400 |
|
||||
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 386 |
|
||||
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 382 |
|
||||
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 368 |
|
||||
|[showlab/VLog](https://github.com/showlab/VLog) | 363 |
|
||||
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 363 |
|
||||
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 361 |
|
||||
|[opentensor/bittensor](https://github.com/opentensor/bittensor) | 360 |
|
||||
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 355 |
|
||||
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 351 |
|
||||
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 348 |
|
||||
|[alejandro-ao/ask-multiple-pdfs](https://github.com/alejandro-ao/ask-multiple-pdfs) | 321 |
|
||||
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 314 |
|
||||
|[mosaicml/examples](https://github.com/mosaicml/examples) | 313 |
|
||||
|[personoids/personoids-lite](https://github.com/personoids/personoids-lite) | 306 |
|
||||
|[itamargol/openai](https://github.com/itamargol/openai) | 304 |
|
||||
|[Anil-matcha/Website-to-Chatbot](https://github.com/Anil-matcha/Website-to-Chatbot) | 299 |
|
||||
|[momegas/megabots](https://github.com/momegas/megabots) | 299 |
|
||||
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 289 |
|
||||
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 283 |
|
||||
|[wandb/weave](https://github.com/wandb/weave) | 279 |
|
||||
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 273 |
|
||||
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 271 |
|
||||
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 270 |
|
||||
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 269 |
|
||||
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 259 |
|
||||
|[Azure-Samples/openai](https://github.com/Azure-Samples/openai) | 252 |
|
||||
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 248 |
|
||||
|[hnawaz007/pythondataanalysis](https://github.com/hnawaz007/pythondataanalysis) | 247 |
|
||||
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 243 |
|
||||
|[truera/trulens](https://github.com/truera/trulens) | 239 |
|
||||
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 238 |
|
||||
|[intel/intel-extension-for-transformers](https://github.com/intel/intel-extension-for-transformers) | 237 |
|
||||
|[monarch-initiative/ontogpt](https://github.com/monarch-initiative/ontogpt) | 236 |
|
||||
|[wandb/edu](https://github.com/wandb/edu) | 231 |
|
||||
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 229 |
|
||||
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 223 |
|
||||
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 221 |
|
||||
|[JohnSnowLabs/nlptest](https://github.com/JohnSnowLabs/nlptest) | 220 |
|
||||
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 219 |
|
||||
|[Safiullah-Rahu/CSV-AI](https://github.com/Safiullah-Rahu/CSV-AI) | 215 |
|
||||
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 215 |
|
||||
|[steamship-packages/langchain-agent-production-starter](https://github.com/steamship-packages/langchain-agent-production-starter) | 214 |
|
||||
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 213 |
|
||||
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 211 |
|
||||
|[marella/chatdocs](https://github.com/marella/chatdocs) | 207 |
|
||||
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 200 |
|
||||
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 195 |
|
||||
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 189 |
|
||||
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 186 |
|
||||
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 185 |
|
||||
|[huchenxucs/ChatDB](https://github.com/huchenxucs/ChatDB) | 179 |
|
||||
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 178 |
|
||||
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 178 |
|
||||
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 177 |
|
||||
|[jiran214/GPT-vup](https://github.com/jiran214/GPT-vup) | 176 |
|
||||
|[rsaryev/talk-codebase](https://github.com/rsaryev/talk-codebase) | 174 |
|
||||
|[edreisMD/plugnplai](https://github.com/edreisMD/plugnplai) | 174 |
|
||||
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 172 |
|
||||
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 171 |
|
||||
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 165 |
|
||||
|[gustavz/DataChad](https://github.com/gustavz/DataChad) | 164 |
|
||||
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 163 |
|
||||
|[SamPink/dev-gpt](https://github.com/SamPink/dev-gpt) | 161 |
|
||||
|[yuanjie-ai/ChatLLM](https://github.com/yuanjie-ai/ChatLLM) | 161 |
|
||||
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 160 |
|
||||
|[jondurbin/airoboros](https://github.com/jondurbin/airoboros) | 157 |
|
||||
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 157 |
|
||||
|[PradipNichite/Youtube-Tutorials](https://github.com/PradipNichite/Youtube-Tutorials) | 156 |
|
||||
|[nicknochnack/LangchainDocuments](https://github.com/nicknochnack/LangchainDocuments) | 155 |
|
||||
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 155 |
|
||||
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 154 |
|
||||
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 153 |
|
||||
|[preset-io/promptimize](https://github.com/preset-io/promptimize) | 150 |
|
||||
|[onlyphantom/llm-python](https://github.com/onlyphantom/llm-python) | 148 |
|
||||
|[Azure-Samples/azure-search-power-skills](https://github.com/Azure-Samples/azure-search-power-skills) | 146 |
|
||||
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 144 |
|
||||
|[microsoft/azure-openai-in-a-day-workshop](https://github.com/microsoft/azure-openai-in-a-day-workshop) | 144 |
|
||||
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 143 |
|
||||
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 142 |
|
||||
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 141 |
|
||||
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 140 |
|
||||
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 140 |
|
||||
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 139 |
|
||||
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 139 |
|
||||
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 138 |
|
||||
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 137 |
|
||||
|[steamship-core/vercel-examples](https://github.com/steamship-core/vercel-examples) | 137 |
|
||||
|[deeppavlov/dream](https://github.com/deeppavlov/dream) | 136 |
|
||||
|[miaoshouai/miaoshouai-assistant](https://github.com/miaoshouai/miaoshouai-assistant) | 135 |
|
||||
|[sugarforever/LangChain-Tutorials](https://github.com/sugarforever/LangChain-Tutorials) | 135 |
|
||||
|[yasyf/summ](https://github.com/yasyf/summ) | 135 |
|
||||
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 134 |
|
||||
|[vaibkumr/prompt-optimizer](https://github.com/vaibkumr/prompt-optimizer) | 132 |
|
||||
|[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators) | 130 |
|
||||
|[homanp/vercel-langchain](https://github.com/homanp/vercel-langchain) | 128 |
|
||||
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 127 |
|
||||
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 125 |
|
||||
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 122 |
|
||||
|[fixie-ai/fixie-examples](https://github.com/fixie-ai/fixie-examples) | 122 |
|
||||
|[Aggregate-Intellect/practical-llms](https://github.com/Aggregate-Intellect/practical-llms) | 120 |
|
||||
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 120 |
|
||||
|[Azure-Samples/azure-search-openai-demo-csharp](https://github.com/Azure-Samples/azure-search-openai-demo-csharp) | 119 |
|
||||
|[prof-frink-lab/slangchain](https://github.com/prof-frink-lab/slangchain) | 117 |
|
||||
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 117 |
|
||||
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 116 |
|
||||
|[flurb18/AgentOoba](https://github.com/flurb18/AgentOoba) | 114 |
|
||||
|[kaarthik108/snowChat](https://github.com/kaarthik108/snowChat) | 112 |
|
||||
|[RedisVentures/redis-openai-qna](https://github.com/RedisVentures/redis-openai-qna) | 111 |
|
||||
|[solana-labs/chatgpt-plugin](https://github.com/solana-labs/chatgpt-plugin) | 111 |
|
||||
|[kulltc/chatgpt-sql](https://github.com/kulltc/chatgpt-sql) | 109 |
|
||||
|[summarizepaper/summarizepaper](https://github.com/summarizepaper/summarizepaper) | 109 |
|
||||
|[Azure-Samples/miyagi](https://github.com/Azure-Samples/miyagi) | 106 |
|
||||
|[ssheng/BentoChain](https://github.com/ssheng/BentoChain) | 106 |
|
||||
|[voxel51/voxelgpt](https://github.com/voxel51/voxelgpt) | 105 |
|
||||
|[mallahyari/drqa](https://github.com/mallahyari/drqa) | 103 |
|
||||
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 46276 |
|
||||
|[AntonOsika/gpt-engineer](https://github.com/AntonOsika/gpt-engineer) | 41497 |
|
||||
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 36296 |
|
||||
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 34861 |
|
||||
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 33906 |
|
||||
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 31654 |
|
||||
|[streamlit/streamlit](https://github.com/streamlit/streamlit) | 26571 |
|
||||
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 25819 |
|
||||
|[OpenBB-finance/OpenBBTerminal](https://github.com/OpenBB-finance/OpenBBTerminal) | 23180 |
|
||||
|[geekan/MetaGPT](https://github.com/geekan/MetaGPT) | 21968 |
|
||||
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 20204 |
|
||||
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 20142 |
|
||||
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 19215 |
|
||||
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 17580 |
|
||||
|[cube-js/cube](https://github.com/cube-js/cube) | 16003 |
|
||||
|[PromtEngineer/localGPT](https://github.com/PromtEngineer/localGPT) | 15134 |
|
||||
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 15027 |
|
||||
|[chatchat-space/Langchain-Chatchat](https://github.com/chatchat-space/Langchain-Chatchat) | 14024 |
|
||||
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 12020 |
|
||||
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 11599 |
|
||||
|[openai/evals](https://github.com/openai/evals) | 11509 |
|
||||
|[airbytehq/airbyte](https://github.com/airbytehq/airbyte) | 11493 |
|
||||
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 10531 |
|
||||
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 9955 |
|
||||
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 9081 |
|
||||
|[gventuri/pandas-ai](https://github.com/gventuri/pandas-ai) | 8201 |
|
||||
|[hwchase17/langchainjs](https://github.com/hwchase17/langchainjs) | 7754 |
|
||||
|[langgenius/dify](https://github.com/langgenius/dify) | 7348 |
|
||||
|[PipedreamHQ/pipedream](https://github.com/PipedreamHQ/pipedream) | 6950 |
|
||||
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 6858 |
|
||||
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 6300 |
|
||||
|[0xpayne/gpt-migrate](https://github.com/0xpayne/gpt-migrate) | 6193 |
|
||||
|[eosphoros-ai/DB-GPT](https://github.com/eosphoros-ai/DB-GPT) | 6026 |
|
||||
|[bentoml/OpenLLM](https://github.com/bentoml/OpenLLM) | 5641 |
|
||||
|[jmorganca/ollama](https://github.com/jmorganca/ollama) | 5448 |
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5365 |
|
||||
|[mage-ai/mage-ai](https://github.com/mage-ai/mage-ai) | 5352 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 5192 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 4993 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4831 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4824 |
|
||||
|[serge-chat/serge](https://github.com/serge-chat/serge) | 4783 |
|
||||
|[Shaunwei/RealChar](https://github.com/Shaunwei/RealChar) | 4779 |
|
||||
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 4752 |
|
||||
|[openchatai/OpenChat](https://github.com/openchatai/OpenChat) | 4452 |
|
||||
|[intel-analytics/BigDL](https://github.com/intel-analytics/BigDL) | 4286 |
|
||||
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 4167 |
|
||||
|[MineDojo/Voyager](https://github.com/MineDojo/Voyager) | 3952 |
|
||||
|[embedchain/embedchain](https://github.com/embedchain/embedchain) | 3887 |
|
||||
|[postgresml/postgresml](https://github.com/postgresml/postgresml) | 3636 |
|
||||
|[assafelovic/gpt-researcher](https://github.com/assafelovic/gpt-researcher) | 3480 |
|
||||
|[llm-workflow-engine/llm-workflow-engine](https://github.com/llm-workflow-engine/llm-workflow-engine) | 3445 |
|
||||
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 3397 |
|
||||
|[kyegomez/tree-of-thoughts](https://github.com/kyegomez/tree-of-thoughts) | 3366 |
|
||||
|[RayVentura/ShortGPT](https://github.com/RayVentura/ShortGPT) | 3335 |
|
||||
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 3316 |
|
||||
|[langchain-ai/chat-langchain](https://github.com/langchain-ai/chat-langchain) | 3270 |
|
||||
|[khoj-ai/khoj](https://github.com/khoj-ai/khoj) | 3266 |
|
||||
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 3176 |
|
||||
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2999 |
|
||||
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2932 |
|
||||
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 2816 |
|
||||
|[continuedev/continue](https://github.com/continuedev/continue) | 2803 |
|
||||
|[ParisNeo/lollms-webui](https://github.com/ParisNeo/lollms-webui) | 2679 |
|
||||
|[OpenBMB/ToolBench](https://github.com/OpenBMB/ToolBench) | 2673 |
|
||||
|[shroominic/codeinterpreter-api](https://github.com/shroominic/codeinterpreter-api) | 2492 |
|
||||
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 2486 |
|
||||
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2450 |
|
||||
|[SamurAIGPT/EmbedAI](https://github.com/SamurAIGPT/EmbedAI) | 2448 |
|
||||
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 2255 |
|
||||
|[Mintplex-Labs/anything-llm](https://github.com/Mintplex-Labs/anything-llm) | 2216 |
|
||||
|[emptycrown/llama-hub](https://github.com/emptycrown/llama-hub) | 2198 |
|
||||
|[homanp/superagent](https://github.com/homanp/superagent) | 2177 |
|
||||
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 2144 |
|
||||
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 2092 |
|
||||
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 2060 |
|
||||
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 2039 |
|
||||
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1992 |
|
||||
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1949 |
|
||||
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1915 |
|
||||
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1783 |
|
||||
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 1761 |
|
||||
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1627 |
|
||||
|[pinterest/querybook](https://github.com/pinterest/querybook) | 1509 |
|
||||
|[psychic-api/psychic](https://github.com/psychic-api/psychic) | 1499 |
|
||||
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1476 |
|
||||
|[avinashkranjan/Amazing-Python-Scripts](https://github.com/avinashkranjan/Amazing-Python-Scripts) | 1471 |
|
||||
|[hegelai/prompttools](https://github.com/hegelai/prompttools) | 1392 |
|
||||
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 1370 |
|
||||
|[Forethought-Technologies/AutoChain](https://github.com/Forethought-Technologies/AutoChain) | 1360 |
|
||||
|[keephq/keep](https://github.com/keephq/keep) | 1357 |
|
||||
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1345 |
|
||||
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1342 |
|
||||
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1332 |
|
||||
|[noahshinn024/reflexion](https://github.com/noahshinn024/reflexion) | 1314 |
|
||||
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1314 |
|
||||
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1313 |
|
||||
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1299 |
|
||||
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 1237 |
|
||||
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 1232 |
|
||||
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 1223 |
|
||||
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 1192 |
|
||||
|[pluralsh/plural](https://github.com/pluralsh/plural) | 1126 |
|
||||
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1117 |
|
||||
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 1110 |
|
||||
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 1096 |
|
||||
|[refuel-ai/autolabel](https://github.com/refuel-ai/autolabel) | 1080 |
|
||||
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 1075 |
|
||||
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 1068 |
|
||||
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 984 |
|
||||
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 957 |
|
||||
|[chatarena/chatarena](https://github.com/chatarena/chatarena) | 955 |
|
||||
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 944 |
|
||||
|[psychic-api/rag-stack](https://github.com/psychic-api/rag-stack) | 942 |
|
||||
|[nod-ai/SHARK](https://github.com/nod-ai/SHARK) | 909 |
|
||||
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 899 |
|
||||
|[melih-unsal/DemoGPT](https://github.com/melih-unsal/DemoGPT) | 896 |
|
||||
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 889 |
|
||||
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 868 |
|
||||
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 854 |
|
||||
|[cheshire-cat-ai/core](https://github.com/cheshire-cat-ai/core) | 847 |
|
||||
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 836 |
|
||||
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 818 |
|
||||
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 798 |
|
||||
|[alejandro-ao/ask-multiple-pdfs](https://github.com/alejandro-ao/ask-multiple-pdfs) | 782 |
|
||||
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 748 |
|
||||
|[LambdaLabsML/examples](https://github.com/LambdaLabsML/examples) | 741 |
|
||||
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 732 |
|
||||
|[microsoft/Llama-2-Onnx](https://github.com/microsoft/Llama-2-Onnx) | 722 |
|
||||
|[e-johnstonn/BriefGPT](https://github.com/e-johnstonn/BriefGPT) | 710 |
|
||||
|[billxbf/ReWOO](https://github.com/billxbf/ReWOO) | 710 |
|
||||
|[kennethleungty/Llama-2-Open-Source-LLM-CPU-Inference](https://github.com/kennethleungty/Llama-2-Open-Source-LLM-CPU-Inference) | 707 |
|
||||
|[databrickslabs/pyspark-ai](https://github.com/databrickslabs/pyspark-ai) | 704 |
|
||||
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 704 |
|
||||
|[kreneskyp/ix](https://github.com/kreneskyp/ix) | 692 |
|
||||
|[akshata29/entaoai](https://github.com/akshata29/entaoai) | 682 |
|
||||
|[promptfoo/promptfoo](https://github.com/promptfoo/promptfoo) | 670 |
|
||||
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 662 |
|
||||
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 650 |
|
||||
|[YiVal/YiVal](https://github.com/YiVal/YiVal) | 632 |
|
||||
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 624 |
|
||||
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 617 |
|
||||
|[dot-agent/openagent](https://github.com/dot-agent/openagent) | 602 |
|
||||
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 588 |
|
||||
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 585 |
|
||||
|[microsoft/PodcastCopilot](https://github.com/microsoft/PodcastCopilot) | 581 |
|
||||
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 569 |
|
||||
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 568 |
|
||||
|[xusenlinzy/api-for-open-llm](https://github.com/xusenlinzy/api-for-open-llm) | 559 |
|
||||
|[NoDataFound/hackGPT](https://github.com/NoDataFound/hackGPT) | 558 |
|
||||
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 554 |
|
||||
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 537 |
|
||||
|[FlagOpen/FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding) | 534 |
|
||||
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 534 |
|
||||
|[OpenGenerativeAI/GenossGPT](https://github.com/OpenGenerativeAI/GenossGPT) | 524 |
|
||||
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 496 |
|
||||
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 495 |
|
||||
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 494 |
|
||||
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 492 |
|
||||
|[plastic-labs/tutor-gpt](https://github.com/plastic-labs/tutor-gpt) | 490 |
|
||||
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 488 |
|
||||
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 481 |
|
||||
|[tgscan-dev/tgscan](https://github.com/tgscan-dev/tgscan) | 480 |
|
||||
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 480 |
|
||||
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 473 |
|
||||
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 471 |
|
||||
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 467 |
|
||||
|[langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent) | 463 |
|
||||
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 463 |
|
||||
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 463 |
|
||||
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 441 |
|
||||
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 437 |
|
||||
|[Dicklesworthstone/llama_embeddings_fastapi_service](https://github.com/Dicklesworthstone/llama_embeddings_fastapi_service) | 432 |
|
||||
|[DataDog/dd-trace-py](https://github.com/DataDog/dd-trace-py) | 431 |
|
||||
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 431 |
|
||||
|[jiran214/GPT-vup](https://github.com/jiran214/GPT-vup) | 428 |
|
||||
|[Azure-Samples/openai](https://github.com/Azure-Samples/openai) | 419 |
|
||||
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 414 |
|
||||
|[CarperAI/OpenELM](https://github.com/CarperAI/OpenELM) | 411 |
|
||||
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 404 |
|
||||
|[MiuLab/Taiwan-LLaMa](https://github.com/MiuLab/Taiwan-LLaMa) | 402 |
|
||||
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 399 |
|
||||
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 394 |
|
||||
|[opentensor/bittensor](https://github.com/opentensor/bittensor) | 393 |
|
||||
|[showlab/VLog](https://github.com/showlab/VLog) | 392 |
|
||||
|[microsoft/sample-app-aoai-chatGPT](https://github.com/microsoft/sample-app-aoai-chatGPT) | 391 |
|
||||
|[truera/trulens](https://github.com/truera/trulens) | 390 |
|
||||
|[Anil-matcha/Chatbase](https://github.com/Anil-matcha/Chatbase) | 363 |
|
||||
|[marella/chatdocs](https://github.com/marella/chatdocs) | 360 |
|
||||
|[jondurbin/airoboros](https://github.com/jondurbin/airoboros) | 357 |
|
||||
|[mosaicml/examples](https://github.com/mosaicml/examples) | 353 |
|
||||
|[wandb/weave](https://github.com/wandb/weave) | 352 |
|
||||
|[huchenxucs/ChatDB](https://github.com/huchenxucs/ChatDB) | 350 |
|
||||
|[rsaryev/talk-codebase](https://github.com/rsaryev/talk-codebase) | 343 |
|
||||
|[steamship-packages/langchain-production-starter](https://github.com/steamship-packages/langchain-production-starter) | 335 |
|
||||
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 335 |
|
||||
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 329 |
|
||||
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 325 |
|
||||
|[personoids/personoids-lite](https://github.com/personoids/personoids-lite) | 319 |
|
||||
|[momegas/megabots](https://github.com/momegas/megabots) | 317 |
|
||||
|[itamargol/openai](https://github.com/itamargol/openai) | 312 |
|
||||
|[intel/intel-extension-for-transformers](https://github.com/intel/intel-extension-for-transformers) | 310 |
|
||||
|[monarch-initiative/ontogpt](https://github.com/monarch-initiative/ontogpt) | 310 |
|
||||
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 308 |
|
||||
|[Nuggt-dev/Nuggt](https://github.com/Nuggt-dev/Nuggt) | 305 |
|
||||
|[cofactoryai/textbase](https://github.com/cofactoryai/textbase) | 304 |
|
||||
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 296 |
|
||||
|[onlyphantom/llm-python](https://github.com/onlyphantom/llm-python) | 288 |
|
||||
|[morpheuslord/GPT_Vuln-analyzer](https://github.com/morpheuslord/GPT_Vuln-analyzer) | 285 |
|
||||
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 280 |
|
||||
|[wandb/edu](https://github.com/wandb/edu) | 277 |
|
||||
|[austin2035/chatpdf](https://github.com/austin2035/chatpdf) | 275 |
|
||||
|[liangwq/Chatglm_lora_multi-gpu](https://github.com/liangwq/Chatglm_lora_multi-gpu) | 273 |
|
||||
|[preset-io/promptimize](https://github.com/preset-io/promptimize) | 272 |
|
||||
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 271 |
|
||||
|[hnawaz007/pythondataanalysis](https://github.com/hnawaz007/pythondataanalysis) | 268 |
|
||||
|[JohnSnowLabs/langtest](https://github.com/JohnSnowLabs/langtest) | 268 |
|
||||
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 263 |
|
||||
|[sugarforever/LangChain-Tutorials](https://github.com/sugarforever/LangChain-Tutorials) | 260 |
|
||||
|[Safiullah-Rahu/CSV-AI](https://github.com/Safiullah-Rahu/CSV-AI) | 259 |
|
||||
|[artitw/text2text](https://github.com/artitw/text2text) | 257 |
|
||||
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 256 |
|
||||
|[JayZeeDesign/researcher-gpt](https://github.com/JayZeeDesign/researcher-gpt) | 252 |
|
||||
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 251 |
|
||||
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 251 |
|
||||
|[Azure-Samples/miyagi](https://github.com/Azure-Samples/miyagi) | 248 |
|
||||
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 243 |
|
||||
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 242 |
|
||||
|[explodinggradients/ragas](https://github.com/explodinggradients/ragas) | 232 |
|
||||
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 232 |
|
||||
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 230 |
|
||||
|[eosphoros-ai/DB-GPT-Hub](https://github.com/eosphoros-ai/DB-GPT-Hub) | 229 |
|
||||
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 227 |
|
||||
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 224 |
|
||||
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 223 |
|
||||
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 222 |
|
||||
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 221 |
|
||||
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 221 |
|
||||
|[gustavz/DataChad](https://github.com/gustavz/DataChad) | 219 |
|
||||
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 217 |
|
||||
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 217 |
|
||||
|[ennucore/clippinator](https://github.com/ennucore/clippinator) | 211 |
|
||||
|[edreisMD/plugnplai](https://github.com/edreisMD/plugnplai) | 210 |
|
||||
|[kaarthik108/snowChat](https://github.com/kaarthik108/snowChat) | 210 |
|
||||
|[PradipNichite/Youtube-Tutorials](https://github.com/PradipNichite/Youtube-Tutorials) | 206 |
|
||||
|[ur-whitelab/chemcrow-public](https://github.com/ur-whitelab/chemcrow-public) | 202 |
|
||||
|[CambioML/pykoi](https://github.com/CambioML/pykoi) | 199 |
|
||||
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 198 |
|
||||
|[LC1332/Chat-Haruhi-Suzumiya](https://github.com/LC1332/Chat-Haruhi-Suzumiya) | 196 |
|
||||
|[nicknochnack/LangchainDocuments](https://github.com/nicknochnack/LangchainDocuments) | 196 |
|
||||
|[yuanjie-ai/ChatLLM](https://github.com/yuanjie-ai/ChatLLM) | 196 |
|
||||
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 196 |
|
||||
|[yakami129/VirtualWife](https://github.com/yakami129/VirtualWife) | 194 |
|
||||
|[Mintplex-Labs/vector-admin](https://github.com/Mintplex-Labs/vector-admin) | 191 |
|
||||
|[SamPink/dev-gpt](https://github.com/SamPink/dev-gpt) | 190 |
|
||||
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 190 |
|
||||
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 190 |
|
||||
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 182 |
|
||||
|[voxel51/voxelgpt](https://github.com/voxel51/voxelgpt) | 181 |
|
||||
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 176 |
|
||||
|[orgexyz/BlockAGI](https://github.com/orgexyz/BlockAGI) | 174 |
|
||||
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 173 |
|
||||
|[miaoshouai/miaoshouai-assistant](https://github.com/miaoshouai/miaoshouai-assistant) | 172 |
|
||||
|[microsoft/azure-openai-in-a-day-workshop](https://github.com/microsoft/azure-openai-in-a-day-workshop) | 170 |
|
||||
|[kyegomez/swarms](https://github.com/kyegomez/swarms) | 169 |
|
||||
|[Azure-Samples/azure-search-power-skills](https://github.com/Azure-Samples/azure-search-power-skills) | 169 |
|
||||
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 169 |
|
||||
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 167 |
|
||||
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 166 |
|
||||
|[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators) | 165 |
|
||||
|[Azure-Samples/azure-search-openai-demo-csharp](https://github.com/Azure-Samples/azure-search-openai-demo-csharp) | 164 |
|
||||
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 164 |
|
||||
|[grumpyp/aixplora](https://github.com/grumpyp/aixplora) | 162 |
|
||||
|[langchain-ai/web-explorer](https://github.com/langchain-ai/web-explorer) | 158 |
|
||||
|[JorisdeJong123/7-Days-of-LangChain](https://github.com/JorisdeJong123/7-Days-of-LangChain) | 158 |
|
||||
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 158 |
|
||||
|[Azure-Samples/jp-azureopenai-samples](https://github.com/Azure-Samples/jp-azureopenai-samples) | 157 |
|
||||
|[AkshitIreddy/Interactive-LLM-Powered-NPCs](https://github.com/AkshitIreddy/Interactive-LLM-Powered-NPCs) | 156 |
|
||||
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 156 |
|
||||
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 156 |
|
||||
|[mayooear/private-chatbot-mpt30b-langchain](https://github.com/mayooear/private-chatbot-mpt30b-langchain) | 155 |
|
||||
|[homanp/vercel-langchain](https://github.com/homanp/vercel-langchain) | 152 |
|
||||
|[mlops-for-all/mlops-for-all.github.io](https://github.com/mlops-for-all/mlops-for-all.github.io) | 151 |
|
||||
|[vaibkumr/prompt-optimizer](https://github.com/vaibkumr/prompt-optimizer) | 151 |
|
||||
|[Agenta-AI/agenta](https://github.com/Agenta-AI/agenta) | 150 |
|
||||
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 149 |
|
||||
|[menloparklab/falcon-langchain](https://github.com/menloparklab/falcon-langchain) | 148 |
|
||||
|[deeppavlov/dream](https://github.com/deeppavlov/dream) | 146 |
|
||||
|[positive666/Prompt-Can-Anything](https://github.com/positive666/Prompt-Can-Anything) | 145 |
|
||||
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 145 |
|
||||
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 145 |
|
||||
|[SpecterOps/Nemesis](https://github.com/SpecterOps/Nemesis) | 144 |
|
||||
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 144 |
|
||||
|[summarizepaper/summarizepaper](https://github.com/summarizepaper/summarizepaper) | 142 |
|
||||
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 141 |
|
||||
|[Aggregate-Intellect/practical-llms](https://github.com/Aggregate-Intellect/practical-llms) | 140 |
|
||||
|[streamlit/llm-examples](https://github.com/streamlit/llm-examples) | 140 |
|
||||
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 140 |
|
||||
|[Chainlit/cookbook](https://github.com/Chainlit/cookbook) | 139 |
|
||||
|[alphasecio/langchain-examples](https://github.com/alphasecio/langchain-examples) | 139 |
|
||||
|[flurb18/AgentOoba](https://github.com/flurb18/AgentOoba) | 139 |
|
||||
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 138 |
|
||||
|[yasyf/summ](https://github.com/yasyf/summ) | 138 |
|
||||
|[kulltc/chatgpt-sql](https://github.com/kulltc/chatgpt-sql) | 137 |
|
||||
|[v7labs/benchllm](https://github.com/v7labs/benchllm) | 135 |
|
||||
|[ray-project/langchain-ray](https://github.com/ray-project/langchain-ray) | 134 |
|
||||
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 134 |
|
||||
|[peterwnjenga/aigent](https://github.com/peterwnjenga/aigent) | 133 |
|
||||
|[jina-ai/fastapi-serve](https://github.com/jina-ai/fastapi-serve) | 133 |
|
||||
|[retr0reg/Ret2GPT](https://github.com/retr0reg/Ret2GPT) | 132 |
|
||||
|[agenthubdev/agenthub_operators](https://github.com/agenthubdev/agenthub_operators) | 131 |
|
||||
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 131 |
|
||||
|[solana-labs/chatgpt-plugin](https://github.com/solana-labs/chatgpt-plugin) | 130 |
|
||||
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 130 |
|
||||
|[ChuloAI/BrainChulo](https://github.com/ChuloAI/BrainChulo) | 128 |
|
||||
|[ssheng/BentoChain](https://github.com/ssheng/BentoChain) | 128 |
|
||||
|[mallahyari/drqa](https://github.com/mallahyari/drqa) | 127 |
|
||||
|[fixie-ai/fixie-examples](https://github.com/fixie-ai/fixie-examples) | 127 |
|
||||
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 127 |
|
||||
|[showlab/UniVTG](https://github.com/showlab/UniVTG) | 125 |
|
||||
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 125 |
|
||||
|[RedisVentures/redis-openai-qna](https://github.com/RedisVentures/redis-openai-qna) | 124 |
|
||||
|[PJLab-ADG/DriveLikeAHuman](https://github.com/PJLab-ADG/DriveLikeAHuman) | 122 |
|
||||
|[prof-frink-lab/slangchain](https://github.com/prof-frink-lab/slangchain) | 122 |
|
||||
|[Coding-Crashkurse/Langchain-Full-Course](https://github.com/Coding-Crashkurse/Langchain-Full-Course) | 121 |
|
||||
|[ciare-robotics/world-creator](https://github.com/ciare-robotics/world-creator) | 120 |
|
||||
|[blob42/Instrukt](https://github.com/blob42/Instrukt) | 120 |
|
||||
|[langchain-ai/langsmith-cookbook](https://github.com/langchain-ai/langsmith-cookbook) | 119 |
|
||||
|[OpenPluginACI/openplugin](https://github.com/OpenPluginACI/openplugin) | 118 |
|
||||
|[defenseunicorns/leapfrogai](https://github.com/defenseunicorns/leapfrogai) | 118 |
|
||||
|[sdaaron/QueryGPT](https://github.com/sdaaron/QueryGPT) | 117 |
|
||||
|[grumpyp/chroma-langchain-tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) | 117 |
|
||||
|[3Alan/DocsMind](https://github.com/3Alan/DocsMind) | 116 |
|
||||
|[CodeAlchemyAI/ViLT-GPT](https://github.com/CodeAlchemyAI/ViLT-GPT) | 114 |
|
||||
|[emarco177/ice_breaker](https://github.com/emarco177/ice_breaker) | 113 |
|
||||
|[nftblackmagic/flask-langchain](https://github.com/nftblackmagic/flask-langchain) | 113 |
|
||||
|[log1stics/voice-generator-webui](https://github.com/log1stics/voice-generator-webui) | 112 |
|
||||
|[nrl-ai/pautobot](https://github.com/nrl-ai/pautobot) | 110 |
|
||||
|[Azure/business-process-automation](https://github.com/Azure/business-process-automation) | 110 |
|
||||
|[MedalCollector/Orator](https://github.com/MedalCollector/Orator) | 109 |
|
||||
|[wombyz/HormoziGPT](https://github.com/wombyz/HormoziGPT) | 108 |
|
||||
|[afaqueumer/DocQA](https://github.com/afaqueumer/DocQA) | 106 |
|
||||
|[mortium91/langchain-assistant](https://github.com/mortium91/langchain-assistant) | 106 |
|
||||
|[Azure/azure-sdk-tools](https://github.com/Azure/azure-sdk-tools) | 105 |
|
||||
|[yeagerai/genworlds](https://github.com/yeagerai/genworlds) | 105 |
|
||||
|[AmineDiro/cria](https://github.com/AmineDiro/cria) | 104 |
|
||||
|[langchain-ai/text-split-explorer](https://github.com/langchain-ai/text-split-explorer) | 104 |
|
||||
|[luisroque/large_laguage_models](https://github.com/luisroque/large_laguage_models) | 104 |
|
||||
|[xuwenhao/mactalk-ai-course](https://github.com/xuwenhao/mactalk-ai-course) | 104 |
|
||||
|[Open-Swarm-Net/GPT-Swarm](https://github.com/Open-Swarm-Net/GPT-Swarm) | 104 |
|
||||
|[langchain-ai/langchain-aws-template](https://github.com/langchain-ai/langchain-aws-template) | 104 |
|
||||
|[aws-samples/aws-genai-llm-chatbot](https://github.com/aws-samples/aws-genai-llm-chatbot) | 103 |
|
||||
|[crosleythomas/MirrorGPT](https://github.com/crosleythomas/MirrorGPT) | 103 |
|
||||
|[Dicklesworthstone/llama2_aided_tesseract](https://github.com/Dicklesworthstone/llama2_aided_tesseract) | 101 |
|
||||
|
||||
|
||||
|
||||
_Generated by [github-dependents-info](https://github.com/nvuillam/github-dependents-info)_
|
||||
|
||||
[github-dependents-info --repo hwchase17/langchain --markdownfile dependents.md --minstars 100 --sort stars]
|
||||
`github-dependents-info --repo langchain-ai/langchain --markdownfile dependents.md --minstars 100 --sort stars`
|
||||
|
||||
1
docs/extras/guides/adapters/_category_.yml
Normal file
1
docs/extras/guides/adapters/_category_.yml
Normal file
@@ -0,0 +1 @@
|
||||
label: 'Adapters'
|
||||
@@ -1638,196 +1638,6 @@
|
||||
"source": [
|
||||
"moderated_chain.invoke({\"input\": \"you are stupid\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a0a85ba4-f782-47b8-b16f-8b7a61d6dab7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Conversational Retrieval With Memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92c87dd8-bb6f-4f32-a30d-8f5459ce6265",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fallbacks\n",
|
||||
"\n",
|
||||
"With LCEL you can easily introduce fallbacks for any Runnable component, like an LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1b1cb744-31fc-4261-ab25-65fe1fcad559",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='To get to the other side.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"bad_llm = ChatOpenAI(model_name=\"gpt-fake\")\n",
|
||||
"good_llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
|
||||
"llm = bad_llm.with_fallbacks([good_llm])\n",
|
||||
"\n",
|
||||
"llm.invoke(\"Why did the the chicken cross the road?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8cf3982-03f6-49b3-8ff5-7cd12444f19c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Looking at the trace, we can see that the first model failed but the second succeeded, so we still got an output: https://smith.langchain.com/public/dfaf0bf6-d86d-43e9-b084-dd16a56df15c/r\n",
|
||||
"\n",
|
||||
"We can add an arbitrary sequence of fallbacks, which will be executed in order:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "31819be0-7f40-4e67-b5ab-61340027b948",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='To get to the other side.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = bad_llm.with_fallbacks([bad_llm, bad_llm, good_llm])\n",
|
||||
"\n",
|
||||
"llm.invoke(\"Why did the the chicken cross the road?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acad6e88-8046-450e-b005-db7e50f33b80",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trace: https://smith.langchain.com/public/c09efd01-3184-4369-a225-c9da8efcaf47/r\n",
|
||||
"\n",
|
||||
"We can continue to use our Runnable with fallbacks the same way we use any Runnable, mean we can include it in sequences:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "bab114a1-bb93-4b7e-a639-e7e00f21aebc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='To show off its incredible jumping skills! Kangaroos are truly amazing creatures.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
|
||||
" (\"human\", \"Why did the {animal} cross the road\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke({\"animal\": \"kangaroo\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58340afa-8187-4ffe-9bd2-7912fb733a15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trace: https://smith.langchain.com/public/ba03895f-f8bd-4c70-81b7-8b930353eabd/r\n",
|
||||
"\n",
|
||||
"Note, since every sequence of Runnables is itself a Runnable, we can create fallbacks for whole Sequences. We can also continue using the full interface, including asynchronous calls, batched calls, and streams:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "45aa3170-b2e6-430d-887b-bd879048060a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[\"\\n\\nAnswer: The rabbit crossed the road to get to the other side. That's quite clever of him!\",\n",
|
||||
" '\\n\\nAnswer: The turtle crossed the road to get to the other side. You must be pretty clever to come up with that riddle!']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"chat_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
|
||||
" (\"human\", \"Why did the {animal} cross the road\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"chat_model = ChatOpenAI(model_name=\"gpt-fake\")\n",
|
||||
"\n",
|
||||
"prompt_template = \"\"\"Instructions: You should always include a compliment in your response.\n",
|
||||
"\n",
|
||||
"Question: Why did the {animal} cross the road?\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(prompt_template)\n",
|
||||
"llm = OpenAI()\n",
|
||||
"\n",
|
||||
"bad_chain = chat_prompt | chat_model\n",
|
||||
"good_chain = prompt | llm\n",
|
||||
"chain = bad_chain.with_fallbacks([good_chain])\n",
|
||||
"await chain.abatch([{\"animal\": \"rabbit\"}, {\"animal\": \"turtle\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af6731c6-0c73-4b1d-a433-6e8f6ecce2bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Traces: \n",
|
||||
"1. https://smith.langchain.com/public/ccd73236-9ae5-48a6-94b5-41210be18a46/r\n",
|
||||
"2. https://smith.langchain.com/public/f43f608e-075c-45c7-bf73-b64e4d3f3082/r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d2fe1fe-506b-4ee5-8056-8b9df801765f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -1846,7 +1656,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
430
docs/extras/guides/fallbacks.ipynb
Normal file
430
docs/extras/guides/fallbacks.ipynb
Normal file
@@ -0,0 +1,430 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19c9cbd6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Fallbacks\n",
|
||||
"\n",
|
||||
"When working with language models, you may often encounter issues from the underlying APIs, whether these be rate limiting or downtime. Therefore, as you go to move your LLM applications into production it becomes more and more important to safe guard against these. That's why we've introduced the concept of fallbacks.\n",
|
||||
"\n",
|
||||
"Crucially, fallbacks can be applied not only on the LLM level but on the whole runnable level. This is important because often times different models require different prompts. So if your call to OpenAI fails, you don't just want to send the same prompt to Anthropic - you probably want want to use a different prompt template and send a different version there."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6bb9ba9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Handling LLM API Errors\n",
|
||||
"\n",
|
||||
"This is maybe the most common use case for fallbacks. A request to an LLM API can fail for a variety of reasons - the API could be down, you could have hit rate limits, any number of things. Therefore, using fallbacks can help protect against these types of things.\n",
|
||||
"\n",
|
||||
"IMPORTANT: By default, a lot of the LLM wrappers catch errors and retry. You will most likely want to turn those off when working with fallbacks. Otherwise the first wrapper will keep on retrying and not failing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "d3e893bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI, ChatAnthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4847c82d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's mock out what happens if we hit a RateLimitError from OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "dfdd8bf5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from unittest.mock import patch\n",
|
||||
"from openai.error import RateLimitError"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "e6fdffc1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note that we set max_retries = 0 to avoid retrying on RateLimits, etc\n",
|
||||
"openai_llm = ChatOpenAI(max_retries=0)\n",
|
||||
"anthropic_llm = ChatAnthropic()\n",
|
||||
"llm = openai_llm.with_fallbacks([anthropic_llm])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "584461ab",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hit error\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Let's use just the OpenAI LLm first, to show that we run into an error\n",
|
||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||
" try:\n",
|
||||
" print(openai_llm.invoke(\"Why did the chicken cross the road?\"))\n",
|
||||
" except:\n",
|
||||
" print(\"Hit error\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "4fc1e673",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=' I don\\'t actually know why the chicken crossed the road, but here are some possible humorous answers:\\n\\n- To get to the other side!\\n\\n- It was too chicken to just stand there. \\n\\n- It wanted a change of scenery.\\n\\n- It wanted to show the possum it could be done.\\n\\n- It was on its way to a poultry farmers\\' convention.\\n\\nThe joke plays on the double meaning of \"the other side\" - literally crossing the road to the other side, or the \"other side\" meaning the afterlife. So it\\'s an anti-joke, with a silly or unexpected pun as the answer.' additional_kwargs={} example=False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now let's try with fallbacks to Anthropic\n",
|
||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||
" try:\n",
|
||||
" print(llm.invoke(\"Why did the the chicken cross the road?\"))\n",
|
||||
" except:\n",
|
||||
" print(\"Hit error\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f00bea25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can use our \"LLM with Fallbacks\" as we would a normal LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "4f8eaaa0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\" I don't actually know why the kangaroo crossed the road, but I can take a guess! Here are some possible reasons:\\n\\n- To get to the other side (the classic joke answer!)\\n\\n- It was trying to find some food or water \\n\\n- It was trying to find a mate during mating season\\n\\n- It was fleeing from a predator or perceived threat\\n\\n- It was disoriented and crossed accidentally \\n\\n- It was following a herd of other kangaroos who were crossing\\n\\n- It wanted a change of scenery or environment \\n\\n- It was trying to reach a new habitat or territory\\n\\nThe real reason is unknown without more context, but hopefully one of those potential explanations does the joke justice! Let me know if you have any other animal jokes I can try to decipher.\" additional_kwargs={} example=False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
|
||||
" (\"human\", \"Why did the {animal} cross the road\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"chain = prompt | llm\n",
|
||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||
" try:\n",
|
||||
" print(chain.invoke({\"animal\": \"kangaroo\"}))\n",
|
||||
" except:\n",
|
||||
" print(\"Hit error\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8d62241b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fallbacks for Sequences\n",
|
||||
"\n",
|
||||
"We can also create fallbacks for sequences, that are sequences themselves. Here we do that with two different models: ChatOpenAI and then normal OpenAI (which does not use a chat model). Because OpenAI is NOT a chat model, you likely want a different prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "6d0b8056",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# First let's create a chain with a ChatModel\n",
|
||||
"# We add in a string output parser here so the outputs between the two are the same type\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"\n",
|
||||
"chat_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
|
||||
" (\"human\", \"Why did the {animal} cross the road\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"# Here we're going to use a bad model name to easily create a chain that will error\n",
|
||||
"chat_model = ChatOpenAI(model_name=\"gpt-fake\")\n",
|
||||
"bad_chain = chat_prompt | chat_model | StrOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "8d1fc2a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Now lets create a chain with the normal OpenAI model\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"prompt_template = \"\"\"Instructions: You should always include a compliment in your response.\n",
|
||||
"\n",
|
||||
"Question: Why did the {animal} cross the road?\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(prompt_template)\n",
|
||||
"llm = OpenAI()\n",
|
||||
"good_chain = prompt | llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "283bfa44",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nAnswer: The turtle crossed the road to get to the other side, and I have to say he had some impressive determination.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# We can now create a final chain which combines the two\n",
|
||||
"chain = bad_chain.with_fallbacks([good_chain])\n",
|
||||
"chain.invoke({\"animal\": \"turtle\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec4685b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Handling Long Inputs\n",
|
||||
"\n",
|
||||
"One of the big limiting factors of LLMs in their context window. Usually you can count and track the length of prompts before sending them to an LLM, but in situations where that is hard/complicated you can fallback to a model with longer context length."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "564b84c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"short_llm = ChatOpenAI()\n",
|
||||
"long_llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\")\n",
|
||||
"llm = short_llm.with_fallbacks([long_llm])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "5e27a775",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"inputs = \"What is the next number: \" + \", \".join([\"one\", \"two\"] * 3000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "0a502731",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This model's maximum context length is 4097 tokens. However, your messages resulted in 12012 tokens. Please reduce the length of the messages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" print(short_llm.invoke(inputs))\n",
|
||||
"except Exception as e:\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "d91ba5d7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content='The next number in the sequence is two.' additional_kwargs={} example=False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" print(llm.invoke(inputs))\n",
|
||||
"except Exception as e:\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a6735df",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fallback to Better Model\n",
|
||||
"\n",
|
||||
"Often times we ask models to output format in a specific format (like JSON). Models like GPT-3.5 can do this okay, but sometimes struggle. This naturally points to fallbacks - we can try with GPT-3.5 (faster, cheaper), but then if parsing fails we can use GPT-4."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "867a3793",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.output_parsers import DatetimeOutputParser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"id": "b8d9959d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"what time was {event} (in %Y-%m-%dT%H:%M:%S.%fZ format - only return this value)\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"id": "98087a76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# In this case we are going to do the fallbacks on the LLM + output parser level\n",
|
||||
"# Because the error will get raised in the OutputParser\n",
|
||||
"openai_35 = ChatOpenAI() | DatetimeOutputParser()\n",
|
||||
"openai_4 = ChatOpenAI(model=\"gpt-4\")| DatetimeOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"id": "17ec9e8f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"only_35 = prompt | openai_35 \n",
|
||||
"fallback_4 = prompt | openai_35.with_fallbacks([openai_4])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"id": "7e536f0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error: Could not parse datetime string: The Super Bowl in 1994 took place on January 30th at 3:30 PM local time. Converting this to the specified format (%Y-%m-%dT%H:%M:%S.%fZ) results in: 1994-01-30T15:30:00.000Z\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" print(only_35.invoke({\"event\": \"the superbowl in 1994\"}))\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"id": "01355c5e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1994-01-30 15:30:00\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" print(fallback_4.invoke({\"event\": \"the superbowl in 1994\"}))\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c537f9d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
807
docs/extras/guides/local_llms.ipynb
Normal file
807
docs/extras/guides/local_llms.ipynb
Normal file
@@ -0,0 +1,807 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8982428",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Private, local, open source LLMs\n",
|
||||
"\n",
|
||||
"## Use case\n",
|
||||
"\n",
|
||||
"The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the demand to run LLMs locally (on your own device).\n",
|
||||
"\n",
|
||||
"This has at least two important benefits:\n",
|
||||
"\n",
|
||||
"1. `Privacy`: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
|
||||
"2. `Cost`: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"Running an LLM locally requires a few things:\n",
|
||||
"\n",
|
||||
"1. `Open source LLM`: An open source LLM that can be freely modified and shared \n",
|
||||
"2. `Inference`: Ability to run this LLM on your device w/ acceptable latency\n",
|
||||
"\n",
|
||||
"### Open Source LLMs\n",
|
||||
"\n",
|
||||
"Users can now gain access to a rapidly growing set of [open source LLMs](https://cameronrwolfe.substack.com/p/the-history-of-open-source-llms-better). \n",
|
||||
"\n",
|
||||
"These LLMs can be assessed across at least two dimentions (see figure):\n",
|
||||
" \n",
|
||||
"1. `Base model`: What is the base-model and how was it trained?\n",
|
||||
"2. `Fine-tuning approach`: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The relative performance of these models can be assessed using several leaderboards, including:\n",
|
||||
"\n",
|
||||
"1. [LmSys](https://chat.lmsys.org/?arena)\n",
|
||||
"2. [GPT4All](https://gpt4all.io/index.html)\n",
|
||||
"3. [HuggingFace](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard)\n",
|
||||
"\n",
|
||||
"### Inference\n",
|
||||
"\n",
|
||||
"A few frameworks for this have emerged to support inference of open source LLMs on various devices:\n",
|
||||
"\n",
|
||||
"1. [`llama.cpp`](https://github.com/ggerganov/llama.cpp): C++ implementation of llama inference code with [weight optimization / quantization](https://finbarr.ca/how-is-llama-cpp-possible/)\n",
|
||||
"2. [`gpt4all`](https://docs.gpt4all.io/index.html): Optimized C backend for inference\n",
|
||||
"3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM \n",
|
||||
"\n",
|
||||
"In general, these frameworks will do a few things:\n",
|
||||
"\n",
|
||||
"1. `Quantization`: Reduce the memory footprint of the raw model weights\n",
|
||||
"2. `Efficient implementation for inference`: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
|
||||
"\n",
|
||||
"In particular, see [this excellent post](https://finbarr.ca/how-is-llama-cpp-possible/) on the importance of quantization.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"With less precision, we radically decrease the memory needed to store the LLM in memory.\n",
|
||||
"\n",
|
||||
"In addition, we can see the importance of GPU memory bandwidth [sheet](https://docs.google.com/spreadsheets/d/1OehfHHNSn66BP2h3Bxp2NJTVX97icU0GmCXF6pK23H8/edit#gid=0)!\n",
|
||||
"\n",
|
||||
"A Mac M2 Max is 5-6x faster than a M1 for inference due to the larger GPU memory bandwidth.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Quickstart\n",
|
||||
"\n",
|
||||
"[`Ollama`](https://ollama.ai/) is one way to easily run inference on macOS.\n",
|
||||
" \n",
|
||||
"The instructions [here](docs/integrations/llms/ollama) provide details, which we summarize:\n",
|
||||
" \n",
|
||||
"* [Download and run](https://ollama.ai/download) the app\n",
|
||||
"* From command line, fetch a model from this [list of options](https://github.com/jmorganca/ollama): e.g., `ollama pull llama2`\n",
|
||||
"* When the app is running, all models are automatically served on `localhost:11434`\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "86178adb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The first man on the moon was Neil Armstrong, who landed on the moon on July 20, 1969 as part of the Apollo 11 mission. obviously.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import Ollama\n",
|
||||
"llm = Ollama(model=\"llama2\")\n",
|
||||
"llm(\"The first man on the moon was ...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "343ab645",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Stream tokens as they are being generated."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "9cd83603",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon's surface, famously declaring \"That's one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.callbacks.manager import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler \n",
|
||||
"llm = Ollama(model=\"llama2\", \n",
|
||||
" callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))\n",
|
||||
"llm(\"The first man on the moon was ...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5cb27414",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Environment\n",
|
||||
"\n",
|
||||
"Inference speed is a chllenge when running models locally (see above).\n",
|
||||
"\n",
|
||||
"To minimize latency, it is desiable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
|
||||
"\n",
|
||||
"And even with GPU, the available GPU memory bandwidth (as noted above) is important.\n",
|
||||
"\n",
|
||||
"### Running Apple silicon GPU\n",
|
||||
"\n",
|
||||
"`Ollama` will automatically utilize the GPU on Apple devices.\n",
|
||||
" \n",
|
||||
"Other frameworks require the user to set up the environment to utilize the Apple GPU.\n",
|
||||
"\n",
|
||||
"For example, `llama.cpp` python bindings can be configured to use the GPU via [Metal](https://developer.apple.com/metal/).\n",
|
||||
"\n",
|
||||
"Metal is a graphics and compute API created by Apple providing near-direct access to the GPU. \n",
|
||||
"\n",
|
||||
"See the [`llama.cpp`](docs/integrations/llms/llamacpp) setup [here](https://github.com/abetlen/llama-cpp-python/blob/main/docs/install/macos.md) to enable this.\n",
|
||||
"\n",
|
||||
"In particular, ensure that conda is using the correct virtual enviorment that you created (`miniforge3`).\n",
|
||||
"\n",
|
||||
"E.g., for me:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"conda activate /Users/rlm/miniforge3/envs/llama\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"With the above confirmed, then:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c382e79a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## LLMs\n",
|
||||
"\n",
|
||||
"There are various ways to gain access to quantized model weights.\n",
|
||||
"\n",
|
||||
"1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp)\n",
|
||||
"2. [`gpt4all`](https://gpt4all.io/index.html) - The model explorer offers a leaderboard of metrics and associated quantized models available for download \n",
|
||||
"3. [`Ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n",
|
||||
"\n",
|
||||
"### Ollama\n",
|
||||
"\n",
|
||||
"With [Ollama](docs/integrations/llms/ollama), fetch a model via `ollama pull <model family>:<tag>`:\n",
|
||||
"\n",
|
||||
"* E.g., for Llama-7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
|
||||
"* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama), e.g., `ollama pull llama2:13b`\n",
|
||||
"* See the full set of parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain.llms.ollama.Ollama.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "8ecd2f78",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Sure! Here\\'s the answer, broken down step by step:\\n\\nThe first man on the moon was... Neil Armstrong.\\n\\nHere\\'s how I arrived at that answer:\\n\\n1. The first manned mission to land on the moon was Apollo 11.\\n2. The mission included three astronauts: Neil Armstrong, Edwin \"Buzz\" Aldrin, and Michael Collins.\\n3. Neil Armstrong was the mission commander and the first person to set foot on the moon.\\n4. On July 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind.\"\\n\\nSo, the first man on the moon was Neil Armstrong!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import Ollama\n",
|
||||
"llm = Ollama(model=\"llama2:13b\")\n",
|
||||
"llm(\"The first man on the moon was ... think step by step\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "07c8c0d1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Llama.cpp\n",
|
||||
"\n",
|
||||
"Llama.cpp is compatible with a [broad set of models](https://github.com/ggerganov/llama.cpp).\n",
|
||||
"\n",
|
||||
"For example, below we run inference on `llama2-13b` with 4 bit quantization downloaded from [HuggingFace](https://huggingface.co/TheBloke/Llama-2-13B-GGML/tree/main).\n",
|
||||
"\n",
|
||||
"As noted above, see the [API reference](https://api.python.langchain.com/en/latest/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
|
||||
"\n",
|
||||
"From the [llama.cpp docs](https://python.langchain.com/docs/integrations/llms/llamacpp), a few are worth commenting on:\n",
|
||||
"\n",
|
||||
"`n_gpu_layers`: number of layers to be loaded into GPU memory\n",
|
||||
"\n",
|
||||
"* Value: 1\n",
|
||||
"* Meaning: Only one layer of the model will be loaded into GPU memory (1 is often sufficient).\n",
|
||||
"\n",
|
||||
"`n_batch`: number of tokens the model should process in parallel \n",
|
||||
"* Value: n_batch\n",
|
||||
"* Meaning: It's recommended to choose a value between 1 and n_ctx (which in this case is set to 2048)\n",
|
||||
"\n",
|
||||
"`n_ctx`: Token context window .\n",
|
||||
"* Value: 2048\n",
|
||||
"* Meaning: The model will consider a window of 2048 tokens at a time\n",
|
||||
"\n",
|
||||
"`f16_kv`: whether the model should use half-precision for the key/value cache\n",
|
||||
"* Value: True\n",
|
||||
"* Meaning: The model will use half-precision, which can be more memory efficient; Metal only support True."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5eba38dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install llama-cpp-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "9d5f94b5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"objc[10142]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x2a0c4c208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c28bc208). One of the two will be used. Which one is undefined.\n",
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32000\n",
|
||||
"llama_model_load_internal: n_ctx = 2048\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: freq_base = 10000.0\n",
|
||||
"llama_model_load_internal: freq_scale = 1\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.09 MB\n",
|
||||
"llama_model_load_internal: mem required = 8953.71 MB (+ 1608.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 1600.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: using MPS\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x47774af60\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x47774bc00\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x47774c230\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x47774c890\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x47774cef0\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x10e33e500\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x47774b2f0\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x47771a580\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x47774dab0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x47774e110\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x47774e7d0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x13efd7170\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x13efd73d0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x13efd7630\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x13efd7890\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x4744c9740\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x4744ca6b0\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x4744cb250\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x4744cb970\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x10e33f700\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x10e33fcd0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x4744cc2d0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x4744cc6f0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x4744cd6b0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x4744cde20\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x10e33ff30\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x10e340190\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x10e3403f0\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x10e340de0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x10e3416d0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x10e342080\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x10e342ca0\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6986.19 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1032.00 MB, ( 8018.19 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, ( 9620.19 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 426.00 MB, (10046.19 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (10558.19 / 21845.34)\n",
|
||||
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import LlamaCpp\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" n_gpu_layers=1,\n",
|
||||
" n_batch=512,\n",
|
||||
" n_ctx=2048,\n",
|
||||
" f16_kv=True, \n",
|
||||
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f56f5168",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The console log will show the the below to indicate Metal was enabled properly from steps above:\n",
|
||||
"```\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: using MPS\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "7890a077",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Llama.generate: prefix-match hit\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" and use logical reasoning to figure out who the first man on the moon was.\n",
|
||||
"\n",
|
||||
"Here are some clues:\n",
|
||||
"\n",
|
||||
"1. The first man on the moon was an American.\n",
|
||||
"2. He was part of the Apollo 11 mission.\n",
|
||||
"3. He stepped out of the lunar module and became the first person to set foot on the moon's surface.\n",
|
||||
"4. His last name is Armstrong.\n",
|
||||
"\n",
|
||||
"Now, let's use our reasoning skills to figure out who the first man on the moon was. Based on clue #1, we know that the first man on the moon was an American. Clue #2 tells us that he was part of the Apollo 11 mission. Clue #3 reveals that he was the first person to set foot on the moon's surface. And finally, clue #4 gives us his last name: Armstrong.\n",
|
||||
"Therefore, the first man on the moon was Neil Armstrong!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 9623.21 ms\n",
|
||||
"llama_print_timings: sample time = 143.77 ms / 203 runs ( 0.71 ms per token, 1412.01 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 485.94 ms / 7 tokens ( 69.42 ms per token, 14.40 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 6385.16 ms / 202 runs ( 31.61 ms per token, 31.64 tokens per second)\n",
|
||||
"llama_print_timings: total time = 7279.28 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" and use logical reasoning to figure out who the first man on the moon was.\\n\\nHere are some clues:\\n\\n1. The first man on the moon was an American.\\n2. He was part of the Apollo 11 mission.\\n3. He stepped out of the lunar module and became the first person to set foot on the moon's surface.\\n4. His last name is Armstrong.\\n\\nNow, let's use our reasoning skills to figure out who the first man on the moon was. Based on clue #1, we know that the first man on the moon was an American. Clue #2 tells us that he was part of the Apollo 11 mission. Clue #3 reveals that he was the first person to set foot on the moon's surface. And finally, clue #4 gives us his last name: Armstrong.\\nTherefore, the first man on the moon was Neil Armstrong!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm(\"The first man on the moon was ... Let's think step by step\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "831ddf7c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### GPT4All\n",
|
||||
"\n",
|
||||
"We can use model weights downloaded from [GPT4All](https://python.langchain.com/docs/integrations/llms/gpt4all) model explorer.\n",
|
||||
"\n",
|
||||
"Similar to what is shown above, we can run inference and use [the API reference](https://api.python.langchain.com/en/latest/llms/langchain.llms.gpt4all.GPT4All.html?highlight=gpt4all#langchain.llms.gpt4all.GPT4All) to set parameters of interest."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e27baf6e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install gpt4all"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "b55a2147",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found model file at /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n",
|
||||
"llama_new_context_with_model: max tensor size = 87.89 MB\n",
|
||||
"llama_new_context_with_model: max tensor size = 87.89 MB\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama.cpp: using Metal\n",
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32001\n",
|
||||
"llama_model_load_internal: n_ctx = 2048\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: n_parts = 1\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.09 MB\n",
|
||||
"llama_model_load_internal: mem required = 9031.71 MB (+ 1608.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 1600.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: using MPS\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x37944d850\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x37944f350\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x37944fdd0\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x3794505a0\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x379450800\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x379450a60\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x379450cc0\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x379450ff0\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x379451250\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x3794514b0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x379451710\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x379451970\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_k 0x379451bd0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_k 0x379451e30\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_k 0x379452090\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_k 0x3794522f0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_k 0x379452550\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x3794527b0\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x379452a10\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x379452c70\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x379452ed0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x379453130\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_k_f32 0x379453390\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_k_f32 0x3794535f0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_k_f32 0x379453850\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_k_f32 0x379453ab0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_k_f32 0x379453d10\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x379453f70\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x3794541d0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x379454430\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x379454690\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x3794548f0\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, (17542.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1024.00 MB, (18566.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, (20168.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 512.00 MB, (20680.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (21192.94 / 21845.34)\n",
|
||||
"ggml_metal_free: deallocating\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import GPT4All\n",
|
||||
"llm = GPT4All(model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "e3d4526f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\".\\n1) The United States decides to send a manned mission to the moon.2) They choose their best astronauts and train them for this specific mission.3) They build a spacecraft that can take humans to the moon, called the Lunar Module (LM).4) They also create a larger spacecraft, called the Saturn V rocket, which will launch both the LM and the Command Service Module (CSM), which will carry the astronauts into orbit.5) The mission is planned down to the smallest detail: from the trajectory of the rockets to the exact movements of the astronauts during their moon landing.6) On July 16, 1969, the Saturn V rocket launches from Kennedy Space Center in Florida, carrying the Apollo 11 mission crew into space.7) After one and a half orbits around the Earth, the LM separates from the CSM and begins its descent to the moon's surface.8) On July 20, 1969, at 2:56 pm EDT (GMT-4), Neil Armstrong becomes the first man on the moon. He speaks these\""
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm(\"The first man on the moon was ... Let's think step by step\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b84e543",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prompts\n",
|
||||
"\n",
|
||||
"Some LLMs will benefit from specific prompts.\n",
|
||||
"\n",
|
||||
"For example, llama2 can use [special tokens](https://twitter.com/RLanceMartin/status/1681879318493003776?s=20).\n",
|
||||
"\n",
|
||||
"We can use `ConditionalPromptSelector` to set prompt based on the model type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"id": "d082b10a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32000\n",
|
||||
"llama_model_load_internal: n_ctx = 2048\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: freq_base = 10000.0\n",
|
||||
"llama_model_load_internal: freq_scale = 1\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.09 MB\n",
|
||||
"llama_model_load_internal: mem required = 8953.71 MB (+ 1608.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 1600.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: using MPS\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x4744d09d0\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x3781cb3d0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x37813bb60\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x474481080\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x4744d29f0\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x3781254c0\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x47447f280\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x4744cf470\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x4744cf6d0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x4744cf930\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x4744cfb90\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x4744cfdf0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x4744d0050\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x4744ce980\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x4744cebe0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x4744cee40\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x4744cf0a0\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x474482450\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x4744826b0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x474482910\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x474482b70\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x474482dd0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x474483030\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x474483290\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x4744834f0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x474483750\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x4744839b0\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x474483c10\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x474483e70\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x4744840d0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x474484330\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x474484590\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6986.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1032.00 MB, ( 8018.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, ( 9620.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 426.00 MB, (10046.94 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (10558.94 / 21845.34)\n",
|
||||
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Set our LLM\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" n_gpu_layers=1,\n",
|
||||
" n_batch=512,\n",
|
||||
" n_ctx=2048,\n",
|
||||
" f16_kv=True, \n",
|
||||
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66656084",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set the associated prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "8555f5bf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='<<SYS>> \\n You are an assistant tasked with improving Google search results. \\n <</SYS>> \\n\\n [INST] Generate THREE Google search queries that are similar to this question. The output should be a numbered list of questions and each should have a question mark at the end: \\n\\n {question} [/INST]', template_format='f-string', validate_template=True)"
|
||||
]
|
||||
},
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"from langchain.chains.prompt_selector import ConditionalPromptSelector\n",
|
||||
"\n",
|
||||
"DEFAULT_LLAMA_SEARCH_PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"question\"],\n",
|
||||
" template=\"\"\"<<SYS>> \\n You are an assistant tasked with improving Google search \\\n",
|
||||
"results. \\n <</SYS>> \\n\\n [INST] Generate THREE Google search queries that \\\n",
|
||||
"are similar to this question. The output should be a numbered list of questions \\\n",
|
||||
"and each should have a question mark at the end: \\n\\n {question} [/INST]\"\"\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"DEFAULT_SEARCH_PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"question\"],\n",
|
||||
" template=\"\"\"You are an assistant tasked with improving Google search \\\n",
|
||||
"results. Generate THREE Google search queries that are similar to \\\n",
|
||||
"this question. The output should be a numbered list of questions and each \\\n",
|
||||
"should have a question mark at the end: {question}\"\"\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(\n",
|
||||
" default_prompt=DEFAULT_SEARCH_PROMPT,\n",
|
||||
" conditionals=[\n",
|
||||
" (lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)\n",
|
||||
"prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "d0aedfd2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Sure! Here are three similar search queries with a question mark at the end:\n",
|
||||
"\n",
|
||||
"1. Which NBA team did LeBron James lead to a championship in the year he was drafted?\n",
|
||||
"2. Who won the Grammy Awards for Best New Artist and Best Female Pop Vocal Performance in the same year that Lady Gaga was born?\n",
|
||||
"3. What MLB team did Babe Ruth play for when he hit 60 home runs in a single season?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 14943.19 ms\n",
|
||||
"llama_print_timings: sample time = 72.93 ms / 101 runs ( 0.72 ms per token, 1384.87 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 14942.95 ms / 93 tokens ( 160.68 ms per token, 6.22 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 3430.85 ms / 100 runs ( 34.31 ms per token, 29.15 tokens per second)\n",
|
||||
"llama_print_timings: total time = 18578.26 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Sure! Here are three similar search queries with a question mark at the end:\\n\\n1. Which NBA team did LeBron James lead to a championship in the year he was drafted?\\n2. Who won the Grammy Awards for Best New Artist and Best Female Pop Vocal Performance in the same year that Lady Gaga was born?\\n3. What MLB team did Babe Ruth play for when he hit 60 home runs in a single season?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Chain\n",
|
||||
"llm_chain = LLMChain(prompt=prompt,llm=llm)\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year that Justin Bieber was born?\"\n",
|
||||
"llm_chain.run({\"question\":question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ba66260",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use cases\n",
|
||||
"\n",
|
||||
"Given an `llm` created from one of the models above, you can use it for [many use cases](docs/use_cases).\n",
|
||||
"\n",
|
||||
"For example, here is a guide to [RAG](docs/use_cases/question_answering/how_to/local_retrieval_qa) with local LLMs.\n",
|
||||
"\n",
|
||||
"In general, use cases for local model can be driven by at least two factors:\n",
|
||||
"\n",
|
||||
"* `Privacy`: private data (e.g., journals, etc) that a user does not want to share \n",
|
||||
"* `Cost`: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
|
||||
"\n",
|
||||
"There are a few approach to support specific use-cases: \n",
|
||||
"\n",
|
||||
"* Fine-tuning (e.g., [gpt-llm-trainer](https://github.com/mshumer/gpt-llm-trainer), [Anyscale](https://www.anyscale.com/blog/fine-tuning-llama-2-a-comprehensive-case-study-for-tailoring-models-to-unique-applications)) \n",
|
||||
"* [Function-calling](https://github.com/MeetKai/functionary/tree/main) for use-cases like extraction or tagging\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
105
docs/extras/guides/pydantic_compatibility.md
Normal file
105
docs/extras/guides/pydantic_compatibility.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# Pydantic Compatibility
|
||||
|
||||
- Pydantic v2 was released in June, 2023 (https://docs.pydantic.dev/2.0/blog/pydantic-v2-final/)
|
||||
- v2 contains has a number of breaking changes (https://docs.pydantic.dev/2.0/migration/)
|
||||
- Pydantic v2 and v1 are under the same package name, so both versions cannot be installed at the same time
|
||||
|
||||
## LangChain Pydantic Migration Plan
|
||||
|
||||
As of `langchain>=0.0.267`, LangChain will allow users to install either Pydantic V1 or V2.
|
||||
* Internally LangChain will continue to [use V1](https://docs.pydantic.dev/latest/migration/#continue-using-pydantic-v1-features).
|
||||
* During this time, users can pin their pydantic version to v1 to avoid breaking changes, or start a partial
|
||||
migration using pydantic v2 throughout their code, but avoiding mixing v1 and v2 code for LangChain (see below).
|
||||
|
||||
User can either pin to pydantic v1, and upgrade their code in one go once LangChain has migrated to v2 internally, or they can start a partial migration to v2, but must avoid mixing v1 and v2 code for LangChain.
|
||||
|
||||
Below are two examples of showing how to avoid mixing pydantic v1 and v2 code in
|
||||
the case of inheritance and in the case of passing objects to LangChain.
|
||||
|
||||
**Example 1: Extending via inheritance**
|
||||
|
||||
**YES**
|
||||
|
||||
```python
|
||||
from pydantic.v1 import root_validator, validator
|
||||
|
||||
class CustomTool(BaseTool): # BaseTool is v1 code
|
||||
x: int = Field(default=1)
|
||||
|
||||
def _run(*args, **kwargs):
|
||||
return "hello"
|
||||
|
||||
@validator('x') # v1 code
|
||||
@classmethod
|
||||
def validate_x(cls, x: int) -> int:
|
||||
return 1
|
||||
|
||||
|
||||
CustomTool(
|
||||
name='custom_tool',
|
||||
description="hello",
|
||||
x=1,
|
||||
)
|
||||
```
|
||||
|
||||
Mixing Pydantic v2 primitives with Pydantic v1 primitives can raise cryptic errors
|
||||
|
||||
**NO**
|
||||
|
||||
```python
|
||||
from pydantic import Field, field_validator # pydantic v2
|
||||
|
||||
class CustomTool(BaseTool): # BaseTool is v1 code
|
||||
x: int = Field(default=1)
|
||||
|
||||
def _run(*args, **kwargs):
|
||||
return "hello"
|
||||
|
||||
@field_validator('x') # v2 code
|
||||
@classmethod
|
||||
def validate_x(cls, x: int) -> int:
|
||||
return 1
|
||||
|
||||
|
||||
CustomTool(
|
||||
name='custom_tool',
|
||||
description="hello",
|
||||
x=1,
|
||||
)
|
||||
```
|
||||
|
||||
**Example 2: Passing objects to LangChain**
|
||||
|
||||
**YES**
|
||||
|
||||
```python
|
||||
from langchain.tools.base import Tool
|
||||
from pydantic.v1 import BaseModel, Field # <-- Uses v1 namespace
|
||||
|
||||
class CalculatorInput(BaseModel):
|
||||
question: str = Field()
|
||||
|
||||
Tool.from_function( # <-- tool uses v1 namespace
|
||||
func=lambda question: 'hello',
|
||||
name="Calculator",
|
||||
description="useful for when you need to answer questions about math",
|
||||
args_schema=CalculatorInput
|
||||
)
|
||||
```
|
||||
|
||||
**NO**
|
||||
|
||||
```python
|
||||
from langchain.tools.base import Tool
|
||||
from pydantic import BaseModel, Field # <-- Uses v2 namespace
|
||||
|
||||
class CalculatorInput(BaseModel):
|
||||
question: str = Field()
|
||||
|
||||
Tool.from_function( # <-- tool uses v1 namespace
|
||||
func=lambda question: 'hello',
|
||||
name="Calculator",
|
||||
description="useful for when you need to answer questions about math",
|
||||
args_schema=CalculatorInput
|
||||
)
|
||||
```
|
||||
@@ -7,12 +7,12 @@
|
||||
"source": [
|
||||
"# Context\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"[Context](https://getcontext.ai/) provides product analytics for AI chatbots.\n",
|
||||
"[Context](https://context.ai/) provides user analytics for LLM powered products and features.\n",
|
||||
"\n",
|
||||
"Context helps you understand how users are interacting with your AI chat products.\n",
|
||||
"Gain critical insights, optimise poor experiences, and minimise brand risks.\n"
|
||||
"With Context, you can start understanding your users and improving their experiences in less than 30 minutes.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -55,7 +55,7 @@
|
||||
"\n",
|
||||
"To get your Context API token:\n",
|
||||
"\n",
|
||||
"1. Go to the settings page within your Context account (https://go.getcontext.ai/settings).\n",
|
||||
"1. Go to the settings page within your Context account (https://with.context.ai/settings).\n",
|
||||
"2. Generate a new API Token.\n",
|
||||
"3. Store this token somewhere secure."
|
||||
]
|
||||
@@ -207,7 +207,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
88
docs/extras/integrations/chat/ernie.ipynb
Normal file
88
docs/extras/integrations/chat/ernie.ipynb
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ERNIE-Bot Chat\n",
|
||||
"\n",
|
||||
"[ERNIE-Bot](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11) is a large language model developed by Baidu, covering a huge amount of Chinese data.\n",
|
||||
"This notebook covers how to get started with ErnieBot chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ErnieBotChat\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ErnieBotChat(ernie_client_id='YOUR_CLIENT_ID', ernie_client_secret='YOUR_CLIENT_SECRET')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"or you can set `client_id` and `client_secret` in your environment variables\n",
|
||||
"```bash\n",
|
||||
"export ERNIE_CLIENT_ID=YOUR_CLIENT_ID\n",
|
||||
"export ERNIE_CLIENT_SECRET=YOUR_CLIENT_SECRET\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Hello, I am an artificial intelligence language model. My purpose is to help users answer questions or provide information. What can I do for you?', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat([\n",
|
||||
" HumanMessage(content='hello there, who are you?')\n",
|
||||
"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "62359e08-cf80-4210-a30c-f450000e65b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArcGISLoader\n",
|
||||
"# ArcGIS\n",
|
||||
"\n",
|
||||
"This notebook demonstrates the use of the `langchain.document_loaders.ArcGISLoader` class.\n",
|
||||
"\n",
|
||||
@@ -39,8 +39,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 4.04 ms, sys: 1.63 ms, total: 5.67 ms\n",
|
||||
"Wall time: 644 ms\n"
|
||||
"CPU times: user 7.86 ms, sys: 0 ns, total: 7.86 ms\n",
|
||||
"Wall time: 802 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -59,7 +59,179 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"dict_keys(['url', 'layer_description', 'item_description', 'layer_properties'])"
|
||||
"{'accessed': '2023-08-15T04:30:41.689270+00:00Z',\n",
|
||||
" 'name': 'Beach Ramps',\n",
|
||||
" 'url': 'https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7',\n",
|
||||
" 'layer_description': '(Not Provided)',\n",
|
||||
" 'item_description': '(Not Provided)',\n",
|
||||
" 'layer_properties': {\n",
|
||||
" \"currentVersion\": 10.81,\n",
|
||||
" \"id\": 7,\n",
|
||||
" \"name\": \"Beach Ramps\",\n",
|
||||
" \"type\": \"Feature Layer\",\n",
|
||||
" \"description\": \"\",\n",
|
||||
" \"geometryType\": \"esriGeometryPoint\",\n",
|
||||
" \"sourceSpatialReference\": {\n",
|
||||
" \"wkid\": 2881,\n",
|
||||
" \"latestWkid\": 2881\n",
|
||||
" },\n",
|
||||
" \"copyrightText\": \"\",\n",
|
||||
" \"parentLayer\": null,\n",
|
||||
" \"subLayers\": [],\n",
|
||||
" \"minScale\": 750000,\n",
|
||||
" \"maxScale\": 0,\n",
|
||||
" \"drawingInfo\": {\n",
|
||||
" \"renderer\": {\n",
|
||||
" \"type\": \"simple\",\n",
|
||||
" \"symbol\": {\n",
|
||||
" \"type\": \"esriPMS\",\n",
|
||||
" \"url\": \"9bb2e5ca499bb68aa3ee0d4e1ecc3849\",\n",
|
||||
" \"imageData\": \"iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IB2cksfwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAJJJREFUOI3NkDEKg0AQRZ9kkSnSGBshR7DJqdJYeg7BMpcS0uQWQsqoCLExkcUJzGqT38zw2fcY1rEzbp7vjXz0EXC7gBxs1ABcG/8CYkCcDqwyLqsV+RlV0I/w7PzuJBArr1VB20H58Ls6h+xoFITkTwWpQJX7XSIBAnFwVj7MLAjJV/AC6G3QoAmK+74Lom04THTBEp/HCSc6AAAAAElFTkSuQmCC\",\n",
|
||||
" \"contentType\": \"image/png\",\n",
|
||||
" \"width\": 12,\n",
|
||||
" \"height\": 12,\n",
|
||||
" \"angle\": 0,\n",
|
||||
" \"xoffset\": 0,\n",
|
||||
" \"yoffset\": 0\n",
|
||||
" },\n",
|
||||
" \"label\": \"\",\n",
|
||||
" \"description\": \"\"\n",
|
||||
" },\n",
|
||||
" \"transparency\": 0,\n",
|
||||
" \"labelingInfo\": null\n",
|
||||
" },\n",
|
||||
" \"defaultVisibility\": true,\n",
|
||||
" \"extent\": {\n",
|
||||
" \"xmin\": -81.09480168806815,\n",
|
||||
" \"ymin\": 28.858349245353473,\n",
|
||||
" \"xmax\": -80.77512908572814,\n",
|
||||
" \"ymax\": 29.41078388840041,\n",
|
||||
" \"spatialReference\": {\n",
|
||||
" \"wkid\": 4326,\n",
|
||||
" \"latestWkid\": 4326\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"hasAttachments\": false,\n",
|
||||
" \"htmlPopupType\": \"esriServerHTMLPopupTypeNone\",\n",
|
||||
" \"displayField\": \"AccessName\",\n",
|
||||
" \"typeIdField\": null,\n",
|
||||
" \"subtypeFieldName\": null,\n",
|
||||
" \"subtypeField\": null,\n",
|
||||
" \"defaultSubtypeCode\": null,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"name\": \"OBJECTID\",\n",
|
||||
" \"type\": \"esriFieldTypeOID\",\n",
|
||||
" \"alias\": \"OBJECTID\",\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Shape\",\n",
|
||||
" \"type\": \"esriFieldTypeGeometry\",\n",
|
||||
" \"alias\": \"Shape\",\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessName\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessName\",\n",
|
||||
" \"length\": 40,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessID\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessID\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessType\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessType\",\n",
|
||||
" \"length\": 25,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"GeneralLoc\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"GeneralLoc\",\n",
|
||||
" \"length\": 100,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"MilePost\",\n",
|
||||
" \"type\": \"esriFieldTypeDouble\",\n",
|
||||
" \"alias\": \"MilePost\",\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"City\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"City\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessStatus\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessStatus\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Entry_Date_Time\",\n",
|
||||
" \"type\": \"esriFieldTypeDate\",\n",
|
||||
" \"alias\": \"Entry_Date_Time\",\n",
|
||||
" \"length\": 8,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"DrivingZone\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"DrivingZone\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"geometryField\": {\n",
|
||||
" \"name\": \"Shape\",\n",
|
||||
" \"type\": \"esriFieldTypeGeometry\",\n",
|
||||
" \"alias\": \"Shape\"\n",
|
||||
" },\n",
|
||||
" \"indexes\": null,\n",
|
||||
" \"subtypes\": [],\n",
|
||||
" \"relationships\": [],\n",
|
||||
" \"canModifyLayer\": true,\n",
|
||||
" \"canScaleSymbols\": false,\n",
|
||||
" \"hasLabels\": false,\n",
|
||||
" \"capabilities\": \"Map,Query,Data\",\n",
|
||||
" \"maxRecordCount\": 1000,\n",
|
||||
" \"supportsStatistics\": true,\n",
|
||||
" \"supportsAdvancedQueries\": true,\n",
|
||||
" \"supportedQueryFormats\": \"JSON, geoJSON\",\n",
|
||||
" \"isDataVersioned\": false,\n",
|
||||
" \"ownershipBasedAccessControlForFeatures\": {\n",
|
||||
" \"allowOthersToQuery\": true\n",
|
||||
" },\n",
|
||||
" \"useStandardizedQueries\": true,\n",
|
||||
" \"advancedQueryCapabilities\": {\n",
|
||||
" \"useStandardizedQueries\": true,\n",
|
||||
" \"supportsStatistics\": true,\n",
|
||||
" \"supportsHavingClause\": true,\n",
|
||||
" \"supportsCountDistinct\": true,\n",
|
||||
" \"supportsOrderBy\": true,\n",
|
||||
" \"supportsDistinct\": true,\n",
|
||||
" \"supportsPagination\": true,\n",
|
||||
" \"supportsTrueCurve\": true,\n",
|
||||
" \"supportsReturningQueryExtent\": true,\n",
|
||||
" \"supportsQueryWithDistance\": true,\n",
|
||||
" \"supportsSqlExpression\": true\n",
|
||||
" },\n",
|
||||
" \"supportsDatumTransformation\": true,\n",
|
||||
" \"dateFieldsTimeReference\": null,\n",
|
||||
" \"supportsCoordinatesQuantization\": true\n",
|
||||
" }}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@@ -68,200 +240,12 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata.keys()"
|
||||
"docs[0].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6b6e9107-6a80-4ef7-8149-3013faa2de76",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"KeysView({\n",
|
||||
" \"currentVersion\": 10.81,\n",
|
||||
" \"id\": 7,\n",
|
||||
" \"name\": \"Beach Ramps\",\n",
|
||||
" \"type\": \"Feature Layer\",\n",
|
||||
" \"description\": \"\",\n",
|
||||
" \"geometryType\": \"esriGeometryPoint\",\n",
|
||||
" \"sourceSpatialReference\": {\n",
|
||||
" \"wkid\": 2881,\n",
|
||||
" \"latestWkid\": 2881\n",
|
||||
" },\n",
|
||||
" \"copyrightText\": \"\",\n",
|
||||
" \"parentLayer\": null,\n",
|
||||
" \"subLayers\": [],\n",
|
||||
" \"minScale\": 750000,\n",
|
||||
" \"maxScale\": 0,\n",
|
||||
" \"drawingInfo\": {\n",
|
||||
" \"renderer\": {\n",
|
||||
" \"type\": \"simple\",\n",
|
||||
" \"symbol\": {\n",
|
||||
" \"type\": \"esriPMS\",\n",
|
||||
" \"url\": \"9bb2e5ca499bb68aa3ee0d4e1ecc3849\",\n",
|
||||
" \"imageData\": \"iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IB2cksfwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAJJJREFUOI3NkDEKg0AQRZ9kkSnSGBshR7DJqdJYeg7BMpcS0uQWQsqoCLExkcUJzGqT38zw2fcY1rEzbp7vjXz0EXC7gBxs1ABcG/8CYkCcDqwyLqsV+RlV0I/w7PzuJBArr1VB20H58Ls6h+xoFITkTwWpQJX7XSIBAnFwVj7MLAjJV/AC6G3QoAmK+74Lom04THTBEp/HCSc6AAAAAElFTkSuQmCC\",\n",
|
||||
" \"contentType\": \"image/png\",\n",
|
||||
" \"width\": 12,\n",
|
||||
" \"height\": 12,\n",
|
||||
" \"angle\": 0,\n",
|
||||
" \"xoffset\": 0,\n",
|
||||
" \"yoffset\": 0\n",
|
||||
" },\n",
|
||||
" \"label\": \"\",\n",
|
||||
" \"description\": \"\"\n",
|
||||
" },\n",
|
||||
" \"transparency\": 0,\n",
|
||||
" \"labelingInfo\": null\n",
|
||||
" },\n",
|
||||
" \"defaultVisibility\": true,\n",
|
||||
" \"extent\": {\n",
|
||||
" \"xmin\": -81.09480168806815,\n",
|
||||
" \"ymin\": 28.858349245353473,\n",
|
||||
" \"xmax\": -80.77512908572814,\n",
|
||||
" \"ymax\": 29.41078388840041,\n",
|
||||
" \"spatialReference\": {\n",
|
||||
" \"wkid\": 4326,\n",
|
||||
" \"latestWkid\": 4326\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"hasAttachments\": false,\n",
|
||||
" \"htmlPopupType\": \"esriServerHTMLPopupTypeNone\",\n",
|
||||
" \"displayField\": \"AccessName\",\n",
|
||||
" \"typeIdField\": null,\n",
|
||||
" \"subtypeFieldName\": null,\n",
|
||||
" \"subtypeField\": null,\n",
|
||||
" \"defaultSubtypeCode\": null,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"name\": \"OBJECTID\",\n",
|
||||
" \"type\": \"esriFieldTypeOID\",\n",
|
||||
" \"alias\": \"OBJECTID\",\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Shape\",\n",
|
||||
" \"type\": \"esriFieldTypeGeometry\",\n",
|
||||
" \"alias\": \"Shape\",\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessName\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessName\",\n",
|
||||
" \"length\": 40,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessID\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessID\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessType\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessType\",\n",
|
||||
" \"length\": 25,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"GeneralLoc\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"GeneralLoc\",\n",
|
||||
" \"length\": 100,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"MilePost\",\n",
|
||||
" \"type\": \"esriFieldTypeDouble\",\n",
|
||||
" \"alias\": \"MilePost\",\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"City\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"City\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"AccessStatus\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"AccessStatus\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Entry_Date_Time\",\n",
|
||||
" \"type\": \"esriFieldTypeDate\",\n",
|
||||
" \"alias\": \"Entry_Date_Time\",\n",
|
||||
" \"length\": 8,\n",
|
||||
" \"domain\": null\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"DrivingZone\",\n",
|
||||
" \"type\": \"esriFieldTypeString\",\n",
|
||||
" \"alias\": \"DrivingZone\",\n",
|
||||
" \"length\": 50,\n",
|
||||
" \"domain\": null\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"geometryField\": {\n",
|
||||
" \"name\": \"Shape\",\n",
|
||||
" \"type\": \"esriFieldTypeGeometry\",\n",
|
||||
" \"alias\": \"Shape\"\n",
|
||||
" },\n",
|
||||
" \"indexes\": null,\n",
|
||||
" \"subtypes\": [],\n",
|
||||
" \"relationships\": [],\n",
|
||||
" \"canModifyLayer\": true,\n",
|
||||
" \"canScaleSymbols\": false,\n",
|
||||
" \"hasLabels\": false,\n",
|
||||
" \"capabilities\": \"Map,Query,Data\",\n",
|
||||
" \"maxRecordCount\": 1000,\n",
|
||||
" \"supportsStatistics\": true,\n",
|
||||
" \"supportsAdvancedQueries\": true,\n",
|
||||
" \"supportedQueryFormats\": \"JSON, geoJSON\",\n",
|
||||
" \"isDataVersioned\": false,\n",
|
||||
" \"ownershipBasedAccessControlForFeatures\": {\n",
|
||||
" \"allowOthersToQuery\": true\n",
|
||||
" },\n",
|
||||
" \"useStandardizedQueries\": true,\n",
|
||||
" \"advancedQueryCapabilities\": {\n",
|
||||
" \"useStandardizedQueries\": true,\n",
|
||||
" \"supportsStatistics\": true,\n",
|
||||
" \"supportsHavingClause\": true,\n",
|
||||
" \"supportsCountDistinct\": true,\n",
|
||||
" \"supportsOrderBy\": true,\n",
|
||||
" \"supportsDistinct\": true,\n",
|
||||
" \"supportsPagination\": true,\n",
|
||||
" \"supportsTrueCurve\": true,\n",
|
||||
" \"supportsReturningQueryExtent\": true,\n",
|
||||
" \"supportsQueryWithDistance\": true,\n",
|
||||
" \"supportsSqlExpression\": true\n",
|
||||
" },\n",
|
||||
" \"supportsDatumTransformation\": true,\n",
|
||||
" \"dateFieldsTimeReference\": null,\n",
|
||||
" \"supportsCoordinatesQuantization\": true\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata['layer_properties'].keys()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1d132b7d-5a13-4d66-98e8-785ffdf87af0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -269,29 +253,29 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\"OBJECTID\": 2, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 7, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 10, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691394892000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 13, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 16, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691395174000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 26, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691394892000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 36, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 40, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691395124000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 41, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691395174000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 50, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 58, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 63, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 68, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 92, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 94, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 122, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1691394832000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 125, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 134, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 229, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 230, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691394952000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 232, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1691397348000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 233, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1691394832000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 235, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1691395124000, \"DrivingZone\": \"YES\"}\n"
|
||||
"{\"OBJECTID\": 4, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 5, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 6, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 11, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 14, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 27, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 38, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 42, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 43, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 45, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 56, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 64, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 69, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 94, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 96, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 124, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 127, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 136, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 229, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 230, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 232, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 234, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 235, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -0,0 +1,878 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3cebbe-079a-4bfe-b1a1-07bdac882ce2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Amazon Textract \n",
|
||||
"\n",
|
||||
"Amazon Textract is a machine learning (ML) service that automatically extracts text, handwriting, and data from scanned documents. It goes beyond simple optical character recognition (OCR) to identify, understand, and extract data from forms and tables. Today, many companies manually extract data from scanned documents such as PDFs, images, tables, and forms, or through simple OCR software that requires manual configuration (which often must be updated when the form changes). To overcome these manual and expensive processes, Textract uses ML to read and process any type of document, accurately extracting text, handwriting, tables, and other data with no manual effort. You can quickly automate document processing and act on the information extracted, whether you’re automating loans processing or extracting information from invoices and receipts. Textract can extract the data in minutes instead of hours or days.\n",
|
||||
"\n",
|
||||
"This sample demonstrates the use of Amazon Textract in combination with LangChain as a DocumentLoader.\n",
|
||||
"\n",
|
||||
"Textract supports PDF, TIFF, PNG and JPEG format.\n",
|
||||
"\n",
|
||||
"Check https://docs.aws.amazon.com/textract/latest/dg/limits-document.html for supported document sizes, languages and characters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c049beaf-f904-4ce6-91ca-805da62084c2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install langchain boto3 openai tiktoken python-dotenv -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "400b25c6-befa-4730-a201-39ff112c8858",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Sample 1\n",
|
||||
"\n",
|
||||
"The first example uses a local file, which internally will be send to Amazon Textract sync API [DetectDocumentText](https://docs.aws.amazon.com/textract/latest/dg/API_DetectDocumentText.html). \n",
|
||||
"\n",
|
||||
"Local files or URL endpoints like HTTP:// are limited to one page documents for Textract.\n",
|
||||
"Multi-page documents have to reside on S3. This sample file is a jpeg."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "1becee92-e82f-42d4-9b4e-b23d77cbe88d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AmazonTextractPDFLoader\n",
|
||||
"loader = AmazonTextractPDFLoader(\"example_data/alejandro_rosalez_sample-small.jpeg\")\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d566dc56-c9a9-44ec-84fb-a81928f90d40",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Output from the file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "1272ce8c-d298-4059-ac0a-780bf5f82302",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Patient Information First Name: ALEJANDRO Last Name: ROSALEZ Date of Birth: 10/10/1982 Sex: M Marital Status: MARRIED Email Address: Address: 123 ANY STREET City: ANYTOWN State: CA Zip Code: 12345 Phone: 646-555-0111 Emergency Contact 1: First Name: CARLOS Last Name: SALAZAR Phone: 212-555-0150 Relationship to Patient: BROTHER Emergency Contact 2: First Name: JANE Last Name: DOE Phone: 650-555-0123 Relationship FRIEND to Patient: Did you feel fever or feverish lately? Yes No Are you having shortness of breath? Yes No Do you have a cough? Yes No Did you experience loss of taste or smell? Yes No Where you in contact with any confirmed COVID-19 positive patients? Yes No Did you travel in the past 14 days to any regions affected by COVID-19? Yes No Patient Information First Name: ALEJANDRO Last Name: ROSALEZ Date of Birth: 10/10/1982 Sex: M Marital Status: MARRIED Email Address: Address: 123 ANY STREET City: ANYTOWN State: CA Zip Code: 12345 Phone: 646-555-0111 Emergency Contact 1: First Name: CARLOS Last Name: SALAZAR Phone: 212-555-0150 Relationship to Patient: BROTHER Emergency Contact 2: First Name: JANE Last Name: DOE Phone: 650-555-0123 Relationship FRIEND to Patient: Did you feel fever or feverish lately? Yes No Are you having shortness of breath? Yes No Do you have a cough? Yes No Did you experience loss of taste or smell? Yes No Where you in contact with any confirmed COVID-19 positive patients? Yes No Did you travel in the past 14 days to any regions affected by COVID-19? Yes No ', metadata={'source': 'example_data/alejandro_rosalez_sample-small.jpeg', 'page': 1})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4cf7f19c-3635-453a-9c76-4baf98b8d7f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Sample 2\n",
|
||||
"The next sample loads a file from an HTTPS endpoint. \n",
|
||||
"It has to be single page, as Amazon Textract requires all multi-page documents to be stored on S3."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "10374bfb-b325-451f-8bd0-c686710ab68c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AmazonTextractPDFLoader\n",
|
||||
"loader = AmazonTextractPDFLoader(\"https://amazon-textract-public-content.s3.us-east-2.amazonaws.com/langchain/alejandro_rosalez_sample_1.jpg\")\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "16a2b6a3-7514-4c2c-a427-6847169af473",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Patient Information First Name: ALEJANDRO Last Name: ROSALEZ Date of Birth: 10/10/1982 Sex: M Marital Status: MARRIED Email Address: Address: 123 ANY STREET City: ANYTOWN State: CA Zip Code: 12345 Phone: 646-555-0111 Emergency Contact 1: First Name: CARLOS Last Name: SALAZAR Phone: 212-555-0150 Relationship to Patient: BROTHER Emergency Contact 2: First Name: JANE Last Name: DOE Phone: 650-555-0123 Relationship FRIEND to Patient: Did you feel fever or feverish lately? Yes No Are you having shortness of breath? Yes No Do you have a cough? Yes No Did you experience loss of taste or smell? Yes No Where you in contact with any confirmed COVID-19 positive patients? Yes No Did you travel in the past 14 days to any regions affected by COVID-19? Yes No Patient Information First Name: ALEJANDRO Last Name: ROSALEZ Date of Birth: 10/10/1982 Sex: M Marital Status: MARRIED Email Address: Address: 123 ANY STREET City: ANYTOWN State: CA Zip Code: 12345 Phone: 646-555-0111 Emergency Contact 1: First Name: CARLOS Last Name: SALAZAR Phone: 212-555-0150 Relationship to Patient: BROTHER Emergency Contact 2: First Name: JANE Last Name: DOE Phone: 650-555-0123 Relationship FRIEND to Patient: Did you feel fever or feverish lately? Yes No Are you having shortness of breath? Yes No Do you have a cough? Yes No Did you experience loss of taste or smell? Yes No Where you in contact with any confirmed COVID-19 positive patients? Yes No Did you travel in the past 14 days to any regions affected by COVID-19? Yes No ', metadata={'source': 'example_data/alejandro_rosalez_sample-small.jpeg', 'page': 1})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a9cd8ec-e663-4dc7-9db1-d2f575253141",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Sample 3\n",
|
||||
"\n",
|
||||
"Processing a multi-page document requires the document to be on S3. The sample document resides in a bucket in us-east-2 and Textract needs to be called in that same region to be successful, so we set the region_name on the client and pass that in to the loader to ensure Textract is called from us-east-2. You could also to have your notebook running in us-east-2, setting the AWS_DEFAULT_REGION set to us-east-2 or when running in a different environment, pass in a boto3 Textract client with that region name like in the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "8185e3e6-9599-4a47-8969-d6dcef3e6404",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import boto3\n",
|
||||
"textract_client = boto3.client('textract', region_name='us-east-2')\n",
|
||||
"\n",
|
||||
"file_path = \"s3://amazon-textract-public-content/langchain/layout-parser-paper.pdf\"\n",
|
||||
"loader = AmazonTextractPDFLoader(file_path, client=textract_client)\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8901eec-070d-4fd6-9d65-52211d332441",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now getting the number of pages to validate the response (printing out the full response would be quite long...). We expect 16 pages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "b23c01c8-cf69-4fe2-8141-4621edb7d79c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"16"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3e41b4d-b159-4274-89be-80d8159134ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using the AmazonTextractPDFLoader in an LangChain chain (e. g. OpenAI)\n",
|
||||
"\n",
|
||||
"The AmazonTextractPDFLoader can be used in a chain the same way the other loaders are used.\n",
|
||||
"Textract itself does have a [Query feature](https://docs.aws.amazon.com/textract/latest/dg/API_Query.html), which offers similar functionality to the QA chain in this sample, which is worth checking out as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "53c47b24-cc06-4256-9e5b-a82fc80bc55d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can store your OPENAI_API_KEY in a .env file as well\n",
|
||||
"# import os \n",
|
||||
"# from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"# load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "a9ae004c-246c-4c7f-8458-191cd7424a9b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Or set the OpenAI key in the environment directly\n",
|
||||
"import os \n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"your-OpenAI-API-key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "d52b089c-10ca-45fb-8669-8a1c5fee10d5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The authors are Zejiang Shen, Ruochen Zhang, Melissa Dell, Benjamin Charles Germain Lee, Jacob Carlson, Weining Li, Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L., Lukasz Garncarek, Powalski, R., Stanislawek, T., Topolski, B., Halama, P., Gralinski, F., Graves, A., Fernández, S., Gomez, F., Schmidhuber, J., Harley, A.W., Ufkes, A., Derpanis, K.G., He, K., Gkioxari, G., Dollár, P., Girshick, R., He, K., Zhang, X., Ren, S., Sun, J., Kay, A., Lamiroy, B., Lopresti, D., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Li, M., Cui, L., Huang,'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"\n",
|
||||
"chain = load_qa_chain(llm=OpenAI(), chain_type=\"map_reduce\")\n",
|
||||
"query = [\"Who are the autors?\"]\n",
|
||||
"\n",
|
||||
"chain.run(input_documents=documents, question=query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1a09d18b-ab7b-468e-ae66-f92abf666b9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"availableInstances": [
|
||||
{
|
||||
"_defaultOrder": 0,
|
||||
"_isFastLaunch": true,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 4,
|
||||
"name": "ml.t3.medium",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 1,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.t3.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 2,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.t3.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 3,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.t3.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 4,
|
||||
"_isFastLaunch": true,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.m5.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 5,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.m5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 6,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.m5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 7,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.m5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 8,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.m5.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 9,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.m5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 10,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.m5.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 11,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.m5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 12,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.m5d.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 13,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.m5d.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 14,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.m5d.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 15,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.m5d.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 16,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.m5d.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 17,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.m5d.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 18,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.m5d.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 19,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.m5d.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 20,
|
||||
"_isFastLaunch": false,
|
||||
"category": "General purpose",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": true,
|
||||
"memoryGiB": 0,
|
||||
"name": "ml.geospatial.interactive",
|
||||
"supportedImageNames": [
|
||||
"sagemaker-geospatial-v1-0"
|
||||
],
|
||||
"vcpuNum": 0
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 21,
|
||||
"_isFastLaunch": true,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 4,
|
||||
"name": "ml.c5.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 22,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 8,
|
||||
"name": "ml.c5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 23,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.c5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 24,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.c5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 25,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 72,
|
||||
"name": "ml.c5.9xlarge",
|
||||
"vcpuNum": 36
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 26,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 96,
|
||||
"name": "ml.c5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 27,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 144,
|
||||
"name": "ml.c5.18xlarge",
|
||||
"vcpuNum": 72
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 28,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Compute optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.c5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 29,
|
||||
"_isFastLaunch": true,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.g4dn.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 30,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.g4dn.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 31,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.g4dn.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 32,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.g4dn.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 33,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.g4dn.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 34,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.g4dn.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 35,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 61,
|
||||
"name": "ml.p3.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 36,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 244,
|
||||
"name": "ml.p3.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 37,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 488,
|
||||
"name": "ml.p3.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 38,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 768,
|
||||
"name": "ml.p3dn.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 39,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.r5.large",
|
||||
"vcpuNum": 2
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 40,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.r5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 41,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.r5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 42,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.r5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 43,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.r5.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 44,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.r5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 45,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 512,
|
||||
"name": "ml.r5.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 46,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Memory Optimized",
|
||||
"gpuNum": 0,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 768,
|
||||
"name": "ml.r5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 47,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 16,
|
||||
"name": "ml.g5.xlarge",
|
||||
"vcpuNum": 4
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 48,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 32,
|
||||
"name": "ml.g5.2xlarge",
|
||||
"vcpuNum": 8
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 49,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 64,
|
||||
"name": "ml.g5.4xlarge",
|
||||
"vcpuNum": 16
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 50,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 128,
|
||||
"name": "ml.g5.8xlarge",
|
||||
"vcpuNum": 32
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 51,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 1,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 256,
|
||||
"name": "ml.g5.16xlarge",
|
||||
"vcpuNum": 64
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 52,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 192,
|
||||
"name": "ml.g5.12xlarge",
|
||||
"vcpuNum": 48
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 53,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 4,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 384,
|
||||
"name": "ml.g5.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 54,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 768,
|
||||
"name": "ml.g5.48xlarge",
|
||||
"vcpuNum": 192
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 55,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 1152,
|
||||
"name": "ml.p4d.24xlarge",
|
||||
"vcpuNum": 96
|
||||
},
|
||||
{
|
||||
"_defaultOrder": 56,
|
||||
"_isFastLaunch": false,
|
||||
"category": "Accelerated computing",
|
||||
"gpuNum": 8,
|
||||
"hideHardwareSpecs": false,
|
||||
"memoryGiB": 1152,
|
||||
"name": "ml.p4de.24xlarge",
|
||||
"vcpuNum": 96
|
||||
}
|
||||
],
|
||||
"instance_type": "ml.t3.medium",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -299,7 +299,7 @@
|
||||
"id": "1cf27fc8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you need to post process the `unstructured` elements after extraction, you can pass in a list of `Element` -> `Element` functions to the `post_processors` kwarg when you instantiate the `UnstructuredFileLoader`. This applies to other Unstructured loaders as well. Below is an example. Post processors are only applied if you run the loader in `\"elements\"` mode."
|
||||
"If you need to post process the `unstructured` elements after extraction, you can pass in a list of `str` -> `str` functions to the `post_processors` kwarg when you instantiate the `UnstructuredFileLoader`. This applies to other Unstructured loaders as well. Below is an example."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -495,7 +495,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
167
docs/extras/integrations/llms/bittensor.ipynb
Normal file
167
docs/extras/integrations/llms/bittensor.ipynb
Normal file
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NIBittensorLLM\n",
|
||||
"\n",
|
||||
"NIBittensorLLM is developed by [Neural Internet](https://neuralinternet.ai/), powered by [Bittensor](https://bittensor.com/).\n",
|
||||
"\n",
|
||||
"This LLM showcases true potential of decentralized AI by giving you the best response(s) from the Bittensor protocol, which consist of various AI models such as OpenAI, LLaMA2 etc.\n",
|
||||
"\n",
|
||||
"Users can view their logs, requests, and API keys on the [Validator Endpoint Frontend](https://api.neuralinternet.ai/). However, changes to the configuration are currently prohibited; otherwise, the user's queries will be blocked.\n",
|
||||
"\n",
|
||||
"If you encounter any difficulties or have any questions, please feel free to reach out to our developer on [GitHub](https://github.com/Kunj-2206), [Discord](https://discordapp.com/users/683542109248159777) or join our discord server for latest update and queries [Neural Internet](https://discord.gg/neuralinternet).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Different Parameter and response handling for NIBittensorLLM "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import langchain\n",
|
||||
"from langchain.llms import NIBittensorLLM\n",
|
||||
"import json\n",
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"langchain.debug = True\n",
|
||||
"\n",
|
||||
"# System parameter in NIBittensorLLM is optional but you can set whatever you want to perform with model\n",
|
||||
"llm_sys = NIBittensorLLM(\n",
|
||||
" system_prompt=\"Your task is to determine response based on user prompt.Explain me like I am technical lead of a project\"\n",
|
||||
")\n",
|
||||
"sys_resp = llm_sys(\n",
|
||||
" \"What is bittensor and What are the potential benifits of decentralized AI?\"\n",
|
||||
")\n",
|
||||
"print(f\"Response provided by LLM with system prompt set is : {sys_resp}\")\n",
|
||||
"\n",
|
||||
"# The top_responses parameter can give multiple responses based on its parameter value\n",
|
||||
"# This below code retrive top 10 miner's response all the response are in format of json\n",
|
||||
"\n",
|
||||
"# Json response structure is\n",
|
||||
"\"\"\" {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\"index\": Bittensor's Metagraph index number,\n",
|
||||
" \"uid\": Unique Identifier of a miner,\n",
|
||||
" \"responder_hotkey\": Hotkey of a miner,\n",
|
||||
" \"message\":{\"role\":\"assistant\",\"content\": Contains actual response},\n",
|
||||
" \"response_ms\": Time in millisecond required to fetch response from a miner} \n",
|
||||
" ]\n",
|
||||
" } \"\"\"\n",
|
||||
"\n",
|
||||
"multi_response_llm = NIBittensorLLM(top_responses=10)\n",
|
||||
"multi_resp = multi_response_llm(\"What is Neural Network Feeding Mechanism?\")\n",
|
||||
"json_multi_resp = json.loads(multi_resp)\n",
|
||||
"pprint(json_multi_resp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using NIBittensorLLM with LLMChain and PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import langchain\n",
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"from langchain.llms import NIBittensorLLM\n",
|
||||
"\n",
|
||||
"langchain.debug = True\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"\n",
|
||||
"# System parameter in NIBittensorLLM is optional but you can set whatever you want to perform with model\n",
|
||||
"llm = NIBittensorLLM(system_prompt=\"Your task is to determine response based on user prompt.\")\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"question = \"What is bittensor?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using NIBittensorLLM with Conversational Agent and Google Search Tool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import (\n",
|
||||
" AgentType,\n",
|
||||
" initialize_agent,\n",
|
||||
" load_tools,\n",
|
||||
" ZeroShotAgent,\n",
|
||||
" Tool,\n",
|
||||
" AgentExecutor,\n",
|
||||
")\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain import LLMChain, PromptTemplate\n",
|
||||
"from langchain.utilities import GoogleSearchAPIWrapper, SerpAPIWrapper\n",
|
||||
"from langchain.llms import NIBittensorLLM\n",
|
||||
"\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"prefix = \"\"\"Answer prompt based on LLM if there is need to search something then use internet and observe internet result and give accurate reply of user questions also try to use authenticated sources\"\"\"\n",
|
||||
"suffix = \"\"\"Begin!\n",
|
||||
" {chat_history}\n",
|
||||
" Question: {input}\n",
|
||||
" {agent_scratchpad}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools,\n",
|
||||
" prefix=prefix,\n",
|
||||
" suffix=suffix,\n",
|
||||
" input_variables=[\"input\", \"chat_history\", \"agent_scratchpad\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm = NIBittensorLLM(system_prompt=\"Your task is to determine response based on user prompt\")\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
"\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\")\n",
|
||||
"\n",
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)\n",
|
||||
"agent_chain = AgentExecutor.from_agent_and_tools(\n",
|
||||
" agent=agent, tools=tools, verbose=True, memory=memory\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = agent_chain.run(input=prompt)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -32,7 +32,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "d772b637-de00-4663-bd77-9bc96d798db2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -135,7 +135,7 @@
|
||||
"id": "4c16fded-70d1-42af-8bfa-6ddda9f0bc63",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Flan, by Google"
|
||||
"### `Flan`, by `Google`"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -178,7 +178,7 @@
|
||||
"id": "1a5c97af-89bc-4e59-95c1-223742a9160b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Dolly, by Databricks\n",
|
||||
"### `Dolly`, by `Databricks`\n",
|
||||
"\n",
|
||||
"See [Databricks](https://huggingface.co/databricks) organization page for a list of available models."
|
||||
]
|
||||
@@ -225,14 +225,14 @@
|
||||
"id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Camel, by Writer\n",
|
||||
"### `Camel`, by `Writer`\n",
|
||||
"\n",
|
||||
"See [Writer's](https://huggingface.co/Writer) organization page for a list of available models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 11,
|
||||
"id": "257a091d-750b-4910-ac08-fe1c7b3fd98b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -261,7 +261,7 @@
|
||||
"id": "2bf838eb-1083-402f-b099-b07c452418c8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### XGen, by Salesforce\n",
|
||||
"### `XGen`, by `Salesforce`\n",
|
||||
"\n",
|
||||
"See [more information](https://github.com/salesforce/xgen)."
|
||||
]
|
||||
@@ -295,7 +295,7 @@
|
||||
"id": "0aca9f9e-f333-449c-97b2-10d1dbf17e75",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Falcon, by Technology Innovation Institute (TII)\n",
|
||||
"### `Falcon`, by `Technology Innovation Institute (TII)`\n",
|
||||
"\n",
|
||||
"See [more information](https://huggingface.co/tiiuae/falcon-40b)."
|
||||
]
|
||||
@@ -323,6 +323,86 @@
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7e15849b-5561-4bb9-86ec-6412ca10196a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### `InternLM-Chat`, by `Shanghai AI Laboratory`\n",
|
||||
"\n",
|
||||
"See [more information](https://huggingface.co/internlm/internlm-7b)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "3b533461-59f8-406e-907b-000841fa60a7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"repo_id = \"internlm/internlm-chat-7b\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c71210b9-5895-41a2-889a-f430d22fa1aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFaceHub(\n",
|
||||
" repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.8}\n",
|
||||
")\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f2e5132-1713-42d7-919a-8c313744ce95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### `Qwen`, by `Alibaba Cloud`\n",
|
||||
"\n",
|
||||
">`Tongyi Qianwen-7B` (`Qwen-7B`) is a model with a scale of 7 billion parameters in the `Tongyi Qianwen` large model series developed by `Alibaba Cloud`. `Qwen-7B` is a large language model based on Transformer, which is trained on ultra-large-scale pre-training data.\n",
|
||||
"\n",
|
||||
"See [more information on HuggingFace](https://huggingface.co/Qwen/Qwen-7B) of on [GitHub](https://github.com/QwenLM/Qwen-7B).\n",
|
||||
"\n",
|
||||
"See here a [big example for LangChain integration and Qwen](https://github.com/QwenLM/Qwen-7B/blob/main/examples/langchain_tooluse.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f598b1ca-77c7-40f1-a83f-c21ea9910c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"repo_id = \"Qwen/Qwen-7B\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2c97f4e2-d401-44fb-9da7-b60b2e2cc663",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFaceHub(\n",
|
||||
" repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n",
|
||||
")\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1dd67c1e-1efc-4def-bde4-2e5265725303",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -341,7 +421,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -21,19 +21,19 @@
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/)."
|
||||
"To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/), as well as [pytorch](https://pytorch.org/get-started/locally/). You can also install `xformer` for a more memory-efficient attention implementation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "d772b637-de00-4663-bd77-9bc96d798db2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install transformers > /dev/null"
|
||||
"%pip install transformers --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -46,22 +46,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 6,
|
||||
"id": "165ae236-962a-4763-8052-c4836d78a5d2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to default session, using empty session: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /sessions (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x1117f9790>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFacePipeline\n",
|
||||
"from langchain.llms import HuggingFacePipeline\n",
|
||||
"\n",
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"bigscience/bloom-1b7\",\n",
|
||||
@@ -75,24 +67,18 @@
|
||||
"id": "00104b27-0c15-4a97-b198-4512337ee211",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integrate the model in an LLMChain"
|
||||
"### Create Chain\n",
|
||||
"\n",
|
||||
"With the model loaded into memory, you can compose it with a prompt to\n",
|
||||
"form a chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"id": "3acf0069",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages/transformers/generation/utils.py:1288: UserWarning: Using `max_length`'s default (64) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
|
||||
" warnings.warn(\n",
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x144d06910>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
@@ -102,27 +88,19 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"chain = prompt | llm\n",
|
||||
"\n",
|
||||
"question = \"What is electroencephalography?\"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
"print(chain.invoke({\"question\": question}))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "843a3837",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -74,7 +74,7 @@
|
||||
" typical_p=0.95,\n",
|
||||
" temperature=0.01,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" stream=True\n",
|
||||
" streaming=True\n",
|
||||
")\n",
|
||||
"llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])"
|
||||
]
|
||||
|
||||
@@ -242,7 +242,7 @@
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama/llama-2-7b-ggml/llama-2-7b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" temperature=0.75,\n",
|
||||
" max_length=2000,\n",
|
||||
" max_tokens=2000,\n",
|
||||
" top_p=1,\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
|
||||
@@ -19,8 +19,29 @@
|
||||
"First, follow [these instructions](https://github.com/jmorganca/ollama) to set up and run a local Ollama instance:\n",
|
||||
"\n",
|
||||
"* [Download](https://ollama.ai/download)\n",
|
||||
"* Fetch a model, e.g., `Llama-7b`: `ollama pull llama2`\n",
|
||||
"* Run `ollama run llama2`\n",
|
||||
"* Fetch a model via `ollama pull <model family>`\n",
|
||||
"* e.g., for `Llama-7b`: `ollama pull llama2` (see full list [here](https://github.com/jmorganca/ollama))\n",
|
||||
"* This will download the most basic version of the model typically (e.g., smallest # parameters and `q4_0`)\n",
|
||||
"* On Mac, it will download to \n",
|
||||
"\n",
|
||||
"`~/.ollama/models/manifests/registry.ollama.ai/library/<model family>/latest`\n",
|
||||
"\n",
|
||||
"* And we specify a particular version, e.g., for `ollama pull vicuna:13b-v1.5-16k-q4_0`\n",
|
||||
"* The file is here with the model version in place of `latest`\n",
|
||||
"\n",
|
||||
"`~/.ollama/models/manifests/registry.ollama.ai/library/vicuna/13b-v1.5-16k-q4_0`\n",
|
||||
"\n",
|
||||
"You can easily access models in a few ways:\n",
|
||||
"\n",
|
||||
"1/ if the app is running:\n",
|
||||
"* All of your local models are automatically served on `localhost:11434`\n",
|
||||
"* Select your model when setting `llm = Ollama(..., model=\"<model family>:<version>\")`\n",
|
||||
"* If you set `llm = Ollama(..., model=\"<model family\")` withoout a version it will simply look for `latest`\n",
|
||||
"\n",
|
||||
"2/ if building from source or just running the binary: \n",
|
||||
"* Then you must run `ollama serve`\n",
|
||||
"* All of your local models are automatically served on `localhost:11434`\n",
|
||||
"* Then, select as shown above\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Usage\n",
|
||||
|
||||
@@ -1,80 +1,83 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9597802c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Nebula\n",
|
||||
"# Nebula (Symbl.ai)\n",
|
||||
"[Nebula](https://symbl.ai/nebula/) is a large language model (LLM) built by [Symbl.ai](https://symbl.ai). It is trained to perform generative tasks on human conversations. Nebula excels at modeling the nuanced details of a conversation and performing tasks on the conversation.\n",
|
||||
"\n",
|
||||
"[Nebula](https://symbl.ai/nebula/) is a fully-managed Conversation platform, on which you can build, deploy, and manage scalable AI applications.\n",
|
||||
"Nebula documentation: https://docs.symbl.ai/docs/nebula-llm\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with the [Nebula platform](https://docs.symbl.ai/docs/nebula-llm-overview). \n",
|
||||
"\n",
|
||||
"It will send the requests to Nebula Service endpoint, which concatenates `SYMBLAI_NEBULA_SERVICE_URL` and `SYMBLAI_NEBULA_SERVICE_PATH`, with a token defined in `SYMBLAI_NEBULA_SERVICE_TOKEN`"
|
||||
]
|
||||
"This example goes over how to use LangChain to interact with the [Nebula platform](https://docs.symbl.ai/docs/nebula-llm)."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "bb8cd830db4a004e"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f15ebe0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integrate with a LLMChain"
|
||||
]
|
||||
"Make sure you have API Key with you. If you don't have one please [request one](https://info.symbl.ai/Nebula_Private_Beta.html)."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "519570b6539aa18c"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5472a7cd-af26-48ca-ae9b-5f6ae73c74d2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.llms.symblai_nebula import Nebula\n",
|
||||
"\n",
|
||||
"os.environ[\"NEBULA_SERVICE_URL\"] = NEBULA_SERVICE_URL\n",
|
||||
"os.environ[\"NEBULA_SERVICE_PATH\"] = NEBULA_SERVICE_PATH\n",
|
||||
"os.environ[\"NEBULA_SERVICE_API_KEY\"] = NEBULA_SERVICE_API_KEY"
|
||||
]
|
||||
"llm = Nebula(nebula_api_key='<your_api_key>')"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "9f47bef45880aece"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6fb585dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"from langchain.llms import OpenLLM\n",
|
||||
"\n",
|
||||
"llm = OpenLLM(\n",
|
||||
" conversation=\"<Drop your text conversation that you want to ask Nebula to analyze here>\",\n",
|
||||
")"
|
||||
]
|
||||
"Use a conversation transcript and instruction to construct a prompt."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "88c6a516ef51c74b"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"\n",
|
||||
"template = \"Identify the {count} main objectives or goals mentioned in this context concisely in less points. Emphasize on key intents.\"\n",
|
||||
"conversation = \"\"\"Sam: Good morning, team! Let's keep this standup concise. We'll go in the usual order: what you did yesterday, what you plan to do today, and any blockers. Alex, kick us off.\n",
|
||||
"Alex: Morning! Yesterday, I wrapped up the UI for the user dashboard. The new charts and widgets are now responsive. I also had a sync with the design team to ensure the final touchups are in line with the brand guidelines. Today, I'll start integrating the frontend with the new API endpoints Rhea was working on. The only blocker is waiting for some final API documentation, but I guess Rhea can update on that.\n",
|
||||
"Rhea: Hey, all! Yep, about the API documentation - I completed the majority of the backend work for user data retrieval yesterday. The endpoints are mostly set up, but I need to do a bit more testing today. I'll finalize the API documentation by noon, so that should unblock Alex. After that, I’ll be working on optimizing the database queries for faster data fetching. No other blockers on my end.\n",
|
||||
"Sam: Great, thanks Rhea. Do reach out if you need any testing assistance or if there are any hitches with the database. Now, my update: Yesterday, I coordinated with the client to get clarity on some feature requirements. Today, I'll be updating our project roadmap and timelines based on their feedback. Additionally, I'll be sitting with the QA team in the afternoon for preliminary testing. Blocker: I might need both of you to be available for a quick call in case the client wants to discuss the changes live.\n",
|
||||
"Alex: Sounds good, Sam. Just let us know a little in advance for the call.\n",
|
||||
"Rhea: Agreed. We can make time for that.\n",
|
||||
"Sam: Perfect! Let's keep the momentum going. Reach out if there are any sudden issues or support needed. Have a productive day!\n",
|
||||
"Alex: You too.\n",
|
||||
"Rhea: Thanks, bye!\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"count\"])\n",
|
||||
"instruction = \"Identify the main objectives mentioned in this conversation.\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(\"{instruction}\\n{conversation}\")\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"generated = llm_chain.run(count=\"five\")\n",
|
||||
"print(generated)"
|
||||
]
|
||||
"llm_chain.run(instruction=instruction, conversation=conversation)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "5977ccc2d4432624"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -13,7 +13,7 @@ pip install python-arango
|
||||
|
||||
Connect your ArangoDB Database with a Chat Model to get insights on your data.
|
||||
|
||||
See the notebook example [here](/docs/use_cases/graph/graph_arangodb_qa.html).
|
||||
See the notebook example [here](/docs/use_cases/more/graph/graph_arangodb_qa.html).
|
||||
|
||||
```python
|
||||
from arango import ArangoClient
|
||||
|
||||
37
docs/extras/integrations/providers/bittensor.mdx
Normal file
37
docs/extras/integrations/providers/bittensor.mdx
Normal file
@@ -0,0 +1,37 @@
|
||||
# NIBittensor
|
||||
|
||||
This page covers how to use the BittensorLLM inference runtime within LangChain.
|
||||
It is broken into two parts: installation and setup, and then examples of NIBittensorLLM usage.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python package with `pip install langchain`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
|
||||
There exists a NIBittensor LLM wrapper, which you can access with:
|
||||
|
||||
```python
|
||||
from langchain.llms import NIBittensorLLM
|
||||
```
|
||||
|
||||
It provides a unified interface for all models:
|
||||
|
||||
```python
|
||||
llm = NIBittensorLLM(system_prompt="Your task is to provide consice and accurate response based on user prompt")
|
||||
|
||||
print(llm('Write a fibonacci function in python with golder ratio'))
|
||||
```
|
||||
|
||||
Multiple responses from top miners can be accessible using the `top_responses` parameter:
|
||||
|
||||
```python
|
||||
multi_response_llm = NIBittensorLLM(top_responses=10)
|
||||
multi_resp = multi_response_llm("What is Neural Network Feeding Mechanism?")
|
||||
json_multi_resp = json.loads(multi_resp)
|
||||
|
||||
print(json_multi_resp)
|
||||
```
|
||||
|
||||
24
docs/extras/integrations/providers/dashvector.mdx
Normal file
24
docs/extras/integrations/providers/dashvector.mdx
Normal file
@@ -0,0 +1,24 @@
|
||||
# DashVector
|
||||
|
||||
> [DashVector](https://help.aliyun.com/document_detail/2510225.html) is a fully-managed vectorDB service that supports high-dimension dense and sparse vectors, real-time insertion and filtered search. It is built to scale automatically and can adapt to different application requirements.
|
||||
|
||||
This document demonstrates to leverage DashVector within the LangChain ecosystem. In particular, it shows how to install DashVector, and how to use it as a VectorStore plugin in LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific DashVector wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
Install the Python SDK:
|
||||
```bash
|
||||
pip install dashvector
|
||||
```
|
||||
|
||||
## VectorStore
|
||||
|
||||
A DashVector Collection is wrapped as a familiar VectorStore for native usage within LangChain,
|
||||
which allows it to be readily used for various scenarios, such as semantic search or example selection.
|
||||
|
||||
You may import the vectorstore by:
|
||||
```python
|
||||
from langchain.vectorstores import DashVector
|
||||
```
|
||||
|
||||
For a detailed walkthrough of the DashVector wrapper, please refer to [this notebook](/docs/integrations/vectorstores/dashvector.html)
|
||||
@@ -1,24 +1,54 @@
|
||||
# Elasticsearch
|
||||
|
||||
>[Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine.
|
||||
> It provides a distributed, multi-tenant-capable full-text search engine with an HTTP web interface and schema-free
|
||||
> [Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine.
|
||||
> It provides a distributed, multi-tenant-capable full-text search engine with an HTTP web interface and schema-free
|
||||
> JSON documents.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
There are two ways to get started with Elasticsearch:
|
||||
|
||||
#### Install Elasticsearch on your local machine via docker
|
||||
|
||||
Example: Run a single-node Elasticsearch instance with security disabled. This is not recommended for production use.
|
||||
|
||||
```bash
|
||||
docker run -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.security.http.ssl.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.9.0
|
||||
```
|
||||
|
||||
#### Deploy Elasticsearch on Elastic Cloud
|
||||
|
||||
Elastic Cloud is a managed Elasticsearch service. Signup for a [free trial](https://cloud.elastic.co/registration?storm=langchain-notebook).
|
||||
|
||||
### Install Client
|
||||
|
||||
```bash
|
||||
pip install elasticsearch
|
||||
```
|
||||
|
||||
## Retriever
|
||||
## Vector Store
|
||||
|
||||
>In information retrieval, [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) (BM is an abbreviation of best matching) is a ranking function used by search engines to estimate the relevance of documents to a given search query. It is based on the probabilistic retrieval framework developed in the 1970s and 1980s by Stephen E. Robertson, Karen Spärck Jones, and others.
|
||||
|
||||
>The name of the actual ranking function is BM25. The fuller name, Okapi BM25, includes the name of the first system to use it, which was the Okapi information retrieval system, implemented at London's City University in the 1980s and 1990s. BM25 and its newer variants, e.g. BM25F (a version of BM25 that can take document structure and anchor text into account), represent TF-IDF-like retrieval functions used in document retrieval.
|
||||
|
||||
See a [usage example](/docs/integrations/retrievers/elastic_search_bm25).
|
||||
The vector store is a simple wrapper around Elasticsearch. It provides a simple interface to store and retrieve vectors.
|
||||
|
||||
```python
|
||||
from langchain.retrievers import ElasticSearchBM25Retriever
|
||||
from langchain.vectorstores import ElasticsearchStore
|
||||
|
||||
from langchain.document_loaders import TextLoader
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
|
||||
loader = TextLoader("./state_of_the_union.txt")
|
||||
documents = loader.load()
|
||||
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
||||
docs = text_splitter.split_documents(documents)
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
db = ElasticsearchStore.from_documents(
|
||||
docs, embeddings, es_url="http://localhost:9200", index_name="test-basic",
|
||||
)
|
||||
|
||||
db.client.indices.refresh(index="test-basic")
|
||||
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
results = db.similarity_search(query)
|
||||
```
|
||||
|
||||
23
docs/extras/integrations/providers/epsilla.mdx
Normal file
23
docs/extras/integrations/providers/epsilla.mdx
Normal file
@@ -0,0 +1,23 @@
|
||||
# Epsilla
|
||||
|
||||
This page covers how to use [Epsilla](https://github.com/epsilla-cloud/vectordb) within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Epsilla wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python SDK with `pip/pip3 install pyepsilla`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around Epsilla vector databases, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Epsilla
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Epsilla wrapper, see [this notebook](/docs/integrations/vectorstores/epsilla.html)
|
||||
@@ -5,16 +5,14 @@ It is broken into two parts: installation and setup, and then references to spec
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get an Nebula API Key and set as environment variables (`SYMBLAI_NEBULA_SERVICE_URL`, `SYMBLAI_NEBULA_SERVICE_PATH`, `SYMBLAI_NEBULA_SERVICE_TOKEN`)
|
||||
- Sign up for a FREE Symbl.ai/Nebula Account: [https://nebula.symbl.ai/playground/](https://nebula.symbl.ai/playground/)
|
||||
- Please see the [Nebula documentation](https://docs.symbl.ai/docs/nebula-llm-overview) for more details.
|
||||
- No time? Visit the [Nebula Quickstart Guide](https://docs.symbl.ai/docs/nebula-quickstart).
|
||||
|
||||
## Wrappers
|
||||
- Get an [Nebula API Key](https://info.symbl.ai/Nebula_Private_Beta.html) and set as environment variable `NEBULA_API_KEY`
|
||||
- Please see the [Nebula documentation](https://docs.symbl.ai/docs/nebula-llm) for more details.
|
||||
- No time? Visit the [Nebula Quickstart Guide](https://docs.symbl.ai/docs/nebula-quickstart).
|
||||
|
||||
### LLM
|
||||
|
||||
There exists an Nebula LLM wrapper, which you can access with
|
||||
There exists an Nebula LLM wrapper, which you can access with
|
||||
```python
|
||||
from langchain.llms import Nebula
|
||||
llm = Nebula()
|
||||
```
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"loader = TextLoader(\"../../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
@@ -136,7 +136,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -168,7 +168,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Justice Stephen Breyer'"
|
||||
"' Ketanji Brown Jackson succeeded Justice Breyer.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@@ -237,7 +237,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
@@ -282,7 +282,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Justice Stephen Breyer'"
|
||||
"' Ketanji Brown Jackson succeeded Justice Breyer.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
@@ -342,7 +342,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
"Document(page_content='Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice.', metadata={'source': '../../../modules/state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
@@ -396,7 +396,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 20,
|
||||
"id": "24ebdaec",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -404,7 +404,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\n"
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -423,7 +423,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 21,
|
||||
"id": "e53a9d66",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -437,7 +437,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 22,
|
||||
"id": "bf205e35",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -456,7 +456,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 23,
|
||||
"id": "78155887",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -470,7 +470,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 24,
|
||||
"id": "e54b5fa2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -479,10 +479,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who he described as one of the nation's top legal minds, to continue Justice Breyer's legacy of excellence.\""
|
||||
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who is one of the nation's top legal minds and a former top litigator in private practice.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -503,7 +503,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 25,
|
||||
"id": "d1058fd2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -515,7 +515,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 26,
|
||||
"id": "a6594482",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -534,7 +534,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 27,
|
||||
"id": "e2badd21",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -548,7 +548,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 28,
|
||||
"id": "edb31fe5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -557,10 +557,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who he described as one of the nation's top legal minds, and that she will continue Justice Breyer's legacy of excellence.\\nSOURCES: ../../../state_of_the_union.txt\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice.\\nSOURCES: ../../../modules/state_of_the_union.txt\""
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -581,7 +581,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 29,
|
||||
"id": "2efacec3-2690-4b05-8de3-a32fd2ac3911",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -618,7 +618,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 30,
|
||||
"id": "fd6d43f4-7428-44a4-81bc-26fe88a98762",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -628,7 +628,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence."
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -640,7 +640,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 31,
|
||||
"id": "5ab38978-f3e8-4fa7-808c-c79dec48379a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -650,7 +650,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Justice Stephen Breyer"
|
||||
" Justice Breyer"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -671,7 +671,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 32,
|
||||
"id": "a7ba9d8c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -692,7 +692,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 33,
|
||||
"id": "a3e33c0d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -706,7 +706,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 34,
|
||||
"id": "936dc62f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -715,10 +715,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -97,8 +97,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"search_index = Vectara.from_texts(source_chunks, embedding=None)"
|
||||
]
|
||||
},
|
||||
@@ -161,7 +159,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'text': '\\n\\nEnvironment variables are a powerful tool for managing configuration settings in your applications. They allow you to store and access values from anywhere in your code, making it easier to keep your codebase organized and maintainable.\\n\\nHowever, there are times when you may want to use environment variables specifically for a single command. This is where shell variables come in. Shell variables are similar to environment variables, but they won\\'t be exported to spawned commands. They are defined with the following syntax:\\n\\n```sh\\nVAR_NAME=value\\n```\\n\\nFor example, if you wanted to use a shell variable instead of an environment variable in a command, you could do something like this:\\n\\n```sh\\nVAR=hello && echo $VAR && deno eval \"console.log(\\'Deno: \\' + Deno.env.get(\\'VAR\\'))\"\\n```\\n\\nThis would output the following:\\n\\n```\\nhello\\nDeno: undefined\\n```\\n\\nShell variables can be useful when you want to re-use a value, but don\\'t want it available in any spawned processes.\\n\\nAnother way to use environment variables is through pipelines. Pipelines provide a way to pipe the'}, {'text': '\\n\\nEnvironment variables are a great way to store and access sensitive information in your applications. They are also useful for configuring applications and managing different environments. In Deno, there are two ways to use environment variables: the built-in `Deno.env` and the `.env` file.\\n\\nThe `Deno.env` is a built-in feature of the Deno runtime that allows you to set and get environment variables. It has getter and setter methods that you can use to access and set environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain'}, {'text': \"\\n\\nEnvironment variables are a powerful tool for managing configuration and settings in your applications. They allow you to store and access values that can be used in your code, and they can be set and changed without having to modify your code.\\n\\nIn Deno, environment variables are defined using the `export` command. For example, to set a variable called `VAR_NAME` to the value `value`, you would use the following command:\\n\\n```sh\\nexport VAR_NAME=value\\n```\\n\\nYou can then access the value of the environment variable in your code using the `Deno.env.get()` method. For example, if you wanted to log the value of the `VAR_NAME` variable, you could use the following code:\\n\\n```js\\nconsole.log(Deno.env.get('VAR_NAME'));\\n```\\n\\nYou can also set environment variables for a single command. To do this, you can list the environment variables before the command, like so:\\n\\n```\\nVAR=hello VAR2=bye deno run main.ts\\n```\\n\\nThis will set the environment variables `VAR` and `V\"}, {'text': \"\\n\\nEnvironment variables are a powerful tool for managing settings and configuration in your applications. They can be used to store information such as user preferences, application settings, and even passwords. In this blog post, we'll discuss how to make Deno scripts executable with a hashbang (shebang).\\n\\nA hashbang is a line of code that is placed at the beginning of a script. It tells the system which interpreter to use when running the script. In the case of Deno, the hashbang should be `#!/usr/bin/env -S deno run --allow-env`. This tells the system to use the Deno interpreter and to allow the script to access environment variables.\\n\\nOnce the hashbang is in place, you may need to give the script execution permissions. On Linux, this can be done with the command `sudo chmod +x hashbang.ts`. After that, you can execute the script by calling it like any other command: `./hashbang.ts`.\\n\\nIn the example program, we give the context permission to access the environment variables and print the Deno installation path. This is done by using the `Deno.env.get()` function, which returns the value of the specified environment\"}]\n"
|
||||
"[{'text': '\\n\\nWhen it comes to running Deno CLI tasks, environment variables can be a powerful tool for customizing the behavior of your tasks. With the Deno Task Definition interface, you can easily configure environment variables to be set when executing your tasks.\\n\\nThe Deno Task Definition interface is configured in a `tasks.json` within your workspace. It includes a `env` field, which allows you to specify any environment variables that should be set when executing the task. For example, if you wanted to set the `NODE_ENV` environment variable to `production` when running a Deno task, you could add the following to your `tasks.json`:\\n\\n```json\\n{\\n \"version\": \"2.0.0\",\\n \"tasks\": [\\n {\\n \"type\": \"deno\",\\n \"command\": \"run\",\\n \"args\": [\\n \"mod.ts\"\\n ],\\n \"env\": {\\n \"NODE_ENV\": \"production\"\\n },\\n \"problemMatcher\": [\\n \"$deno\"\\n ],\\n \"label\": \"deno: run\"\\n }\\n ]\\n}\\n```\\n\\nThe Deno language server and this extension also'}, {'text': '\\n\\nEnvironment variables are a great way to store and access data in your applications. They are especially useful when you need to store sensitive information such as API keys, passwords, and other credentials.\\n\\nDeno.env is a library that provides getter and setter methods for environment variables. This makes it easy to store and retrieve data from environment variables. For example, you can use the setter method to set a variable like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n```\\n\\nAnd then you can use the getter method to retrieve the data like this:\\n\\n```ts\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain.com\\n```\\n\\nYou can also store environment variables in a `.env` file and retrieve them using `dotenv` in the standard'}, {'text': '\\n\\nEnvironment variables are a powerful tool for developers, allowing them to store and access data without hard-coding it into their applications. Deno, the secure JavaScript and TypeScript runtime, offers built-in support for environment variables with the `Deno.env` API.\\n\\nUsing `Deno.env` is simple. It has getter and setter methods that allow you to easily set and retrieve environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n```\\n\\nAnd then you can retrieve them like this:\\n\\n```ts\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain.com\\n```'}, {'text': '\\n\\nEnvironment variables are an important part of any programming language, and Deno is no exception. Environment variables are used to store information about the environment in which a program is running, such as the operating system, user preferences, and other settings. In Deno, environment variables are used to set up proxies, control the output of colors, and more.\\n\\nThe `NO_PROXY` environment variable is a de facto standard in Deno that indicates which hosts should bypass the proxy set in other environment variables. This is useful for developers who want to access certain resources without having to go through a proxy. For more information on this standard, you can check out the website no-color.org.\\n\\nThe `Deno.noColor` environment variable is another important environment variable in Deno. This variable is used to control the output of colors in the Deno terminal. By setting this variable to true, you can disable the output of colors in the terminal. This can be useful for developers who want to focus on the output of their code without being distracted by the colors.\\n\\nFinally, the `Deno.env` environment variable is used to access the environment variables set in the Deno runtime. This variable is useful for developers who want'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
980
docs/extras/integrations/vectorstores/activeloop_deeplake.ipynb
Normal file
980
docs/extras/integrations/vectorstores/activeloop_deeplake.ipynb
Normal file
@@ -0,0 +1,980 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Activeloop's Deep Lake\n",
|
||||
"\n",
|
||||
">[Activeloop's Deep Lake](https://docs.activeloop.ai/) as a Multi-Modal Vector Store that stores embeddings and their metadata including text, jsons, images, audio, video, and more. It saves the data locally, in your cloud, or on Activeloop storage. It performs hybrid search including embeddings and their attributes.\n",
|
||||
"\n",
|
||||
"This notebook showcases basic functionality related to `Activeloop's Deep Lake`. While `Deep Lake` can store embeddings, it is capable of storing any type of data. It is a serverless data lake with version control, query engine and streaming dataloaders to deep learning frameworks. \n",
|
||||
"\n",
|
||||
"For more information, please see the Deep Lake [documentation](https://docs.activeloop.ai) or [api reference](https://docs.deeplake.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install openai 'deeplake[enterprise]' tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import DeepLake"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"activeloop_token = getpass.getpass(\"activeloop token:\")\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a dataset locally at `./deeplake/`, then run similarity search. The Deeplake+LangChain integration uses Deep Lake datasets under the hood, so `dataset` and `vector store` are used interchangeably. To create a dataset in your own cloud, or in the Deep Lake storage, [adjust the path accordingly](https://docs.activeloop.ai/storage-and-credentials/storage-options)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='./my_deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (42, 1536) float32 None \n",
|
||||
" id text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(\n",
|
||||
" dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True\n",
|
||||
")\n",
|
||||
"db.add_documents(docs)\n",
|
||||
"# or shorter\n",
|
||||
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To disable dataset summary printings all the time, you can specify verbose=False during VectorStore initialization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Later, you can reload the dataset without recomputing embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in ./my_deeplake/ already exists, loading from the storage\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(\n",
|
||||
" dataset_path=\"./my_deeplake/\", embedding=embeddings, read_only=True\n",
|
||||
")\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Deep Lake, for now, is single writer and multiple reader. Setting `read_only=True` helps to avoid acquiring the writer lock."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieval Question/Answering"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/ubuntu/langchain_activeloop/langchain/libs/langchain/langchain/llms/openai.py:786: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.llms import OpenAIChat\n",
|
||||
"\n",
|
||||
"qa = RetrievalQA.from_chain_type(\n",
|
||||
" llm=OpenAIChat(model=\"gpt-3.5-turbo\"),\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=db.as_retriever(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The president said that Ketanji Brown Jackson is a former top litigator in private practice and a former federal public defender. She comes from a family of public school educators and police officers. She is a consensus builder and has received a broad range of support since being nominated.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"qa.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Attribute based filtering in metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create another vector store containing metadata with the year the documents were created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='./my_deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (4, 1536) float32 None \n",
|
||||
" id text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import random\n",
|
||||
"\n",
|
||||
"for d in docs:\n",
|
||||
" d.metadata[\"year\"] = random.randint(2012, 2014)\n",
|
||||
"\n",
|
||||
"db = DeepLake.from_documents(\n",
|
||||
" docs, embeddings, dataset_path=\"./my_deeplake/\", overwrite=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 4/4 [00:00<00:00, 2936.16it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.similarity_search(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson\",\n",
|
||||
" filter={\"metadata\": {\"year\": 2013}},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Choosing distance function\n",
|
||||
"Distance function `L2` for Euclidean, `L1` for Nuclear, `Max` l-infinity distance, `cos` for cosine similarity, `dot` for dot product "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2012})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.similarity_search(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson?\", distance_metric=\"cos\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Maximal Marginal relevance\n",
|
||||
"Using maximal marginal relevance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2012})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.max_marginal_relevance_search(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.delete_dataset()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"and if delete fails you can also force delete"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"DeepLake.force_delete_by_path(\"./my_deeplake\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deep Lake datasets on cloud (Activeloop, AWS, GCS, etc.) or in memory\n",
|
||||
"By default, Deep Lake datasets are stored locally. To store them in memory, in the Deep Lake Managed DB, or in any object storage, you can provide the [corresponding path and credentials when creating the vector store](https://docs.activeloop.ai/storage-and-credentials/storage-options). Some paths require registration with Activeloop and creation of an API token that can be [retrieved here](https://app.activeloop.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"ACTIVELOOP_TOKEN\"] = activeloop_token"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Your Deep Lake dataset has been successfully created!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://adilkhan/langchain_testing_python', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (42, 1536) float32 None \n",
|
||||
" id text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"username = \"<USERNAME_OR_ORG>\" # your username on app.activeloop.ai\n",
|
||||
"dataset_path = f\"hub://{username}/langchain_testing_python\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake(dataset_path=dataset_path, embedding=embeddings, overwrite=True)\n",
|
||||
"ids = db.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### `tensor_db` execution option "
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to utilize Deep Lake's Managed Tensor Database, it is necessary to specify the runtime parameter as {'tensor_db': True} during the creation of the vector store. This configuration enables the execution of queries on the Managed Tensor Database, rather than on the client side. It should be noted that this functionality is not applicable to datasets stored locally or in-memory. In the event that a vector store has already been created outside of the Managed Tensor Database, it is possible to transfer it to the Managed Tensor Database by following the prescribed steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Your Deep Lake dataset has been successfully created!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"|"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (42, 1536) float32 None \n",
|
||||
" id text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"username = \"<USERNAME_OR_ORG>\" # your username on app.activeloop.ai\n",
|
||||
"dataset_path = f\"hub://{username}/langchain_testing\"\n",
|
||||
"\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake(\n",
|
||||
" dataset_path=dataset_path,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" overwrite=True,\n",
|
||||
" runtime={\"tensor_db\": True},\n",
|
||||
")\n",
|
||||
"ids = db.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### TQL Search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Furthermore, the execution of queries is also supported within the similarity_search method, whereby the query can be specified utilizing Deep Lake's Tensor Query Language (TQL)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search_id = db.vectorstore.dataset.id[0].numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'8a6ff326-3a85-11ee-b840-13905694aaaf'"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search_id[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = db.similarity_search(\n",
|
||||
" query=None,\n",
|
||||
" tql=f\"SELECT * WHERE id == '{search_id[0]}'\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (42, 1536) float32 None \n",
|
||||
" id text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.vectorstore.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating vector stores on AWS S3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"s3://hub-2.0-datasets-n/langchain_test loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:10<00:00\n",
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='s3://hub-2.0-datasets-n/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake.from_documents(\n",
|
||||
" docs,\n",
|
||||
" dataset_path=dataset_path,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" overwrite=True,\n",
|
||||
" creds={\n",
|
||||
" \"aws_access_key_id\": os.environ[\"AWS_ACCESS_KEY_ID\"],\n",
|
||||
" \"aws_secret_access_key\": os.environ[\"AWS_SECRET_ACCESS_KEY\"],\n",
|
||||
" \"aws_session_token\": os.environ[\"AWS_SESSION_TOKEN\"], # Optional\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deep Lake API\n",
|
||||
"you can access the Deep Lake dataset at `db.vectorstore`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (42, 1536) float32 None \n",
|
||||
" id text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get structure of the dataset\n",
|
||||
"db.vectorstore.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get embeddings numpy array\n",
|
||||
"embeds = db.vectorstore.dataset.embedding.numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transfer local dataset to cloud\n",
|
||||
"Copy already created dataset to the cloud. You can also transfer from cloud to local."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Copying dataset: 100%|██████████| 56/56 [00:38<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
|
||||
"Your Deep Lake dataset has been successfully created!\n",
|
||||
"The dataset is private so make sure you are logged in!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])"
|
||||
]
|
||||
},
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import deeplake\n",
|
||||
"\n",
|
||||
"username = \"davitbun\" # your username on app.activeloop.ai\n",
|
||||
"source = f\"hub://{username}/langchain_testing\" # could be local, s3, gcs, etc.\n",
|
||||
"destination = f\"hub://{username}/langchain_test_copy\" # could be local, s3, gcs, etc.\n",
|
||||
"\n",
|
||||
"deeplake.deepcopy(src=source, dest=destination, overwrite=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/langchain_test_copy loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://davitbun/langchain_test_copy already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:31<00:00\n",
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (8, 1536) float32 None \n",
|
||||
" ids text (8, 1) str None \n",
|
||||
" metadata json (8, 1) str None \n",
|
||||
" text text (8, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['ad42f3fe-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f3ff-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f400-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f401-e188-11ed-b66d-41c5f7b85421']"
|
||||
]
|
||||
},
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=destination, embedding=embeddings)\n",
|
||||
"db.add_documents(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.6 ('langchain_venv': venv)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "0b0bacaffd430edc3085253ee7ee1bcda9f76a5e66b369dda8ba68baa6d14ba7"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
236
docs/extras/integrations/vectorstores/dashvector.ipynb
Normal file
236
docs/extras/integrations/vectorstores/dashvector.ipynb
Normal file
@@ -0,0 +1,236 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# DashVector\n",
|
||||
"\n",
|
||||
"> [DashVector](https://help.aliyun.com/document_detail/2510225.html) is a fully-managed vectorDB service that supports high-dimension dense and sparse vectors, real-time insertion and filtered search. It is built to scale automatically and can adapt to different application requirements.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `DashVector` vector database.\n",
|
||||
"\n",
|
||||
"To use DashVector, you must have an API key.\n",
|
||||
"Here are the [installation instructions](https://help.aliyun.com/document_detail/2510223.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Install"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install dashvector dashscope"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"We want to use `DashScopeEmbeddings` so we also have to get the Dashscope API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n",
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-11T10:37:15.091585Z",
|
||||
"start_time": "2023-08-11T10:36:51.859753Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"DASHVECTOR_API_KEY\"] = getpass.getpass(\"DashVector API Key:\")\n",
|
||||
"os.environ[\"DASHSCOPE_API_KEY\"] = getpass.getpass(\"DashScope API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n",
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-11T10:42:30.243460Z",
|
||||
"start_time": "2023-08-11T10:42:27.783785Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.dashscope import DashScopeEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import DashVector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true,
|
||||
"name": "#%%\n"
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-11T10:42:30.391580Z",
|
||||
"start_time": "2023-08-11T10:42:30.249021Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = DashScopeEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"We can create DashVector from documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dashvector = DashVector.from_documents(docs, embeddings)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = dashvector.similarity_search(query)\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"We can add texts with meta datas and ids, and search with meta filter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-11T10:42:51.641309Z",
|
||||
"start_time": "2023-08-11T10:42:51.132109Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='baz', metadata={'key': 2})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"texts = [\"foo\", \"bar\", \"baz\"]\n",
|
||||
"metadatas = [{\"key\": i} for i in range(len(texts))]\n",
|
||||
"ids = [\"0\", \"1\", \"2\"]\n",
|
||||
"\n",
|
||||
"dashvector.add_texts(texts, metadatas=metadatas, ids=ids)\n",
|
||||
"\n",
|
||||
"docs = dashvector.similarity_search(\"foo\", filter=\"key = 2\")\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -1,719 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Activeloop's Deep Lake\n",
|
||||
"\n",
|
||||
">[Activeloop's Deep Lake](https://docs.activeloop.ai/) as a Multi-Modal Vector Store that stores embeddings and their metadata including text, jsons, images, audio, video, and more. It saves the data locally, in your cloud, or on Activeloop storage. It performs hybrid search including embeddings and their attributes.\n",
|
||||
"\n",
|
||||
"This notebook showcases basic functionality related to `Activeloop's Deep Lake`. While `Deep Lake` can store embeddings, it is capable of storing any type of data. It is a serverless data lake with version control, query engine and streaming dataloaders to deep learning frameworks. \n",
|
||||
"\n",
|
||||
"For more information, please see the Deep Lake [documentation](https://docs.activeloop.ai) or [api reference](https://docs.deeplake.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install openai 'deeplake[enterprise]' tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import DeepLake"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"activeloop_token = getpass.getpass(\"activeloop token:\")\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a dataset locally at `./deeplake/`, then run similarity search. The Deeplake+LangChain integration uses Deep Lake datasets under the hood, so `dataset` and `vector store` are used interchangeably. To create a dataset in your own cloud, or in the Deep Lake storage, [adjust the path accordingly](https://docs.activeloop.ai/storage-and-credentials/storage-options)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake(\n",
|
||||
" dataset_path=\"./my_deeplake/\", embedding_function=embeddings, overwrite=True\n",
|
||||
")\n",
|
||||
"db.add_documents(docs)\n",
|
||||
"# or shorter\n",
|
||||
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Later, you can reload the dataset without recomputing embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake(\n",
|
||||
" dataset_path=\"./my_deeplake/\", embedding_function=embeddings, read_only=True\n",
|
||||
")\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Deep Lake, for now, is single writer and multiple reader. Setting `read_only=True` helps to avoid acquiring the writer lock."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieval Question/Answering"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.llms import OpenAIChat\n",
|
||||
"\n",
|
||||
"qa = RetrievalQA.from_chain_type(\n",
|
||||
" llm=OpenAIChat(model=\"gpt-3.5-turbo\"),\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=db.as_retriever(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"qa.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Attribute based filtering in metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create another vector store containing metadata with the year the documents were created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import random\n",
|
||||
"\n",
|
||||
"for d in docs:\n",
|
||||
" d.metadata[\"year\"] = random.randint(2012, 2014)\n",
|
||||
"\n",
|
||||
"db = DeepLake.from_documents(\n",
|
||||
" docs, embeddings, dataset_path=\"./my_deeplake/\", overwrite=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.similarity_search(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson\",\n",
|
||||
" filter={\"metadata\": {\"year\": 2013}},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Choosing distance function\n",
|
||||
"Distance function `L2` for Euclidean, `L1` for Nuclear, `Max` l-infinity distance, `cos` for cosine similarity, `dot` for dot product "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.similarity_search(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson?\", distance_metric=\"cos\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Maximal Marginal relevance\n",
|
||||
"Using maximal marginal relevance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.max_marginal_relevance_search(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.delete_dataset()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"and if delete fails you can also force delete"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"DeepLake.force_delete_by_path(\"./my_deeplake\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deep Lake datasets on cloud (Activeloop, AWS, GCS, etc.) or in memory\n",
|
||||
"By default, Deep Lake datasets are stored locally. To store them in memory, in the Deep Lake Managed DB, or in any object storage, you can provide the [corresponding path and credentials when creating the vector store](https://docs.activeloop.ai/storage-and-credentials/storage-options). Some paths require registration with Activeloop and creation of an API token that can be [retrieved here](https://app.activeloop.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"ACTIVELOOP_TOKEN\"] = activeloop_token"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"username = \"<username>\" # your username on app.activeloop.ai\n",
|
||||
"dataset_path = f\"hub://{username}/langchain_testing_python\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)\n",
|
||||
"db.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### `tensor_db` execution option "
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to utilize Deep Lake's Managed Tensor Database, it is necessary to specify the runtime parameter as {'tensor_db': True} during the creation of the vector store. This configuration enables the execution of queries on the Managed Tensor Database, rather than on the client side. It should be noted that this functionality is not applicable to datasets stored locally or in-memory. In the event that a vector store has already been created outside of the Managed Tensor Database, it is possible to transfer it to the Managed Tensor Database by following the prescribed steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"username = \"adilkhan\" # your username on app.activeloop.ai\n",
|
||||
"dataset_path = f\"hub://{username}/langchain_testing\"\n",
|
||||
"\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake(\n",
|
||||
" dataset_path=dataset_path,\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" overwrite=True,\n",
|
||||
" runtime={\"tensor_db\": True},\n",
|
||||
")\n",
|
||||
"db.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### TQL Search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Furthermore, the execution of queries is also supported within the similarity_search method, whereby the query can be specified utilizing Deep Lake's Tensor Query Language (TQL)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search_id = db.vectorstore.dataset.id[0].numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = db.similarity_search(\n",
|
||||
" query=None,\n",
|
||||
" tql_query=f\"SELECT * WHERE id == '{search_id[0]}'\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating vector stores on AWS S3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"s3://hub-2.0-datasets-n/langchain_test loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:10<00:00\n",
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='s3://hub-2.0-datasets-n/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake.from_documents(\n",
|
||||
" docs,\n",
|
||||
" dataset_path=dataset_path,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" overwrite=True,\n",
|
||||
" creds={\n",
|
||||
" \"aws_access_key_id\": os.environ[\"AWS_ACCESS_KEY_ID\"],\n",
|
||||
" \"aws_secret_access_key\": os.environ[\"AWS_SECRET_ACCESS_KEY\"],\n",
|
||||
" \"aws_session_token\": os.environ[\"AWS_SESSION_TOKEN\"], # Optional\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deep Lake API\n",
|
||||
"you can access the Deep Lake dataset at `db.vectorstore`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (42, 1536) float32 None \n",
|
||||
" id text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get structure of the dataset\n",
|
||||
"db.vectorstore.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get embeddings numpy array\n",
|
||||
"embeds = db.vectorstore.dataset.embedding.numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transfer local dataset to cloud\n",
|
||||
"Copy already created dataset to the cloud. You can also transfer from cloud to local."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Copying dataset: 100%|██████████| 56/56 [00:38<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
|
||||
"Your Deep Lake dataset has been successfully created!\n",
|
||||
"The dataset is private so make sure you are logged in!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])"
|
||||
]
|
||||
},
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import deeplake\n",
|
||||
"\n",
|
||||
"username = \"davitbun\" # your username on app.activeloop.ai\n",
|
||||
"source = f\"hub://{username}/langchain_test\" # could be local, s3, gcs, etc.\n",
|
||||
"destination = f\"hub://{username}/langchain_test_copy\" # could be local, s3, gcs, etc.\n",
|
||||
"\n",
|
||||
"deeplake.deepcopy(src=source, dest=destination, overwrite=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/langchain_test_copy loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://davitbun/langchain_test_copy already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:31<00:00\n",
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (8, 1536) float32 None \n",
|
||||
" ids text (8, 1) str None \n",
|
||||
" metadata json (8, 1) str None \n",
|
||||
" text text (8, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['ad42f3fe-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f3ff-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f400-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f401-e188-11ed-b66d-41c5f7b85421']"
|
||||
]
|
||||
},
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=destination, embedding_function=embeddings)\n",
|
||||
"db.add_documents(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.6 ('langchain_venv': venv)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "0b0bacaffd430edc3085253ee7ee1bcda9f76a5e66b369dda8ba68baa6d14ba7"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -23,7 +23,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install dingodb"
|
||||
"!pip install dingodb\n",
|
||||
"or install latest:\n",
|
||||
"!pip install git+https://git@github.com/dingodb/pydingo.git"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -107,7 +109,7 @@
|
||||
"dingo_client = DingoDB(user=\"\", password=\"\", host=[\"127.0.0.1:13000\"])\n",
|
||||
"# First, check if our index already exists. If it doesn't, we create it\n",
|
||||
"if index_name not in dingo_client.get_index():\n",
|
||||
" # we create a new index\n",
|
||||
" # we create a new index, modify to your own\n",
|
||||
" dingo_client.create_index(\n",
|
||||
" index_name=index_name,\n",
|
||||
" dimension=1536,\n",
|
||||
@@ -150,7 +152,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0][1])"
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,9 +172,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = Dingo(client, embeddings.embed_query, \"text\")\n",
|
||||
"vectorstore = Dingo(embeddings, \"text\", client=dingo_client, index_name=index_name)\n",
|
||||
"\n",
|
||||
"vectorstore.add_texts(\"More text!\")"
|
||||
"vectorstore.add_texts([\"More text!\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
160
docs/extras/integrations/vectorstores/epsilla.ipynb
Normal file
160
docs/extras/integrations/vectorstores/epsilla.ipynb
Normal file
@@ -0,0 +1,160 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Epsilla\n",
|
||||
"\n",
|
||||
">[Epsilla](https://www.epsilla.com) is an open-source vector database that leverages the advanced parallel graph traversal techniques for vector indexing. Epsilla is licensed under GPL-3.0.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the functionalities related to the `Epsilla` vector database.\n",
|
||||
"\n",
|
||||
"As a prerequisite, you need to have a running Epsilla vector database (for example, through our docker image), and install the ``pyepsilla`` package. View full docs at [docs](https://epsilla-inc.gitbook.io/epsilladb/quick-start)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip/pip3 install pyepsilla"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use OpenAIEmbeddings so we have to get the OpenAI API Key. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"OpenAI API Key: ········"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Epsilla"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Epsilla vectordb is running with default host \"localhost\" and port \"8888\". We have a custom db path, db name and collection name instead of the default ones."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pyepsilla import vectordb\n",
|
||||
"\n",
|
||||
"client = vectordb.Client()\n",
|
||||
"vector_store = Epsilla.from_documents(\n",
|
||||
" documents,\n",
|
||||
" embeddings,\n",
|
||||
" client,\n",
|
||||
" db_path=\"/tmp/mypath\",\n",
|
||||
" db_name=\"MyDB\",\n",
|
||||
" collection_name=\"MyCollection\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vector_store.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections.\n",
|
||||
"\n",
|
||||
"We cannot let this happen.\n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections.\n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.\n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.17"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
494
docs/extras/integrations/vectorstores/zep.ipynb
Normal file
494
docs/extras/integrations/vectorstores/zep.ipynb
Normal file
@@ -0,0 +1,494 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Zep\n",
|
||||
"\n",
|
||||
"Zep is an open source long-term memory store for LLM applications. Zep makes it easy to add relevant documents,\n",
|
||||
"chat history memory & rich user data to your LLM app's prompts.\n",
|
||||
"\n",
|
||||
"**Note:** The `ZepVectorStore` works with `Documents` and is intended to be used as a `Retriever`.\n",
|
||||
"It offers separate functionality to Zep's `ZepMemory` class, which is designed for persisting, enriching\n",
|
||||
"and searching your user's chat history.\n",
|
||||
"\n",
|
||||
"## Why Zep's VectorStore? 🤖🚀\n",
|
||||
"Zep automatically embeds documents added to the Zep Vector Store using low-latency models local to the Zep server.\n",
|
||||
"The Zep client also offers async interfaces for all document operations. These two together with Zep's chat memory\n",
|
||||
" functionality make Zep ideal for building conversational LLM apps where latency and performance are important.\n",
|
||||
"\n",
|
||||
"## Installation\n",
|
||||
"Follow the [Zep Quickstart Guide](https://docs.getzep.com/deployment/quickstart/) to install and get started with Zep.\n",
|
||||
"\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"You'll need your Zep API URL and optionally an API key to use the Zep VectorStore. \n",
|
||||
"See the [Zep docs](https://docs.getzep.com) for more information.\n",
|
||||
"\n",
|
||||
"In the examples below, we're using Zep's auto-embedding feature which automatically embed documents on the Zep server \n",
|
||||
"using low-latency embedding models.\n",
|
||||
"\n",
|
||||
"## Note\n",
|
||||
"- These examples use Zep's async interfaces. Call sync interfaces by removing the `a` prefix from the method names.\n",
|
||||
"- If you pass in an `Embeddings` instance Zep will use this to embed documents rather than auto-embed them.\n",
|
||||
"You must also set your document collection to `isAutoEmbedded === false`. \n",
|
||||
"- If you set your collection to `isAutoEmbedded === false`, you must pass in an `Embeddings` instance."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "9eb8dfa6fdb71ef5"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Load or create a Collection from documents"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "9a3a11aab1412d98"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import ZepVectorStore\n",
|
||||
"from langchain.vectorstores.zep import CollectionConfig\n",
|
||||
"\n",
|
||||
"ZEP_API_URL = \"http://localhost:8000\" # this is the API url of your Zep instance\n",
|
||||
"ZEP_API_KEY = \"<optional_key>\" # optional API Key for your Zep instance\n",
|
||||
"collection_name = f\"babbage{uuid4().hex}\" # a unique collection name. alphanum only\n",
|
||||
"\n",
|
||||
"# Collection config is needed if we're creating a new Zep Collection\n",
|
||||
"config = CollectionConfig(\n",
|
||||
" name=collection_name,\n",
|
||||
" description=\"<optional description>\",\n",
|
||||
" metadata={\"optional_metadata\": \"associated with the collection\"},\n",
|
||||
" is_auto_embedded=True, # we'll have Zep embed our documents using its low-latency embedder\n",
|
||||
" embedding_dimensions=1536 # this should match the model you've configured Zep to use.\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# load the document\n",
|
||||
"article_url = \"https://www.gutenberg.org/cache/epub/71292/pg71292.txt\"\n",
|
||||
"loader = WebBaseLoader(article_url)\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"# split it into chunks\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"# Instantiate the VectorStore. Since the collection does not already exist in Zep,\n",
|
||||
"# it will be created and populated with the documents we pass in.\n",
|
||||
"vs = ZepVectorStore.from_documents(docs,\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" config=config,\n",
|
||||
" api_url=ZEP_API_URL,\n",
|
||||
" api_key=ZEP_API_KEY\n",
|
||||
" )"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:07:50.672390Z",
|
||||
"start_time": "2023-08-13T01:07:48.777799Z"
|
||||
}
|
||||
},
|
||||
"id": "519418421a32e4d"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Embedding status: 0/402 documents embedded\n",
|
||||
"Embedding status: 0/402 documents embedded\n",
|
||||
"Embedding status: 402/402 documents embedded\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# wait for the collection embedding to complete\n",
|
||||
"\n",
|
||||
"async def wait_for_ready(collection_name: str) -> None:\n",
|
||||
" from zep_python import ZepClient\n",
|
||||
" import time\n",
|
||||
"\n",
|
||||
" client = ZepClient(ZEP_API_URL, ZEP_API_KEY)\n",
|
||||
"\n",
|
||||
" while True:\n",
|
||||
" c = await client.document.aget_collection(collection_name)\n",
|
||||
" print(\n",
|
||||
" \"Embedding status: \"\n",
|
||||
" f\"{c.document_embedded_count}/{c.document_count} documents embedded\"\n",
|
||||
" )\n",
|
||||
" time.sleep(1)\n",
|
||||
" if c.status == \"ready\":\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await wait_for_ready(collection_name)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:07:53.807663Z",
|
||||
"start_time": "2023-08-13T01:07:50.671241Z"
|
||||
}
|
||||
},
|
||||
"id": "201dc57b124cb6d7"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Simarility Search Query over the Collection"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "94ca9dfa7d0ecaa5"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tables necessary to determine the places of the planets are not less\r\n",
|
||||
"necessary than those for the sun, moon, and stars. Some notion of the\r\n",
|
||||
"number and complexity of these tables may be formed, when we state that\r\n",
|
||||
"the positions of the two principal planets, (and these the most\r\n",
|
||||
"necessary for the navigator,) Jupiter and Saturn, require each not less\r\n",
|
||||
"than one hundred and sixteen tables. Yet it is not only necessary to\r\n",
|
||||
"predict the position of these bodies, but it is likewise expedient to -> 0.8998482592744614 \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"tabulate the motions of the four satellites of Jupiter, to predict the\r\n",
|
||||
"exact times at which they enter his shadow, and at which their shadows\r\n",
|
||||
"cross his disc, as well as the times at which they are interposed\r\n",
|
||||
"between him and the Earth, and he between them and the Earth.\r\n",
|
||||
"\r\n",
|
||||
"Among the extensive classes of tables here enumerated, there are several\r\n",
|
||||
"which are in their nature permanent and unalterable, and would never\r\n",
|
||||
"require to be recomputed, if they could once be computed with perfect -> 0.8976143854195493 \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"the scheme of notation thus applied, immediately suggested the\r\n",
|
||||
"advantages which must attend it as an instrument for expressing the\r\n",
|
||||
"structure, operation, and circulation of the animal system; and we\r\n",
|
||||
"entertain no doubt of its adequacy for that purpose. Not only the\r\n",
|
||||
"mechanical connexion of the solid members of the bodies of men and\r\n",
|
||||
"animals, but likewise the structure and operation of the softer parts,\r\n",
|
||||
"including the muscles, integuments, membranes, &c. the nature, motion, -> 0.889982614061763 \n",
|
||||
"====\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# query it\n",
|
||||
"query = \"what is the structure of our solar system?\"\n",
|
||||
"docs_scores = await vs.asimilarity_search_with_relevance_scores(query, k=3)\n",
|
||||
"\n",
|
||||
"# print results\n",
|
||||
"for d, s in docs_scores:\n",
|
||||
" print(d.page_content, \" -> \", s, \"\\n====\\n\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:07:54.195988Z",
|
||||
"start_time": "2023-08-13T01:07:53.808550Z"
|
||||
}
|
||||
},
|
||||
"id": "1998de0a96fe89c3"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Search over Collection Re-ranked by MMR"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "e02b61a9af0b2c80"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tables necessary to determine the places of the planets are not less\r\n",
|
||||
"necessary than those for the sun, moon, and stars. Some notion of the\r\n",
|
||||
"number and complexity of these tables may be formed, when we state that\r\n",
|
||||
"the positions of the two principal planets, (and these the most\r\n",
|
||||
"necessary for the navigator,) Jupiter and Saturn, require each not less\r\n",
|
||||
"than one hundred and sixteen tables. Yet it is not only necessary to\r\n",
|
||||
"predict the position of these bodies, but it is likewise expedient to \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"the scheme of notation thus applied, immediately suggested the\r\n",
|
||||
"advantages which must attend it as an instrument for expressing the\r\n",
|
||||
"structure, operation, and circulation of the animal system; and we\r\n",
|
||||
"entertain no doubt of its adequacy for that purpose. Not only the\r\n",
|
||||
"mechanical connexion of the solid members of the bodies of men and\r\n",
|
||||
"animals, but likewise the structure and operation of the softer parts,\r\n",
|
||||
"including the muscles, integuments, membranes, &c. the nature, motion, \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"tabulate the motions of the four satellites of Jupiter, to predict the\r\n",
|
||||
"exact times at which they enter his shadow, and at which their shadows\r\n",
|
||||
"cross his disc, as well as the times at which they are interposed\r\n",
|
||||
"between him and the Earth, and he between them and the Earth.\r\n",
|
||||
"\r\n",
|
||||
"Among the extensive classes of tables here enumerated, there are several\r\n",
|
||||
"which are in their nature permanent and unalterable, and would never\r\n",
|
||||
"require to be recomputed, if they could once be computed with perfect \n",
|
||||
"====\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"what is the structure of our solar system?\"\n",
|
||||
"docs = await vs.asearch(query, search_type=\"mmr\", k=3)\n",
|
||||
"\n",
|
||||
"for d in docs:\n",
|
||||
" print(d.page_content, \"\\n====\\n\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:07:54.394873Z",
|
||||
"start_time": "2023-08-13T01:07:54.180901Z"
|
||||
}
|
||||
},
|
||||
"id": "488112da752b1d58"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Filter by Metadata\n",
|
||||
"\n",
|
||||
"Use a metadata filter to narrow down results. First, load another book: \"Adventures of Sherlock Holmes\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "42455e31d4ab0d68"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Embedding status: 402/1692 documents embedded\n",
|
||||
"Embedding status: 402/1692 documents embedded\n",
|
||||
"Embedding status: 552/1692 documents embedded\n",
|
||||
"Embedding status: 702/1692 documents embedded\n",
|
||||
"Embedding status: 1002/1692 documents embedded\n",
|
||||
"Embedding status: 1002/1692 documents embedded\n",
|
||||
"Embedding status: 1152/1692 documents embedded\n",
|
||||
"Embedding status: 1302/1692 documents embedded\n",
|
||||
"Embedding status: 1452/1692 documents embedded\n",
|
||||
"Embedding status: 1602/1692 documents embedded\n",
|
||||
"Embedding status: 1692/1692 documents embedded\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Let's add more content to the existing Collection\n",
|
||||
"article_url = \"https://www.gutenberg.org/files/48320/48320-0.txt\"\n",
|
||||
"loader = WebBaseLoader(article_url)\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"# split it into chunks\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"await vs.aadd_documents(docs)\n",
|
||||
"\n",
|
||||
"await wait_for_ready(collection_name)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:08:06.323569Z",
|
||||
"start_time": "2023-08-13T01:07:54.381822Z"
|
||||
}
|
||||
},
|
||||
"id": "146c8a96201c0ab9"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### We see results from both books. Note the `source` metadata"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "5b225f3ae1e61de8"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"by that body to Mr Babbage:--'In no department of science, or of the\r\n",
|
||||
"arts, does this discovery promise to be so eminently useful as in that\r\n",
|
||||
"of astronomy, and its kindred sciences, with the various arts dependent\r\n",
|
||||
"on them. In none are computations more operose than those which\r\n",
|
||||
"astronomy in particular requires;--in none are preparatory facilities\r\n",
|
||||
"more needful;--in none is error more detrimental. The practical\r\n",
|
||||
"astronomer is interrupted in his pursuit, and diverted from his task of -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"possess all knowledge which is likely to be useful to him in his work,\r\n",
|
||||
"and this I have endeavored in my case to do. If I remember rightly, you\r\n",
|
||||
"on one occasion, in the early days of our friendship, defined my limits\r\n",
|
||||
"in a very precise fashion.”\r\n",
|
||||
"\r\n",
|
||||
"“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n",
|
||||
"astronomy, and politics were marked at zero, I remember. Botany\r\n",
|
||||
"variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"in all its relations; but above all, with Astronomy and Navigation. So\r\n",
|
||||
"important have they been considered, that in many instances large sums\r\n",
|
||||
"have been appropriated by the most enlightened nations in the production\r\n",
|
||||
"of them; and yet so numerous and insurmountable have been the\r\n",
|
||||
"difficulties attending the attainment of this end, that after all, even\r\n",
|
||||
"navigators, putting aside every other department of art and science,\r\n",
|
||||
"have, until very recently, been scantily and imperfectly supplied with -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n",
|
||||
"====\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Was he interested in astronomy?\"\n",
|
||||
"docs = await vs.asearch(query, search_type=\"similarity\", k=3)\n",
|
||||
"\n",
|
||||
"for d in docs:\n",
|
||||
" print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:08:06.504769Z",
|
||||
"start_time": "2023-08-13T01:08:06.325435Z"
|
||||
}
|
||||
},
|
||||
"id": "53700a9cd817cde4"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Let's try again using a filter for only the Sherlock Holmes document."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "7b81d7cae351a1ec"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"possess all knowledge which is likely to be useful to him in his work,\r\n",
|
||||
"and this I have endeavored in my case to do. If I remember rightly, you\r\n",
|
||||
"on one occasion, in the early days of our friendship, defined my limits\r\n",
|
||||
"in a very precise fashion.”\r\n",
|
||||
"\r\n",
|
||||
"“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n",
|
||||
"astronomy, and politics were marked at zero, I remember. Botany\r\n",
|
||||
"variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"the light shining upon his strong-set aquiline features. So he sat as I\r\n",
|
||||
"dropped off to sleep, and so he sat when a sudden ejaculation caused me\r\n",
|
||||
"to wake up, and I found the summer sun shining into the apartment. The\r\n",
|
||||
"pipe was still between his lips, the smoke still curled upward, and the\r\n",
|
||||
"room was full of a dense tobacco haze, but nothing remained of the heap\r\n",
|
||||
"of shag which I had seen upon the previous night.\r\n",
|
||||
"\r\n",
|
||||
"“Awake, Watson?” he asked.\r\n",
|
||||
"\r\n",
|
||||
"“Yes.”\r\n",
|
||||
"\r\n",
|
||||
"“Game for a morning drive?” -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
|
||||
"====\n",
|
||||
"\n",
|
||||
"“I glanced at the books upon the table, and in spite of my ignorance\r\n",
|
||||
"of German I could see that two of them were treatises on science, the\r\n",
|
||||
"others being volumes of poetry. Then I walked across to the window,\r\n",
|
||||
"hoping that I might catch some glimpse of the country-side, but an oak\r\n",
|
||||
"shutter, heavily barred, was folded across it. It was a wonderfully\r\n",
|
||||
"silent house. There was an old clock ticking loudly somewhere in the\r\n",
|
||||
"passage, but otherwise everything was deadly still. A vague feeling of -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
|
||||
"====\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"filter = {\n",
|
||||
" \"where\": {\"jsonpath\": \"$[*] ? (@.source == 'https://www.gutenberg.org/files/48320/48320-0.txt')\"},\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"docs = await vs.asearch(query, search_type=\"similarity\", metadata=filter, k=3)\n",
|
||||
"\n",
|
||||
"for d in docs:\n",
|
||||
" print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-13T01:08:06.672836Z",
|
||||
"start_time": "2023-08-13T01:08:06.505944Z"
|
||||
}
|
||||
},
|
||||
"id": "8f1bdcba03979d22"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -15,7 +15,7 @@
|
||||
"- destination_chains: chains that the router chain can route to\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In this notebook we will focus on the different types of routing chains. We will show these routing chains used in a `MultiPromptChain` to create a question-answering chain that selects the prompt which is most relevant for a given question, and then answers the question using that prompt."
|
||||
"In this notebook, we will focus on the different types of routing chains. We will show these routing chains used in a `MultiPromptChain` to create a question-answering chain that selects the prompt which is most relevant for a given question, and then answers the question using that prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -195,7 +195,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiPromptChain chain...\u001b[0m\n",
|
||||
"math: {'input': 'What is the first prime number greater than 40 such that one plus the prime number is divisible by 3'}\n",
|
||||
"math: {'input': 'What is the first prime number greater than 40 such that one plus the prime number is divisible by 3?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"?\n",
|
||||
"\n",
|
||||
@@ -206,7 +206,7 @@
|
||||
"source": [
|
||||
"print(\n",
|
||||
" chain.run(\n",
|
||||
" \"What is the first prime number greater than 40 such that one plus the prime number is divisible by 3\"\n",
|
||||
" \"What is the first prime number greater than 40 such that one plus the prime number is divisible by 3?\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
@@ -231,7 +231,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What is the name of the type of cloud that rins\"))"
|
||||
"print(chain.run(\"What is the name of the type of cloud that rains?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -342,7 +342,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiPromptChain chain...\u001b[0m\n",
|
||||
"math: {'input': 'What is the first prime number greater than 40 such that one plus the prime number is divisible by 3'}\n",
|
||||
"math: {'input': 'What is the first prime number greater than 40 such that one plus the prime number is divisible by 3?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"?\n",
|
||||
"\n",
|
||||
@@ -353,7 +353,7 @@
|
||||
"source": [
|
||||
"print(\n",
|
||||
" chain.run(\n",
|
||||
" \"What is the first prime number greater than 40 such that one plus the prime number is divisible by 3\"\n",
|
||||
" \"What is the first prime number greater than 40 such that one plus the prime number is divisible by 3?\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
|
||||
@@ -83,10 +83,8 @@
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
" {'function_call': {'name': '_OutputFormatter', 'arguments': '{\\n \"output\": {\\n \"name\": \"Sally\",\\n \"age\": 13,\\n \"fav_food\": \"Unknown\"\\n }\\n}'}}\n",
|
||||
"Human: Use the given format to extract information from the following input: Sally is 13\n",
|
||||
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -105,18 +103,13 @@
|
||||
"source": [
|
||||
"# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4\", temperature=0)\n",
|
||||
"\n",
|
||||
"prompt_msgs = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Use the given format to extract information from the following input:\"\n",
|
||||
" ),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||
"]\n",
|
||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a world class algorithm for extracting information in structured formats.\"),\n",
|
||||
" (\"human\", \"Use the given format to extract information from the following input: {input}\"),\n",
|
||||
" (\"human\", \"Tip: Make sure to answer in the correct format\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Sally is 13\")"
|
||||
@@ -132,7 +125,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "4d8ea815",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -145,10 +138,8 @@
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
" {'function_call': {'name': '_OutputFormatter', 'arguments': '{\\n \"output\": {\\n \"people\": [\\n {\\n \"name\": \"Sally\",\\n \"age\": 13,\\n \"fav_food\": \"\"\\n },\\n {\\n \"name\": \"Joey\",\\n \"age\": 12,\\n \"fav_food\": \"spinach\"\\n },\\n {\\n \"name\": \"Caroline\",\\n \"age\": 23,\\n \"fav_food\": \"\"\\n }\\n ]\\n }\\n}'}}\n",
|
||||
"Human: Use the given format to extract information from the following input: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally.\n",
|
||||
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -159,7 +150,7 @@
|
||||
"People(people=[Person(name='Sally', age=13, fav_food=''), Person(name='Joey', age=12, fav_food='spinach'), Person(name='Caroline', age=23, fav_food='')])"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -176,7 +167,7 @@
|
||||
"\n",
|
||||
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\n",
|
||||
" \"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\"\n",
|
||||
" \"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -229,10 +220,8 @@
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
" {'function_call': {'name': 'output_formatter', 'arguments': '{\\n \"name\": \"Sally\",\\n \"age\": 13\\n}'}}\n",
|
||||
"Human: Use the given format to extract information from the following input: Sally is 13\n",
|
||||
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -279,7 +268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "17f52508",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -302,7 +291,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"id": "a4658ad8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -314,11 +303,9 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: Harry was a chubby brown beagle who loved chicken\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
" {'function_call': {'name': 'RecordDog', 'arguments': '{\\n \"name\": \"Harry\",\\n \"color\": \"brown\",\\n \"fav_food\": \"chicken\"\\n}'}}\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities.\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input: Harry was a chubby brown beagle who loved chicken\n",
|
||||
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -329,21 +316,19 @@
|
||||
"RecordDog(name='Harry', color='brown', fav_food='chicken')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prompt_msgs = [\n",
|
||||
" SystemMessage(content=\"You are a world class algorithm for recording entities\"),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Make calls to the relevant function to record the entities in the following input:\"\n",
|
||||
" ),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||
"]\n",
|
||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a world class algorithm for recording entities.\"),\n",
|
||||
" (\"human\", \"Make calls to the relevant function to record the entities in the following input: {input}\"),\n",
|
||||
" (\"human\", \"Tip: Make sure to answer in the correct format\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = create_openai_fn_chain([RecordPerson, RecordDog], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Harry was a chubby brown beagle who loved chicken\")"
|
||||
@@ -362,7 +347,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"id": "95ac5825",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -374,11 +359,9 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
" {'function_call': {'name': 'record_person', 'arguments': '{\\n \"name\": \"Tommy\",\\n \"age\": 12,\\n \"fav_food\": {\\n \"food\": \"apple pie\"\\n }\\n}'}}\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities.\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
|
||||
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -389,7 +372,7 @@
|
||||
"{'name': 'Tommy', 'age': 12, 'fav_food': {'food': 'apple pie'}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -434,7 +417,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "8b0d11de",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -446,11 +429,9 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
" {'function_call': {'name': 'record_dog', 'arguments': '{\\n \"name\": \"Henry\",\\n \"color\": \"brown\",\\n \"fav_food\": {\\n \"food\": null\\n }\\n}'}}\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities.\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
|
||||
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -462,7 +443,7 @@
|
||||
" 'arguments': {'name': 'Henry', 'color': 'brown', 'fav_food': {'food': None}}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -502,9 +483,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"display_name": "poetry-venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
"name": "poetry-venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -516,7 +497,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -72,7 +72,7 @@
|
||||
"source": [
|
||||
"## Retrieving Full Documents\n",
|
||||
"\n",
|
||||
"In this mode, we want to retrieve the full documents. Therefor, we only specify a child splitter."
|
||||
"In this mode, we want to retrieve the full documents. Therefore, we only specify a child splitter."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -144,7 +144,7 @@
|
||||
"id": "f895d62b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now call the vectorstore search functionality - we should see that it returns small chunks (since we're storing the small chunks"
|
||||
"Let's now call the vectorstore search functionality - we should see that it returns small chunks (since we're storing the small chunks)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DeepLake self-querying \n",
|
||||
"# Deep Lake self-querying \n",
|
||||
"\n",
|
||||
">[DeepLake](https://www.activeloop.ai) is a multimodal database for building AI applications.\n",
|
||||
"\n",
|
||||
@@ -19,7 +19,7 @@
|
||||
"id": "68e75fb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a DeepLake vectorstore\n",
|
||||
"## Creating a Deep Lake vectorstore\n",
|
||||
"First we'll want to create a DeepLake VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `deeplake` package."
|
||||
@@ -27,26 +27,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "63a8af5b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": null,
|
||||
"id": "a798fe66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install lark"
|
||||
"# !pip install lark"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "22431060-52c4-48a7-a97b-9f542b8b0928",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "43821a8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install 'deeplake[enterprise]'"
|
||||
"# in case if some queries fail consider installing libdeeplake manually\n",
|
||||
"# !pip install libdeeplake"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,7 +57,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "dd01b61b-7d32-4a55-85d6-b2d2d4f18840",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -70,12 +67,13 @@
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"os.environ[\"ACTIVELOOP_TOKEN\"] = getpass.getpass(\"Activeloop token:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "cb4a5787",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -108,7 +106,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -166,9 +164,9 @@
|
||||
" },\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"username_or_org = \"<USER_NAME_OR_ORG>\"\n",
|
||||
"username_or_org = \"<USERNAME_OR_ORG>\"\n",
|
||||
"vectorstore = DeepLake.from_documents(\n",
|
||||
" docs, embeddings, dataset_path=f\"hub://{username_or_org}/self_queery\"\n",
|
||||
" docs, embeddings, dataset_path=f\"hub://{username_or_org}/self_queery\", overwrite=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -185,10 +183,8 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "86e34dbf",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "c90b0b40",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
@@ -242,7 +238,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/adilkhansarsen/Documents/work/LangChain/langchain/langchain/chains/llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
|
||||
"/home/ubuntu/langchain_activeloop/langchain/libs/langchain/langchain/chains/llm.py:279: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
@@ -299,7 +295,9 @@
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a filter\n",
|
||||
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")"
|
||||
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")\n",
|
||||
"\n",
|
||||
"# in case if this example errored out, consider installing libdeeplake manually: `pip install libdeeplake`, and then restart notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -456,14 +454,6 @@
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c93f0847-cbd9-4c25-aed1-91588e856b5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -482,7 +472,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -0,0 +1,362 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Elasticsearch self-querying "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68e75fb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a Elasticsearch vectorstore\n",
|
||||
"First we'll want to create a Elasticsearch VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `elasticsearch` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "63a8af5b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install lark elasticsearch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb4a5787",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import ElasticsearchStore\n",
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "bcbe04d9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
|
||||
" metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
|
||||
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
|
||||
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
|
||||
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Toys come alive and have a blast doing so\",\n",
|
||||
" metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
|
||||
" metadata={\n",
|
||||
" \"year\": 1979,\n",
|
||||
" \"rating\": 9.9,\n",
|
||||
" \"director\": \"Andrei Tarkovsky\",\n",
|
||||
" \"genre\": \"science fiction\",\n",
|
||||
" \"rating\": 9.9,\n",
|
||||
" },\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"vectorstore = ElasticsearchStore.from_documents(\n",
|
||||
" docs, embeddings, index_name=\"elasticsearch-self-query-demo\", es_url=\"http://localhost:9200\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ecaab6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "86e34dbf",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"\n",
|
||||
"metadata_field_info = [\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"genre\",\n",
|
||||
" description=\"The genre of the movie\",\n",
|
||||
" type=\"string or list[string]\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"year\",\n",
|
||||
" description=\"The year the movie was released\",\n",
|
||||
" type=\"integer\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"director\",\n",
|
||||
" description=\"The name of the movie director\",\n",
|
||||
" type=\"string\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"document_content_description = \"Brief summary of a movie\"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea9df8d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "38a126e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='dinosaur' filter=None limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'}),\n",
|
||||
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'}),\n",
|
||||
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"What are some movies about dinosaurs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "b19d4da0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig') limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a query and a filter\n",
|
||||
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filter k\n",
|
||||
"\n",
|
||||
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||
"\n",
|
||||
"We can do this by passing `enable_limit=True` to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "bff36b88-b506-4877-9c63-e5a1a8d78e64",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
" metadata_field_info,\n",
|
||||
" enable_limit=True,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2758d229-4f97-499c-819f-888acaf8ee10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='dinosaur' filter=None limit=2\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61a10294",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Complex queries in Action!\n",
|
||||
"We've tried out some simple queries, but what about more complex ones? Let's try out a few more complex queries that utilize the full power of Elasticsearch."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "e460da93",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='animated toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Operation(operator=<Operator.OR: 'or'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='animated'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='comedy')]), Comparison(comparator=<Comparator.GTE: 'gte'>, attribute='year', value=1990)]) limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"what animated or comedy movies have been released in the last 30 years about animated toys?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "0851fc42",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"ObjectApiResponse({'acknowledged': True})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectorstore.client.indices.delete(index=\"elasticsearch-self-query-demo\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -544,7 +544,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt_template = FeatureformPrompTemplate(input_variables=[\"user_id\"])"
|
||||
"prompt_template = FeatureformPromptTemplate(input_variables=[\"user_id\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -282,7 +282,7 @@
|
||||
"user_agent.reset()\n",
|
||||
"\n",
|
||||
"# Initialize chats\n",
|
||||
"assistant_msg = HumanMessage(\n",
|
||||
"user_msg = HumanMessage(\n",
|
||||
" content=(\n",
|
||||
" f\"{user_sys_msg.content}. \"\n",
|
||||
" \"Now start to give me introductions one by one. \"\n",
|
||||
@@ -290,8 +290,8 @@
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"user_msg = HumanMessage(content=f\"{assistant_sys_msg.content}\")\n",
|
||||
"user_msg = assistant_agent.step(user_msg)"
|
||||
"assistant_msg = HumanMessage(content=f\"{assistant_sys_msg.content}\")\n",
|
||||
"assistant_msg = assistant_agent.step(user_msg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Agent simulations
|
||||
|
||||
Agent simulations involve interacting one of more agents with each other.
|
||||
|
||||
@@ -282,7 +282,7 @@
|
||||
"user_agent.reset()\n",
|
||||
"\n",
|
||||
"# Initialize chats\n",
|
||||
"assistant_msg = HumanMessage(\n",
|
||||
"user_msg = HumanMessage(\n",
|
||||
" content=(\n",
|
||||
" f\"{user_sys_msg.content}. \"\n",
|
||||
" \"Now start to give me introductions one by one. \"\n",
|
||||
@@ -290,8 +290,8 @@
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"user_msg = HumanMessage(content=f\"{assistant_sys_msg.content}\")\n",
|
||||
"user_msg = assistant_agent.step(user_msg)"
|
||||
"assistant_msg = HumanMessage(content=f\"{assistant_sys_msg.content}\")\n",
|
||||
"assistant_msg = assistant_agent.step(user_msg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Agents
|
||||
|
||||
Agents can be used for a variety of tasks.
|
||||
|
||||
@@ -123,7 +123,7 @@
|
||||
"wikidata_user_agent_header = (\n",
|
||||
" None\n",
|
||||
" if not config.has_section(\"WIKIDATA\")\n",
|
||||
" else config[\"WIKIDATA\"][\"WIKIDAtA_USER_AGENT_HEADER\"]\n",
|
||||
" else config[\"WIKIDATA\"][\"WIKIDATA_USER_AGENT_HEADER\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
"source": [
|
||||
"## Quickstart \n",
|
||||
"\n",
|
||||
"Many APIs already are compatible with OpenAI function calling.\n",
|
||||
"Many APIs are already compatible with OpenAI function calling.\n",
|
||||
"\n",
|
||||
"For example, [Klarna](https://www.klarna.com/international/press/klarna-brings-smoooth-shopping-to-chatgpt/) has a YAML file that describes its API and allows OpenAI to interact with it:\n",
|
||||
"\n",
|
||||
@@ -145,7 +145,7 @@
|
||||
"source": [
|
||||
"## Functions \n",
|
||||
"\n",
|
||||
"We can unpack what is hapening when we use the funtions to calls external APIs.\n",
|
||||
"We can unpack what is hapening when we use the functions to calls external APIs.\n",
|
||||
"\n",
|
||||
"Let's look at the [LangSmith trace](https://smith.langchain.com/public/76a58b85-193f-4eb7-ba40-747f0d5dd56e/r):\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Autonomous (long-running) agents
|
||||
|
||||
Autonomous Agents are agents that designed to be more long running.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Chatbots\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/chatbots/chatbots.ipynb)\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/chatbots.ipynb)\n",
|
||||
"\n",
|
||||
"## Use case\n",
|
||||
"\n",
|
||||
|
||||
@@ -130,7 +130,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Splittng\n",
|
||||
"### Splitting\n",
|
||||
"\n",
|
||||
"Split the `Document` into chunks for embedding and vector storage.\n",
|
||||
"\n",
|
||||
|
||||
2
docs/extras/use_cases/more/_category_.yml
Normal file
2
docs/extras/use_cases/more/_category_.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
label: 'More'
|
||||
position: 1
|
||||
@@ -1,3 +1,7 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Code writing
|
||||
|
||||
:::warning
|
||||
@@ -17,7 +17,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import PALChain\n",
|
||||
"from langchain_experimental.pal_chain import PALChain\n",
|
||||
"from langchain import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -9,7 +9,7 @@
|
||||
"source": [
|
||||
"# ArangoDB QA chain\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/graph/graph_arangodb_qa.ipynb)\n",
|
||||
"[](https://colab.research.google.com/github/arangodb/interactive_tutorials/blob/master/notebooks/Langchain.ipynb)\n",
|
||||
"\n",
|
||||
"This notebook shows how to use LLMs to provide a natural language interface to an [ArangoDB](https://github.com/arangodb/arangodb#readme) database."
|
||||
]
|
||||
@@ -1,3 +1,7 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Self-checking
|
||||
|
||||
One of the main issues with using LLMs is that they can often hallucinate and make false claims. One of the surprisingly effective ways to remediate this is to use the LLM itself to check its own answers.
|
||||
@@ -1 +1,2 @@
|
||||
label: 'Multi-modal'
|
||||
position: 0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_position: -1
|
||||
---
|
||||
|
||||
# QA over Documents
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -11,6 +12,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -27,6 +29,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -34,6 +37,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
@@ -69,6 +73,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -78,6 +83,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -96,9 +102,52 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Participants:\\n\\nJerry: Loves movies and is a bit of a klutz.\\nSamantha: Enthusiastic about food and always trying new restaurants.\\nBarry: A nature lover, but always manages to get lost.\\nJerry: Hey, guys! You won\\'t believe what happened to me at the Times Square AMC theater. I tripped over my own feet and spilled popcorn everywhere! 🍿💥\\n\\nSamantha: LOL, that\\'s so you, Jerry! Was the floor buttery enough for you to ice skate on after that? 😂\\n\\nBarry: Sounds like a regular Tuesday for you, Jerry. Meanwhile, I tried to find that new hiking trail in Central Park. You know, the one that\\'s supposed to be impossible to get lost on? Well, guess what...\\n\\nJerry: You found a hidden treasure?\\n\\nBarry: No, I got lost. AGAIN. 🧭🙄\\n\\nSamantha: Barry, you\\'d get lost in your own backyard! But speaking of treasures, I found this new sushi place in Little Tokyo. \"Samantha\\'s Sushi Symphony\" it\\'s called. Coincidence? I think not!\\n\\nJerry: Maybe they named it after your ability to eat your body weight in sushi. 🍣', metadata={}), Document(page_content='Barry: How do you even FIND all these places, Samantha?\\n\\nSamantha: Simple, I don\\'t rely on Barry\\'s navigation skills. 😉 But seriously, the wasabi there was hotter than Jerry\\'s love for Marvel movies!\\n\\nJerry: Hey, nothing wrong with a little superhero action. By the way, did you guys see the new \"Captain Crunch: Breakfast Avenger\" trailer?\\n\\nSamantha: Captain Crunch? Are you sure you didn\\'t get that from one of your Saturday morning cereal binges?\\n\\nBarry: Yeah, and did he defeat his arch-enemy, General Mills? 😆\\n\\nJerry: Ha-ha, very funny. Anyway, that sushi place sounds awesome, Samantha. Next time, let\\'s go together, and maybe Barry can guide us... if we want a city-wide tour first.\\n\\nBarry: As long as we\\'re not hiking, I\\'ll get us there... eventually. 😅\\n\\nSamantha: It\\'s a date! But Jerry, you\\'re banned from carrying any food items.\\n\\nJerry: Deal! Just promise me no wasabi challenges. I don\\'t want to end up like the time I tried Sriracha ice cream.', metadata={}), Document(page_content=\"Barry: Wait, what happened with Sriracha ice cream?\\n\\nJerry: Let's just say it was a hot situation. Literally. 🔥\\n\\nSamantha: 🤣 I still have the video!\\n\\nJerry: Samantha, if you value our friendship, that video will never see the light of day.\\n\\nSamantha: No promises, Jerry. No promises. 🤐😈\\n\\nBarry: I foresee a fun weekend ahead! 🎉\", metadata={})]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Your Deep Lake dataset has been successfully created!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://adilkhan/data', tensors=['embedding', 'id', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding embedding (3, 1536) float32 None \n",
|
||||
" id text (3, 1) str None \n",
|
||||
" metadata json (3, 1) str None \n",
|
||||
" text text (3, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with open(\"messages.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
@@ -110,7 +159,7 @@
|
||||
"\n",
|
||||
"print(texts)\n",
|
||||
"\n",
|
||||
"dataset_path = \"hub://\" + org + \"/data\"\n",
|
||||
"dataset_path = \"hub://\" + org_id + \"/data\"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake.from_documents(\n",
|
||||
" texts, embeddings, dataset_path=dataset_path, overwrite=True\n",
|
||||
@@ -118,6 +167,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -126,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -143,11 +193,12 @@
|
||||
"# dataset_path = \"hub://\" + org + \"/data\"\n",
|
||||
"# embeddings = OpenAIEmbeddings()\n",
|
||||
"# db = DeepLake.from_documents(\n",
|
||||
"# texts, embeddings, dataset_path=dataset_path, overwrite=True, runtime=\"tensor_db\"\n",
|
||||
"# texts, embeddings, dataset_path=dataset_path, overwrite=True, runtime={\"tensor_db\": True}\n",
|
||||
"# )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -162,7 +213,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)\n",
|
||||
"db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)\n",
|
||||
"\n",
|
||||
"retriever = db.as_retriever()\n",
|
||||
"retriever.search_kwargs[\"distance_metric\"] = \"cos\"\n",
|
||||
@@ -180,13 +231,6 @@
|
||||
"\n",
|
||||
"print(ans)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -205,7 +249,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "6605e7f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Web scraping\n",
|
||||
"# Web Scraping\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/web_scraping.ipynb)\n",
|
||||
"\n",
|
||||
|
||||
@@ -40,7 +40,7 @@ pip install 'langchain[all]'
|
||||
|
||||
## From source
|
||||
|
||||
If you want to install from source, you can do so by cloning the repo and running:
|
||||
If you want to install from source, you can do so by cloning the repo and be sure that the directory is `PATH/TO/REPO/langchain/libs/langchain` running:
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
## Using PyPDF
|
||||
# Using PyPDF
|
||||
|
||||
Load PDF using `pypdf` into array of documents, where each document contains the page content and metadata with `page` number.
|
||||
|
||||
@@ -389,3 +389,17 @@ data[0]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Using AmazonTextractPDFParser
|
||||
|
||||
The AmazonTextractPDFLoader calls the [Amazon Textract Service](https://aws.amazon.com/textract/) to convert PDFs into a Document structure. The loader does pure OCR at the moment, with more features like layout support planned, depending on demand. Single and multi-page documents are supported with up to 3000 pages and 512 MB of size.
|
||||
|
||||
For the call to be successful an AWS account is required, similar to the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) requirements.
|
||||
|
||||
Besides the AWS configuration, it is very similar to the other PDF loaders, while also supporting JPEG, PNG and TIFF and non-native PDF formats.
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import AmazonTextractPDFLoader
|
||||
loader = AmazonTextractPDFLoader("example_data/alejandro_rosalez_sample-small.jpeg")
|
||||
documents = loader.load()
|
||||
```
|
||||
@@ -5,34 +5,24 @@ from langchain.llms import OpenAI
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
output_parser = CommaSeparatedListOutputParser()
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
format_instructions = output_parser.get_format_instructions()
|
||||
prompt = PromptTemplate(
|
||||
template="List five {subject}.\n{format_instructions}",
|
||||
input_variables=["subject"],
|
||||
partial_variables={"format_instructions": format_instructions}
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
model = OpenAI(temperature=0)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
_input = prompt.format(subject="ice cream flavors")
|
||||
output = model(_input)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
output_parser.parse(output)
|
||||
```
|
||||
|
||||
The resulting output will be:
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
|
||||
@@ -13,7 +13,6 @@ from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.human.tool import HumanInputRun
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
from pydantic import ValidationError
|
||||
|
||||
from langchain_experimental.autonomous_agents.autogpt.output_parser import (
|
||||
AutoGPTOutputParser,
|
||||
@@ -23,6 +22,7 @@ from langchain_experimental.autonomous_agents.autogpt.prompt import AutoGPTPromp
|
||||
from langchain_experimental.autonomous_agents.autogpt.prompt_generator import (
|
||||
FINISH_NAME,
|
||||
)
|
||||
from langchain_experimental.pydantic_v1 import ValidationError
|
||||
|
||||
|
||||
class AutoGPT:
|
||||
|
||||
@@ -2,7 +2,8 @@ from typing import Any, Dict, List
|
||||
|
||||
from langchain.memory.chat_memory import BaseChatMemory, get_prompt_input_key
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
from pydantic import Field
|
||||
|
||||
from langchain_experimental.pydantic_v1 import Field
|
||||
|
||||
|
||||
class AutoGPTMemory(BaseChatMemory):
|
||||
|
||||
@@ -7,9 +7,9 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema.messages import BaseMessage, HumanMessage, SystemMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_experimental.autonomous_agents.autogpt.prompt_generator import get_prompt
|
||||
from langchain_experimental.pydantic_v1 import BaseModel
|
||||
|
||||
|
||||
class AutoGPTPrompt(BaseChatPromptTemplate, BaseModel):
|
||||
|
||||
@@ -6,7 +6,6 @@ from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_experimental.autonomous_agents.baby_agi.task_creation import (
|
||||
TaskCreationChain,
|
||||
@@ -17,6 +16,7 @@ from langchain_experimental.autonomous_agents.baby_agi.task_execution import (
|
||||
from langchain_experimental.autonomous_agents.baby_agi.task_prioritization import (
|
||||
TaskPrioritizationChain,
|
||||
)
|
||||
from langchain_experimental.pydantic_v1 import BaseModel, Field
|
||||
|
||||
|
||||
class BabyAGI(Chain, BaseModel):
|
||||
|
||||
@@ -13,7 +13,8 @@ from langchain.prompts.chat import (
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.tools.base import BaseTool
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_experimental.pydantic_v1 import BaseModel
|
||||
|
||||
DEMONSTRATIONS = [
|
||||
{
|
||||
|
||||
@@ -6,9 +6,15 @@ from typing import Any, Optional, Union
|
||||
import duckdb
|
||||
import pandas as pd
|
||||
from langchain.graphs.networkx_graph import NetworkxEntityGraph
|
||||
from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator
|
||||
|
||||
from langchain_experimental.cpal.constants import Constant
|
||||
from langchain_experimental.pydantic_v1 import (
|
||||
BaseModel,
|
||||
Field,
|
||||
PrivateAttr,
|
||||
root_validator,
|
||||
validator,
|
||||
)
|
||||
|
||||
|
||||
class NarrativeModel(BaseModel):
|
||||
|
||||
@@ -5,9 +5,9 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_experimental.generative_agents.memory import GenerativeAgentMemory
|
||||
from langchain_experimental.pydantic_v1 import BaseModel, Field
|
||||
|
||||
|
||||
class GenerativeAgent(BaseModel):
|
||||
|
||||
@@ -19,7 +19,8 @@ from langchain.schema.messages import (
|
||||
BaseMessage,
|
||||
SystemMessage,
|
||||
)
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain_experimental.pydantic_v1 import root_validator
|
||||
|
||||
prompt = """In addition to responding, you can use tools. \
|
||||
You have access to the following tools.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user