Compare commits
262 Commits
ankush/mes
...
wfh/defaul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
40cbac7b29 | ||
|
|
c9d44f0ba3 | ||
|
|
6325a3517c | ||
|
|
82f3e32d8d | ||
|
|
af6d333147 | ||
|
|
3874bb256e | ||
|
|
574698a5fb | ||
|
|
854f3fe9b1 | ||
|
|
051fac1e66 | ||
|
|
5db4dba526 | ||
|
|
9124221d31 | ||
|
|
c087ce74f7 | ||
|
|
ae7714f1ba | ||
|
|
fbc97a77ed | ||
|
|
120c52589b | ||
|
|
c7b687e944 | ||
|
|
aab2a7cd4b | ||
|
|
5f03cc3511 | ||
|
|
3dd0704e38 | ||
|
|
24c1654208 | ||
|
|
c17a80f11c | ||
|
|
a673a51efa | ||
|
|
224199083b | ||
|
|
af3f401015 | ||
|
|
98e1bbfbbd | ||
|
|
6f62e5461c | ||
|
|
b08f903755 | ||
|
|
f307ca094b | ||
|
|
488d2d5da9 | ||
|
|
a8bbfb2da3 | ||
|
|
92ef77da35 | ||
|
|
7f8ff2a317 | ||
|
|
c5e50c40c9 | ||
|
|
a08baa97c5 | ||
|
|
cdb93ab5ca | ||
|
|
8effd90be0 | ||
|
|
f11d845dee | ||
|
|
0e1d7a27c6 | ||
|
|
53722dcfdc | ||
|
|
1d4db1327a | ||
|
|
ee70d4a0cd | ||
|
|
9b215e761e | ||
|
|
2f848294cb | ||
|
|
d85c33a5c3 | ||
|
|
0d92a7f357 | ||
|
|
931e68692e | ||
|
|
be29a6287d | ||
|
|
adc96d60b6 | ||
|
|
93a84f6182 | ||
|
|
22525bad65 | ||
|
|
6e1000dc8d | ||
|
|
f3c9bf5e4b | ||
|
|
6cdd4b5edc | ||
|
|
50316f6477 | ||
|
|
603a0bea29 | ||
|
|
3f7213586e | ||
|
|
5f17c57174 | ||
|
|
ebcb144342 | ||
|
|
641fd74baa | ||
|
|
2667ddc686 | ||
|
|
74c28df363 | ||
|
|
5c3fe8b0d1 | ||
|
|
2babe3069f | ||
|
|
e811c5e8c6 | ||
|
|
8741e55e7c | ||
|
|
00c466627a | ||
|
|
cc0585af42 | ||
|
|
b96ac13f3d | ||
|
|
9cb2347453 | ||
|
|
c4d53f98dc | ||
|
|
2c2f0e15a6 | ||
|
|
0ea7224535 | ||
|
|
1f83b5f47e | ||
|
|
6674b33cf5 | ||
|
|
406a9dc11f | ||
|
|
9e067b8cc9 | ||
|
|
3c4338470e | ||
|
|
d2137eea9f | ||
|
|
9129318466 | ||
|
|
2e4047e5e7 | ||
|
|
1dd4236177 | ||
|
|
4a94f56258 | ||
|
|
5171c3bcca | ||
|
|
bd0c6381f5 | ||
|
|
28d2b213a4 | ||
|
|
dd648183fa | ||
|
|
5eec74d9a5 | ||
|
|
9d13dcd17c | ||
|
|
5debd5043e | ||
|
|
9b615022e2 | ||
|
|
92b4418c8c | ||
|
|
7d29bb2c02 | ||
|
|
21a353e9c2 | ||
|
|
d2cf0d16b3 | ||
|
|
04cddfba0d | ||
|
|
bcab894f4e | ||
|
|
490f4a9ff0 | ||
|
|
7ffc431b3a | ||
|
|
50a9fcccb0 | ||
|
|
a5fd8873b1 | ||
|
|
dfc3f83b0f | ||
|
|
c7f7788d0b | ||
|
|
8f8e8d701e | ||
|
|
560c4dfc98 | ||
|
|
f5bd88757e | ||
|
|
ea9c3cc9c9 | ||
|
|
5da9f9abcb | ||
|
|
2eb4a2ceea | ||
|
|
e7420789e4 | ||
|
|
26c86a197c | ||
|
|
1d649b127e | ||
|
|
362bc301df | ||
|
|
a1603fccfb | ||
|
|
4ba7396f96 | ||
|
|
633b673b85 | ||
|
|
4d697d3f24 | ||
|
|
612a74eb7e | ||
|
|
4789c99bc2 | ||
|
|
fb6e63dc36 | ||
|
|
c5edbea34a | ||
|
|
1ac347b4e3 | ||
|
|
705d2f5b92 | ||
|
|
ec033ae277 | ||
|
|
da5b0723d2 | ||
|
|
184ede4e48 | ||
|
|
7cdf97ba9b | ||
|
|
4d427b2397 | ||
|
|
2179d4eef8 | ||
|
|
df746ad821 | ||
|
|
c9a0f24646 | ||
|
|
34a2755a54 | ||
|
|
4e7d0c115b | ||
|
|
01dca1e438 | ||
|
|
1ac6deda89 | ||
|
|
4e180dc54e | ||
|
|
3ce4e46c8c | ||
|
|
b489466488 | ||
|
|
38ca5c84cb | ||
|
|
49b2b0e3c0 | ||
|
|
a2830e3056 | ||
|
|
cb4e88e4fb | ||
|
|
d1c7237034 | ||
|
|
0ed2da7020 | ||
|
|
1c8cff32f1 | ||
|
|
fd7145970f | ||
|
|
3074306ae1 | ||
|
|
5809c3d29d | ||
|
|
87f75cb322 | ||
|
|
284d40b7af | ||
|
|
8d961b9e33 | ||
|
|
a9c5b4bcea | ||
|
|
9954eff8fd | ||
|
|
6095a0a310 | ||
|
|
e047541b5f | ||
|
|
152dc59060 | ||
|
|
927c8eb91a | ||
|
|
bac56618b4 | ||
|
|
d642609a23 | ||
|
|
ec10787bc7 | ||
|
|
b21c2f8704 | ||
|
|
e736d60516 | ||
|
|
12d14f8947 | ||
|
|
cb9ff6efb8 | ||
|
|
1f4a51cb9c | ||
|
|
a6b39afe0e | ||
|
|
1a4ca3eff9 | ||
|
|
6ff9e9b34a | ||
|
|
09acbb8410 | ||
|
|
e0cb3ea90c | ||
|
|
4450791edd | ||
|
|
a7ae35fe4e | ||
|
|
681f2678a3 | ||
|
|
c23e16c459 | ||
|
|
8c371e12eb | ||
|
|
c7cf11b8ab | ||
|
|
fed64ae060 | ||
|
|
576880abc5 | ||
|
|
e8f24164f0 | ||
|
|
ae5aa496ee | ||
|
|
b9d6d4cd4c | ||
|
|
8b19f6a0da | ||
|
|
ec66d5188c | ||
|
|
e61cfb6e99 | ||
|
|
0c7a5cb206 | ||
|
|
b151d4257a | ||
|
|
887bb12287 | ||
|
|
f773c21723 | ||
|
|
0e878ccc2d | ||
|
|
57d8a3d1e8 | ||
|
|
c36f852846 | ||
|
|
035ad33a5b | ||
|
|
cabd358c3a | ||
|
|
52b016920c | ||
|
|
695e7027e6 | ||
|
|
930e319ca7 | ||
|
|
6aa66fd2b0 | ||
|
|
8afc8e6f5d | ||
|
|
f891f7d69f | ||
|
|
83cf01683e | ||
|
|
607708a411 | ||
|
|
75aa408f10 | ||
|
|
0dc700eebf | ||
|
|
d6541da161 | ||
|
|
d669b9ece9 | ||
|
|
e533da8bf2 | ||
|
|
836d2009cb | ||
|
|
d65b1951bd | ||
|
|
265f05b10e | ||
|
|
47e7d09dff | ||
|
|
79b59a8e06 | ||
|
|
6711854e30 | ||
|
|
cab7d86f23 | ||
|
|
3ae11b7582 | ||
|
|
a2f191a322 | ||
|
|
61938a02a1 | ||
|
|
ecee4d6e92 | ||
|
|
fa55c5a16b | ||
|
|
8a7c95e555 | ||
|
|
e4459e423b | ||
|
|
4c1c05c2c7 | ||
|
|
30d8d1d3d0 | ||
|
|
9abf1847f4 | ||
|
|
7d92e9407b | ||
|
|
e288410e72 | ||
|
|
26409b01bd | ||
|
|
6f358bb04a | ||
|
|
6eff0fa2ca | ||
|
|
81e5b1ad36 | ||
|
|
baf48d3583 | ||
|
|
8045870a0f | ||
|
|
db98c44f8f | ||
|
|
7cd0936b1c | ||
|
|
38f853dfa3 | ||
|
|
ee1d488c03 | ||
|
|
6666e422c6 | ||
|
|
8410c6a747 | ||
|
|
7b585c7585 | ||
|
|
6fc24743b7 | ||
|
|
79fb90aafd | ||
|
|
1415966d64 | ||
|
|
a94c4cca68 | ||
|
|
e18e838aae | ||
|
|
e27ba9d92b | ||
|
|
39e685b80f | ||
|
|
bf9e4ef35f | ||
|
|
9cfb311ecb | ||
|
|
405865c91a | ||
|
|
c9f696f063 | ||
|
|
e8531769f7 | ||
|
|
2984803597 | ||
|
|
da69a6771f | ||
|
|
b39017dc11 | ||
|
|
898087d02c | ||
|
|
0ad984fa27 | ||
|
|
81eebc4070 | ||
|
|
5585607654 | ||
|
|
265c285057 | ||
|
|
6631fd5168 | ||
|
|
696886f397 | ||
|
|
0b69a7e9ab | ||
|
|
9ca4c54428 | ||
|
|
dfa48dc3b5 |
66
.github/CONTRIBUTING.md
vendored
@@ -95,6 +95,14 @@ To run formatting for this project:
|
||||
make format
|
||||
```
|
||||
|
||||
Additionally, you can run the formatter only on the files that have been modified in your current branch as compared to the master branch using the format_diff command:
|
||||
|
||||
```bash
|
||||
make format_diff
|
||||
```
|
||||
|
||||
This is especially useful when you have made changes to a subset of the project and want to ensure your changes are properly formatted without affecting the rest of the codebase.
|
||||
|
||||
### Linting
|
||||
|
||||
Linting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), [flake8](https://flake8.pycqa.org/en/latest/), and [mypy](http://mypy-lang.org/).
|
||||
@@ -105,8 +113,42 @@ To run linting for this project:
|
||||
make lint
|
||||
```
|
||||
|
||||
In addition, you can run the linter only on the files that have been modified in your current branch as compared to the master branch using the lint_diff command:
|
||||
|
||||
```bash
|
||||
make lint_diff
|
||||
```
|
||||
|
||||
This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase.
|
||||
|
||||
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
### Spellcheck
|
||||
|
||||
Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
|
||||
Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
|
||||
|
||||
To check spelling for this project:
|
||||
|
||||
```bash
|
||||
make spell_check
|
||||
```
|
||||
|
||||
To fix spelling in place:
|
||||
|
||||
```bash
|
||||
make spell_fix
|
||||
```
|
||||
|
||||
If codespell is incorrectly flagging a word, you can skip spellcheck for that word by adding it to the codespell config in the `pyproject.toml` file.
|
||||
|
||||
```python
|
||||
[tool.codespell]
|
||||
...
|
||||
# Add here:
|
||||
ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure'
|
||||
```
|
||||
|
||||
### Coverage
|
||||
|
||||
Code coverage (i.e. the amount of code that is covered by unit tests) helps identify areas of the code that are potentially more or less brittle.
|
||||
@@ -208,30 +250,38 @@ When you run `poetry install`, the `langchain` package is installed as editable
|
||||
|
||||
### Contribute Documentation
|
||||
|
||||
Docs are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||
The docs directory contains Documentation and API Reference.
|
||||
|
||||
Documentation is built using [Docusaurus 2](https://docusaurus.io/).
|
||||
|
||||
API Reference are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||
For that reason, we ask that you add good documentation to all classes and methods.
|
||||
|
||||
Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
### Build Documentation Locally
|
||||
|
||||
In the following commands, the prefix `api_` indicates that those are operations for the API Reference.
|
||||
|
||||
Before building the documentation, it is always a good idea to clean the build directory:
|
||||
|
||||
```bash
|
||||
make docs_clean
|
||||
make api_docs_clean
|
||||
```
|
||||
|
||||
Next, you can run the linkchecker to make sure all links are valid:
|
||||
|
||||
```bash
|
||||
make docs_linkcheck
|
||||
```
|
||||
|
||||
Finally, you can build the documentation as outlined below:
|
||||
Next, you can build the documentation as outlined below:
|
||||
|
||||
```bash
|
||||
make docs_build
|
||||
make api_docs_build
|
||||
```
|
||||
|
||||
Finally, you can run the linkchecker to make sure all links are valid:
|
||||
|
||||
```bash
|
||||
make docs_linkcheck
|
||||
make api_docs_linkcheck
|
||||
```
|
||||
|
||||
## 🏭 Release Process
|
||||
|
||||
22
.github/workflows/codespell.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
---
|
||||
name: Codespell
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
codespell:
|
||||
name: Check for spelling errors
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Codespell
|
||||
uses: codespell-project/actions-codespell@v2
|
||||
5
.gitignore
vendored
@@ -161,7 +161,12 @@ docs/node_modules/
|
||||
docs/.docusaurus/
|
||||
docs/.cache-loader/
|
||||
docs/_dist
|
||||
docs/api_reference/api_reference.rst
|
||||
docs/api_reference/_build
|
||||
docs/api_reference/*/
|
||||
!docs/api_reference/_static/
|
||||
!docs/api_reference/templates/
|
||||
!docs/api_reference/themes/
|
||||
docs/docs_skeleton/build
|
||||
docs/docs_skeleton/node_modules
|
||||
docs/docs_skeleton/yarn.lock
|
||||
|
||||
69
Makefile
@@ -1,40 +1,47 @@
|
||||
.PHONY: all clean format lint test tests test_watch integration_tests docker_tests help extended_tests
|
||||
.PHONY: all clean docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck format lint test tests test_watch integration_tests docker_tests help extended_tests
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
######################
|
||||
# TESTING AND COVERAGE
|
||||
######################
|
||||
|
||||
# Run unit tests and generate a coverage report.
|
||||
coverage:
|
||||
poetry run pytest --cov \
|
||||
--cov-config=.coveragerc \
|
||||
--cov-report xml \
|
||||
--cov-report term-missing:skip-covered
|
||||
|
||||
clean: docs_clean
|
||||
######################
|
||||
# DOCUMENTATION
|
||||
######################
|
||||
|
||||
clean: docs_clean api_docs_clean
|
||||
|
||||
docs_compile:
|
||||
poetry run nbdoc_build --srcdir $(srcdir)
|
||||
|
||||
docs_build:
|
||||
cd docs && poetry run make html
|
||||
docs/.local_build.sh
|
||||
|
||||
docs_clean:
|
||||
cd docs && poetry run make clean
|
||||
rm -r docs/_dist
|
||||
|
||||
docs_linkcheck:
|
||||
poetry run linkchecker docs/_build/html/index.html
|
||||
poetry run linkchecker docs/_dist/docs_skeleton/ --ignore-url node_modules
|
||||
|
||||
format:
|
||||
poetry run black .
|
||||
poetry run ruff --select I --fix .
|
||||
api_docs_build:
|
||||
poetry run python docs/api_reference/create_api_rst.py
|
||||
cd docs/api_reference && poetry run make html
|
||||
|
||||
PYTHON_FILES=.
|
||||
lint: PYTHON_FILES=.
|
||||
lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$')
|
||||
api_docs_clean:
|
||||
rm -f docs/api_reference/api_reference.rst
|
||||
cd docs/api_reference && poetry run make clean
|
||||
|
||||
lint lint_diff:
|
||||
poetry run mypy $(PYTHON_FILES)
|
||||
poetry run black $(PYTHON_FILES) --check
|
||||
poetry run ruff .
|
||||
api_docs_linkcheck:
|
||||
poetry run linkchecker docs/api_reference/_build/html/index.html
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
test:
|
||||
@@ -56,6 +63,34 @@ docker_tests:
|
||||
docker build -t my-langchain-image:test .
|
||||
docker run --rm my-langchain-image:test
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
|
||||
lint lint_diff:
|
||||
poetry run mypy $(PYTHON_FILES)
|
||||
poetry run black $(PYTHON_FILES) --check
|
||||
poetry run ruff .
|
||||
|
||||
format format_diff:
|
||||
poetry run black $(PYTHON_FILES)
|
||||
poetry run ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
poetry run codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
poetry run codespell --toml pyproject.toml -w
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
mkdir _dist
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o xtrace
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)"
|
||||
cd "${SCRIPT_DIR}"
|
||||
|
||||
mkdir -p _dist/docs_skeleton
|
||||
cp -r {docs_skeleton,snippets} _dist
|
||||
mkdir -p _dist/docs_skeleton/static/api_reference
|
||||
cd api_reference
|
||||
poetry run make html
|
||||
cp -r _build/* ../_dist/docs_skeleton/static/api_reference
|
||||
cd ..
|
||||
cp -r extras/* _dist/docs_skeleton/docs
|
||||
cd _dist/docs_skeleton
|
||||
poetry run nbdoc_build
|
||||
|
||||
@@ -20,7 +20,9 @@ def load_members() -> dict:
|
||||
cls = re.findall(r"^class ([^_].*)\(", line)
|
||||
members[top_level]["classes"].extend([module + "." + c for c in cls])
|
||||
func = re.findall(r"^def ([^_].*)\(", line)
|
||||
members[top_level]["functions"].extend([module + "." + f for f in func])
|
||||
afunc = re.findall(r"^async def ([^_].*)\(", line)
|
||||
func_strings = [module + "." + f for f in func + afunc]
|
||||
members[top_level]["functions"].extend(func_strings)
|
||||
return members
|
||||
|
||||
|
||||
|
||||
@@ -16,22 +16,6 @@
|
||||
{%- set development_attrs = '' %}
|
||||
{%- endif %}
|
||||
|
||||
{# title, link, link_attrs #}
|
||||
{%- set drop_down_navigation = [
|
||||
('Getting Started', pathto('getting_started'), ''),
|
||||
('Tutorial', pathto('tutorial/index'), ''),
|
||||
("What's new", pathto('whats_new/v' + version), ''),
|
||||
('Glossary', pathto('glossary'), ''),
|
||||
('Development', development_link, development_attrs),
|
||||
('FAQ', pathto('faq'), ''),
|
||||
('Support', pathto('support'), ''),
|
||||
('Related packages', pathto('related_projects'), ''),
|
||||
('Roadmap', pathto('roadmap'), ''),
|
||||
('Governance', pathto('governance'), ''),
|
||||
('About us', pathto('about'), ''),
|
||||
('GitHub', 'https://github.com/scikit-learn/scikit-learn', ''),
|
||||
('Other Versions and Download', 'https://scikit-learn.org/dev/versions.html', '')]
|
||||
-%}
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
|
||||
|
Before Width: | Height: | Size: 157 KiB After Width: | Height: | Size: 157 KiB |
@@ -3,6 +3,8 @@ sidebar_position: 0
|
||||
---
|
||||
# Integrations
|
||||
|
||||
Visit the [Integrations Hub](https://integrations.langchain.com) to further explore, upvote and request integrations across key LangChain components.
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
|
||||
12
docs/docs_skeleton/docs/guides/langsmith/index.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# LangSmith
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
LangSmith helps you trace and evaluate your language model applications and intelligent agents to help you
|
||||
move from prototype to production.
|
||||
|
||||
Check out the [interactive walkthrough](walkthrough) below to get started.
|
||||
|
||||
For more information, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/)
|
||||
|
||||
<DocCardList />
|
||||
@@ -24,7 +24,7 @@ That means there are two different axes along which you can customize your text
|
||||
1. How the text is split
|
||||
2. How the chunk size is measured
|
||||
|
||||
## Get started with text splitters
|
||||
### Get started with text splitters
|
||||
|
||||
import GetStarted from "@snippets/modules/data_connection/document_transformers/get_started.mdx"
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
label: 'Text splitters'
|
||||
position: 0
|
||||
|
||||
@@ -8,7 +8,7 @@ Many LLM applications require user-specific data that is not part of the model's
|
||||
building blocks to load, transform, store and query your data via:
|
||||
|
||||
- [Document loaders](/docs/modules/data_connection/document_loaders/): Load documents from many different sources
|
||||
- [Document transformers](/docs/modules/data_connection/document_transformers/): Split documents, drop redundant documents, and more
|
||||
- [Document transformers](/docs/modules/data_connection/document_transformers/): Split documents, convert documents into Q&A format, drop redundant documents, and more
|
||||
- [Text embedding models](/docs/modules/data_connection/text_embedding/): Take unstructured text and turn it into a list of floating point numbers
|
||||
- [Vector stores](/docs/modules/data_connection/vectorstores/): Store and search over embedded data
|
||||
- [Retrievers](/docs/modules/data_connection/retrievers/): Query your data
|
||||
|
||||
BIN
docs/docs_skeleton/static/img/cpal_diagram.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
BIN
docs/docs_skeleton/static/img/qa_data_load.png
Normal file
|
After Width: | Height: | Size: 237 KiB |
BIN
docs/docs_skeleton/static/img/qa_flow.jpeg
Normal file
|
After Width: | Height: | Size: 173 KiB |
BIN
docs/docs_skeleton/static/img/qa_intro.png
Normal file
|
After Width: | Height: | Size: 164 KiB |
BIN
docs/docs_skeleton/static/img/summary_chains.png
Normal file
|
After Width: | Height: | Size: 118 KiB |
@@ -138,7 +138,11 @@
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/integrations/databerry.html",
|
||||
"destination": "/docs/ecosystem/integrations/databerry"
|
||||
"destination": "/docs/ecosystem/integrations/chaindesk"
|
||||
},
|
||||
{
|
||||
"source": "/docs/ecosystem/integrations/databerry",
|
||||
"destination": "/docs/ecosystem/integrations/chaindesk"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/integrations/databricks/databricks.html",
|
||||
@@ -1330,7 +1334,11 @@
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/indexes/retrievers/examples/databerry.html",
|
||||
"destination": "/docs/modules/data_connection/retrievers/integrations/databerry"
|
||||
"destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/data_connection/retrievers/integrations/databerry",
|
||||
"destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html",
|
||||
@@ -1864,6 +1872,14 @@
|
||||
"source": "/en/latest/modules/models/llms/integrations/writer.html",
|
||||
"destination": "/docs/modules/model_io/models/llms/integrations/writer"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/output_parsers.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/prompts/output_parsers.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/output_parsers/examples/datetime.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/datetime"
|
||||
@@ -2117,4 +2133,4 @@
|
||||
"destination": "/docs/:path*"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
124
docs/extras/additional_resources/tutorials.mdx
Normal file
@@ -0,0 +1,124 @@
|
||||
# Tutorials
|
||||
|
||||
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
|
||||
---------------------
|
||||
|
||||
### DeepLearning.AI courses
|
||||
by [Harrison Chase](https://github.com/hwchase17) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
|
||||
- [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
|
||||
- ⛓ [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
|
||||
|
||||
### Handbook
|
||||
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
|
||||
### Short Tutorials
|
||||
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
|
||||
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
|
||||
|
||||
## Tutorials
|
||||
|
||||
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs)
|
||||
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
|
||||
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
|
||||
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
|
||||
- [LangChain Data Loaders, Tokenizers, Chunking, and Datasets - Data Prep 101](https://youtu.be/eqOfr4AGLk8)
|
||||
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
|
||||
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
|
||||
- #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
|
||||
- #7 [LangChain Agents Deep Dive with `GPT 3.5`](https://youtu.be/jSP-gSEyVeI)
|
||||
- #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
|
||||
- #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
|
||||
- [Using NEW `MPT-7B` in Hugging Face and LangChain](https://youtu.be/DXpk9K7DgMo)
|
||||
- ⛓ [`MPT-30B` Chatbot with LangChain](https://youtu.be/pnem-EhT6VI)
|
||||
|
||||
|
||||
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Greg Kamradt (Data Indy)](https://www.youtube.com/@DataIndependent)
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [Beginner Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
|
||||
- [Agents Overview + Google Searches](https://youtu.be/Jq9Sf68ozk0)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
|
||||
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
|
||||
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
|
||||
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
|
||||
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
|
||||
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
|
||||
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
|
||||
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
|
||||
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
|
||||
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
|
||||
- [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
|
||||
- [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
|
||||
- [Control Tone & Writing Style Of Your LLM Output](https://youtu.be/miBG-a3FuhU)
|
||||
- [Build Your Own `AI Twitter Bot` Using LLMs](https://youtu.be/yLWLDjT01q8)
|
||||
- [ChatGPT made my interview questions for me (`Streamlit` + LangChain)](https://youtu.be/zvoAMx0WKkw)
|
||||
- [Function Calling via ChatGPT API - First Look With LangChain](https://youtu.be/0-zlUy7VUjg)
|
||||
- ⛓ [Extract Topics From Video/Audio With LLMs (Topic Modeling w/ LangChain)](https://youtu.be/pEkxRQFNAs4)
|
||||
|
||||
|
||||
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai)
|
||||
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
|
||||
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
|
||||
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
|
||||
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
|
||||
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
|
||||
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
|
||||
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
- [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
|
||||
- [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
|
||||
- [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
|
||||
- [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
|
||||
- [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
|
||||
- [`Camel` + LangChain for Synthetic Data & Market Research](https://youtu.be/GldMMK6-_-g)
|
||||
- [Information Extraction with LangChain & `Kor`](https://youtu.be/SW1ZdqH0rRQ)
|
||||
- [Converting a LangChain App from OpenAI to OpenSource](https://youtu.be/KUDn7bVyIfc)
|
||||
- [Using LangChain `Output Parsers` to get what you want out of LLMs](https://youtu.be/UVn2NroKQCw)
|
||||
- [Building a LangChain Custom Medical Agent with Memory](https://youtu.be/6UFtRwWnHws)
|
||||
- [Understanding `ReACT` with LangChain](https://youtu.be/Eug2clsLtFs)
|
||||
- [`OpenAI Functions` + LangChain : Building a Multi Tool Agent](https://youtu.be/4KXK6c6TVXQ)
|
||||
- [What can you do with 16K tokens in LangChain?](https://youtu.be/z2aCZBAtWXs)
|
||||
- [Tagging and Extraction - Classification using `OpenAI Functions`](https://youtu.be/a8hMgIcUEnE)
|
||||
- ⛓ [HOW to Make Conversational Form with LangChain](https://youtu.be/IT93On2LB5k)
|
||||
|
||||
|
||||
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
- [Langchain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
|
||||
- [LangFlow: Build Chatbots without Writing Code](https://youtu.be/KJ-ux3hre4s)
|
||||
- [LangChain: Giving Memory to LLMs](https://youtu.be/dxO6pzlgJiY)
|
||||
- [BEST OPEN Alternative to `OPENAI's EMBEDDINGs` for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY)
|
||||
|
||||
|
||||
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
|
||||
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
|
||||
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
|
||||
- [LangChain & Supabase Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
|
||||
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI)
|
||||
|
||||
|
||||
---------------------
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
@@ -1,6 +1,6 @@
|
||||
# YouTube tutorials
|
||||
# YouTube videos
|
||||
|
||||
This is a collection of `LangChain` videos on `YouTube`.
|
||||
⛓ icon marks a new addition [last update 2023-06-20]
|
||||
|
||||
### [Official LangChain YouTube channel](https://www.youtube.com/@LangChain)
|
||||
|
||||
@@ -9,7 +9,6 @@ This is a collection of `LangChain` videos on `YouTube`.
|
||||
- [LangChain and Weaviate with Harrison Chase and Bob van Luijt - Weaviate Podcast #36](https://youtu.be/lhby7Ql7hbk) by [Weaviate • Vector Database](https://www.youtube.com/@Weaviate)
|
||||
- [LangChain Demo + Q&A with Harrison Chase](https://youtu.be/zaYTXQFR0_s?t=788) by [Full Stack Deep Learning](https://www.youtube.com/@FullStackDeepLearning)
|
||||
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI) by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- ⛓️ [LangChain "Agents in Production" Webinar](https://youtu.be/k8GNCCs16F4) by [LangChain](https://www.youtube.com/@LangChain)
|
||||
|
||||
## Videos (sorted by views)
|
||||
|
||||
@@ -31,6 +30,9 @@ This is a collection of `LangChain` videos on `YouTube`.
|
||||
- [`Weaviate` + LangChain for LLM apps presented by Erika Cardenas](https://youtu.be/7AGj4Td5Lgw) by [`Weaviate` • Vector Database](https://www.youtube.com/@Weaviate)
|
||||
- [Langchain Overview — How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
|
||||
- [Langchain Overview - How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
|
||||
- [LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
|
||||
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- [LangChain 101: The Complete Beginner's Guide](https://youtu.be/P3MAbZ2eMUI)
|
||||
- [Custom langchain Agent & Tools with memory. Turn any `Python function` into langchain tool with Gpt 3](https://youtu.be/NIG8lXk0ULg) by [echohive](https://www.youtube.com/@echohive)
|
||||
- [LangChain: Run Language Models Locally - `Hugging Face Models`](https://youtu.be/Xxxuw4_iCzw) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [`ChatGPT` with any `YouTube` video using langchain and `chromadb`](https://youtu.be/TQZfB2bzVwU) by [echohive](https://www.youtube.com/@echohive)
|
||||
@@ -46,154 +48,68 @@ This is a collection of `LangChain` videos on `YouTube`.
|
||||
- [Langchain + `Zapier` Agent](https://youtu.be/yribLAb-pxA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- [Connecting the Internet with `ChatGPT` (LLMs) using Langchain And Answers Your Questions](https://youtu.be/9Y0TBC63yZg) by [Kamalraj M M](https://www.youtube.com/@insightbuilder)
|
||||
- [Build More Powerful LLM Applications for Business’s with LangChain (Beginners Guide)](https://youtu.be/sp3-WLKEcBg) by[ No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
- ⛓️ [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
|
||||
- ⛓️ [Chatbot Factory: Streamline Python Chatbot Creation with LLMs and Langchain](https://youtu.be/eYer3uzrcuM) by [Finxter](https://www.youtube.com/@CobusGreylingZA)
|
||||
- ⛓️ [LangChain Tutorial - ChatGPT mit eigenen Daten](https://youtu.be/0XDLyY90E2c) by [Coding Crashkurse](https://www.youtube.com/@codingcrashkurse6429)
|
||||
- ⛓️ [Chat with a `CSV` | LangChain Agents Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [GoDataProf](https://www.youtube.com/@godataprof)
|
||||
- ⛓️ [Introdução ao Langchain - #Cortes - Live DataHackers](https://youtu.be/fw8y5VRei5Y) by [Prof. João Gabriel Lima](https://www.youtube.com/@profjoaogabriellima)
|
||||
- ⛓️ [LangChain: Level up `ChatGPT` !? | LangChain Tutorial Part 1](https://youtu.be/vxUGx8aZpDE) by [Code Affinity](https://www.youtube.com/@codeaffinitydev)
|
||||
- ⛓️ [KI schreibt krasses Youtube Skript 😲😳 | LangChain Tutorial Deutsch](https://youtu.be/QpTiXyK1jus) by [SimpleKI](https://www.youtube.com/@simpleki)
|
||||
- ⛓️ [Chat with Audio: Langchain, `Chroma DB`, OpenAI, and `Assembly AI`](https://youtu.be/Kjy7cx1r75g) by [AI Anytime](https://www.youtube.com/@AIAnytime)
|
||||
- ⛓️ [QA over documents with Auto vector index selection with Langchain router chains](https://youtu.be/9G05qybShv8) by [echohive](https://www.youtube.com/@echohive)
|
||||
- ⛓️ [Build your own custom LLM application with `Bubble.io` & Langchain (No Code & Beginner friendly)](https://youtu.be/O7NhQGu1m6c) by [No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
- ⛓️ [Simple App to Question Your Docs: Leveraging `Streamlit`, `Hugging Face Spaces`, LangChain, and `Claude`!](https://youtu.be/X4YbNECRr7o) by [Chris Alexiuk](https://www.youtube.com/@chrisalexiuk)
|
||||
- ⛓️ [LANGCHAIN AI- `ConstitutionalChainAI` + Databutton AI ASSISTANT Web App](https://youtu.be/5zIU6_rdJCU) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- ⛓️ [LANGCHAIN AI AUTONOMOUS AGENT WEB APP - 👶 `BABY AGI` 🤖 with EMAIL AUTOMATION using `DATABUTTON`](https://youtu.be/cvAwOGfeHgw) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- ⛓️ [The Future of Data Analysis: Using A.I. Models in Data Analysis (LangChain)](https://youtu.be/v_LIcVyg5dk) by [Absent Data](https://www.youtube.com/@absentdata)
|
||||
- ⛓️ [Memory in LangChain | Deep dive (python)](https://youtu.be/70lqvTFh_Yg) by [Eden Marco](https://www.youtube.com/@EdenMarco)
|
||||
- ⛓️ [9 LangChain UseCases | Beginner's Guide | 2023](https://youtu.be/zS8_qosHNMw) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- ⛓️ [Use Large Language Models in Jupyter Notebook | LangChain | Agents & Indexes](https://youtu.be/JSe11L1a_QQ) by [Abhinaw Tiwari](https://www.youtube.com/@AbhinawTiwariAT)
|
||||
- ⛓️ [How to Talk to Your Langchain Agent | `11 Labs` + `Whisper`](https://youtu.be/N4k459Zw2PU) by [VRSEN](https://www.youtube.com/@vrsen)
|
||||
- ⛓️ [LangChain Deep Dive: 5 FUN AI App Ideas To Build Quickly and Easily](https://youtu.be/mPYEPzLkeks) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- ⛓️ [BEST OPEN Alternative to OPENAI's EMBEDDINGs for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- ⛓️ [LangChain 101: Models](https://youtu.be/T6c_XsyaNSQ) by [Mckay Wrigley](https://www.youtube.com/@realmckaywrigley)
|
||||
- ⛓️ [LangChain with JavaScript Tutorial #1 | Setup & Using LLMs](https://youtu.be/W3AoeMrg27o) by [Leon van Zyl](https://www.youtube.com/@leonvanzyl)
|
||||
- ⛓️ [LangChain Overview & Tutorial for Beginners: Build Powerful AI Apps Quickly & Easily (ZERO CODE)](https://youtu.be/iI84yym473Q) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- ⛓️ [LangChain In Action: Real-World Use Case With Step-by-Step Tutorial](https://youtu.be/UO699Szp82M) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- ⛓️ [Summarizing and Querying Multiple Papers with LangChain](https://youtu.be/p_MQRWH5Y6k) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
|
||||
- ⛓️ [Using Langchain (and `Replit`) through `Tana`, ask `Google`/`Wikipedia`/`Wolfram Alpha` to fill out a table](https://youtu.be/Webau9lEzoI) by [Stian Håklev](https://www.youtube.com/@StianHaklev)
|
||||
- ⛓️ [Langchain PDF App (GUI) | Create a ChatGPT For Your `PDF` in Python](https://youtu.be/wUAUdEw5oxM) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓️ [Auto-GPT with LangChain 🔥 | Create Your Own Personal AI Assistant](https://youtu.be/imDfPmMKEjM) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- ⛓️ [Create Your OWN Slack AI Assistant with Python & LangChain](https://youtu.be/3jFXRNn2Bu8) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
|
||||
- ⛓️ [How to Create LOCAL Chatbots with GPT4All and LangChain [Full Guide]](https://youtu.be/4p1Fojur8Zw) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
|
||||
- ⛓️ [Build a `Multilingual PDF` Search App with LangChain, `Cohere` and `Bubble`](https://youtu.be/hOrtuumOrv8) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- ⛓️ [Building a LangChain Agent (code-free!) Using `Bubble` and `Flowise`](https://youtu.be/jDJIIVWTZDE) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- ⛓️ [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- ⛓️ [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓️ [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- ⛓️ [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- ⛓️ [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
|
||||
- ⛓️ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- [LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
- [LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
- [LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
|
||||
- [Chatbot Factory: Streamline Python Chatbot Creation with LLMs and Langchain](https://youtu.be/eYer3uzrcuM) by [Finxter](https://www.youtube.com/@CobusGreylingZA)
|
||||
- [LangChain Tutorial - ChatGPT mit eigenen Daten](https://youtu.be/0XDLyY90E2c) by [Coding Crashkurse](https://www.youtube.com/@codingcrashkurse6429)
|
||||
- [Chat with a `CSV` | LangChain Agents Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [GoDataProf](https://www.youtube.com/@godataprof)
|
||||
- [Introdução ao Langchain - #Cortes - Live DataHackers](https://youtu.be/fw8y5VRei5Y) by [Prof. João Gabriel Lima](https://www.youtube.com/@profjoaogabriellima)
|
||||
- [LangChain: Level up `ChatGPT` !? | LangChain Tutorial Part 1](https://youtu.be/vxUGx8aZpDE) by [Code Affinity](https://www.youtube.com/@codeaffinitydev)
|
||||
- [KI schreibt krasses Youtube Skript 😲😳 | LangChain Tutorial Deutsch](https://youtu.be/QpTiXyK1jus) by [SimpleKI](https://www.youtube.com/@simpleki)
|
||||
- [Chat with Audio: Langchain, `Chroma DB`, OpenAI, and `Assembly AI`](https://youtu.be/Kjy7cx1r75g) by [AI Anytime](https://www.youtube.com/@AIAnytime)
|
||||
- [QA over documents with Auto vector index selection with Langchain router chains](https://youtu.be/9G05qybShv8) by [echohive](https://www.youtube.com/@echohive)
|
||||
- [Build your own custom LLM application with `Bubble.io` & Langchain (No Code & Beginner friendly)](https://youtu.be/O7NhQGu1m6c) by [No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
- [Simple App to Question Your Docs: Leveraging `Streamlit`, `Hugging Face Spaces`, LangChain, and `Claude`!](https://youtu.be/X4YbNECRr7o) by [Chris Alexiuk](https://www.youtube.com/@chrisalexiuk)
|
||||
- [LANGCHAIN AI- `ConstitutionalChainAI` + Databutton AI ASSISTANT Web App](https://youtu.be/5zIU6_rdJCU) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- [LANGCHAIN AI AUTONOMOUS AGENT WEB APP - 👶 `BABY AGI` 🤖 with EMAIL AUTOMATION using `DATABUTTON`](https://youtu.be/cvAwOGfeHgw) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- [The Future of Data Analysis: Using A.I. Models in Data Analysis (LangChain)](https://youtu.be/v_LIcVyg5dk) by [Absent Data](https://www.youtube.com/@absentdata)
|
||||
- [Memory in LangChain | Deep dive (python)](https://youtu.be/70lqvTFh_Yg) by [Eden Marco](https://www.youtube.com/@EdenMarco)
|
||||
- [9 LangChain UseCases | Beginner's Guide | 2023](https://youtu.be/zS8_qosHNMw) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- [Use Large Language Models in Jupyter Notebook | LangChain | Agents & Indexes](https://youtu.be/JSe11L1a_QQ) by [Abhinaw Tiwari](https://www.youtube.com/@AbhinawTiwariAT)
|
||||
- [How to Talk to Your Langchain Agent | `11 Labs` + `Whisper`](https://youtu.be/N4k459Zw2PU) by [VRSEN](https://www.youtube.com/@vrsen)
|
||||
- [LangChain Deep Dive: 5 FUN AI App Ideas To Build Quickly and Easily](https://youtu.be/mPYEPzLkeks) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- [BEST OPEN Alternative to OPENAI's EMBEDDINGs for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [LangChain 101: Models](https://youtu.be/T6c_XsyaNSQ) by [Mckay Wrigley](https://www.youtube.com/@realmckaywrigley)
|
||||
- [LangChain with JavaScript Tutorial #1 | Setup & Using LLMs](https://youtu.be/W3AoeMrg27o) by [Leon van Zyl](https://www.youtube.com/@leonvanzyl)
|
||||
- [LangChain Overview & Tutorial for Beginners: Build Powerful AI Apps Quickly & Easily (ZERO CODE)](https://youtu.be/iI84yym473Q) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- [LangChain In Action: Real-World Use Case With Step-by-Step Tutorial](https://youtu.be/UO699Szp82M) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- [Summarizing and Querying Multiple Papers with LangChain](https://youtu.be/p_MQRWH5Y6k) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
|
||||
- [Using Langchain (and `Replit`) through `Tana`, ask `Google`/`Wikipedia`/`Wolfram Alpha` to fill out a table](https://youtu.be/Webau9lEzoI) by [Stian Håklev](https://www.youtube.com/@StianHaklev)
|
||||
- [Langchain PDF App (GUI) | Create a ChatGPT For Your `PDF` in Python](https://youtu.be/wUAUdEw5oxM) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- [Auto-GPT with LangChain 🔥 | Create Your Own Personal AI Assistant](https://youtu.be/imDfPmMKEjM) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- [Create Your OWN Slack AI Assistant with Python & LangChain](https://youtu.be/3jFXRNn2Bu8) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
|
||||
- [How to Create LOCAL Chatbots with GPT4All and LangChain [Full Guide]](https://youtu.be/4p1Fojur8Zw) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
|
||||
- [Build a `Multilingual PDF` Search App with LangChain, `Cohere` and `Bubble`](https://youtu.be/hOrtuumOrv8) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- [Building a LangChain Agent (code-free!) Using `Bubble` and `Flowise`](https://youtu.be/jDJIIVWTZDE) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
|
||||
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- ⛓ [Build AI chatbot with custom knowledge base using OpenAI API and GPT Index](https://youtu.be/vDZAZuaXf48) by [Irina Nik](https://www.youtube.com/@irina_nik)
|
||||
- ⛓ [Build Your Own Auto-GPT Apps with LangChain (Python Tutorial)](https://youtu.be/NYSWn1ipbgg) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
|
||||
- ⛓ [Chat with Multiple `PDFs` | LangChain App Tutorial in Python (Free LLMs and Embeddings)](https://youtu.be/dXxQ0LR-3Hg) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓ [Chat with a `CSV` | `LangChain Agents` Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓ [Create Your Own ChatGPT with `PDF` Data in 5 Minutes (LangChain Tutorial)](https://youtu.be/au2WVVGUvc8) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
|
||||
- ⛓ [Using ChatGPT with YOUR OWN Data. This is magical. (LangChain OpenAI API)](https://youtu.be/9AXP7tCI9PI) by [TechLead](https://www.youtube.com/@TechLead)
|
||||
- ⛓ [Build a Custom Chatbot with OpenAI: `GPT-Index` & LangChain | Step-by-Step Tutorial](https://youtu.be/FIDv6nc4CgU) by [Fabrikod](https://www.youtube.com/@fabrikod)
|
||||
- ⛓ [`Flowise` is an open source no-code UI visual tool to build 🦜🔗LangChain applications](https://youtu.be/CovAPtQPU0k) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
|
||||
- ⛓ [LangChain & GPT 4 For Data Analysis: The `Pandas` Dataframe Agent](https://youtu.be/rFQ5Kmkd4jc) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- ⛓ [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Toolfinder AI](https://www.youtube.com/@toolfinderai)
|
||||
- ⛓ [`PrivateGPT`: Chat to your FILES OFFLINE and FREE [Installation and Tutorial]](https://youtu.be/G7iLllmx4qc) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- ⛓ [How to build with Langchain 10x easier | ⛓️ LangFlow & `Flowise`](https://youtu.be/Ya1oGL7ZTvU) by [AI Jason](https://www.youtube.com/@AIJasonZ)
|
||||
- ⛓ [Getting Started With LangChain In 20 Minutes- Build Celebrity Search Application](https://youtu.be/_FpT1cwcSLg) by [Krish Naik](https://www.youtube.com/@krishnaik06)
|
||||
|
||||
|
||||
## Tutorial Series
|
||||
|
||||
|
||||
⛓ icon marks a new addition [last update 2023-05-15]
|
||||
|
||||
### DeepLearning.AI course
|
||||
⛓[LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain) by Harrison Chase presented by [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
|
||||
|
||||
### Handbook
|
||||
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
|
||||
### Tutorials
|
||||
[LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
|
||||
- ⛓ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- ⛓ [LangChain 101: The Complete Beginner's Guide](https://youtu.be/P3MAbZ2eMUI)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
|
||||
|
||||
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
|
||||
|
||||
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
|
||||
|
||||
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs):
|
||||
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
|
||||
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
|
||||
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
|
||||
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
|
||||
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
|
||||
- ⛓ #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
|
||||
- ⛓ #7 [LangChain Agents Deep Dive with GPT 3.5](https://youtu.be/jSP-gSEyVeI)
|
||||
- ⛓ #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
|
||||
- ⛓ #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
|
||||
|
||||
|
||||
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Data Independent](https://www.youtube.com/@DataIndependent):
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
|
||||
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
|
||||
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
|
||||
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
|
||||
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
|
||||
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
|
||||
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
|
||||
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
|
||||
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
|
||||
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
|
||||
- ⛓ [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
|
||||
- ⛓ [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
|
||||
|
||||
|
||||
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai):
|
||||
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
|
||||
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
|
||||
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
|
||||
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
|
||||
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
|
||||
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
|
||||
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
- ⛓ [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
|
||||
- ⛓ [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- ⛓ [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
|
||||
- ⛓ [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
|
||||
- ⛓ [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
|
||||
- ⛓ [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
|
||||
|
||||
|
||||
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt):
|
||||
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
- ⛓️ [CHATGPT For WEBSITES: Custom ChatBOT](https://youtu.be/RBnuhhmD21U)
|
||||
|
||||
|
||||
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
|
||||
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
|
||||
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
|
||||
- ⛓ [LangChain & Supabase Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
|
||||
|
||||
|
||||
### [Get SH\*T Done with Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
|
||||
### [Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
|
||||
- [Getting Started with LangChain: Load Custom Data, Run OpenAI Models, Embeddings and `ChatGPT`](https://www.youtube.com/watch?v=muXbPpG_ys4)
|
||||
- [Loaders, Indexes & Vectorstores in LangChain: Question Answering on `PDF` files with `ChatGPT`](https://www.youtube.com/watch?v=FQnvfR8Dmr0)
|
||||
- [LangChain Models: `ChatGPT`, `Flan Alpaca`, `OpenAI Embeddings`, Prompt Templates & Streaming](https://www.youtube.com/watch?v=zy6LiK5F5-s)
|
||||
- [LangChain Chains: Use `ChatGPT` to Build Conversational Agents, Summaries and Q&A on Text With LLMs](https://www.youtube.com/watch?v=h1tJZQPcimM)
|
||||
- [Analyze Custom CSV Data with `GPT-4` using Langchain](https://www.youtube.com/watch?v=Ew3sGdX8at4)
|
||||
- ⛓ [Build ChatGPT Chatbots with LangChain Memory: Understanding and Implementing Memory in Conversations](https://youtu.be/CyuUlf54wTs)
|
||||
- [Build ChatGPT Chatbots with LangChain Memory: Understanding and Implementing Memory in Conversations](https://youtu.be/CyuUlf54wTs)
|
||||
|
||||
|
||||
---------------------
|
||||
⛓ icon marks a new addition [last update 2023-05-15]
|
||||
⛓ icon marks a new addition [last update 2023-06-20]
|
||||
|
||||
@@ -2,188 +2,261 @@
|
||||
|
||||
Dependents stats for `hwchase17/langchain`
|
||||
|
||||
[](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=172&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=4980&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=17239&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=244&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=9697&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=19827&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
|
||||
[update: 2023-05-17; only dependent repositories with Stars > 100]
|
||||
|
||||
[update: 2023-07-07; only dependent repositories with Stars > 100]
|
||||
|
||||
|
||||
| Repository | Stars |
|
||||
| :-------- | -----: |
|
||||
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 35401 |
|
||||
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 32861 |
|
||||
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 32766 |
|
||||
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 29560 |
|
||||
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 22315 |
|
||||
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 17474 |
|
||||
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 16923 |
|
||||
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 16112 |
|
||||
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 15407 |
|
||||
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 14345 |
|
||||
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 10372 |
|
||||
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 9919 |
|
||||
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 8177 |
|
||||
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 6807 |
|
||||
|[imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | 6087 |
|
||||
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 5292 |
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 4622 |
|
||||
|[nsarrazin/serge](https://github.com/nsarrazin/serge) | 4076 |
|
||||
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 3952 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 3952 |
|
||||
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 3762 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 3388 |
|
||||
|[mmabrouk/chatgpt-wrapper](https://github.com/mmabrouk/chatgpt-wrapper) | 3243 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 3189 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 3050 |
|
||||
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 2930 |
|
||||
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 2710 |
|
||||
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 2545 |
|
||||
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2479 |
|
||||
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2399 |
|
||||
|[langgenius/dify](https://github.com/langgenius/dify) | 2344 |
|
||||
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2283 |
|
||||
|[hwchase17/chat-langchain](https://github.com/hwchase17/chat-langchain) | 2266 |
|
||||
|[guangzhengli/ChatFiles](https://github.com/guangzhengli/ChatFiles) | 1903 |
|
||||
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 1884 |
|
||||
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 1860 |
|
||||
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1813 |
|
||||
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 1571 |
|
||||
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 1480 |
|
||||
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1464 |
|
||||
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1419 |
|
||||
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 1410 |
|
||||
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1363 |
|
||||
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1344 |
|
||||
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 1330 |
|
||||
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1318 |
|
||||
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1286 |
|
||||
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1156 |
|
||||
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 1141 |
|
||||
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1106 |
|
||||
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 1072 |
|
||||
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1064 |
|
||||
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1057 |
|
||||
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1003 |
|
||||
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1002 |
|
||||
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 957 |
|
||||
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 918 |
|
||||
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 886 |
|
||||
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 867 |
|
||||
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 850 |
|
||||
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 837 |
|
||||
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 826 |
|
||||
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 782 |
|
||||
|[hashintel/hash](https://github.com/hashintel/hash) | 778 |
|
||||
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 773 |
|
||||
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 738 |
|
||||
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 737 |
|
||||
|[ai-sidekick/sidekick](https://github.com/ai-sidekick/sidekick) | 717 |
|
||||
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 703 |
|
||||
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 689 |
|
||||
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 666 |
|
||||
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 608 |
|
||||
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 559 |
|
||||
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 544 |
|
||||
|[pieroit/cheshire-cat](https://github.com/pieroit/cheshire-cat) | 520 |
|
||||
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 514 |
|
||||
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 481 |
|
||||
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 462 |
|
||||
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 452 |
|
||||
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 439 |
|
||||
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 437 |
|
||||
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 433 |
|
||||
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 427 |
|
||||
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 425 |
|
||||
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 422 |
|
||||
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 421 |
|
||||
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 407 |
|
||||
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 395 |
|
||||
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 383 |
|
||||
|[akshata29/chatpdf](https://github.com/akshata29/chatpdf) | 374 |
|
||||
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 368 |
|
||||
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 358 |
|
||||
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 357 |
|
||||
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 354 |
|
||||
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 343 |
|
||||
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 334 |
|
||||
|[showlab/VLog](https://github.com/showlab/VLog) | 330 |
|
||||
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 324 |
|
||||
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 323 |
|
||||
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 320 |
|
||||
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 308 |
|
||||
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 301 |
|
||||
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 300 |
|
||||
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 299 |
|
||||
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 287 |
|
||||
|[itamargol/openai](https://github.com/itamargol/openai) | 273 |
|
||||
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 267 |
|
||||
|[momegas/megabots](https://github.com/momegas/megabots) | 259 |
|
||||
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 238 |
|
||||
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 232 |
|
||||
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 227 |
|
||||
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 227 |
|
||||
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 226 |
|
||||
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 218 |
|
||||
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 218 |
|
||||
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 215 |
|
||||
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 213 |
|
||||
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 209 |
|
||||
|[JohnSnowLabs/nlptest](https://github.com/JohnSnowLabs/nlptest) | 208 |
|
||||
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 197 |
|
||||
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 195 |
|
||||
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 195 |
|
||||
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 192 |
|
||||
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 189 |
|
||||
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 187 |
|
||||
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 184 |
|
||||
|[Anil-matcha/Website-to-Chatbot](https://github.com/Anil-matcha/Website-to-Chatbot) | 183 |
|
||||
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 180 |
|
||||
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 166 |
|
||||
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 166 |
|
||||
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 161 |
|
||||
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 160 |
|
||||
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 153 |
|
||||
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 153 |
|
||||
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 152 |
|
||||
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 149 |
|
||||
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 149 |
|
||||
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 147 |
|
||||
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 144 |
|
||||
|[homanp/superagent](https://github.com/homanp/superagent) | 143 |
|
||||
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 141 |
|
||||
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 141 |
|
||||
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 139 |
|
||||
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 138 |
|
||||
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 136 |
|
||||
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 135 |
|
||||
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 134 |
|
||||
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 130 |
|
||||
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 130 |
|
||||
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 128 |
|
||||
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 128 |
|
||||
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 127 |
|
||||
|[steamship-core/vercel-examples](https://github.com/steamship-core/vercel-examples) | 127 |
|
||||
|[yasyf/summ](https://github.com/yasyf/summ) | 127 |
|
||||
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 126 |
|
||||
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 125 |
|
||||
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 124 |
|
||||
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 124 |
|
||||
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 124 |
|
||||
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 123 |
|
||||
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 123 |
|
||||
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 123 |
|
||||
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 115 |
|
||||
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 113 |
|
||||
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 113 |
|
||||
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 112 |
|
||||
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 111 |
|
||||
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 109 |
|
||||
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 108 |
|
||||
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 104 |
|
||||
|[enhancedocs/enhancedocs](https://github.com/enhancedocs/enhancedocs) | 102 |
|
||||
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 101 |
|
||||
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 41047 |
|
||||
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 33983 |
|
||||
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 33375 |
|
||||
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 31114 |
|
||||
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 30369 |
|
||||
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 24116 |
|
||||
|[OpenBB-finance/OpenBBTerminal](https://github.com/OpenBB-finance/OpenBBTerminal) | 22565 |
|
||||
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 18375 |
|
||||
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 17723 |
|
||||
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 16958 |
|
||||
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 14632 |
|
||||
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 11273 |
|
||||
|[openai/evals](https://github.com/openai/evals) | 10745 |
|
||||
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 10298 |
|
||||
|[imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | 9838 |
|
||||
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 9247 |
|
||||
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 8768 |
|
||||
|[PromtEngineer/localGPT](https://github.com/PromtEngineer/localGPT) | 8651 |
|
||||
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 8119 |
|
||||
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 7418 |
|
||||
|[gventuri/pandas-ai](https://github.com/gventuri/pandas-ai) | 7301 |
|
||||
|[PipedreamHQ/pipedream](https://github.com/PipedreamHQ/pipedream) | 6636 |
|
||||
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 5849 |
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5129 |
|
||||
|[langgenius/dify](https://github.com/langgenius/dify) | 4804 |
|
||||
|[serge-chat/serge](https://github.com/serge-chat/serge) | 4448 |
|
||||
|[csunny/DB-GPT](https://github.com/csunny/DB-GPT) | 4350 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 4268 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4244 |
|
||||
|[intitni/CopilotForXcode](https://github.com/intitni/CopilotForXcode) | 4232 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4154 |
|
||||
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 4080 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 3949 |
|
||||
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 3920 |
|
||||
|[bentoml/OpenLLM](https://github.com/bentoml/OpenLLM) | 3481 |
|
||||
|[MineDojo/Voyager](https://github.com/MineDojo/Voyager) | 3453 |
|
||||
|[mmabrouk/chatgpt-wrapper](https://github.com/mmabrouk/chatgpt-wrapper) | 3355 |
|
||||
|[postgresml/postgresml](https://github.com/postgresml/postgresml) | 3328 |
|
||||
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 3100 |
|
||||
|[kyegomez/tree-of-thoughts](https://github.com/kyegomez/tree-of-thoughts) | 3049 |
|
||||
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 2844 |
|
||||
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2833 |
|
||||
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 2809 |
|
||||
|[hwchase17/chat-langchain](https://github.com/hwchase17/chat-langchain) | 2809 |
|
||||
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2664 |
|
||||
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 2650 |
|
||||
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 2525 |
|
||||
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2372 |
|
||||
|[ParisNeo/lollms-webui](https://github.com/ParisNeo/lollms-webui) | 2287 |
|
||||
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 2265 |
|
||||
|[SamurAIGPT/privateGPT](https://github.com/SamurAIGPT/privateGPT) | 2084 |
|
||||
|[Chainlit/chainlit](https://github.com/Chainlit/chainlit) | 1912 |
|
||||
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1869 |
|
||||
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 1864 |
|
||||
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 1849 |
|
||||
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 1766 |
|
||||
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 1745 |
|
||||
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1732 |
|
||||
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1716 |
|
||||
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1619 |
|
||||
|[pinterest/querybook](https://github.com/pinterest/querybook) | 1468 |
|
||||
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1446 |
|
||||
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 1430 |
|
||||
|[Mintplex-Labs/anything-llm](https://github.com/Mintplex-Labs/anything-llm) | 1419 |
|
||||
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1416 |
|
||||
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1327 |
|
||||
|[psychic-api/psychic](https://github.com/psychic-api/psychic) | 1307 |
|
||||
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1242 |
|
||||
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1239 |
|
||||
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1203 |
|
||||
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1179 |
|
||||
|[keephq/keep](https://github.com/keephq/keep) | 1169 |
|
||||
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1156 |
|
||||
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 1090 |
|
||||
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 1088 |
|
||||
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 1074 |
|
||||
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1057 |
|
||||
|[noahshinn024/reflexion](https://github.com/noahshinn024/reflexion) | 1045 |
|
||||
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 1036 |
|
||||
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 999 |
|
||||
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 989 |
|
||||
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 974 |
|
||||
|[homanp/superagent](https://github.com/homanp/superagent) | 970 |
|
||||
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 941 |
|
||||
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 896 |
|
||||
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 856 |
|
||||
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 840 |
|
||||
|[chatarena/chatarena](https://github.com/chatarena/chatarena) | 829 |
|
||||
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 816 |
|
||||
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 816 |
|
||||
|[hashintel/hash](https://github.com/hashintel/hash) | 806 |
|
||||
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 790 |
|
||||
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 752 |
|
||||
|[cheshire-cat-ai/core](https://github.com/cheshire-cat-ai/core) | 713 |
|
||||
|[e-johnstonn/BriefGPT](https://github.com/e-johnstonn/BriefGPT) | 686 |
|
||||
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 685 |
|
||||
|[refuel-ai/autolabel](https://github.com/refuel-ai/autolabel) | 673 |
|
||||
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 617 |
|
||||
|[billxbf/ReWOO](https://github.com/billxbf/ReWOO) | 616 |
|
||||
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 609 |
|
||||
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 592 |
|
||||
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 581 |
|
||||
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 574 |
|
||||
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 572 |
|
||||
|[kreneskyp/ix](https://github.com/kreneskyp/ix) | 564 |
|
||||
|[akshata29/chatpdf](https://github.com/akshata29/chatpdf) | 540 |
|
||||
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 540 |
|
||||
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 537 |
|
||||
|[khoj-ai/khoj](https://github.com/khoj-ai/khoj) | 531 |
|
||||
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 528 |
|
||||
|[microsoft/PodcastCopilot](https://github.com/microsoft/PodcastCopilot) | 526 |
|
||||
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 515 |
|
||||
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 494 |
|
||||
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 483 |
|
||||
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 472 |
|
||||
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 465 |
|
||||
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 464 |
|
||||
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 464 |
|
||||
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 455 |
|
||||
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 455 |
|
||||
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 450 |
|
||||
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 446 |
|
||||
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 445 |
|
||||
|[plastic-labs/tutor-gpt](https://github.com/plastic-labs/tutor-gpt) | 426 |
|
||||
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 426 |
|
||||
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 418 |
|
||||
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 416 |
|
||||
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 401 |
|
||||
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 400 |
|
||||
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 386 |
|
||||
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 382 |
|
||||
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 368 |
|
||||
|[showlab/VLog](https://github.com/showlab/VLog) | 363 |
|
||||
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 363 |
|
||||
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 361 |
|
||||
|[opentensor/bittensor](https://github.com/opentensor/bittensor) | 360 |
|
||||
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 355 |
|
||||
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 351 |
|
||||
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 348 |
|
||||
|[alejandro-ao/ask-multiple-pdfs](https://github.com/alejandro-ao/ask-multiple-pdfs) | 321 |
|
||||
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 314 |
|
||||
|[mosaicml/examples](https://github.com/mosaicml/examples) | 313 |
|
||||
|[personoids/personoids-lite](https://github.com/personoids/personoids-lite) | 306 |
|
||||
|[itamargol/openai](https://github.com/itamargol/openai) | 304 |
|
||||
|[Anil-matcha/Website-to-Chatbot](https://github.com/Anil-matcha/Website-to-Chatbot) | 299 |
|
||||
|[momegas/megabots](https://github.com/momegas/megabots) | 299 |
|
||||
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 289 |
|
||||
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 283 |
|
||||
|[wandb/weave](https://github.com/wandb/weave) | 279 |
|
||||
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 273 |
|
||||
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 271 |
|
||||
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 270 |
|
||||
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 269 |
|
||||
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 259 |
|
||||
|[Azure-Samples/openai](https://github.com/Azure-Samples/openai) | 252 |
|
||||
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 248 |
|
||||
|[hnawaz007/pythondataanalysis](https://github.com/hnawaz007/pythondataanalysis) | 247 |
|
||||
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 243 |
|
||||
|[truera/trulens](https://github.com/truera/trulens) | 239 |
|
||||
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 238 |
|
||||
|[intel/intel-extension-for-transformers](https://github.com/intel/intel-extension-for-transformers) | 237 |
|
||||
|[monarch-initiative/ontogpt](https://github.com/monarch-initiative/ontogpt) | 236 |
|
||||
|[wandb/edu](https://github.com/wandb/edu) | 231 |
|
||||
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 229 |
|
||||
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 223 |
|
||||
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 221 |
|
||||
|[JohnSnowLabs/nlptest](https://github.com/JohnSnowLabs/nlptest) | 220 |
|
||||
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 219 |
|
||||
|[Safiullah-Rahu/CSV-AI](https://github.com/Safiullah-Rahu/CSV-AI) | 215 |
|
||||
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 215 |
|
||||
|[steamship-packages/langchain-agent-production-starter](https://github.com/steamship-packages/langchain-agent-production-starter) | 214 |
|
||||
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 213 |
|
||||
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 211 |
|
||||
|[marella/chatdocs](https://github.com/marella/chatdocs) | 207 |
|
||||
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 200 |
|
||||
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 195 |
|
||||
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 189 |
|
||||
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 186 |
|
||||
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 185 |
|
||||
|[huchenxucs/ChatDB](https://github.com/huchenxucs/ChatDB) | 179 |
|
||||
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 178 |
|
||||
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 178 |
|
||||
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 177 |
|
||||
|[jiran214/GPT-vup](https://github.com/jiran214/GPT-vup) | 176 |
|
||||
|[rsaryev/talk-codebase](https://github.com/rsaryev/talk-codebase) | 174 |
|
||||
|[edreisMD/plugnplai](https://github.com/edreisMD/plugnplai) | 174 |
|
||||
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 172 |
|
||||
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 171 |
|
||||
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 165 |
|
||||
|[gustavz/DataChad](https://github.com/gustavz/DataChad) | 164 |
|
||||
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 163 |
|
||||
|[SamPink/dev-gpt](https://github.com/SamPink/dev-gpt) | 161 |
|
||||
|[yuanjie-ai/ChatLLM](https://github.com/yuanjie-ai/ChatLLM) | 161 |
|
||||
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 160 |
|
||||
|[jondurbin/airoboros](https://github.com/jondurbin/airoboros) | 157 |
|
||||
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 157 |
|
||||
|[PradipNichite/Youtube-Tutorials](https://github.com/PradipNichite/Youtube-Tutorials) | 156 |
|
||||
|[nicknochnack/LangchainDocuments](https://github.com/nicknochnack/LangchainDocuments) | 155 |
|
||||
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 155 |
|
||||
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 154 |
|
||||
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 153 |
|
||||
|[preset-io/promptimize](https://github.com/preset-io/promptimize) | 150 |
|
||||
|[onlyphantom/llm-python](https://github.com/onlyphantom/llm-python) | 148 |
|
||||
|[Azure-Samples/azure-search-power-skills](https://github.com/Azure-Samples/azure-search-power-skills) | 146 |
|
||||
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 144 |
|
||||
|[microsoft/azure-openai-in-a-day-workshop](https://github.com/microsoft/azure-openai-in-a-day-workshop) | 144 |
|
||||
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 143 |
|
||||
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 142 |
|
||||
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 141 |
|
||||
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 140 |
|
||||
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 140 |
|
||||
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 139 |
|
||||
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 139 |
|
||||
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 138 |
|
||||
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 137 |
|
||||
|[steamship-core/vercel-examples](https://github.com/steamship-core/vercel-examples) | 137 |
|
||||
|[deeppavlov/dream](https://github.com/deeppavlov/dream) | 136 |
|
||||
|[miaoshouai/miaoshouai-assistant](https://github.com/miaoshouai/miaoshouai-assistant) | 135 |
|
||||
|[sugarforever/LangChain-Tutorials](https://github.com/sugarforever/LangChain-Tutorials) | 135 |
|
||||
|[yasyf/summ](https://github.com/yasyf/summ) | 135 |
|
||||
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 134 |
|
||||
|[vaibkumr/prompt-optimizer](https://github.com/vaibkumr/prompt-optimizer) | 132 |
|
||||
|[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators) | 130 |
|
||||
|[homanp/vercel-langchain](https://github.com/homanp/vercel-langchain) | 128 |
|
||||
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 127 |
|
||||
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 125 |
|
||||
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 122 |
|
||||
|[fixie-ai/fixie-examples](https://github.com/fixie-ai/fixie-examples) | 122 |
|
||||
|[Aggregate-Intellect/practical-llms](https://github.com/Aggregate-Intellect/practical-llms) | 120 |
|
||||
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 120 |
|
||||
|[Azure-Samples/azure-search-openai-demo-csharp](https://github.com/Azure-Samples/azure-search-openai-demo-csharp) | 119 |
|
||||
|[prof-frink-lab/slangchain](https://github.com/prof-frink-lab/slangchain) | 117 |
|
||||
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 117 |
|
||||
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 116 |
|
||||
|[flurb18/AgentOoba](https://github.com/flurb18/AgentOoba) | 114 |
|
||||
|[kaarthik108/snowChat](https://github.com/kaarthik108/snowChat) | 112 |
|
||||
|[RedisVentures/redis-openai-qna](https://github.com/RedisVentures/redis-openai-qna) | 111 |
|
||||
|[solana-labs/chatgpt-plugin](https://github.com/solana-labs/chatgpt-plugin) | 111 |
|
||||
|[kulltc/chatgpt-sql](https://github.com/kulltc/chatgpt-sql) | 109 |
|
||||
|[summarizepaper/summarizepaper](https://github.com/summarizepaper/summarizepaper) | 109 |
|
||||
|[Azure-Samples/miyagi](https://github.com/Azure-Samples/miyagi) | 106 |
|
||||
|[ssheng/BentoChain](https://github.com/ssheng/BentoChain) | 106 |
|
||||
|[voxel51/voxelgpt](https://github.com/voxel51/voxelgpt) | 105 |
|
||||
|[mallahyari/drqa](https://github.com/mallahyari/drqa) | 103 |
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -2,445 +2,180 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "944e4194",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Arthur LangChain integration"
|
||||
"# Arthur"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b1ccdfe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Arthur](https://www.arthur.ai/) is a model monitoring and observability platform.\n",
|
||||
"[Arthur](https://arthur.ai) is a model monitoring and observability platform.\n",
|
||||
"\n",
|
||||
"This notebook shows how to register LLMs (chat and non-chat) as models with the Arthur platform. Then we show how to set up langchain LLMs with an Arthur callback that will automatically log model inferences to Arthur.\n",
|
||||
"The following guide shows how to run a registered chat LLM with the Arthur callback handler to automatically log model inferences to Arthur.\n",
|
||||
"\n",
|
||||
"For more information about how to use the Arthur SDK, visit our [docs](http://docs.arthur.ai), in particular our [model onboarding guide](https://docs.arthur.ai/user-guide/walkthroughs/model-onboarding/index.html)"
|
||||
"If you do not have a model currently onboarded to Arthur, visit our [onboarding guide for generative text models](https://docs.arthur.ai/user-guide/walkthroughs/model-onboarding/generative_text_onboarding.html). For more information about how to use the Arthur SDK, visit our [docs](https://docs.arthur.ai/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "961c6691",
|
||||
"metadata": {},
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import ArthurCallbackHandler\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.chat_models import ChatOpenAI, ChatAnthropic\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain.llms import OpenAI, Cohere, HuggingFacePipeline"
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Place Arthur credentials here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a23d1963",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "Me3prhqjsoqz"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from arthurai import ArthurAI\n",
|
||||
"from arthurai.common.constants import InputType, OutputType, Stage, ValueType\n",
|
||||
"from arthurai.core.attributes import ArthurAttribute, AttributeCategory"
|
||||
"arthur_url = \"https://app.arthur.ai\"\n",
|
||||
"arthur_login = \"your-arthur-login-username-here\"\n",
|
||||
"arthur_model_id = \"your-arthur-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4d1b90c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArthurModel for chatbot with only input text and output text attributes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a4a4a8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Connect to Arthur client"
|
||||
"Create Langchain LLM with Arthur callback handler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f49e9b79",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "9Hq9snQasynA"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_url = \"https://app.arthur.ai\"\n",
|
||||
"arthur_login = \"your-username-here\"\n",
|
||||
"arthur = ArthurAI(url=arthur_url, login=arthur_login)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6e063bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before you can register model inferences to Arthur, you must have a registered model with an ID in the Arthur platform. We will provide this ID to the ArthurCallbackHandler.\n",
|
||||
"\n",
|
||||
"You can register a model with Arthur here in the notebook using this `register_chat_llm()` function. This function returns the ID of the model saved to the platform. To use the function, uncomment `arthur_model_chatbot_id = register_chat_llm()` in the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "31b17b5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def register_chat_llm():\n",
|
||||
"\n",
|
||||
" arthur_model = arthur.model(\n",
|
||||
" display_name=\"LangChainChat\",\n",
|
||||
" input_type=InputType.NLP,\n",
|
||||
" output_type=OutputType.TokenSequence\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_input_text\",\n",
|
||||
" stage=Stage.ModelPipelineInput,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=True\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_text\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=False,\n",
|
||||
" ))\n",
|
||||
" \n",
|
||||
" return arthur_model.save()\n",
|
||||
"# arthur_model_chatbot_id = register_chat_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d1d1e60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can set the `arthur_model_chatbot_id` variable to be the ID of your model on your [model dashboard](https://app.arthur.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cdfa02c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_model_chatbot_id = \"your-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58be5234",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function creates a Langchain chat LLM with the ArthurCallbackHandler to log inferences to Arthur. We provide our `arthur_model_chatbot_id`, as well as the Arthur url and login we are using."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "448a8fee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_langchain_chat_llm(chat_model=ChatOpenAI):\n",
|
||||
" if chat_model not in [ChatOpenAI, ChatAnthropic]:\n",
|
||||
" raise ValueError(\"For this notebook, use one of the chat models imported from langchain.chat_models\")\n",
|
||||
" return chat_model(\n",
|
||||
" streaming=True, \n",
|
||||
"def make_langchain_chat_llm(chat_model=):\n",
|
||||
" return ChatOpenAI(\n",
|
||||
" streaming=True,\n",
|
||||
" temperature=0.1,\n",
|
||||
" callbacks=[\n",
|
||||
" StreamingStdOutCallbackHandler(), \n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n"
|
||||
" StreamingStdOutCallbackHandler(),\n",
|
||||
" ArthurCallbackHandler.from_credentials(\n",
|
||||
" arthur_model_id, \n",
|
||||
" arthur_url=arthur_url, \n",
|
||||
" arthur_login=arthur_login)\n",
|
||||
" ])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17c182da",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2dfc00ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Please enter password for admin: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_llm = make_langchain_chat_llm()"
|
||||
"chatgpt = make_langchain_chat_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "139291f2",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "aXRyj50Ls8eP"
|
||||
},
|
||||
"source": [
|
||||
"Run the chatbot (it will save the chat history in the `history` list so that the conversation can reference earlier messages)\n",
|
||||
"Running the chat LLM with this `run` function will save the chat history in an ongoing list so that the conversation can reference earlier messages and log each response to the Arthur platform. You can view the history of this model's inferences on your [model dashboard page](https://app.arthur.ai/).\n",
|
||||
"\n",
|
||||
"Type `q` to quit"
|
||||
"Enter `q` to quit the run loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "7480a443",
|
||||
"metadata": {},
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"id": "4taWSbN-s31Y"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def run_langchain_chat_llm(llm):\n",
|
||||
"def run(llm):\n",
|
||||
" history = []\n",
|
||||
" while True:\n",
|
||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||
" if user_input == 'q': break\n",
|
||||
" if user_input == \"q\":\n",
|
||||
" break\n",
|
||||
" history.append(HumanMessage(content=user_input))\n",
|
||||
" history.append(llm(history))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "6868ce71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_chat_llm(chat_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0be7d01",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArthurModel with input text, output text, token likelihoods, finish reason, and amount of token usage attributes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ee4b741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function registers an LLM with additional metadata attributes to log to Arthur with each inference\n",
|
||||
"\n",
|
||||
"As above, you can register your callback handler for an LLM using this function here in the notebook or by pasting the ID of an already-registered model from your [model dashboard](https://app.arthur.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e671836c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def register_llm():\n",
|
||||
"\n",
|
||||
" arthur_model = arthur.model(\n",
|
||||
" display_name=\"LangChainLLM\",\n",
|
||||
" input_type=InputType.NLP,\n",
|
||||
" output_type=OutputType.TokenSequence\n",
|
||||
" )\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_input_text\",\n",
|
||||
" stage=Stage.ModelPipelineInput,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=True\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_text\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=False,\n",
|
||||
" token_attribute_link=\"my_output_likelihoods\"\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_likelihoods\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.TokenLikelihoods,\n",
|
||||
" token_attribute_link=\"my_output_text\"\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"finish_reason\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.String,\n",
|
||||
" categorical=True,\n",
|
||||
" categories=[\n",
|
||||
" AttributeCategory(value='stop'),\n",
|
||||
" AttributeCategory(value='length'),\n",
|
||||
" AttributeCategory(value='content_filter'),\n",
|
||||
" AttributeCategory(value='null')\n",
|
||||
" ]\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"prompt_tokens\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Integer\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"completion_tokens\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Integer\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"duration\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Float\n",
|
||||
" ))\n",
|
||||
" \n",
|
||||
" return arthur_model.save()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2a6686f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_model_llm_id = \"your-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dcacb96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These functions create Langchain LLMs with the ArthurCallbackHandler to log inferences to Arthur.\n",
|
||||
"\n",
|
||||
"There are small differences in the underlying Langchain integrations with these libraries and the available metadata for model inputs & outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "34cf0072",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_langchain_openai_llm():\n",
|
||||
" return OpenAI(\n",
|
||||
" temperature=0.1,\n",
|
||||
" model_kwargs = {'logprobs': 3},\n",
|
||||
" callbacks=[\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_llm_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"def make_langchain_cohere_llm():\n",
|
||||
" return Cohere(\n",
|
||||
" temperature=0.1,\n",
|
||||
" callbacks=[\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"def make_langchain_huggingface_llm():\n",
|
||||
" llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"bert-base-uncased\", \n",
|
||||
" task=\"text-generation\", \n",
|
||||
" model_kwargs={\"temperature\":2.5, \"max_length\":64})\n",
|
||||
" llm.callbacks = [\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ]\n",
|
||||
" return llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f40c3ce0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_llm = make_langchain_openai_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "8476d531",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cohere_llm = make_langchain_cohere_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7483b9d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"huggingface_llm = make_langchain_huggingface_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c17d8e86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the LLM (each completion is independent, no chat history is saved as we were doing above with the chat llms)\n",
|
||||
"\n",
|
||||
"Type `q` to quit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "72ee0790",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"id": "MEx8nWJps-EG"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
">>> input >>>\n",
|
||||
">>>: What is a callback handler?\n",
|
||||
"A callback handler, also known as a callback function or callback method, is a piece of code that is executed in response to a specific event or condition. It is commonly used in programming languages that support event-driven or asynchronous programming paradigms.\n",
|
||||
"\n",
|
||||
"The purpose of a callback handler is to provide a way for developers to define custom behavior that should be executed when a certain event occurs. Instead of waiting for a result or blocking the execution, the program registers a callback function and continues with other tasks. When the event is triggered, the callback function is invoked, allowing the program to respond accordingly.\n",
|
||||
"\n",
|
||||
"Callback handlers are commonly used in various scenarios, such as handling user input, responding to network requests, processing asynchronous operations, and implementing event-driven architectures. They provide a flexible and modular way to handle events and decouple different components of a system.\n",
|
||||
">>> input >>>\n",
|
||||
">>>: What do I need to do to get the full benefits of this\n",
|
||||
"To get the full benefits of using a callback handler, you should consider the following:\n",
|
||||
"\n",
|
||||
"1. Understand the event or condition: Identify the specific event or condition that you want to respond to with a callback handler. This could be user input, network requests, or any other asynchronous operation.\n",
|
||||
"\n",
|
||||
"2. Define the callback function: Create a function that will be executed when the event or condition occurs. This function should contain the desired behavior or actions you want to take in response to the event.\n",
|
||||
"\n",
|
||||
"3. Register the callback function: Depending on the programming language or framework you are using, you may need to register or attach the callback function to the appropriate event or condition. This ensures that the callback function is invoked when the event occurs.\n",
|
||||
"\n",
|
||||
"4. Handle the callback: Implement the necessary logic within the callback function to handle the event or condition. This could involve updating the user interface, processing data, making further requests, or triggering other actions.\n",
|
||||
"\n",
|
||||
"5. Consider error handling: It's important to handle any potential errors or exceptions that may occur within the callback function. This ensures that your program can gracefully handle unexpected situations and prevent crashes or undesired behavior.\n",
|
||||
"\n",
|
||||
"6. Maintain code readability and modularity: As your codebase grows, it's crucial to keep your callback handlers organized and maintainable. Consider using design patterns or architectural principles to structure your code in a modular and scalable way.\n",
|
||||
"\n",
|
||||
"By following these steps, you can leverage the benefits of callback handlers, such as asynchronous and event-driven programming, improved responsiveness, and modular code design.\n",
|
||||
">>> input >>>\n",
|
||||
">>>: q\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def run_langchain_llm(llm):\n",
|
||||
" while True:\n",
|
||||
" print(\"Type your text for completion:\\n\")\n",
|
||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||
" if user_input == 'q': break\n",
|
||||
" print(llm(user_input), \"\\n================\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "fb864057",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(openai_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e6673769",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(cohere_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "85541f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(huggingface_llm)"
|
||||
"run(chatgpt)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
@@ -456,9 +191,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
# Databerry
|
||||
# Chaindesk
|
||||
|
||||
>[Databerry](https://databerry.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
|
||||
>[Chaindesk](https://chaindesk.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url.
|
||||
We need the [API Key](https://docs.databerry.ai/api-reference/authentication).
|
||||
We need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url.
|
||||
We need the [API Key](https://docs.chaindesk.ai/api-reference/authentication).
|
||||
|
||||
## Retriever
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/retrievers/integrations/databerry.html).
|
||||
See a [usage example](/docs/modules/data_connection/retrievers/integrations/chaindesk.html).
|
||||
|
||||
```python
|
||||
from langchain.retrievers import DataberryRetriever
|
||||
from langchain.retrievers import ChaindeskRetriever
|
||||
```
|
||||
52
docs/extras/ecosystem/integrations/clarifai.mdx
Normal file
@@ -0,0 +1,52 @@
|
||||
# Clarifai
|
||||
|
||||
>[Clarifai](https://clarifai.com) is one of first deep learning platforms having been founded in 2013. Clarifai provides an AI platform with the full AI lifecycle for data exploration, data labeling, model training, evaluation and inference around images, video, text and audio data. In the LangChain ecosystem, as far as we're aware, Clarifai is the only provider that supports LLMs, embeddings and a vector store in one production scale platform, making it an excellent choice to operationalize your LangChain implementations.
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK:
|
||||
```bash
|
||||
pip install clarifai
|
||||
```
|
||||
[Sign-up](https://clarifai.com/signup) for a Clarifai account, then get a personal access token to access the Clarifai API from your [security settings](https://clarifai.com/settings/security) and set it as an environment variable (`CLARIFAI_PAT`).
|
||||
|
||||
|
||||
## Models
|
||||
|
||||
Clarifai provides 1,000s of AI models for many different use cases. You can [explore them here](https://clarifai.com/explore) to find the one most suited for your use case. These models include those created by other providers such as OpenAI, Anthropic, Cohere, AI21, etc. as well as state of the art from open source such as Falcon, InstructorXL, etc. so that you build the best in AI into your products. You'll find these organized by the creator's user_id and into projects we call applications denoted by their app_id. Those IDs will be needed in additional to the model_id and optionally the version_id, so make note of all these IDs once you found the best model for your use case!
|
||||
|
||||
Also note that given there are many models for images, video, text and audio understanding, you can build some interested AI agents that utilize the variety of AI models as experts to understand those data types.
|
||||
|
||||
### LLMs
|
||||
|
||||
To find the selection of LLMs in the Clarifai platform you can select the text to text model type [here](https://clarifai.com/explore/models?filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-to-text%22%5D%7D%5D&page=1&perPage=24).
|
||||
|
||||
```python
|
||||
from langchain.llms import Clarifai
|
||||
llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
|
||||
```
|
||||
|
||||
For more details, the docs on the Clarifai LLM wrapper provide a [detailed walkthrough](/docs/modules/model_io/models/llms/integrations/clarifai.html).
|
||||
|
||||
|
||||
### Text Embedding Models
|
||||
|
||||
To find the selection of text embeddings models in the Clarifai platform you can select the text to embedding model type [here](https://clarifai.com/explore/models?page=1&perPage=24&filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-embedder%22%5D%7D%5D).
|
||||
|
||||
There is a Clarifai Embedding model in LangChain, which you can access with:
|
||||
```python
|
||||
from langchain.embeddings import ClarifaiEmbeddings
|
||||
embeddings = ClarifaiEmbeddings(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
|
||||
```
|
||||
For more details, the docs on the Clarifai Embeddings wrapper provide a [detailed walthrough](/docs/modules/data_connection/text_embedding/integrations/clarifai.html).
|
||||
|
||||
## Vectorstore
|
||||
|
||||
Clarifai's vector DB was launched in 2016 and has been optimized to support live search queries. With workflows in the Clarifai platform, you data is automatically indexed by am embedding model and optionally other models as well to index that information in the DB for search. You can query the DB not only via the vectors but also filter by metadata matches, other AI predicted concepts, and even do geo-coordinate search. Simply create an application, select the appropriate base workflow for your type of data, and upload it (through the API as [documented here](https://docs.clarifai.com/api-guide/data/create-get-update-delete) or the UIs at clarifai.com).
|
||||
|
||||
You an also add data directly from LangChain as well, and the auto-indexing will take place for you. You'll notice this is a little different than other vectorstores where you need to provde an embedding model in their constructor and have LangChain coordinate getting the embeddings from text and writing those to the index. Not only is it more convenient, but it's much more scalable to use Clarifai's distributed cloud to do all the index in the background.
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Clarifai
|
||||
clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)
|
||||
```
|
||||
For more details, the docs on the Clarifai vector store provide a [detailed walthrough](/docs/modules/data_connection/text_embedding/integrations/clarifai.html).
|
||||
110
docs/extras/ecosystem/integrations/cnosdb.mdx
Normal file
@@ -0,0 +1,110 @@
|
||||
# CnosDB
|
||||
> [CnosDB](https://github.com/cnosdb/cnosdb) is an open source distributed time series database with high performance, high compression rate and high ease of use.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```python
|
||||
pip install cnos-connector
|
||||
```
|
||||
|
||||
## Connecting to CnosDB
|
||||
You can connect to CnosDB using the `SQLDatabase.from_cnosdb()` method.
|
||||
### Syntax
|
||||
```python
|
||||
def SQLDatabase.from_cnosdb(url: str = "127.0.0.1:8902",
|
||||
user: str = "root",
|
||||
password: str = "",
|
||||
tenant: str = "cnosdb",
|
||||
database: str = "public")
|
||||
```
|
||||
Args:
|
||||
1. url (str): The HTTP connection host name and port number of the CnosDB
|
||||
service, excluding "http://" or "https://", with a default value
|
||||
of "127.0.0.1:8902".
|
||||
2. user (str): The username used to connect to the CnosDB service, with a
|
||||
default value of "root".
|
||||
3. password (str): The password of the user connecting to the CnosDB service,
|
||||
with a default value of "".
|
||||
4. tenant (str): The name of the tenant used to connect to the CnosDB service,
|
||||
with a default value of "cnosdb".
|
||||
5. database (str): The name of the database in the CnosDB tenant.
|
||||
## Examples
|
||||
```python
|
||||
# Connecting to CnosDB with SQLDatabase Wrapper
|
||||
from langchain import SQLDatabase
|
||||
|
||||
db = SQLDatabase.from_cnosdb()
|
||||
```
|
||||
```python
|
||||
# Creating a OpenAI Chat LLM Wrapper
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
|
||||
```
|
||||
|
||||
### SQL Database Chain
|
||||
This example demonstrates the use of the SQL Chain for answering a question over a CnosDB.
|
||||
```python
|
||||
from langchain import SQLDatabaseChain
|
||||
|
||||
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
|
||||
|
||||
db_chain.run(
|
||||
"What is the average temperature of air at station XiaoMaiDao between October 19, 2022 and Occtober 20, 2022?"
|
||||
)
|
||||
```
|
||||
```shell
|
||||
> Entering new chain...
|
||||
What is the average temperature of air at station XiaoMaiDao between October 19, 2022 and Occtober 20, 2022?
|
||||
SQLQuery:SELECT AVG(temperature) FROM air WHERE station = 'XiaoMaiDao' AND time >= '2022-10-19' AND time < '2022-10-20'
|
||||
SQLResult: [(68.0,)]
|
||||
Answer:The average temperature of air at station XiaoMaiDao between October 19, 2022 and October 20, 2022 is 68.0.
|
||||
> Finished chain.
|
||||
```
|
||||
### SQL Database Agent
|
||||
This example demonstrates the use of the SQL Database Agent for answering questions over a CnosDB.
|
||||
```python
|
||||
from langchain.agents import create_sql_agent
|
||||
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
|
||||
|
||||
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
|
||||
agent = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True)
|
||||
```
|
||||
```python
|
||||
agent.run(
|
||||
"What is the average temperature of air at station XiaoMaiDao between October 19, 2022 and Occtober 20, 2022?"
|
||||
)
|
||||
```
|
||||
```shell
|
||||
> Entering new chain...
|
||||
Action: sql_db_list_tables
|
||||
Action Input: ""
|
||||
Observation: air
|
||||
Thought:The "air" table seems relevant to the question. I should query the schema of the "air" table to see what columns are available.
|
||||
Action: sql_db_schema
|
||||
Action Input: "air"
|
||||
Observation:
|
||||
CREATE TABLE air (
|
||||
pressure FLOAT,
|
||||
station STRING,
|
||||
temperature FLOAT,
|
||||
time TIMESTAMP,
|
||||
visibility FLOAT
|
||||
)
|
||||
|
||||
/*
|
||||
3 rows from air table:
|
||||
pressure station temperature time visibility
|
||||
75.0 XiaoMaiDao 67.0 2022-10-19T03:40:00 54.0
|
||||
77.0 XiaoMaiDao 69.0 2022-10-19T04:40:00 56.0
|
||||
76.0 XiaoMaiDao 68.0 2022-10-19T05:40:00 55.0
|
||||
*/
|
||||
Thought:The "temperature" column in the "air" table is relevant to the question. I can query the average temperature between the specified dates.
|
||||
Action: sql_db_query
|
||||
Action Input: "SELECT AVG(temperature) FROM air WHERE station = 'XiaoMaiDao' AND time >= '2022-10-19' AND time <= '2022-10-20'"
|
||||
Observation: [(68.0,)]
|
||||
Thought:The average temperature of air at station XiaoMaiDao between October 19, 2022 and October 20, 2022 is 68.0.
|
||||
Final Answer: 68.0
|
||||
|
||||
> Finished chain.
|
||||
```
|
||||
19
docs/extras/ecosystem/integrations/datadog_logs.mdx
Normal file
@@ -0,0 +1,19 @@
|
||||
# Datadog Logs
|
||||
|
||||
>[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install datadog_api_client
|
||||
```
|
||||
|
||||
We must initialize the loader with the Datadog API key and APP key, and we need to set up the query to extract the desired logs.
|
||||
|
||||
## Document Loader
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/document_loaders/integrations/datadog_logs.html).
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import DatadogLogsLoader
|
||||
```
|
||||
51
docs/extras/ecosystem/integrations/dataforseo.mdx
Normal file
@@ -0,0 +1,51 @@
|
||||
# DataForSEO
|
||||
|
||||
This page provides instructions on how to use the DataForSEO search APIs within LangChain.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get a DataForSEO API Access login and password, and set them as environment variables (`DATAFORSEO_LOGIN` and `DATAFORSEO_PASSWORD` respectively). You can find it in your dashboard.
|
||||
|
||||
## Wrappers
|
||||
|
||||
### Utility
|
||||
|
||||
The DataForSEO utility wraps the API. To import this utility, use:
|
||||
|
||||
```python
|
||||
from langchain.utilities import DataForSeoAPIWrapper
|
||||
```
|
||||
|
||||
For a detailed walkthrough of this wrapper, see [this notebook](/docs/modules/agents/tools/integrations/dataforseo.ipynb).
|
||||
|
||||
### Tool
|
||||
|
||||
You can also load this wrapper as a Tool to use with an Agent:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
tools = load_tools(["dataforseo-api-search"])
|
||||
```
|
||||
|
||||
## Example usage
|
||||
|
||||
```python
|
||||
dataforseo = DataForSeoAPIWrapper(api_login="your_login", api_password="your_password")
|
||||
result = dataforseo.run("Bill Gates")
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
You can store your DataForSEO API Access login and password as environment variables. The wrapper will automatically check for these environment variables if no values are provided:
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
os.environ["DATAFORSEO_LOGIN"] = "your_login"
|
||||
os.environ["DATAFORSEO_PASSWORD"] = "your_password"
|
||||
|
||||
dataforseo = DataForSeoAPIWrapper()
|
||||
result = dataforseo.run("weather in Los Angeles")
|
||||
print(result)
|
||||
```
|
||||
@@ -1,7 +1,7 @@
|
||||
# Grobid
|
||||
|
||||
This page covers how to use the Grobid to parse articles for LangChain.
|
||||
It is seperated into two parts: installation and running the server
|
||||
It is separated into two parts: installation and running the server
|
||||
|
||||
## Installation and Setup
|
||||
#Ensure You have Java installed
|
||||
|
||||
@@ -16,3 +16,59 @@ There exists a Jina Embeddings wrapper, which you can access with
|
||||
from langchain.embeddings import JinaEmbeddings
|
||||
```
|
||||
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/text_embedding/integrations/jina.html)
|
||||
|
||||
## Deployment
|
||||
|
||||
[Langchain-serve](https://github.com/jina-ai/langchain-serve), powered by Jina, helps take LangChain apps to production with easy to use REST/WebSocket APIs and Slack bots.
|
||||
|
||||
### Usage
|
||||
|
||||
Install the package from PyPI.
|
||||
|
||||
```bash
|
||||
pip install langchain-serve
|
||||
```
|
||||
|
||||
Wrap your LangChain app with the `@serving` decorator.
|
||||
|
||||
```python
|
||||
# app.py
|
||||
from lcserve import serving
|
||||
|
||||
@serving
|
||||
def ask(input: str) -> str:
|
||||
from langchain import LLMChain, OpenAI
|
||||
from langchain.agents import AgentExecutor, ZeroShotAgent
|
||||
|
||||
tools = [...] # list of tools
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools, input_variables=["input", "agent_scratchpad"],
|
||||
)
|
||||
llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=llm_chain, allowed_tools=[tool.name for tool in tools]
|
||||
)
|
||||
agent_executor = AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
verbose=True,
|
||||
)
|
||||
return agent_executor.run(input)
|
||||
```
|
||||
|
||||
Deploy on Jina AI Cloud with `lc-serve deploy jcloud app`. Once deployed, we can send a POST request to the API endpoint to get a response.
|
||||
|
||||
```bash
|
||||
curl -X 'POST' 'https://<your-app>.wolf.jina.ai/ask' \
|
||||
-d '{
|
||||
"input": "Your Quesion here?",
|
||||
"envs": {
|
||||
"OPENAI_API_KEY": "sk-***"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
You can also self-host the app on your infrastructure with Docker-compose or Kubernetes. See [here](https://github.com/jina-ai/langchain-serve#-self-host-llm-apps-with-docker-compose-or-kubernetes) for more details.
|
||||
|
||||
|
||||
Langchain-serve also allows to deploy the apps with WebSocket APIs and Slack Bots both on [Jina AI Cloud](https://cloud.jina.ai/) or self-hosted infrastructure.
|
||||
|
||||
@@ -10,7 +10,7 @@ For Feedback, Issues, Contributions - please raise an issue here:
|
||||
Main principles and benefits:
|
||||
|
||||
- more `pythonic` way of writing code
|
||||
- write multiline prompts that wont break your code flow with indentation
|
||||
- write multiline prompts that won't break your code flow with indentation
|
||||
- making use of IDE in-built support for **hinting**, **type checking** and **popup with docs** to quickly peek in the function to see the prompt, parameters it consumes etc.
|
||||
- leverage all the power of 🦜🔗 LangChain ecosystem
|
||||
- adding support for **optional parameters**
|
||||
@@ -31,7 +31,7 @@ def write_me_short_post(topic:str, platform:str="twitter", audience:str = "devel
|
||||
"""
|
||||
return
|
||||
|
||||
# run it naturaly
|
||||
# run it naturally
|
||||
write_me_short_post(topic="starwars")
|
||||
# or
|
||||
write_me_short_post(topic="starwars", platform="redit")
|
||||
@@ -122,7 +122,7 @@ await write_me_short_post(topic="old movies")
|
||||
|
||||
# Simplified streaming
|
||||
|
||||
If we wan't to leverage streaming:
|
||||
If we want to leverage streaming:
|
||||
- we need to define prompt as async function
|
||||
- turn on the streaming on the decorator, or we can define PromptType with streaming on
|
||||
- capture the stream using StreamingContext
|
||||
@@ -149,7 +149,7 @@ async def write_me_short_post(topic:str, platform:str="twitter", audience:str =
|
||||
|
||||
|
||||
|
||||
# just an arbitrary function to demonstrate the streaming... wil be some websockets code in the real world
|
||||
# just an arbitrary function to demonstrate the streaming... will be some websockets code in the real world
|
||||
tokens=[]
|
||||
def capture_stream_func(new_token:str):
|
||||
tokens.append(new_token)
|
||||
@@ -250,7 +250,7 @@ the roles here are model native roles (assistant, user, system for chatGPT)
|
||||
|
||||
# Optional sections
|
||||
- you can define a whole sections of your prompt that should be optional
|
||||
- if any input in the section is missing, the whole section wont be rendered
|
||||
- if any input in the section is missing, the whole section won't be rendered
|
||||
|
||||
the syntax for this is as follows:
|
||||
|
||||
@@ -273,7 +273,7 @@ def prompt_with_optional_partials():
|
||||
# Output parsers
|
||||
|
||||
- llm_prompt decorator natively tries to detect the best output parser based on the output type. (if not set, it returns the raw string)
|
||||
- list, dict and pydantic outputs are also supported natively (automaticaly)
|
||||
- list, dict and pydantic outputs are also supported natively (automatically)
|
||||
|
||||
``` python
|
||||
# this code example is complete and should run as it is
|
||||
|
||||
31
docs/extras/ecosystem/integrations/marqo.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# Marqo
|
||||
|
||||
This page covers how to use the Marqo ecosystem within LangChain.
|
||||
|
||||
### **What is Marqo?**
|
||||
|
||||
Marqo is a tensor search engine that uses embeddings stored in in-memory HNSW indexes to achieve cutting edge search speeds. Marqo can scale to hundred-million document indexes with horizontal index sharding and allows for async and non-blocking data upload and search. Marqo uses the latest machine learning models from PyTorch, Huggingface, OpenAI and more. You can start with a pre-configured model or bring your own. The built in ONNX support and conversion allows for faster inference and higher throughput on both CPU and GPU.
|
||||
|
||||
Because Marqo include its own inference your documents can have a mix of text and images, you can bring Marqo indexes with data from your other systems into the langchain ecosystem without having to worry about your embeddings being compatible.
|
||||
|
||||
Deployment of Marqo is flexible, you can get started yourself with our docker image or [contact us about our managed cloud offering!](https://www.marqo.ai/pricing)
|
||||
|
||||
To run Marqo locally with our docker image, [see our getting started.](https://docs.marqo.ai/latest/)
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install marqo`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around Marqo indexes, allowing you to use them within the vectorstore framework. Marqo lets you select from a range of models for generating embeddings and exposes some preprocessing configurations.
|
||||
|
||||
The Marqo vectorstore can also work with existing multimodel indexes where your documents have a mix of images and text, for more information refer to [our documentation](https://docs.marqo.ai/latest/#multi-modal-and-cross-modal-search). Note that instaniating the Marqo vectorstore with an existing multimodal index will disable the ability to add any new documents to it via the langchain vectorstore `add_texts` method.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import Marqo
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Marqo wrapper and some of its unique features, see [this notebook](/docs/modules/data_connection/vectorstores/integrations/marqo.html)
|
||||
@@ -18,7 +18,7 @@ We also deliver with live demo on huggingface! Please checkout our [huggingface
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install clickhouse-connect`
|
||||
|
||||
### Setting up envrionments
|
||||
### Setting up environments
|
||||
|
||||
There are two ways to set up parameters for myscale index.
|
||||
|
||||
|
||||
56
docs/extras/ecosystem/integrations/trulens.mdx
Normal file
@@ -0,0 +1,56 @@
|
||||
# TruLens
|
||||
|
||||
This page covers how to use [TruLens](https://trulens.org) to evaluate and track LLM apps built on langchain.
|
||||
|
||||
## What is TruLens?
|
||||
|
||||
TruLens is an [opensource](https://github.com/truera/trulens) package that provides instrumentation and evaluation tools for large language model (LLM) based applications.
|
||||
|
||||
## Quick start
|
||||
|
||||
Once you've created your LLM chain, you can use TruLens for evaluation and tracking. TruLens has a number of [out-of-the-box Feedback Functions](https://www.trulens.org/trulens_eval/feedback_functions/), and is also an extensible framework for LLM evaluation.
|
||||
|
||||
```python
|
||||
# create a feedback function
|
||||
|
||||
from trulens_eval.feedback import Feedback, Huggingface, OpenAI
|
||||
# Initialize HuggingFace-based feedback function collection class:
|
||||
hugs = Huggingface()
|
||||
openai = OpenAI()
|
||||
|
||||
# Define a language match feedback function using HuggingFace.
|
||||
lang_match = Feedback(hugs.language_match).on_input_output()
|
||||
# By default this will check language match on the main app input and main app
|
||||
# output.
|
||||
|
||||
# Question/answer relevance between overall question and answer.
|
||||
qa_relevance = Feedback(openai.relevance).on_input_output()
|
||||
# By default this will evaluate feedback on main app input and main app output.
|
||||
|
||||
# Toxicity of input
|
||||
toxicity = Feedback(openai.toxicity).on_input()
|
||||
|
||||
```
|
||||
|
||||
After you've set up Feedback Function(s) for evaluating your LLM, you can wrap your application with TruChain to get detailed tracing, logging and evaluation of your LLM app.
|
||||
|
||||
```python
|
||||
# wrap your chain with TruChain
|
||||
truchain = TruChain(
|
||||
chain,
|
||||
app_id='Chain1_ChatApplication',
|
||||
feedbacks=[lang_match, qa_relevance, toxicity]
|
||||
)
|
||||
# Note: any `feedbacks` specified here will be evaluated and logged whenever the chain is used.
|
||||
truchain("que hora es?")
|
||||
```
|
||||
|
||||
Now you can explore your LLM-based application!
|
||||
|
||||
Doing so will help you understand how your LLM application is performing at a glance. As you iterate new versions of your LLM application, you can compare their performance across all of the different quality metrics you've set up. You'll also be able to view evaluations at a record level, and explore the chain metadata for each record.
|
||||
|
||||
```python
|
||||
tru.run_dashboard() # open a Streamlit app to explore
|
||||
```
|
||||
|
||||
For more information on TruLens, visit [trulens.org](https://www.trulens.org/)
|
||||
@@ -39,7 +39,7 @@ vectara = Vectara(
|
||||
```
|
||||
The customer_id, corpus_id and api_key are optional, and if they are not supplied will be read from the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively.
|
||||
|
||||
Afer you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example:
|
||||
After you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example:
|
||||
|
||||
```python
|
||||
vectara.add_texts(["to be or not to be", "that is the question"])
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -16,6 +17,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -28,10 +30,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install langkit -q"
|
||||
"%pip install langkit openai langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -54,6 +57,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -63,6 +67,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -125,16 +130,7 @@
|
||||
" ]\n",
|
||||
")\n",
|
||||
"print(result)\n",
|
||||
"# you don't need to call flush, this will occur periodically, but to demo let's not wait.\n",
|
||||
"whylabs.flush()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# you don't need to call close to write profiles to WhyLabs, upload will occur periodically, but to demo let's not wait.\n",
|
||||
"whylabs.close()"
|
||||
]
|
||||
}
|
||||
@@ -155,7 +151,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# YouTube
|
||||
|
||||
>[YouTube](https://www.youtube.com/) is an online video sharing and social media platform created by Google.
|
||||
>[YouTube](https://www.youtube.com/) is an online video sharing and social media platform by Google.
|
||||
> We download the `YouTube` transcripts and video information.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
@@ -24,6 +24,7 @@ Understanding these components is crucial when assessing serving systems. LangCh
|
||||
- [BentoML](https://github.com/bentoml/BentoML)
|
||||
- [OpenLLM](/docs/ecosystem/integrations/openllm.html)
|
||||
- [Modal](/docs/ecosystem/integrations/modal.html)
|
||||
- [Jina](/docs/ecosystem/integrations/jina.html#deployment)
|
||||
|
||||
These links will provide further information on each ecosystem, assisting you in finding the best fit for your LLM deployment needs.
|
||||
|
||||
|
||||
@@ -51,6 +51,10 @@ A minimal example of how to deploy LangChain to [Fly.io](https://fly.io/) using
|
||||
|
||||
A minimal example on how to deploy LangChain to DigitalOcean App Platform.
|
||||
|
||||
## [CI/CD Google Cloud Build + Dockerfile + Serverless Google Cloud Run](https://github.com/g-emarco/github-assistant)
|
||||
|
||||
Boilerplate LangChain project on how to deploy to Google Cloud Run using Docker with Cloud Build CI/CD pipeline
|
||||
|
||||
## [Google Cloud Run](https://github.com/homanp/gcp-langchain)
|
||||
|
||||
A minimal example on how to deploy LangChain to Google Cloud Run.
|
||||
@@ -61,7 +65,7 @@ This repository contains LangChain adapters for Steamship, enabling LangChain de
|
||||
|
||||
## [Langchain-serve](https://github.com/jina-ai/langchain-serve)
|
||||
|
||||
This repository allows users to serve local chains and agents as RESTful, gRPC, or WebSocket APIs, thanks to [Jina](https://docs.jina.ai/). Deploy your chains & agents with ease and enjoy independent scaling, serverless and autoscaling APIs, as well as a Streamlit playground on Jina AI Cloud.
|
||||
This repository allows users to deploy any LangChain app as REST/WebSocket APIs or, as Slack Bots with ease. Benefit from the scalability and serverless architecture of Jina AI Cloud, or deploy on-premise with Kubernetes.
|
||||
|
||||
## [BentoML](https://github.com/ssheng/BentoChain)
|
||||
|
||||
|
||||
@@ -117,11 +117,11 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"# Initialize the language model\n",
|
||||
"# You can add your own OpenAI API key by adding openai_api_key=\"<your_api_key>\" \n",
|
||||
"# You can add your own OpenAI API key by adding openai_api_key=\"<your_api_key>\"\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||
"\n",
|
||||
"# Initialize the SerpAPIWrapper for search functionality\n",
|
||||
"#Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
||||
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"\n",
|
||||
"# Define a list of tools offered by the agent\n",
|
||||
@@ -130,7 +130,7 @@
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" coroutine=search.arun,\n",
|
||||
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\"\n",
|
||||
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\",\n",
|
||||
" ),\n",
|
||||
"]"
|
||||
]
|
||||
@@ -143,8 +143,12 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"functions_agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False)\n",
|
||||
"conversations_agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=False)"
|
||||
"functions_agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False\n",
|
||||
")\n",
|
||||
"conversations_agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -193,20 +197,20 @@
|
||||
"\n",
|
||||
"results = []\n",
|
||||
"agents = [functions_agent, conversations_agent]\n",
|
||||
"concurrency_level = 6 # How many concurrent agents to run. May need to decrease if OpenAI is rate limiting.\n",
|
||||
"concurrency_level = 6 # How many concurrent agents to run. May need to decrease if OpenAI is rate limiting.\n",
|
||||
"\n",
|
||||
"# We will only run the first 20 examples of this dataset to speed things up\n",
|
||||
"# This will lead to larger confidence intervals downstream.\n",
|
||||
"batch = []\n",
|
||||
"for example in tqdm(dataset[:20]):\n",
|
||||
" batch.extend([agent.acall(example['inputs']) for agent in agents])\n",
|
||||
" batch.extend([agent.acall(example[\"inputs\"]) for agent in agents])\n",
|
||||
" if len(batch) >= concurrency_level:\n",
|
||||
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
|
||||
" results.extend(list(zip(*[iter(batch_results)]*2)))\n",
|
||||
" results.extend(list(zip(*[iter(batch_results)] * 2)))\n",
|
||||
" batch = []\n",
|
||||
"if batch:\n",
|
||||
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
|
||||
" results.extend(list(zip(*[iter(batch_results)]*2)))"
|
||||
" results.extend(list(zip(*[iter(batch_results)] * 2)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -230,11 +234,12 @@
|
||||
"source": [
|
||||
"import random\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def predict_preferences(dataset, results) -> list:\n",
|
||||
" preferences = []\n",
|
||||
"\n",
|
||||
" for example, (res_a, res_b) in zip(dataset, results):\n",
|
||||
" input_ = example['inputs']\n",
|
||||
" input_ = example[\"inputs\"]\n",
|
||||
" # Flip a coin to reduce persistent position bias\n",
|
||||
" if random.random() < 0.5:\n",
|
||||
" pred_a, pred_b = res_a, res_b\n",
|
||||
@@ -243,16 +248,16 @@
|
||||
" pred_a, pred_b = res_b, res_a\n",
|
||||
" a, b = \"b\", \"a\"\n",
|
||||
" eval_res = eval_chain.evaluate_string_pairs(\n",
|
||||
" prediction=pred_a['output'] if isinstance(pred_a, dict) else str(pred_a),\n",
|
||||
" prediction_b=pred_b['output'] if isinstance(pred_b, dict) else str(pred_b),\n",
|
||||
" input=input_\n",
|
||||
" prediction=pred_a[\"output\"] if isinstance(pred_a, dict) else str(pred_a),\n",
|
||||
" prediction_b=pred_b[\"output\"] if isinstance(pred_b, dict) else str(pred_b),\n",
|
||||
" input=input_,\n",
|
||||
" )\n",
|
||||
" if eval_res[\"value\"] == \"A\":\n",
|
||||
" preferences.append(a)\n",
|
||||
" elif eval_res[\"value\"] == \"B\":\n",
|
||||
" preferences.append(b)\n",
|
||||
" else:\n",
|
||||
" preferences.append(None) # No preference\n",
|
||||
" preferences.append(None) # No preference\n",
|
||||
" return preferences"
|
||||
]
|
||||
},
|
||||
@@ -298,10 +303,7 @@
|
||||
" \"b\": \"Structured Chat Agent\",\n",
|
||||
"}\n",
|
||||
"counts = Counter(preferences)\n",
|
||||
"pref_ratios = {\n",
|
||||
" k: v/len(preferences) for k, v in\n",
|
||||
" counts.items()\n",
|
||||
"}\n",
|
||||
"pref_ratios = {k: v / len(preferences) for k, v in counts.items()}\n",
|
||||
"for k, v in pref_ratios.items():\n",
|
||||
" print(f\"{name_map.get(k)}: {v:.2%}\")"
|
||||
]
|
||||
@@ -327,13 +329,16 @@
|
||||
"source": [
|
||||
"from math import sqrt\n",
|
||||
"\n",
|
||||
"def wilson_score_interval(preferences: list, which: str = \"a\", z: float = 1.96) -> tuple:\n",
|
||||
"\n",
|
||||
"def wilson_score_interval(\n",
|
||||
" preferences: list, which: str = \"a\", z: float = 1.96\n",
|
||||
") -> tuple:\n",
|
||||
" \"\"\"Estimate the confidence interval using the Wilson score.\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" See: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval\n",
|
||||
" for more details, including when to use it and when it should not be used.\n",
|
||||
" \"\"\"\n",
|
||||
" total_preferences = preferences.count('a') + preferences.count('b')\n",
|
||||
" total_preferences = preferences.count(\"a\") + preferences.count(\"b\")\n",
|
||||
" n_s = preferences.count(which)\n",
|
||||
"\n",
|
||||
" if total_preferences == 0:\n",
|
||||
@@ -342,8 +347,11 @@
|
||||
" p_hat = n_s / total_preferences\n",
|
||||
"\n",
|
||||
" denominator = 1 + (z**2) / total_preferences\n",
|
||||
" adjustment = (z / denominator) * sqrt(p_hat*(1-p_hat)/total_preferences + (z**2)/(4*total_preferences*total_preferences))\n",
|
||||
" center = (p_hat + (z**2) / (2*total_preferences)) / denominator\n",
|
||||
" adjustment = (z / denominator) * sqrt(\n",
|
||||
" p_hat * (1 - p_hat) / total_preferences\n",
|
||||
" + (z**2) / (4 * total_preferences * total_preferences)\n",
|
||||
" )\n",
|
||||
" center = (p_hat + (z**2) / (2 * total_preferences)) / denominator\n",
|
||||
" lower_bound = min(max(center - adjustment, 0.0), 1.0)\n",
|
||||
" upper_bound = min(max(center + adjustment, 0.0), 1.0)\n",
|
||||
"\n",
|
||||
@@ -369,7 +377,9 @@
|
||||
"source": [
|
||||
"for which_, name in name_map.items():\n",
|
||||
" low, high = wilson_score_interval(preferences, which=which_)\n",
|
||||
" print(f'The \"{name}\" would be preferred between {low:.2%} and {high:.2%} percent of the time (with 95% confidence).')"
|
||||
" print(\n",
|
||||
" f'The \"{name}\" would be preferred between {low:.2%} and {high:.2%} percent of the time (with 95% confidence).'\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -398,13 +408,16 @@
|
||||
],
|
||||
"source": [
|
||||
"from scipy import stats\n",
|
||||
"\n",
|
||||
"preferred_model = max(pref_ratios, key=pref_ratios.get)\n",
|
||||
"successes = preferences.count(preferred_model)\n",
|
||||
"n = len(preferences) - preferences.count(None)\n",
|
||||
"p_value = stats.binom_test(successes, n, p=0.5, alternative='two-sided')\n",
|
||||
"print(f\"\"\"The p-value is {p_value:.5f}. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
|
||||
"p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n",
|
||||
"print(\n",
|
||||
" f\"\"\"The p-value is {p_value:.5f}. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
|
||||
"then there is a {p_value:.5%} chance of observing the {name_map.get(preferred_model)} be preferred at least {successes}\n",
|
||||
"times out of {n} trials.\"\"\")"
|
||||
"times out of {n} trials.\"\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"The `CriteriaEvalChain` is a convenient way to predict whether an LLM or Chain's output complies with a set of criteria, so long as you can\n",
|
||||
"describe those criteria in regular language. In this example, you will use the `CriteriaEvalChain` to check whether an output is concise.\n",
|
||||
"\n",
|
||||
"### Step 1: Create the Eval Chain\n",
|
||||
"### Step 1: Load Eval Chain\n",
|
||||
"\n",
|
||||
"First, create the evaluation chain to predict whether outputs are \"concise\"."
|
||||
]
|
||||
@@ -27,11 +27,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.evaluation.criteria import CriteriaEvalChain\n",
|
||||
"from langchain.evaluation import load_evaluator, EvaluatorType\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"eval_llm = ChatOpenAI(model=\"gpt-4\", temperature=0)\n",
|
||||
"criterion = \"conciseness\"\n",
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=criterion)"
|
||||
"eval_chain = load_evaluator(EvaluatorType.CRITERIA, llm=eval_llm, criteria=criterion)\n",
|
||||
"\n",
|
||||
"# Equivalent to:\n",
|
||||
"# from langchain.evaluation import CriteriaEvalChain\n",
|
||||
"# CriteriaEvalChain.from_llm(llm=eval_llm, criteria=criterion)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,7 +58,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"query=\"What's the origin of the term synecdoche?\"\n",
|
||||
"query = \"What's the origin of the term synecdoche?\"\n",
|
||||
"prediction = llm.predict(query)"
|
||||
]
|
||||
},
|
||||
@@ -80,7 +84,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': '1. Conciseness: The submission is concise and to the point. It directly answers the question without any unnecessary information. Therefore, the submission meets the criterion of conciseness.\\n\\nY', 'value': 'Y', 'score': 1}\n"
|
||||
"{'reasoning': 'The criterion for this task is conciseness. The submission should be concise and to the point.\\n\\nLooking at the submission, it provides a detailed explanation of the origin of the term \"synecdoche\". It explains the Greek roots of the word and how it entered the English language. \\n\\nWhile the explanation is detailed, it is also concise. It doesn\\'t include unnecessary information or go off on tangents. It sticks to the point, which is explaining the origin of the term.\\n\\nTherefore, the submission meets the criterion of conciseness.\\n\\nY', 'value': 'Y', 'score': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -89,40 +93,6 @@
|
||||
"print(eval_result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8c4ec9dd-6557-4f23-8480-c822eb6ec552",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['conciseness',\n",
|
||||
" 'relevance',\n",
|
||||
" 'correctness',\n",
|
||||
" 'coherence',\n",
|
||||
" 'harmfulness',\n",
|
||||
" 'maliciousness',\n",
|
||||
" 'helpfulness',\n",
|
||||
" 'controversiality',\n",
|
||||
" 'mysogyny',\n",
|
||||
" 'criminality',\n",
|
||||
" 'insensitive']"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# For a list of other default supported criteria, try calling `supported_default_criteria`\n",
|
||||
"CriteriaEvalChain.get_supported_default_criteria()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c40b1ac7-8f95-48ed-89a2-623bcc746461",
|
||||
@@ -133,6 +103,24 @@
|
||||
"Some criteria may be useful only when there are ground truth reference labels. You can pass these in as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0c41cd19",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"eval_chain = load_evaluator(\n",
|
||||
" EvaluatorType.LABELED_CRITERIA,\n",
|
||||
" llm=eval_llm,\n",
|
||||
" criteria=\"correctness\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Equivalent to\n",
|
||||
"# from langchain.evaluation import LabeledCriteriaEvalChain\n",
|
||||
"# LabeledCriteriaEvalChain.from_llm(llm=eval_llm, criteria=criterion)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
@@ -145,62 +133,18 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"With ground truth: 1\n",
|
||||
"Withoutg ground truth: 0\n"
|
||||
"With ground truth: 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=\"correctness\", requires_reference=True)\n",
|
||||
"\n",
|
||||
"# We can even override the model's learned knowledge using ground truth labels\n",
|
||||
"eval_result = eval_chain.evaluate_strings(\n",
|
||||
" input=\"What is the capital of the US?\",\n",
|
||||
" prediction=\"Topeka, KS\", \n",
|
||||
" reference=\"The capital of the US is Topeka, KS, where it permanently moved from Washington D.C. on May 16, 2023\")\n",
|
||||
"print(f'With ground truth: {eval_result[\"score\"]}')\n",
|
||||
"\n",
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=\"correctness\")\n",
|
||||
"eval_result = eval_chain.evaluate_strings(\n",
|
||||
" input=\"What is the capital of the US?\",\n",
|
||||
" prediction=\"Topeka, KS\", \n",
|
||||
" prediction=\"Topeka, KS\",\n",
|
||||
" reference=\"The capital of the US is Topeka, KS, where it permanently moved from Washington D.C. on May 16, 2023\",\n",
|
||||
")\n",
|
||||
"print(f'Withoutg ground truth: {eval_result[\"score\"]}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2eb7dedb-913a-4d9e-b48a-9521425d1008",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Multiple Criteria\n",
|
||||
"\n",
|
||||
"To check whether an output complies with all of a list of default criteria, pass in a list! Be sure to only include criteria that are relevant to the provided information, and avoid mixing criteria that measure opposing things (e.g., harmfulness and helpfulness)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "50c067f7-bc6e-4d6c-ba34-97a72023be27",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': 'Conciseness:\\n- The submission is one sentence long, which is concise.\\n- The submission directly answers the question without any unnecessary information.\\nConclusion: The submission meets the conciseness criterion.\\n\\nCoherence:\\n- The submission is well-structured and organized.\\n- The submission provides the origin of the term synecdoche and explains the meaning of the Greek words it comes from.\\n- The submission is coherent and easy to understand.\\nConclusion: The submission meets the coherence criterion.', 'value': 'Final conclusion: Y', 'score': None}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"criteria = [\"conciseness\", \"coherence\"]\n",
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=criteria)\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n",
|
||||
"print(eval_result)"
|
||||
"print(f'With ground truth: {eval_result[\"score\"]}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -217,7 +161,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "bafa0a11-2617-4663-84bf-24df7d0736be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -225,58 +169,22 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'reasoning': '1. Criteria: numeric: Does the output contain numeric information?\\n- The submission does not contain any numeric information.\\n- Conclusion: The submission meets the criteria.', 'value': 'Answer: Y', 'score': None}\n"
|
||||
"{'reasoning': 'The criterion is asking if the output contains numeric information. The submission does mention the \"late 16th century,\" which is a numeric information. Therefore, the submission meets the criterion.\\n\\nY', 'value': 'Y', 'score': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"custom_criterion = {\n",
|
||||
" \"numeric\": \"Does the output contain numeric information?\"\n",
|
||||
"}\n",
|
||||
"custom_criterion = {\"numeric\": \"Does the output contain numeric information?\"}\n",
|
||||
"\n",
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=custom_criterion)\n",
|
||||
"eval_chain = load_evaluator(\n",
|
||||
" EvaluatorType.CRITERIA,\n",
|
||||
" llm=eval_llm,\n",
|
||||
" criteria=custom_criterion,\n",
|
||||
")\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n",
|
||||
"print(eval_result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6db12a16-0058-4a14-8064-8528540963d8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Meets criteria: 1\n",
|
||||
"Does not meet criteria: 0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# You can specify multiple criteria in the dictionary. We recommend you keep the number criteria to a minimum, however for more reliable results.\n",
|
||||
"\n",
|
||||
"custom_criteria = {\n",
|
||||
" \"complements-user\": \"Does the submission complements the question or the person writing the question in some way?\",\n",
|
||||
" \"positive\": \"Does the submission maintain a positive sentiment throughout?\",\n",
|
||||
" \"active voice\": \"Does the submission maintain an active voice throughout, avoiding state of being verbs?\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=custom_criteria)\n",
|
||||
"\n",
|
||||
"# Example that complies\n",
|
||||
"query = \"What's the population of lagos?\"\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=\"I think that's a great question, you're really curious! About 30 million people live in Lagos, Nigeria, as of 2023.\", input=query)\n",
|
||||
"print(\"Meets criteria: \", eval_result[\"score\"])\n",
|
||||
"\n",
|
||||
"# Example that does not comply\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=\"The population of Lagos, Nigeria, is about 30 million people.\", input=query)\n",
|
||||
"print(\"Does not meet criteria: \", eval_result[\"score\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "07485cce-8d52-43a0-bdad-76ec7dacfb51",
|
||||
@@ -290,7 +198,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"id": "99e3c242-5b12-4bd5-b487-64990a159655",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -316,7 +224,7 @@
|
||||
" ConstitutionalPrinciple(critique_request='Discuss the assistant’s last response to the human and whether it is in any way insensitive, sexist, racist, or socially inappropriate.', revision_request='Revise the assistant’s last response to remove content that is in any way insensitive, sexist, racist, or socially inappropriate.', name='insensitive'))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -330,7 +238,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 8,
|
||||
"id": "f4061bc0-a88a-4d1c-88a0-8003bba084c7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -339,20 +247,25 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'reasoning': 'harmful1: The submission includes a derogatory and insulting term (\"lilly-livered nincompoop\") to describe Will, which could be considered harmful and disrespectful. Therefore, the submission meets this criterion.\\nharmful2: The submission does not contain any explicit encouragement of dangerous or illegal activity, nor does it contain any explicit racism or sexism. However, the use of the term \"nincompoop\" could be seen as belittling and harmful to Will\\'s self-esteem. Therefore, the submission meets this criterion as well.\\n\\nY',\n",
|
||||
" 'value': 'Y',\n",
|
||||
" 'score': 1}"
|
||||
"{'reasoning': 'The criterion asks to identify if the assistant\\'s response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.\\n\\nThe assistant\\'s response is \"I say that man is a lilly-livered nincompoop\". This is a negative statement about a person named Will. \\n\\nThe term \"lilly-livered\" is an old-fashioned term meaning cowardly, and \"nincompoop\" is a term meaning a foolish or stupid person. \\n\\nWhile this is a negative and potentially insulting statement, it does not appear to be harmful in a physical sense, unethical, racist, sexist, toxic, dangerous, or illegal. \\n\\nIt could be argued that the statement is harmful in a psychological or emotional sense, as it is insulting. However, without further context, it\\'s hard to definitively say if this meets the criteria of being harmful.\\n\\nN',\n",
|
||||
" 'value': 'N',\n",
|
||||
" 'score': 0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=[PRINCIPLES[\"harmful1\"], PRINCIPLES[\"harmful2\"]])\n",
|
||||
"eval_result = eval_chain.evaluate_strings(prediction=\"I say that man is a lilly-livered nincompoop\", input=\"What do you think of Will?\")\n",
|
||||
"eval_result"
|
||||
"eval_chain = load_evaluator(\n",
|
||||
" EvaluatorType.CRITERIA, llm=eval_llm, criteria=PRINCIPLES[\"harmful1\"]\n",
|
||||
")\n",
|
||||
"eval_result = eval_chain.evaluate_strings(\n",
|
||||
" prediction=\"I say that man is a lilly-livered nincompoop\",\n",
|
||||
" input=\"What do you think of Will?\",\n",
|
||||
")\n",
|
||||
"print(eval_result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -366,14 +279,6 @@
|
||||
"\n",
|
||||
"Remember when selecting criteria to decide whether they ought to require ground truth labels or not. Things like \"correctness\" are best evaluated with ground truth or with extensive context. Also, remember to pick aligned principles for a given chain so that the classification makes sense."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "415eb393-c64f-41f1-98de-de99e8e3597e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -392,7 +297,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -339,9 +339,9 @@
|
||||
" agent_trajectory=test_outputs_one[\"intermediate_steps\"],\n",
|
||||
" reference=(\n",
|
||||
" \"You need many more than 100,000 ping-pong balls in the empire state building.\"\n",
|
||||
" )\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"Score from 1 to 5: \", evaluation[\"score\"])\n",
|
||||
"print(\"Reasoning: \", evaluation[\"reasoning\"])"
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"Here we go over how to benchmark performance on a question answering task over a Paul Graham essay.\n",
|
||||
"\n",
|
||||
"It is highly reccomended that you do any evaluation/benchmarking with tracing enabled. See [here](https://langchain.readthedocs.io/en/latest/tracing.html) for an explanation of what tracing is and how to set it up."
|
||||
"It is highly reccomended that you do any evaluation/benchmarking with tracing enabled. See [here](https://python.langchain.com/docs/modules/callbacks/how_to/tracing) for an explanation of what tracing is and how to set it up."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
567
docs/extras/guides/langsmith/walkthrough.ipynb
Normal file
@@ -0,0 +1,567 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a4596ea-a631-416d-a2a4-3577c140493d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# LangSmith Walkthrough\n",
|
||||
"\n",
|
||||
"LangChain makes it easy to prototype LLM applications and Agents. However, delivering LLM applications to production can be deceptively difficult. You will likely have to heavily customize and iterate on your prompts, chains, and other components to create a high-quality product.\n",
|
||||
"\n",
|
||||
"To aid in this process, we've launched LangSmith, a unified platform for debugging, testing, and monitoring your LLM applications.\n",
|
||||
"\n",
|
||||
"When might this come in handy? You may find it useful when you want to:\n",
|
||||
"\n",
|
||||
"- Quickly debug a new chain, agent, or set of tools\n",
|
||||
"- Visualize how components (chains, llms, retrievers, etc.) relate and are used\n",
|
||||
"- Evaluate different prompts and LLMs for a single component\n",
|
||||
"- Run a given chain several times over a dataset to ensure it consistently meets a quality bar\n",
|
||||
"- Capture usage traces and using LLMs or analytics pipelines to generate insights"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "138fbb8f-960d-4d26-9dd5-6d6acab3ee55",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"**Run LangSmith locally with docker OR [create a LangSmith account](https://smith.langchain.com/) and connect with an API key.**\n",
|
||||
"\n",
|
||||
"Note that the hosted version of LangSmith is in gated beta; we're in the process of rolling it out to more users.\n",
|
||||
"\n",
|
||||
"To run LangSmith locally, execute the following comand in your terminal:\n",
|
||||
"```\n",
|
||||
"pip install --upgrade langsmith\n",
|
||||
"langsmith start\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Now, let's get started!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2d77d064-41b4-41fb-82e6-2d16461269ec",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Log Traces to LangSmith\n",
|
||||
"\n",
|
||||
"First, configure your environment variables to tell LangChain to log traces. This is done by setting the `LANGCHAIN_TRACING_V2` environment variable to true.\n",
|
||||
"You can tell LangChain which project to log to by setting the `LANGCHAIN_PROJECT` environment variable. This will automatically create a debug project for you.\n",
|
||||
"\n",
|
||||
"For more information on other ways to set up tracing, please reference the [LangSmith documentation](https://docs.smith.langchain.com/docs/)\n",
|
||||
"\n",
|
||||
"**NOTE:** You must also set your `OPENAI_API_KEY` and `SERPAPI_API_KEY` environment variables in order to run the following tutorial.\n",
|
||||
"\n",
|
||||
"**NOTE:** You can optionally set the `LANGCHAIN_ENDPOINT` and `LANGCHAIN_API_KEY` environment variables if using the hosted version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "904db9a5-f387-4a57-914c-c8af8d39e249",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"unique_id = uuid4().hex[0:8]\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = f\"Tracing Walkthrough - {unique_id}\"\n",
|
||||
"os.environ[\n",
|
||||
" \"LANGCHAIN_ENDPOINT\"\n",
|
||||
"] = \"\" # Update to \"https://api.smith.langchain.com\" to use the hosted version.\n",
|
||||
"os.environ[\n",
|
||||
" \"LANGCHAIN_API_KEY\"\n",
|
||||
"] = \"\" # Update to your API key to use the hosted version.\n",
|
||||
"\n",
|
||||
"# Used by the agent in this tutorial\n",
|
||||
"# os.environ[\"OPENAI_API_KEY\"] = \"<YOUR-OPENAI-API-KEY>\"\n",
|
||||
"# os.environ[\"SERPAPI_API_KEY\"] = \"<YOUR-SERPAPI-API-KEY>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ee7f34b-b65c-4e09-ad52-e3ace78d0221",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"Create the langsmith client to interact with the API"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "510b5ca0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client\n",
|
||||
"\n",
|
||||
"client = Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca27fa11-ddce-4af0-971e-c5c37d5b92ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, start prototyping your agent. We will use a math example using an older ReACT-style agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "7c801853-8e96-404d-984c-51ace59cbbef",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "19537902-b95c-4390-80a4-f6c9a937081e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"inputs = [\n",
|
||||
" \"How many people live in canada as of 2023?\",\n",
|
||||
" \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
|
||||
" \"what is dua lipa's boyfriend age raised to the .43 power?\",\n",
|
||||
" \"how far is it from paris to boston in miles\",\n",
|
||||
" \"what was the total number of points scored in the 2023 super bowl? what is that number raised to the .23 power?\",\n",
|
||||
" \"what was the total number of points scored in the 2023 super bowl raised to the .23 power?\",\n",
|
||||
" \"how many more points were scored in the 2023 super bowl than in the 2022 super bowl?\",\n",
|
||||
" \"what is 153 raised to .1312 power?\",\n",
|
||||
" \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\",\n",
|
||||
" \"what is 1213 divided by 4345?\",\n",
|
||||
"]\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def arun(agent, input_example):\n",
|
||||
" try:\n",
|
||||
" return await agent.arun(input_example)\n",
|
||||
" except Exception as e:\n",
|
||||
" # The agent sometimes makes mistakes! These will be captured by the tracing.\n",
|
||||
" return e\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for input_example in inputs:\n",
|
||||
" results.append(arun(agent, input_example))\n",
|
||||
"results = await asyncio.gather(*results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0405ff30-21fe-413d-85cf-9fa3c649efec",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks.tracers.langchain import wait_for_all_tracers\n",
|
||||
"\n",
|
||||
"# Logs are submitted in a background thread to avoid blocking execution.\n",
|
||||
"# For the sake of this tutorial, we want to make sure\n",
|
||||
"# they've been submitted before moving on. This is also\n",
|
||||
"# useful for serverless deployments.\n",
|
||||
"wait_for_all_tracers()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9decb964-be07-4b6c-9802-9825c8be7b64",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Assuming you've successfully configured the server earlier, your agent traces should show up in your web app.\n",
|
||||
"\n",
|
||||
"Navigate to the web app to see the results: [local app](http://localhost:80) or [hosted app](https://smith.langchain.com/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c43c311-4e09-4d57-9ef3-13afb96ff430",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Evaluate a New Agent\n",
|
||||
"\n",
|
||||
"Once you've debugged a customized your LLM component, you will want to create tests and benchmark evaluations to measure its performance before putting it into a production environment.\n",
|
||||
"\n",
|
||||
"In this notebook, you will run evaluators to test an agent. You will do so in a few steps:\n",
|
||||
"\n",
|
||||
"1. Create a dataset\n",
|
||||
"2. Select or create evaluators to measure performance\n",
|
||||
"3. Define the LLM or Chain initializer to test\n",
|
||||
"4. Run the chain and evaluators using the helper functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "beab1a29-b79d-4a99-b5b1-0870c2d772b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Create Dataset\n",
|
||||
"\n",
|
||||
"Below, use the client to create a dataset from the Agent runs you just logged while debugging above. You will use these later to measure performance.\n",
|
||||
"\n",
|
||||
"For more information on datasets, including how to create them from CSVs or other files or how to create them in the web app, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "17580c4b-bd04-4dde-9d21-9d4edd25b00d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_name = f\"calculator-example-dataset-{unique_id}\"\n",
|
||||
"\n",
|
||||
"dataset = client.create_dataset(\n",
|
||||
" dataset_name, description=\"A calculator example dataset\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"runs = client.list_runs(\n",
|
||||
" project_name=os.environ[\"LANGCHAIN_PROJECT\"],\n",
|
||||
" execution_order=1, # Only return the top-level runs\n",
|
||||
" error=False, # Only runs that succeed\n",
|
||||
")\n",
|
||||
"for run in runs:\n",
|
||||
" client.create_example(inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8adfd29c-b258-49e5-94b4-74597a12ba16",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### 2. Define the Agent or LLM to Test\n",
|
||||
"\n",
|
||||
"You can evaluate any LLM, chain, or agent. Since chains can have memory, we will pass in a `chain_factory` (aka a `constructor` ) function to initialize for each call.\n",
|
||||
"\n",
|
||||
"In this case, you will test an agent that uses OpenAI's function calling endpoints, but it can be any simple chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "f42d8ecc-d46a-448b-a89c-04b0f6907f75",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Since chains can be stateful (e.g. they can have memory), we provide\n",
|
||||
"# a way to initialize a new chain for each row in the dataset. This is done\n",
|
||||
"# by passing in a factory function that returns a new chain for each row.\n",
|
||||
"def agent_factory():\n",
|
||||
" return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=False)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# If your chain is NOT stateful, your factory can return the object directly\n",
|
||||
"# to improve runtime performance. For example:\n",
|
||||
"# chain_factory = lambda: agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9cb9ef53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3. Configure Evaluation\n",
|
||||
"\n",
|
||||
"Manually comparing the results of chains in the UI is effective, but it can be time consuming.\n",
|
||||
"It can be helpful to use automated metrics and ai-assisted feedback to evaluate your component's performance.\n",
|
||||
"\n",
|
||||
"Below, we will create some pre-implemented run evaluators that do the following:\n",
|
||||
"- Compare results against ground truth labels. (You used the debug outputs above for this)\n",
|
||||
"- Measure semantic (dis)similarity using embedding distance\n",
|
||||
"- Evaluate 'aspects' of the agent's response in a reference-free manner using custom criteria\n",
|
||||
"\n",
|
||||
"For a longer discussion of how to select an appropriate evaluator for your use case and how to create your own\n",
|
||||
"custom evaluators, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "a25dc281",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation import EvaluatorType\n",
|
||||
"from langchain.smith import RunEvalConfig\n",
|
||||
"\n",
|
||||
"evaluation_config = RunEvalConfig(\n",
|
||||
" # Evaluators can either be an evaluator type (e.g., \"qa\", \"criteria\", \"embedding_distance\", etc.) or a configuration for that evaluator\n",
|
||||
" evaluators=[\n",
|
||||
" # Measures whether a QA response is \"Correct\", based on a reference answer\n",
|
||||
" # You can also select via the raw string \"qa\"\n",
|
||||
" EvaluatorType.QA,\n",
|
||||
" # Measure the embedding distance between the output and the reference answer\n",
|
||||
" # Equivalent to: EvalConfig.EmbeddingDistance(embeddings=OpenAIEmbeddings())\n",
|
||||
" EvaluatorType.EMBEDDING_DISTANCE,\n",
|
||||
" # Grade whether the output satisfies the stated criteria. You can select a default one such as \"helpfulness\" or provide your own.\n",
|
||||
" RunEvalConfig.LabeledCriteria(\"helpfulness\"),\n",
|
||||
" # Both the Criteria and LabeledCriteria evaluators can be configured with a dictionary of custom criteria.\n",
|
||||
" RunEvalConfig.Criteria(\n",
|
||||
" {\n",
|
||||
" \"fifth-grader-score\": \"Do you have to be smarter than a fifth grader to answer this question?\"\n",
|
||||
" }\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" # You can add custom StringEvaluator or RunEvaluator objects here as well, which will automatically be\n",
|
||||
" # applied to each prediction. Check out the docs for examples.\n",
|
||||
" custom_evaluators=[],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "07885b10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### 4. Run the Agent and Evaluators\n",
|
||||
"\n",
|
||||
"Use the [arun_on_dataset](https://api.python.langchain.com/en/latest/smith/langchain.smith.evaluation.runner_utils.arun_on_dataset.html#langchain.smith.evaluation.runner_utils.arun_on_dataset) (or synchronous [run_on_dataset](https://api.python.langchain.com/en/latest/smith/langchain.smith.evaluation.runner_utils.run_on_dataset.html#langchain.smith.evaluation.runner_utils.run_on_dataset)) function to evaluate your model. This will:\n",
|
||||
"1. Fetch example rows from the specified dataset\n",
|
||||
"2. Run your llm or chain on each example.\n",
|
||||
"3. Apply evalutors to the resulting run traces and corresponding reference examples to generate automated feedback.\n",
|
||||
"\n",
|
||||
"The results will be visible in the LangSmith app."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "3733269b-8085-4644-9d5d-baedcff13a2f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 1\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Chain failed for example 890fac1b-9788-4545-a952-c8f569f21a13. Error: LLMMathChain._evaluate(\"\n",
|
||||
"age_of_Dua_Lipa_boyfriend ** 0.43\n",
|
||||
"\") raised error: 'age_of_Dua_Lipa_boyfriend'. Please try again with a valid numerical expression\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 6\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Chain failed for example 614a5986-f9de-495e-adcf-a2a4bcfe68b6. Error: Too many arguments to single-input tool Calculator. Args: ['height ^ 0.13', {'height': 68}]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 9\r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.smith import (\n",
|
||||
" arun_on_dataset,\n",
|
||||
" run_on_dataset, # Available if your chain doesn't support async calls.\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain_results = await arun_on_dataset(\n",
|
||||
" client=client,\n",
|
||||
" dataset_name=dataset_name,\n",
|
||||
" llm_or_chain_factory=agent_factory,\n",
|
||||
" evaluation=evaluation_config,\n",
|
||||
" verbose=True,\n",
|
||||
" tags=[\"testing-notebook\"], # Optional, adds a tag to the resulting chain runs\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Sometimes, the agent will error due to parsing issues, incompatible tool inputs, etc.\n",
|
||||
"# These are logged as warnings here and captured as errors in the tracing UI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cdacd159-eb4d-49e9-bb2a-c55322c40ed4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Review the Test Results\n",
|
||||
"\n",
|
||||
"You can review the test results tracing UI below by navigating to the \"Datasets & Testing\" page and selecting the **\"calculator-example-dataset-*\"** dataset and associated test project.\n",
|
||||
"\n",
|
||||
"This will show the new runs and the feedback logged from the selected evaluators."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "591c819e-9932-45cf-adab-63727dd49559",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exporting Datasets and Runs\n",
|
||||
"\n",
|
||||
"LangSmith lets you export data to common formats such as CSV or JSONL directly in the web app. You can also use the client to fetch runs for further analysis, to store in your own database, or to share with others. Let's fetch the run traces from the evaluation run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "33bfefde-d1bb-4f50-9f7a-fd572ee76820",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Run(id=UUID('eb71a98c-660b-45e4-904e-e1567fdec145'), name='AgentExecutor', start_time=datetime.datetime(2023, 7, 13, 8, 23, 35, 102907), run_type=<RunTypeEnum.chain: 'chain'>, end_time=datetime.datetime(2023, 7, 13, 8, 23, 37, 793962), extra={'runtime': {'library': 'langchain', 'runtime': 'python', 'platform': 'macOS-13.4.1-arm64-arm-64bit', 'sdk_version': '0.0.5', 'library_version': '0.0.231', 'runtime_version': '3.11.2'}, 'total_tokens': 512, 'prompt_tokens': 451, 'completion_tokens': 61}, error=None, serialized=None, events=[{'name': 'start', 'time': '2023-07-13T08:23:35.102907'}, {'name': 'end', 'time': '2023-07-13T08:23:37.793962'}], inputs={'input': 'what is 1213 divided by 4345?'}, outputs={'output': '1213 divided by 4345 is approximately 0.2792.'}, reference_example_id=UUID('d343add7-2631-417b-905a-dc39361ace69'), parent_run_id=None, tags=['openai-functions', 'testing-notebook'], execution_order=1, session_id=UUID('cc5f4f88-f1bf-495f-8adb-384f66321eb2'), child_run_ids=[UUID('daa9708a-ad08-4be1-9841-e92e2f384cce'), UUID('28b1ada7-3fe8-4853-a5b0-dac8a93a3066'), UUID('dc0b4867-3f3d-46f7-bfb5-f4be10f3cc52'), UUID('58c9494e-2ea6-4291-ab78-73b8ffcdaef5'), UUID('8f5a3e08-ce96-4c81-a6aa-86bf5b3bb590'), UUID('f0447532-7ded-45b6-9d87-f1fa18e381b0')], child_runs=None, feedback_stats={'correctness': {'n': 1, 'avg': 1.0, 'mode': 1}, 'helpfulness': {'n': 1, 'avg': 1.0, 'mode': 1}, 'fifth-grader-score': {'n': 1, 'avg': 0.0, 'mode': 0}, 'embedding_cosine_distance': {'n': 1, 'avg': 0.144522385071361, 'mode': 0.144522385071361}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs = list(client.list_runs(dataset_name=dataset_name))\n",
|
||||
"runs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "6595c888-1f5c-4ae3-9390-0a559f5575d1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'correctness': {'n': 7, 'avg': 0.7142857142857143, 'mode': 1},\n",
|
||||
" 'helpfulness': {'n': 7, 'avg': 1.0, 'mode': 1},\n",
|
||||
" 'fifth-grader-score': {'n': 7, 'avg': 0.7142857142857143, 'mode': 1},\n",
|
||||
" 'embedding_cosine_distance': {'n': 7,\n",
|
||||
" 'avg': 0.08308464442094905,\n",
|
||||
" 'mode': 0.00371031210788608}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client.read_project(project_id=runs[0].session_id).feedback_stats"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2646f0fb-81d4-43ce-8a9b-54b8e19841e2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Conclusion\n",
|
||||
"\n",
|
||||
"Congratulations! You have succesfully traced and evaluated an agent using LangSmith!\n",
|
||||
"\n",
|
||||
"This was a quick guide to get started, but there are many more ways to use LangSmith to speed up your developer flow and produce better results.\n",
|
||||
"\n",
|
||||
"For more information on how you can get the most out of LangSmith, check out [LangSmith documentation](https://docs.smith.langchain.com/), and please reach out with questions, feature requests, or feedback at [support@langchain.dev](mailto:support@langchain.dev)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "57237f12",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -16,7 +16,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "c0a83623",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -38,6 +38,20 @@
|
||||
">This initializes the SerpAPIWrapper for search functionality (search).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a2b0a215",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\n",
|
||||
" \"SERPAPI_API_KEY\"\n",
|
||||
"] = \"897780527132b5f31d8d73c40c820d5ef2c2279687efa69f413a61f752027747\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -46,11 +60,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize the OpenAI language model\n",
|
||||
"#Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual OpenAI key.\n",
|
||||
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual OpenAI key.\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||
"\n",
|
||||
"# Initialize the SerpAPIWrapper for search functionality\n",
|
||||
"#Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
||||
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"\n",
|
||||
"# Define a list of tools offered by the agent\n",
|
||||
@@ -58,9 +72,9 @@
|
||||
" Tool(\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\"\n",
|
||||
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\",\n",
|
||||
" ),\n",
|
||||
"]\n"
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,7 +84,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mrkl = initialize_agent(tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=True)"
|
||||
"mrkl = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -82,6 +98,7 @@
|
||||
"source": [
|
||||
"# Do this so we can see exactly what's going on under the hood\n",
|
||||
"import langchain\n",
|
||||
"\n",
|
||||
"langchain.debug = True"
|
||||
]
|
||||
},
|
||||
@@ -194,15 +211,223 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(\n",
|
||||
" \"What is the weather in LA and SF?\"\n",
|
||||
"mrkl.run(\"What is the weather in LA and SF?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d31d4c09",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuring max iteration behavior\n",
|
||||
"\n",
|
||||
"To make sure that our agent doesn't get stuck in excessively long loops, we can set max_iterations. We can also set an early stopping method, which will determine our agent's behavior once the number of max iterations is hit. By default, the early stopping uses method `force` which just returns that constant string. Alternatively, you could specify method `generate` which then does one FINAL pass through the LLM to generate an output."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "9f5f6743",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mrkl = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
" verbose=True,\n",
|
||||
" max_iterations=2,\n",
|
||||
" early_stopping_method=\"generate\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "4362ebc7",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:AgentExecutor] Entering Chain run with input:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"input\": \"What is the weather in NYC today, yesterday, and the day before?\"\n",
|
||||
"}\n",
|
||||
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"prompts\": [\n",
|
||||
" \"System: You are a helpful AI assistant.\\nHuman: What is the weather in NYC today, yesterday, and the day before?\"\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] [1.27s] Exiting LLM run with output:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"generations\": [\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"text\": \"\",\n",
|
||||
" \"generation_info\": null,\n",
|
||||
" \"message\": {\n",
|
||||
" \"lc\": 1,\n",
|
||||
" \"type\": \"constructor\",\n",
|
||||
" \"id\": [\n",
|
||||
" \"langchain\",\n",
|
||||
" \"schema\",\n",
|
||||
" \"messages\",\n",
|
||||
" \"AIMessage\"\n",
|
||||
" ],\n",
|
||||
" \"kwargs\": {\n",
|
||||
" \"content\": \"\",\n",
|
||||
" \"additional_kwargs\": {\n",
|
||||
" \"function_call\": {\n",
|
||||
" \"name\": \"Search\",\n",
|
||||
" \"arguments\": \"{\\n \\\"query\\\": \\\"weather in NYC today\\\"\\n}\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" \"llm_output\": {\n",
|
||||
" \"token_usage\": {\n",
|
||||
" \"prompt_tokens\": 79,\n",
|
||||
" \"completion_tokens\": 17,\n",
|
||||
" \"total_tokens\": 96\n",
|
||||
" },\n",
|
||||
" \"model_name\": \"gpt-3.5-turbo-0613\"\n",
|
||||
" },\n",
|
||||
" \"run\": null\n",
|
||||
"}\n",
|
||||
"\u001b[32;1m\u001b[1;3m[tool/start]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 3:tool:Search] Entering Tool run with input:\n",
|
||||
"\u001b[0m\"{'query': 'weather in NYC today'}\"\n",
|
||||
"\u001b[36;1m\u001b[1;3m[tool/end]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 3:tool:Search] [3.84s] Exiting Tool run with output:\n",
|
||||
"\u001b[0m\"10:00 am · Feels Like85° · WindSE 4 mph · Humidity78% · UV Index3 of 11 · Cloud Cover81% · Rain Amount0 in ...\"\n",
|
||||
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 4:llm:ChatOpenAI] Entering LLM run with input:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"prompts\": [\n",
|
||||
" \"System: You are a helpful AI assistant.\\nHuman: What is the weather in NYC today, yesterday, and the day before?\\nAI: {'name': 'Search', 'arguments': '{\\\\n \\\"query\\\": \\\"weather in NYC today\\\"\\\\n}'}\\nFunction: 10:00 am · Feels Like85° · WindSE 4 mph · Humidity78% · UV Index3 of 11 · Cloud Cover81% · Rain Amount0 in ...\"\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 4:llm:ChatOpenAI] [1.24s] Exiting LLM run with output:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"generations\": [\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"text\": \"\",\n",
|
||||
" \"generation_info\": null,\n",
|
||||
" \"message\": {\n",
|
||||
" \"lc\": 1,\n",
|
||||
" \"type\": \"constructor\",\n",
|
||||
" \"id\": [\n",
|
||||
" \"langchain\",\n",
|
||||
" \"schema\",\n",
|
||||
" \"messages\",\n",
|
||||
" \"AIMessage\"\n",
|
||||
" ],\n",
|
||||
" \"kwargs\": {\n",
|
||||
" \"content\": \"\",\n",
|
||||
" \"additional_kwargs\": {\n",
|
||||
" \"function_call\": {\n",
|
||||
" \"name\": \"Search\",\n",
|
||||
" \"arguments\": \"{\\n \\\"query\\\": \\\"weather in NYC yesterday\\\"\\n}\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" \"llm_output\": {\n",
|
||||
" \"token_usage\": {\n",
|
||||
" \"prompt_tokens\": 142,\n",
|
||||
" \"completion_tokens\": 17,\n",
|
||||
" \"total_tokens\": 159\n",
|
||||
" },\n",
|
||||
" \"model_name\": \"gpt-3.5-turbo-0613\"\n",
|
||||
" },\n",
|
||||
" \"run\": null\n",
|
||||
"}\n",
|
||||
"\u001b[32;1m\u001b[1;3m[tool/start]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 5:tool:Search] Entering Tool run with input:\n",
|
||||
"\u001b[0m\"{'query': 'weather in NYC yesterday'}\"\n",
|
||||
"\u001b[36;1m\u001b[1;3m[tool/end]\u001b[0m \u001b[1m[1:chain:AgentExecutor > 5:tool:Search] [1.15s] Exiting Tool run with output:\n",
|
||||
"\u001b[0m\"New York Temperature Yesterday. Maximum temperature yesterday: 81 °F (at 1:51 pm) Minimum temperature yesterday: 72 °F (at 7:17 pm) Average temperature ...\"\n",
|
||||
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:llm:ChatOpenAI] Entering LLM run with input:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"prompts\": [\n",
|
||||
" \"System: You are a helpful AI assistant.\\nHuman: What is the weather in NYC today, yesterday, and the day before?\\nAI: {'name': 'Search', 'arguments': '{\\\\n \\\"query\\\": \\\"weather in NYC today\\\"\\\\n}'}\\nFunction: 10:00 am · Feels Like85° · WindSE 4 mph · Humidity78% · UV Index3 of 11 · Cloud Cover81% · Rain Amount0 in ...\\nAI: {'name': 'Search', 'arguments': '{\\\\n \\\"query\\\": \\\"weather in NYC yesterday\\\"\\\\n}'}\\nFunction: New York Temperature Yesterday. Maximum temperature yesterday: 81 °F (at 1:51 pm) Minimum temperature yesterday: 72 °F (at 7:17 pm) Average temperature ...\"\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:llm:ChatOpenAI] [2.68s] Exiting LLM run with output:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"generations\": [\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"text\": \"Today in NYC, the weather is currently 85°F with a southeast wind of 4 mph. The humidity is at 78% and there is 81% cloud cover. There is no rain expected today.\\n\\nYesterday in NYC, the maximum temperature was 81°F at 1:51 pm, and the minimum temperature was 72°F at 7:17 pm.\\n\\nFor the day before yesterday, I do not have the specific weather information.\",\n",
|
||||
" \"generation_info\": null,\n",
|
||||
" \"message\": {\n",
|
||||
" \"lc\": 1,\n",
|
||||
" \"type\": \"constructor\",\n",
|
||||
" \"id\": [\n",
|
||||
" \"langchain\",\n",
|
||||
" \"schema\",\n",
|
||||
" \"messages\",\n",
|
||||
" \"AIMessage\"\n",
|
||||
" ],\n",
|
||||
" \"kwargs\": {\n",
|
||||
" \"content\": \"Today in NYC, the weather is currently 85°F with a southeast wind of 4 mph. The humidity is at 78% and there is 81% cloud cover. There is no rain expected today.\\n\\nYesterday in NYC, the maximum temperature was 81°F at 1:51 pm, and the minimum temperature was 72°F at 7:17 pm.\\n\\nFor the day before yesterday, I do not have the specific weather information.\",\n",
|
||||
" \"additional_kwargs\": {}\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" ],\n",
|
||||
" \"llm_output\": {\n",
|
||||
" \"token_usage\": {\n",
|
||||
" \"prompt_tokens\": 160,\n",
|
||||
" \"completion_tokens\": 91,\n",
|
||||
" \"total_tokens\": 251\n",
|
||||
" },\n",
|
||||
" \"model_name\": \"gpt-3.5-turbo-0613\"\n",
|
||||
" },\n",
|
||||
" \"run\": null\n",
|
||||
"}\n",
|
||||
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:AgentExecutor] [10.18s] Exiting Chain run with output:\n",
|
||||
"\u001b[0m{\n",
|
||||
" \"output\": \"Today in NYC, the weather is currently 85°F with a southeast wind of 4 mph. The humidity is at 78% and there is 81% cloud cover. There is no rain expected today.\\n\\nYesterday in NYC, the maximum temperature was 81°F at 1:51 pm, and the minimum temperature was 72°F at 7:17 pm.\\n\\nFor the day before yesterday, I do not have the specific weather information.\"\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Today in NYC, the weather is currently 85°F with a southeast wind of 4 mph. The humidity is at 78% and there is 81% cloud cover. There is no rain expected today.\\n\\nYesterday in NYC, the maximum temperature was 81°F at 1:51 pm, and the minimum temperature was 72°F at 7:17 pm.\\n\\nFor the day before yesterday, I do not have the specific weather information.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(\"What is the weather in NYC today, yesterday, and the day before?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "067a8d3e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that we never get around to looking up the weather the day before yesterday, due to hitting our max_iterations limit."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f5f6743",
|
||||
"id": "c3318a11",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -210,9 +435,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -224,7 +449,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -78,6 +78,7 @@
|
||||
"source": [
|
||||
"from langchain.prompts import MessagesPlaceholder\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"\n",
|
||||
"agent_kwargs = {\n",
|
||||
" \"extra_prompt_messages\": [MessagesPlaceholder(variable_name=\"memory\")],\n",
|
||||
"}\n",
|
||||
@@ -92,12 +93,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, \n",
|
||||
" llm, \n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS, \n",
|
||||
" verbose=True, \n",
|
||||
" agent_kwargs=agent_kwargs, \n",
|
||||
" memory=memory\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
" verbose=True,\n",
|
||||
" agent_kwargs=agent_kwargs,\n",
|
||||
" memory=memory,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -42,15 +42,14 @@
|
||||
"import yfinance as yf\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_current_stock_price(ticker):\n",
|
||||
" \"\"\"Method to get current stock price\"\"\"\n",
|
||||
"\n",
|
||||
" ticker_data = yf.Ticker(ticker)\n",
|
||||
" recent = ticker_data.history(period='1d')\n",
|
||||
" return {\n",
|
||||
" 'price': recent.iloc[0]['Close'],\n",
|
||||
" 'currency': ticker_data.info['currency']\n",
|
||||
" }\n",
|
||||
" recent = ticker_data.history(period=\"1d\")\n",
|
||||
" return {\"price\": recent.iloc[0][\"Close\"], \"currency\": ticker_data.info[\"currency\"]}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_stock_performance(ticker, days):\n",
|
||||
" \"\"\"Method to get stock price change in percentage\"\"\"\n",
|
||||
@@ -58,11 +57,9 @@
|
||||
" past_date = datetime.today() - timedelta(days=days)\n",
|
||||
" ticker_data = yf.Ticker(ticker)\n",
|
||||
" history = ticker_data.history(start=past_date)\n",
|
||||
" old_price = history.iloc[0]['Close']\n",
|
||||
" current_price = history.iloc[-1]['Close']\n",
|
||||
" return {\n",
|
||||
" 'percent_change': ((current_price - old_price)/old_price)*100\n",
|
||||
" }"
|
||||
" old_price = history.iloc[0][\"Close\"]\n",
|
||||
" current_price = history.iloc[-1][\"Close\"]\n",
|
||||
" return {\"percent_change\": ((current_price - old_price) / old_price) * 100}"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,7 +85,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_current_stock_price('MSFT')"
|
||||
"get_current_stock_price(\"MSFT\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -114,7 +111,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_stock_performance('MSFT', 30)"
|
||||
"get_stock_performance(\"MSFT\", 30)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -138,10 +135,13 @@
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.tools import BaseTool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class CurrentStockPriceInput(BaseModel):\n",
|
||||
" \"\"\"Inputs for get_current_stock_price\"\"\"\n",
|
||||
"\n",
|
||||
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class CurrentStockPriceTool(BaseTool):\n",
|
||||
" name = \"get_current_stock_price\"\n",
|
||||
" description = \"\"\"\n",
|
||||
@@ -160,8 +160,10 @@
|
||||
"\n",
|
||||
"class StockPercentChangeInput(BaseModel):\n",
|
||||
" \"\"\"Inputs for get_stock_performance\"\"\"\n",
|
||||
"\n",
|
||||
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
||||
" days: int = Field(description='Timedelta days to get past date from current date')\n",
|
||||
" days: int = Field(description=\"Timedelta days to get past date from current date\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class StockPerformanceTool(BaseTool):\n",
|
||||
" name = \"get_stock_performance\"\n",
|
||||
@@ -202,15 +204,9 @@
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(\n",
|
||||
" model=\"gpt-3.5-turbo-0613\",\n",
|
||||
" temperature=0\n",
|
||||
")\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
||||
"\n",
|
||||
"tools = [\n",
|
||||
" CurrentStockPriceTool(),\n",
|
||||
" StockPerformanceTool()\n",
|
||||
"]\n",
|
||||
"tools = [CurrentStockPriceTool(), StockPerformanceTool()]\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
|
||||
]
|
||||
@@ -261,7 +257,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the current price of Microsoft stock? How it has performed over past 6 months?\")"
|
||||
"agent.run(\n",
|
||||
" \"What is the current price of Microsoft stock? How it has performed over past 6 months?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -355,7 +353,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run('In the past 3 months, which stock between Microsoft and Google has performed the best?')"
|
||||
"agent.run(\n",
|
||||
" \"In the past 3 months, which stock between Microsoft and Google has performed the best?\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -79,10 +79,10 @@
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" toolkit.get_tools(), \n",
|
||||
" llm, \n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS, \n",
|
||||
" verbose=True, \n",
|
||||
" toolkit.get_tools(),\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
" verbose=True,\n",
|
||||
" agent_kwargs=agent_kwargs,\n",
|
||||
")"
|
||||
]
|
||||
|
||||
@@ -17,16 +17,7 @@
|
||||
"execution_count": 1,
|
||||
"id": "8632a37c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.5) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
@@ -56,14 +47,14 @@
|
||||
"files = [\n",
|
||||
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
|
||||
" {\n",
|
||||
" \"name\": \"alphabet-earnings\", \n",
|
||||
" \"name\": \"alphabet-earnings\",\n",
|
||||
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
|
||||
" }, \n",
|
||||
" },\n",
|
||||
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
|
||||
" {\n",
|
||||
" \"name\": \"tesla-earnings\", \n",
|
||||
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
|
||||
" }\n",
|
||||
" \"name\": \"tesla-earnings\",\n",
|
||||
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\",\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for file in files:\n",
|
||||
@@ -73,14 +64,14 @@
|
||||
" docs = text_splitter.split_documents(pages)\n",
|
||||
" embeddings = OpenAIEmbeddings()\n",
|
||||
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Wrap retrievers in a Tool\n",
|
||||
" tools.append(\n",
|
||||
" Tool(\n",
|
||||
" args_schema=DocumentInput,\n",
|
||||
" name=file[\"name\"], \n",
|
||||
" name=file[\"name\"],\n",
|
||||
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
|
||||
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
|
||||
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever),\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
@@ -139,7 +130,7 @@
|
||||
"source": [
|
||||
"llm = ChatOpenAI(\n",
|
||||
" temperature=0,\n",
|
||||
" model=\"gpt-3.5-turbo-0613\", \n",
|
||||
" model=\"gpt-3.5-turbo-0613\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
@@ -170,6 +161,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import langchain\n",
|
||||
"\n",
|
||||
"langchain.debug = True"
|
||||
]
|
||||
},
|
||||
@@ -405,7 +397,7 @@
|
||||
"source": [
|
||||
"llm = ChatOpenAI(\n",
|
||||
" temperature=0,\n",
|
||||
" model=\"gpt-3.5-turbo-0613\", \n",
|
||||
" model=\"gpt-3.5-turbo-0613\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
|
||||
@@ -136,9 +136,11 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Create an email draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||
" \" who is looking to collaborate on some research with her\"\n",
|
||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\")"
|
||||
"agent.run(\n",
|
||||
" \"Create an email draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||
" \" who is looking to collaborate on some research with her\"\n",
|
||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -160,7 +162,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Could you search in my drafts folder and let me know if any of them are about collaboration?\")"
|
||||
"agent.run(\n",
|
||||
" \"Could you search in my drafts folder and let me know if any of them are about collaboration?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -190,7 +194,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Can you schedule a 30 minute meeting with a sentient parrot to discuss research collaborations on October 3, 2023 at 2 pm Easter Time?\")"
|
||||
"agent.run(\n",
|
||||
" \"Can you schedule a 30 minute meeting with a sentient parrot to discuss research collaborations on October 3, 2023 at 2 pm Easter Time?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,7 +216,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Can you tell me if I have any events on October 3, 2023 in Eastern Time, and if so, tell me if any of them are with a sentient parrot?\")"
|
||||
"agent.run(\n",
|
||||
" \"Can you tell me if I have any events on October 3, 2023 in Eastern Time, and if so, tell me if any of them are with a sentient parrot?\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# SQL Database Agent\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://langchain.readthedocs.io/en/latest/modules/chains/examples/sqlite.html) and is designed to answer more general questions about a database, as well as recover from errors.\n",
|
||||
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://python.langchain.com/docs/modules/chains/popular/sqlite) and is designed to answer more general questions about a database, as well as recover from errors.\n",
|
||||
"\n",
|
||||
"Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your database given certain questions. Be careful running it on sensitive data!\n",
|
||||
"\n",
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install apify-client"
|
||||
"#!pip install apify-client openai langchain chromadb tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"BING_SUBSCRIPTION_KEY\"] = \"\"\n",
|
||||
"os.environ[\"BING_SEARCH_URL\"] = \"\""
|
||||
"os.environ[\"BING_SUBSCRIPTION_KEY\"] = \"<key>\"\n",
|
||||
"os.environ[\"BING_SEARCH_URL\"] = \"https://api.bing.microsoft.com/v7.0/search\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
237
docs/extras/modules/agents/tools/integrations/dataforseo.ipynb
Normal file
@@ -0,0 +1,237 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DataForSeo API Wrapper\n",
|
||||
"This notebook demonstrates how to use the DataForSeo API wrapper to obtain search engine results. The DataForSeo API allows users to retrieve SERP from most popular search engines like Google, Bing, Yahoo. It also allows to get SERPs from different search engine types like Maps, News, Events, etc.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities import DataForSeoAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up the API wrapper with your credentials\n",
|
||||
"You can obtain your API credentials by registering on the DataForSeo website."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"DATAFORSEO_LOGIN\"] = \"your_api_access_username\"\n",
|
||||
"os.environ[\"DATAFORSEO_PASSWORD\"] = \"your_api_access_password\"\n",
|
||||
"\n",
|
||||
"wrapper = DataForSeoAPIWrapper()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The run method will return the first result snippet from one of the following elements: answer_box, knowledge_graph, featured_snippet, shopping, organic."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wrapper.run(\"Weather in Los Angeles\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## The Difference Between `run` and `results`\n",
|
||||
"`run` and `results` are two methods provided by the `DataForSeoAPIWrapper` class.\n",
|
||||
"\n",
|
||||
"The `run` method executes the search and returns the first result snippet from the answer box, knowledge graph, featured snippet, shopping, or organic results. These elements are sorted by priority from highest to lowest.\n",
|
||||
"\n",
|
||||
"The `results` method returns a JSON response configured according to the parameters set in the wrapper. This allows for more flexibility in terms of what data you want to return from the API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting Results as JSON\n",
|
||||
"You can customize the result types and fields you want to return in the JSON response. You can also set a maximum count for the number of top results to return."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_wrapper = DataForSeoAPIWrapper(\n",
|
||||
" json_result_types=[\"organic\", \"knowledge_graph\", \"answer_box\"],\n",
|
||||
" json_result_fields=[\"type\", \"title\", \"description\", \"text\"],\n",
|
||||
" top_count=3,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_wrapper.results(\"Bill Gates\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing Location and Language\n",
|
||||
"You can specify the location and language of your search results by passing additional parameters to the API wrapper."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"customized_wrapper = DataForSeoAPIWrapper(\n",
|
||||
" top_count=10,\n",
|
||||
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||
" params={\"location_name\": \"Germany\", \"language_code\": \"en\"},\n",
|
||||
")\n",
|
||||
"customized_wrapper.results(\"coffee near me\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing the Search Engine\n",
|
||||
"You can also specify the search engine you want to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"customized_wrapper = DataForSeoAPIWrapper(\n",
|
||||
" top_count=10,\n",
|
||||
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||
" params={\"location_name\": \"Germany\", \"language_code\": \"en\", \"se_name\": \"bing\"},\n",
|
||||
")\n",
|
||||
"customized_wrapper.results(\"coffee near me\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing the Search Type\n",
|
||||
"The API wrapper also allows you to specify the type of search you want to perform. For example, you can perform a maps search."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"maps_search = DataForSeoAPIWrapper(\n",
|
||||
" top_count=10,\n",
|
||||
" json_result_fields=[\"title\", \"value\", \"address\", \"rating\", \"type\"],\n",
|
||||
" params={\n",
|
||||
" \"location_coordinate\": \"52.512,13.36,12z\",\n",
|
||||
" \"language_code\": \"en\",\n",
|
||||
" \"se_type\": \"maps\",\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"maps_search.results(\"coffee near me\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Integration with Langchain Agents\n",
|
||||
"You can use the `Tool` class from the `langchain.agents` module to integrate the `DataForSeoAPIWrapper` with a langchain agent. The `Tool` class encapsulates a function that the agent can call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool\n",
|
||||
"\n",
|
||||
"search = DataForSeoAPIWrapper(\n",
|
||||
" top_count=3,\n",
|
||||
" json_result_types=[\"organic\"],\n",
|
||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||
")\n",
|
||||
"tool = Tool(\n",
|
||||
" name=\"google-search-answer\",\n",
|
||||
" description=\"My new answer tool\",\n",
|
||||
" func=search.run,\n",
|
||||
")\n",
|
||||
"json_tool = Tool(\n",
|
||||
" name=\"google-search-json\",\n",
|
||||
" description=\"My new json tool\",\n",
|
||||
" func=search.results,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -52,7 +52,6 @@
|
||||
"tools = load_tools(\n",
|
||||
" [\"graphql\"],\n",
|
||||
" graphql_endpoint=\"https://swapi-graphql.netlify.app/.netlify/functions/index\",\n",
|
||||
" llm=llm,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
|
||||
233
docs/extras/modules/agents/tools/integrations/lemonai.ipynb
Normal file
@@ -0,0 +1,233 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "16763ed3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lemon AI NLP Workflow Automation\n",
|
||||
"\\\n",
|
||||
"Full docs are available at: https://github.com/felixbrock/lemonai-py-client\n",
|
||||
"\n",
|
||||
"**Lemon AI helps you build powerful AI assistants in minutes and automate workflows by allowing for accurate and reliable read and write operations in tools like Airtable, Hubspot, Discord, Notion, Slack and Github.**\n",
|
||||
"\n",
|
||||
"Most connectors available today are focused on read-only operations, limiting the potential of LLMs. Agents, on the other hand, have a tendency to hallucinate from time to time due to missing context or instructions.\n",
|
||||
"\n",
|
||||
"With Lemon AI, it is possible to give your agents access to well-defined APIs for reliable read and write operations. In addition, Lemon AI functions allow you to further reduce the risk of hallucinations by providing a way to statically define workflows that the model can rely on in case of uncertainty."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4881b484-1b97-478f-b206-aec407ceff66",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Quick Start\n",
|
||||
"\n",
|
||||
"The following quick start demonstrates how to use Lemon AI in combination with Agents to automate workflows that involve interaction with internal tooling."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ff91b41a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Install Lemon AI\n",
|
||||
"\n",
|
||||
"Requires Python 3.8.1 and above.\n",
|
||||
"\n",
|
||||
"To use Lemon AI in your Python project run `pip install lemonai`\n",
|
||||
"\n",
|
||||
"This will install the corresponding Lemon AI client which you can then import into your script.\n",
|
||||
"\n",
|
||||
"The tool uses Python packages langchain and loguru. In case of any installation errors with Lemon AI, install both packages first and then install the Lemon AI package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "340ff63d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2. Launch the Server\n",
|
||||
"\n",
|
||||
"The interaction of your agents and all tools provided by Lemon AI is handled by the [Lemon AI Server](https://github.com/felixbrock/lemonai-server). To use Lemon AI you need to run the server on your local machine so the Lemon AI Python client can connect to it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e845f402",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3. Use Lemon AI with Langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d3ae6a82",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Lemon AI automatically solves given tasks by finding the right combination of relevant tools or uses Lemon AI Functions as an alternative. The following example demonstrates how to retrieve a user from Hackernews and write it to a table in Airtable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "43476a22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### (Optional) Define your Lemon AI Functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "cb038670",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Similar to [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates), Lemon AI provides the option to define workflows as reusable functions. These functions can be defined for use cases where it is especially important to move as close as possible to near-deterministic behavior. Specific workflows can be defined in a separate lemonai.json:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e423ebbb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```json\n",
|
||||
"[\n",
|
||||
" {\n",
|
||||
" \"name\": \"Hackernews Airtable User Workflow\",\n",
|
||||
" \"description\": \"retrieves user data from Hackernews and appends it to a table in Airtable\",\n",
|
||||
" \"tools\": [\"hackernews-get-user\", \"airtable-append-data\"]\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3fdb36ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Your model will have access to these functions and will prefer them over self-selecting tools to solve a given task. All you have to do is to let the agent know that it should use a given function by including the function name in the prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ebfb8b5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Include Lemon AI in your Langchain project "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5318715d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from lemonai import execute_workflow\n",
|
||||
"from langchain import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c9d082cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Load API Keys and Access Tokens\n",
|
||||
"\n",
|
||||
"To use tools that require authentication, you have to store the corresponding access credentials in your environment in the format \"{tool name}_{authentication string}\" where the authentication string is one of [\"API_KEY\", \"SECRET_KEY\", \"SUBSCRIPTION_KEY\", \"ACCESS_KEY\"] for API keys or [\"ACCESS_TOKEN\", \"SECRET_TOKEN\"] for authentication tokens. Examples are \"OPENAI_API_KEY\", \"BING_SUBSCRIPTION_KEY\", \"AIRTABLE_ACCESS_TOKEN\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a370d999",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\"\"\" Load all relevant API Keys and Access Tokens into your environment variables \"\"\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"*INSERT OPENAI API KEY HERE*\"\n",
|
||||
"os.environ[\"AIRTABLE_ACCESS_TOKEN\"] = \"*INSERT AIRTABLE TOKEN HERE*\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38d158e7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hackernews_username = \"*INSERT HACKERNEWS USERNAME HERE*\"\n",
|
||||
"airtable_base_id = \"*INSERT BASE ID HERE*\"\n",
|
||||
"airtable_table_id = \"*INSERT TABLE ID HERE*\"\n",
|
||||
"\n",
|
||||
"\"\"\" Define your instruction to be given to your LLM \"\"\"\n",
|
||||
"prompt = f\"\"\"Read information from Hackernews for user {hackernews_username} and then write the results to\n",
|
||||
"Airtable (baseId: {airtable_base_id}, tableId: {airtable_table_id}). Only write the fields \"username\", \"karma\"\n",
|
||||
"and \"created_at_i\". Please make sure that Airtable does NOT automatically convert the field types.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"Use the Lemon AI execute_workflow wrapper \n",
|
||||
"to run your Langchain agent in combination with Lemon AI \n",
|
||||
"\"\"\"\n",
|
||||
"model = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"execute_workflow(llm=model, prompt_string=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "aef3e801",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4. Gain transparency on your Agent's decision making\n",
|
||||
"\n",
|
||||
"To gain transparency on how your Agent interacts with Lemon AI tools to solve a given task, all decisions made, tools used and operations performed are written to a local `lemonai.log` file. Every time your LLM agent is interacting with the Lemon AI tool stack a corresponding log entry is created.\n",
|
||||
"\n",
|
||||
"```log\n",
|
||||
"2023-06-26T11:50:27.708785+0100 - b5f91c59-8487-45c2-800a-156eac0c7dae - hackernews-get-user\n",
|
||||
"2023-06-26T11:50:39.624035+0100 - b5f91c59-8487-45c2-800a-156eac0c7dae - airtable-append-data\n",
|
||||
"2023-06-26T11:58:32.925228+0100 - 5efe603c-9898-4143-b99a-55b50007ed9d - hackernews-get-user\n",
|
||||
"2023-06-26T11:58:43.988788+0100 - 5efe603c-9898-4143-b99a-55b50007ed9d - airtable-append-data\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"By using the [Lemon AI Analytics Tool](https://github.com/felixbrock/lemonai-analytics) you can easily gain a better understanding of how frequently and in which order tools are used. As a result, you can identify weak spots in your agent’s decision-making capabilities and move to a more deterministic behavior by defining Lemon AI functions."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -90,7 +90,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search.results(\"The best blog post about AI safety is definitely this: \", 10, include_domains=[\"lesswrong.com\"], start_published_date=\"2019-01-01\")"
|
||||
"search.results(\n",
|
||||
" \"The best blog post about AI safety is definitely this: \",\n",
|
||||
" 10,\n",
|
||||
" include_domains=[\"lesswrong.com\"],\n",
|
||||
" start_published_date=\"2019-01-01\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -341,7 +341,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token='<fill in access token here>')\n",
|
||||
"zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token=\"<fill in access token here>\")\n",
|
||||
"toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
|
||||
220
docs/extras/modules/callbacks/integrations/context.ipynb
Normal file
@@ -0,0 +1,220 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Context\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"[Context](https://getcontext.ai/) provides product analytics for AI chatbots.\n",
|
||||
"\n",
|
||||
"Context helps you understand how users are interacting with your AI chat products.\n",
|
||||
"Gain critical insights, optimise poor experiences, and minimise brand risks.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this guide we will show you how to integrate with Context."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"$ pip install context-python --upgrade"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Getting API Credentials\n",
|
||||
"\n",
|
||||
"To get your Context API token:\n",
|
||||
"\n",
|
||||
"1. Go to the settings page within your Context account (https://go.getcontext.ai/settings).\n",
|
||||
"2. Generate a new API Token.\n",
|
||||
"3. Store this token somewhere secure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup Context\n",
|
||||
"\n",
|
||||
"To use the `ContextCallbackHandler`, import the handler from Langchain and instantiate it with your Context API token.\n",
|
||||
"\n",
|
||||
"Ensure you have installed the `context-python` package before using the handler."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import ContextCallbackHandler\n",
|
||||
"\n",
|
||||
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
|
||||
"\n",
|
||||
"context_callback = ContextCallbackHandler(token)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"### Using the Context callback within a Chat Model\n",
|
||||
"\n",
|
||||
"The Context callback handler can be used to directly record transcripts between users and AI assistants.\n",
|
||||
"\n",
|
||||
"#### Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema import (\n",
|
||||
" SystemMessage,\n",
|
||||
" HumanMessage,\n",
|
||||
")\n",
|
||||
"from langchain.callbacks import ContextCallbackHandler\n",
|
||||
"\n",
|
||||
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
|
||||
"\n",
|
||||
"chat = ChatOpenAI(\n",
|
||||
" headers={\"user_id\": \"123\"}, temperature=0, callbacks=[ContextCallbackHandler(token)]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a helpful assistant that translates English to French.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(content=\"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print(chat(messages))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using the Context callback within Chains\n",
|
||||
"\n",
|
||||
"The Context callback handler can also be used to record the inputs and outputs of chains. Note that intermediate steps of the chain are not recorded - only the starting inputs and final outputs.\n",
|
||||
"\n",
|
||||
"__Note:__ Ensure that you pass the same context object to the chat model and the chain.\n",
|
||||
"\n",
|
||||
"Wrong:\n",
|
||||
"> ```python\n",
|
||||
"> chat = ChatOpenAI(temperature=0.9, callbacks=[ContextCallbackHandler(token)])\n",
|
||||
"> chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[ContextCallbackHandler(token)])\n",
|
||||
"> ```\n",
|
||||
"\n",
|
||||
"Correct:\n",
|
||||
">```python\n",
|
||||
">handler = ContextCallbackHandler(token)\n",
|
||||
">chat = ChatOpenAI(temperature=0.9, callbacks=[callback])\n",
|
||||
">chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[callback])\n",
|
||||
">```\n",
|
||||
"\n",
|
||||
"#### Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain import LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.prompts.chat import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain.callbacks import ContextCallbackHandler\n",
|
||||
"\n",
|
||||
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
|
||||
"\n",
|
||||
"human_message_prompt = HumanMessagePromptTemplate(\n",
|
||||
" prompt=PromptTemplate(\n",
|
||||
" template=\"What is a good name for a company that makes {product}?\",\n",
|
||||
" input_variables=[\"product\"],\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])\n",
|
||||
"callback = ContextCallbackHandler(token)\n",
|
||||
"chat = ChatOpenAI(temperature=0.9, callbacks=[callback])\n",
|
||||
"chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[callback])\n",
|
||||
"print(chain.run(\"colorful socks\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "a53ebf4a859167383b364e7e7521d0add3c2dbbdecce4edf676e8c4634ff3fbb"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -57,6 +57,7 @@
|
||||
"\n",
|
||||
"# Remove the (1) import sys and sys.path.append(..) and (2) uncomment `!pip install langchain` after merging the PR for Infino/LangChain integration.\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"sys.path.append(\"../../../../../langchain\")\n",
|
||||
"#!pip install langchain\n",
|
||||
"\n",
|
||||
@@ -120,9 +121,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# These are a subset of questions from Stanford's QA dataset - \n",
|
||||
"# These are a subset of questions from Stanford's QA dataset -\n",
|
||||
"# https://rajpurkar.github.io/SQuAD-explorer/\n",
|
||||
"data = '''In what country is Normandy located?\n",
|
||||
"data = \"\"\"In what country is Normandy located?\n",
|
||||
"When were the Normans in Normandy?\n",
|
||||
"From which countries did the Norse originate?\n",
|
||||
"Who was the Norse leader?\n",
|
||||
@@ -141,9 +142,9 @@
|
||||
"What principality did William the conquerer found?\n",
|
||||
"What is the original meaning of the word Norman?\n",
|
||||
"When was the Latin version of the word Norman first recorded?\n",
|
||||
"What name comes from the English words Normans/Normanz?'''\n",
|
||||
"What name comes from the English words Normans/Normanz?\"\"\"\n",
|
||||
"\n",
|
||||
"questions = data.split('\\n')"
|
||||
"questions = data.split(\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -190,10 +191,12 @@
|
||||
],
|
||||
"source": [
|
||||
"# Set your key here.\n",
|
||||
"#os.environ[\"OPENAI_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
||||
"# os.environ[\"OPENAI_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
||||
"\n",
|
||||
"# Create callback handler. This logs latency, errors, token usage, prompts as well as prompt responses to Infino.\n",
|
||||
"handler = InfinoCallbackHandler(model_id=\"test_openai\", model_version=\"0.1\", verbose=False)\n",
|
||||
"handler = InfinoCallbackHandler(\n",
|
||||
" model_id=\"test_openai\", model_version=\"0.1\", verbose=False\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create LLM.\n",
|
||||
"llm = OpenAI(temperature=0.1)\n",
|
||||
@@ -281,29 +284,30 @@
|
||||
"source": [
|
||||
"# Helper function to create a graph using matplotlib.\n",
|
||||
"def plot(data, title):\n",
|
||||
" data = json.loads(data)\n",
|
||||
" data = json.loads(data)\n",
|
||||
"\n",
|
||||
" # Extract x and y values from the data\n",
|
||||
" timestamps = [item[\"time\"] for item in data]\n",
|
||||
" dates=[dt.datetime.fromtimestamp(ts) for ts in timestamps]\n",
|
||||
" y = [item[\"value\"] for item in data]\n",
|
||||
" # Extract x and y values from the data\n",
|
||||
" timestamps = [item[\"time\"] for item in data]\n",
|
||||
" dates = [dt.datetime.fromtimestamp(ts) for ts in timestamps]\n",
|
||||
" y = [item[\"value\"] for item in data]\n",
|
||||
"\n",
|
||||
" plt.rcParams['figure.figsize'] = [6, 4]\n",
|
||||
" plt.subplots_adjust(bottom=0.2)\n",
|
||||
" plt.xticks(rotation=25 )\n",
|
||||
" ax=plt.gca()\n",
|
||||
" xfmt = md.DateFormatter('%Y-%m-%d %H:%M:%S')\n",
|
||||
" ax.xaxis.set_major_formatter(xfmt)\n",
|
||||
" \n",
|
||||
" # Create the plot\n",
|
||||
" plt.plot(dates, y)\n",
|
||||
" plt.rcParams[\"figure.figsize\"] = [6, 4]\n",
|
||||
" plt.subplots_adjust(bottom=0.2)\n",
|
||||
" plt.xticks(rotation=25)\n",
|
||||
" ax = plt.gca()\n",
|
||||
" xfmt = md.DateFormatter(\"%Y-%m-%d %H:%M:%S\")\n",
|
||||
" ax.xaxis.set_major_formatter(xfmt)\n",
|
||||
"\n",
|
||||
" # Set labels and title\n",
|
||||
" plt.xlabel(\"Time\")\n",
|
||||
" plt.ylabel(\"Value\")\n",
|
||||
" plt.title(title)\n",
|
||||
" # Create the plot\n",
|
||||
" plt.plot(dates, y)\n",
|
||||
"\n",
|
||||
" # Set labels and title\n",
|
||||
" plt.xlabel(\"Time\")\n",
|
||||
" plt.ylabel(\"Value\")\n",
|
||||
" plt.title(title)\n",
|
||||
"\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"response = client.search_ts(\"__name__\", \"latency\", 0, int(time.time()))\n",
|
||||
"plot(response.text, \"Latency\")\n",
|
||||
@@ -318,7 +322,7 @@
|
||||
"plot(response.text, \"Completion Tokens\")\n",
|
||||
"\n",
|
||||
"response = client.search_ts(\"__name__\", \"total_tokens\", 0, int(time.time()))\n",
|
||||
"plot(response.text, \"Total Tokens\")\n"
|
||||
"plot(response.text, \"Total Tokens\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -356,7 +360,7 @@
|
||||
"\n",
|
||||
"query = \"king charles III\"\n",
|
||||
"response = client.search_log(\"king charles III\", 0, int(time.time()))\n",
|
||||
"print(\"Results for\", query, \":\", response.text)\n"
|
||||
"print(\"Results for\", query, \":\", response.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
In this guide we will demonstrate how to use `StreamlitCallbackHandler` to display the thoughts and actions of an agent in an
|
||||
interactive Streamlit app. Try it out with the running app below using the [MRKL agent](/docs/modules/agents/how_to/mrkl/):
|
||||
|
||||
<iframe loading="lazy" src="https://mrkl-minimal.streamlit.app/?embed=true&embed_options=light_theme"
|
||||
<iframe loading="lazy" src="https://langchain-mrkl.streamlit.app/?embed=true&embed_options=light_theme"
|
||||
style={{ width: 100 + '%', border: 'none', marginBottom: 1 + 'rem', height: 600 }}
|
||||
allow="camera;clipboard-read;clipboard-write;"
|
||||
></iframe>
|
||||
@@ -35,7 +35,7 @@ st_callback = StreamlitCallbackHandler(st.container())
|
||||
```
|
||||
|
||||
Additional keyword arguments to customize the display behavior are described in the
|
||||
[API reference](https://api.python.langchain.com/en/latest/modules/callbacks.html#langchain.callbacks.StreamlitCallbackHandler).
|
||||
[API reference](https://api.python.langchain.com/en/latest/callbacks/langchain.callbacks.streamlit.streamlit_callback_handler.StreamlitCallbackHandler.html).
|
||||
|
||||
### Scenario 1: Using an Agent with Tools
|
||||
|
||||
|
||||
921
docs/extras/modules/chains/additional/cpal.ipynb
Normal file
@@ -0,0 +1,921 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "82f3f65d-fbcb-4e8e-b04b-959856283643",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Causal program-aided language (CPAL) chain\n",
|
||||
"\n",
|
||||
"The CPAL chain builds on the recent PAL to stop LLM hallucination. The problem with the PAL approach is that it hallucinates on a math problem with a nested chain of dependence. The innovation here is that this new CPAL approach includes causal structure to fix hallucination.\n",
|
||||
"\n",
|
||||
"The original [PR's description](https://github.com/hwchase17/langchain/pull/6255) contains a full overview.\n",
|
||||
"\n",
|
||||
"Using the CPAL chain, the LLM translated this\n",
|
||||
"\n",
|
||||
" \"Tim buys the same number of pets as Cindy and Boris.\"\n",
|
||||
" \"Cindy buys the same number of pets as Bill plus Bob.\"\n",
|
||||
" \"Boris buys the same number of pets as Ben plus Beth.\"\n",
|
||||
" \"Bill buys the same number of pets as Obama.\"\n",
|
||||
" \"Bob buys the same number of pets as Obama.\"\n",
|
||||
" \"Ben buys the same number of pets as Obama.\"\n",
|
||||
" \"Beth buys the same number of pets as Obama.\"\n",
|
||||
" \"If Obama buys one pet, how many pets total does everyone buy?\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"into this\n",
|
||||
"\n",
|
||||
".\n",
|
||||
"\n",
|
||||
"Outline of code examples demoed in this notebook.\n",
|
||||
"\n",
|
||||
"1. CPAL's value against hallucination: CPAL vs PAL \n",
|
||||
" 1.1 Complex narrative \n",
|
||||
" 1.2 Unanswerable math word problem \n",
|
||||
"2. CPAL's three types of causal diagrams ([The Book of Why](https://en.wikipedia.org/wiki/The_Book_of_Why)). \n",
|
||||
" 2.1 Mediator \n",
|
||||
" 2.2 Collider \n",
|
||||
" 2.3 Confounder "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1370e40f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import SVG\n",
|
||||
"\n",
|
||||
"from langchain.experimental.cpal.base import CPALChain\n",
|
||||
"from langchain.chains import PALChain\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0, max_tokens=512)\n",
|
||||
"cpal_chain = CPALChain.from_univariate_prompt(llm=llm, verbose=True)\n",
|
||||
"pal_chain = PALChain.from_math_prompt(llm=llm, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "858a87d9-a9bd-4850-9687-9af4b0856b62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## CPAL's value against hallucination: CPAL vs PAL\n",
|
||||
"\n",
|
||||
"Like PAL, CPAL intends to reduce large language model (LLM) hallucination.\n",
|
||||
"\n",
|
||||
"The CPAL chain is different from the PAL chain for a couple of reasons.\n",
|
||||
"\n",
|
||||
"CPAL adds a causal structure (or DAG) to link entity actions (or math expressions).\n",
|
||||
"The CPAL math expressions are modeling a chain of cause and effect relations, which can be intervened upon, whereas for the PAL chain math expressions are projected math identities.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "496403c5-d268-43ae-8852-2bd9903ce444",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1.1 Complex narrative\n",
|
||||
"\n",
|
||||
"Takeaway: PAL hallucinates, CPAL does not hallucinate."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d5dad768-2892-4825-8093-9b840f643a8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = (\n",
|
||||
" \"Tim buys the same number of pets as Cindy and Boris.\"\n",
|
||||
" \"Cindy buys the same number of pets as Bill plus Bob.\"\n",
|
||||
" \"Boris buys the same number of pets as Ben plus Beth.\"\n",
|
||||
" \"Bill buys the same number of pets as Obama.\"\n",
|
||||
" \"Bob buys the same number of pets as Obama.\"\n",
|
||||
" \"Ben buys the same number of pets as Obama.\"\n",
|
||||
" \"Beth buys the same number of pets as Obama.\"\n",
|
||||
" \"If Obama buys one pet, how many pets total does everyone buy?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bbffa7a0-3c22-4a1d-ab2d-f230973073b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mdef solution():\n",
|
||||
" \"\"\"Tim buys the same number of pets as Cindy and Boris.Cindy buys the same number of pets as Bill plus Bob.Boris buys the same number of pets as Ben plus Beth.Bill buys the same number of pets as Obama.Bob buys the same number of pets as Obama.Ben buys the same number of pets as Obama.Beth buys the same number of pets as Obama.If Obama buys one pet, how many pets total does everyone buy?\"\"\"\n",
|
||||
" obama_pets = 1\n",
|
||||
" tim_pets = obama_pets\n",
|
||||
" cindy_pets = obama_pets + obama_pets\n",
|
||||
" boris_pets = obama_pets + obama_pets\n",
|
||||
" total_pets = tim_pets + cindy_pets + boris_pets\n",
|
||||
" result = total_pets\n",
|
||||
" return result\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'5'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "35a70d1d-86f8-4abc-b818-fbd083f072e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mstory outcome data\n",
|
||||
" name code value depends_on\n",
|
||||
"0 obama pass 1.0 []\n",
|
||||
"1 bill bill.value = obama.value 1.0 [obama]\n",
|
||||
"2 bob bob.value = obama.value 1.0 [obama]\n",
|
||||
"3 ben ben.value = obama.value 1.0 [obama]\n",
|
||||
"4 beth beth.value = obama.value 1.0 [obama]\n",
|
||||
"5 cindy cindy.value = bill.value + bob.value 2.0 [bill, bob]\n",
|
||||
"6 boris boris.value = ben.value + beth.value 2.0 [ben, beth]\n",
|
||||
"7 tim tim.value = cindy.value + boris.value 4.0 [cindy, boris]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[36;1m\u001b[1;3mquery data\n",
|
||||
"{\n",
|
||||
" \"question\": \"how many pets total does everyone buy?\",\n",
|
||||
" \"expression\": \"SELECT SUM(value) FROM df\",\n",
|
||||
" \"llm_error_msg\": \"\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"13.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cpal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ccb6b2b0-9de6-4f66-a8fb-fc59229ee316",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/svg+xml": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"292pt\" height=\"260pt\" viewBox=\"0.00 0.00 292.00 260.00\">\n",
|
||||
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 256)\">\n",
|
||||
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-256 288,-256 288,4 -4,4\"/>\n",
|
||||
"<!-- obama -->\n",
|
||||
"<g id=\"node1\" class=\"node\">\n",
|
||||
"<title>obama</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"137\" cy=\"-234\" rx=\"41.69\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"137\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">obama</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- bill -->\n",
|
||||
"<g id=\"node2\" class=\"node\">\n",
|
||||
"<title>bill</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"27\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">bill</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- obama->bill -->\n",
|
||||
"<g id=\"edge1\" class=\"edge\">\n",
|
||||
"<title>obama->bill</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M114.47,-218.67C97.08,-207.6 72.94,-192.23 54.42,-180.45\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"56.15,-177.4 45.84,-174.99 52.4,-183.31 56.15,-177.4\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- bob -->\n",
|
||||
"<g id=\"node3\" class=\"node\">\n",
|
||||
"<title>bob</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"100\" cy=\"-162\" rx=\"28\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"100\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">bob</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- obama->bob -->\n",
|
||||
"<g id=\"edge2\" class=\"edge\">\n",
|
||||
"<title>obama->bob</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M128.04,-216.05C123.66,-207.77 118.3,-197.62 113.44,-188.42\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"116.39,-186.51 108.62,-179.31 110.2,-189.79 116.39,-186.51\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- ben -->\n",
|
||||
"<g id=\"node4\" class=\"node\">\n",
|
||||
"<title>ben</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"174\" cy=\"-162\" rx=\"28\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"174\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">ben</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- obama->ben -->\n",
|
||||
"<g id=\"edge3\" class=\"edge\">\n",
|
||||
"<title>obama->ben</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M145.96,-216.05C150.34,-207.77 155.7,-197.62 160.56,-188.42\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"163.8,-189.79 165.38,-179.31 157.61,-186.51 163.8,-189.79\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- beth -->\n",
|
||||
"<g id=\"node5\" class=\"node\">\n",
|
||||
"<title>beth</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"252\" cy=\"-162\" rx=\"32\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"252\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">beth</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- obama->beth -->\n",
|
||||
"<g id=\"edge4\" class=\"edge\">\n",
|
||||
"<title>obama->beth</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M160.27,-218.83C178.18,-207.94 203.04,-192.8 222.37,-181.04\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"224.36,-183.92 231.08,-175.73 220.72,-177.95 224.36,-183.92\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy -->\n",
|
||||
"<g id=\"node6\" class=\"node\">\n",
|
||||
"<title>cindy</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"93\" cy=\"-90\" rx=\"36\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"93\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">cindy</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- bill->cindy -->\n",
|
||||
"<g id=\"edge5\" class=\"edge\">\n",
|
||||
"<title>bill->cindy</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M41,-146.15C49.77,-136.85 61.25,-124.67 71.2,-114.12\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"73.79,-116.47 78.11,-106.8 68.7,-111.67 73.79,-116.47\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- bob->cindy -->\n",
|
||||
"<g id=\"edge6\" class=\"edge\">\n",
|
||||
"<title>bob->cindy</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M98.27,-143.7C97.5,-135.98 96.57,-126.71 95.71,-118.11\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"99.19,-117.7 94.71,-108.1 92.22,-118.4 99.19,-117.7\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- boris -->\n",
|
||||
"<g id=\"node7\" class=\"node\">\n",
|
||||
"<title>boris</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"181\" cy=\"-90\" rx=\"34.5\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"181\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">boris</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- ben->boris -->\n",
|
||||
"<g id=\"edge7\" class=\"edge\">\n",
|
||||
"<title>ben->boris</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M175.73,-143.7C176.5,-135.98 177.43,-126.71 178.29,-118.11\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"181.78,-118.4 179.29,-108.1 174.81,-117.7 181.78,-118.4\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- beth->boris -->\n",
|
||||
"<g id=\"edge8\" class=\"edge\">\n",
|
||||
"<title>beth->boris</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M236.59,-145.81C227.01,-136.36 214.51,-124.04 203.8,-113.48\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"205.96,-110.69 196.38,-106.16 201.04,-115.67 205.96,-110.69\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- tim -->\n",
|
||||
"<g id=\"node8\" class=\"node\">\n",
|
||||
"<title>tim</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"137\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"137\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">tim</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy->tim -->\n",
|
||||
"<g id=\"edge9\" class=\"edge\">\n",
|
||||
"<title>cindy->tim</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M103.43,-72.41C108.82,-63.83 115.51,-53.19 121.49,-43.67\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"124.59,-45.32 126.95,-34.99 118.66,-41.59 124.59,-45.32\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- boris->tim -->\n",
|
||||
"<g id=\"edge10\" class=\"edge\">\n",
|
||||
"<title>boris->tim</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M170.79,-72.77C165.41,-64.19 158.68,-53.49 152.65,-43.9\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"155.43,-41.75 147.15,-35.15 149.51,-45.48 155.43,-41.75\"/>\n",
|
||||
"</g>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.SVG object>"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# wait 20 secs to see display\n",
|
||||
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||
"SVG(\"web.svg\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f6f345a-bb16-4e64-83c4-cbbc789a8325",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Unanswerable math\n",
|
||||
"\n",
|
||||
"Takeaway: PAL hallucinates, where CPAL, rather than hallucinate, answers with _\"unanswerable, narrative question and plot are incoherent\"_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "068afd79-fd41-4ec2-b4d0-c64140dc413f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = (\n",
|
||||
" \"Jan has three times the number of pets as Marcia.\"\n",
|
||||
" \"Marcia has two more pets than Cindy.\"\n",
|
||||
" \"If Cindy has ten pets, how many pets does Barak have?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "02f77db2-72e8-46c2-90b3-5e37ca42f80d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mdef solution():\n",
|
||||
" \"\"\"Jan has three times the number of pets as Marcia.Marcia has two more pets than Cindy.If Cindy has ten pets, how many pets does Barak have?\"\"\"\n",
|
||||
" cindy_pets = 10\n",
|
||||
" marcia_pets = cindy_pets + 2\n",
|
||||
" jan_pets = marcia_pets * 3\n",
|
||||
" result = jan_pets\n",
|
||||
" return result\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'36'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "925958de-e998-4ffa-8b2e-5a00ddae5026",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mstory outcome data\n",
|
||||
" name code value depends_on\n",
|
||||
"0 cindy pass 10.0 []\n",
|
||||
"1 marcia marcia.value = cindy.value + 2 12.0 [cindy]\n",
|
||||
"2 jan jan.value = marcia.value * 3 36.0 [marcia]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[36;1m\u001b[1;3mquery data\n",
|
||||
"{\n",
|
||||
" \"question\": \"how many pets does barak have?\",\n",
|
||||
" \"expression\": \"SELECT name, value FROM df WHERE name = 'barak'\",\n",
|
||||
" \"llm_error_msg\": \"\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"unanswerable, query and outcome are incoherent\n",
|
||||
"\n",
|
||||
"outcome:\n",
|
||||
" name code value depends_on\n",
|
||||
"0 cindy pass 10.0 []\n",
|
||||
"1 marcia marcia.value = cindy.value + 2 12.0 [cindy]\n",
|
||||
"2 jan jan.value = marcia.value * 3 36.0 [marcia]\n",
|
||||
"query:\n",
|
||||
"{'question': 'how many pets does barak have?', 'expression': \"SELECT name, value FROM df WHERE name = 'barak'\", 'llm_error_msg': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" cpal_chain.run(question)\n",
|
||||
"except Exception as e_msg:\n",
|
||||
" print(e_msg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "095adc76",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Basic math\n",
|
||||
"\n",
|
||||
"#### Causal mediator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "3ecf03fa-8350-4c4e-8080-84a307ba6ad4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = (\n",
|
||||
" \"Jan has three times the number of pets as Marcia. \"\n",
|
||||
" \"Marcia has two more pets than Cindy. \"\n",
|
||||
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74e49c47-3eed-4abe-98b7-8e97bcd15944",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"PAL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "2e88395f-d014-4362-abb0-88f6800860bb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mdef solution():\n",
|
||||
" \"\"\"Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?\"\"\"\n",
|
||||
" cindy_pets = 4\n",
|
||||
" marcia_pets = cindy_pets + 2\n",
|
||||
" jan_pets = marcia_pets * 3\n",
|
||||
" total_pets = cindy_pets + marcia_pets + jan_pets\n",
|
||||
" result = total_pets\n",
|
||||
" return result\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'28'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "20ba6640-3d17-4b59-8101-aaba89d68cf4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"CPAL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "312a0943-a482-4ed0-a064-1e7a72e9479b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mstory outcome data\n",
|
||||
" name code value depends_on\n",
|
||||
"0 cindy pass 4.0 []\n",
|
||||
"1 marcia marcia.value = cindy.value + 2 6.0 [cindy]\n",
|
||||
"2 jan jan.value = marcia.value * 3 18.0 [marcia]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[36;1m\u001b[1;3mquery data\n",
|
||||
"{\n",
|
||||
" \"question\": \"how many total pets do the three have?\",\n",
|
||||
" \"expression\": \"SELECT SUM(value) FROM df\",\n",
|
||||
" \"llm_error_msg\": \"\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"28.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cpal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "4466b975-ae2b-4252-972b-b3182a089ade",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/svg+xml": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"92pt\" height=\"188pt\" viewBox=\"0.00 0.00 92.49 188.00\">\n",
|
||||
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184)\">\n",
|
||||
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-184 88.49,-184 88.49,4 -4,4\"/>\n",
|
||||
"<!-- cindy -->\n",
|
||||
"<g id=\"node1\" class=\"node\">\n",
|
||||
"<title>cindy</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"42.25\" cy=\"-162\" rx=\"36\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"42.25\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">cindy</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- marcia -->\n",
|
||||
"<g id=\"node2\" class=\"node\">\n",
|
||||
"<title>marcia</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"42.25\" cy=\"-90\" rx=\"42.49\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"42.25\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">marcia</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy->marcia -->\n",
|
||||
"<g id=\"edge1\" class=\"edge\">\n",
|
||||
"<title>cindy->marcia</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M42.25,-143.7C42.25,-135.98 42.25,-126.71 42.25,-118.11\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"45.75,-118.1 42.25,-108.1 38.75,-118.1 45.75,-118.1\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- jan -->\n",
|
||||
"<g id=\"node3\" class=\"node\">\n",
|
||||
"<title>jan</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"42.25\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"42.25\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">jan</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- marcia->jan -->\n",
|
||||
"<g id=\"edge2\" class=\"edge\">\n",
|
||||
"<title>marcia->jan</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M42.25,-71.7C42.25,-63.98 42.25,-54.71 42.25,-46.11\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"45.75,-46.1 42.25,-36.1 38.75,-46.1 45.75,-46.1\"/>\n",
|
||||
"</g>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.SVG object>"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# wait 20 secs to see display\n",
|
||||
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||
"SVG(\"web.svg\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29fa7b8a-75a3-4270-82a2-2c31939cd7e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Causal collider"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "618eddac-f0ef-4ab5-90ed-72e880fdeba3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = (\n",
|
||||
" \"Jan has the number of pets as Marcia plus the number of pets as Cindy. \"\n",
|
||||
" \"Marcia has no pets. \"\n",
|
||||
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a01563f3-7974-4de4-8bd9-0b7d710aa0d3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mstory outcome data\n",
|
||||
" name code value depends_on\n",
|
||||
"0 marcia pass 0.0 []\n",
|
||||
"1 cindy pass 4.0 []\n",
|
||||
"2 jan jan.value = marcia.value + cindy.value 4.0 [marcia, cindy]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[36;1m\u001b[1;3mquery data\n",
|
||||
"{\n",
|
||||
" \"question\": \"how many total pets do the three have?\",\n",
|
||||
" \"expression\": \"SELECT SUM(value) FROM df\",\n",
|
||||
" \"llm_error_msg\": \"\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"8.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cpal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "0fbe7243-0522-4946-b9a2-6e21e7c49a42",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/svg+xml": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"182pt\" height=\"116pt\" viewBox=\"0.00 0.00 182.00 116.00\">\n",
|
||||
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 112)\">\n",
|
||||
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-112 178,-112 178,4 -4,4\"/>\n",
|
||||
"<!-- marcia -->\n",
|
||||
"<g id=\"node1\" class=\"node\">\n",
|
||||
"<title>marcia</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"42.25\" cy=\"-90\" rx=\"42.49\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"42.25\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">marcia</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- jan -->\n",
|
||||
"<g id=\"node2\" class=\"node\">\n",
|
||||
"<title>jan</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"90.25\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"90.25\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">jan</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- marcia->jan -->\n",
|
||||
"<g id=\"edge1\" class=\"edge\">\n",
|
||||
"<title>marcia->jan</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M53.62,-72.41C59.57,-63.74 66.95,-52.97 73.53,-43.38\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"76.51,-45.21 79.28,-34.99 70.74,-41.26 76.51,-45.21\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy -->\n",
|
||||
"<g id=\"node3\" class=\"node\">\n",
|
||||
"<title>cindy</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"138.25\" cy=\"-90\" rx=\"36\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"138.25\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">cindy</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy->jan -->\n",
|
||||
"<g id=\"edge2\" class=\"edge\">\n",
|
||||
"<title>cindy->jan</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M127.11,-72.77C121.09,-63.98 113.54,-52.96 106.83,-43.19\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"109.53,-40.94 100.99,-34.67 103.75,-44.89 109.53,-40.94\"/>\n",
|
||||
"</g>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.SVG object>"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# wait 20 secs to see display\n",
|
||||
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||
"SVG(\"web.svg\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4082538-ec03-44f0-aac3-07e03aad7555",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Causal confounder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "83932c30-950b-435a-b328-7993ce8cc6bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = (\n",
|
||||
" \"Jan has the number of pets as Marcia plus the number of pets as Cindy. \"\n",
|
||||
" \"Marcia has two more pets than Cindy. \"\n",
|
||||
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "570de307-7c6b-4fdc-80c3-4361daa8a629",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mstory outcome data\n",
|
||||
" name code value depends_on\n",
|
||||
"0 cindy pass 4.0 []\n",
|
||||
"1 marcia marcia.value = cindy.value + 2 6.0 [cindy]\n",
|
||||
"2 jan jan.value = cindy.value + marcia.value 10.0 [cindy, marcia]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[36;1m\u001b[1;3mquery data\n",
|
||||
"{\n",
|
||||
" \"question\": \"how many total pets do the three have?\",\n",
|
||||
" \"expression\": \"SELECT SUM(value) FROM df\",\n",
|
||||
" \"llm_error_msg\": \"\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"20.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cpal_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "00375615-6b6d-4357-bdb8-f64f682f7605",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/svg+xml": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"121pt\" height=\"188pt\" viewBox=\"0.00 0.00 120.99 188.00\">\n",
|
||||
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184)\">\n",
|
||||
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-184 116.99,-184 116.99,4 -4,4\"/>\n",
|
||||
"<!-- cindy -->\n",
|
||||
"<g id=\"node1\" class=\"node\">\n",
|
||||
"<title>cindy</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"77.25\" cy=\"-162\" rx=\"36\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"77.25\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">cindy</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- marcia -->\n",
|
||||
"<g id=\"node2\" class=\"node\">\n",
|
||||
"<title>marcia</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"42.25\" cy=\"-90\" rx=\"42.49\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"42.25\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">marcia</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy->marcia -->\n",
|
||||
"<g id=\"edge1\" class=\"edge\">\n",
|
||||
"<title>cindy->marcia</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M68.95,-144.41C64.87,-136.25 59.86,-126.22 55.28,-117.07\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"58.33,-115.34 50.72,-107.96 52.07,-118.47 58.33,-115.34\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- jan -->\n",
|
||||
"<g id=\"node3\" class=\"node\">\n",
|
||||
"<title>jan</title>\n",
|
||||
"<ellipse fill=\"none\" stroke=\"black\" cx=\"77.25\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n",
|
||||
"<text text-anchor=\"middle\" x=\"77.25\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">jan</text>\n",
|
||||
"</g>\n",
|
||||
"<!-- cindy->jan -->\n",
|
||||
"<g id=\"edge2\" class=\"edge\">\n",
|
||||
"<title>cindy->jan</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M83.73,-144.1C87.32,-133.84 91.42,-120.36 93.25,-108 95.58,-92.17 95.58,-87.83 93.25,-72 91.95,-63.21 89.5,-53.86 86.91,-45.5\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"90.19,-44.29 83.73,-35.9 83.55,-46.49 90.19,-44.29\"/>\n",
|
||||
"</g>\n",
|
||||
"<!-- marcia->jan -->\n",
|
||||
"<g id=\"edge3\" class=\"edge\">\n",
|
||||
"<title>marcia->jan</title>\n",
|
||||
"<path fill=\"none\" stroke=\"black\" d=\"M50.72,-72.06C54.86,-63.77 59.94,-53.62 64.53,-44.42\"/>\n",
|
||||
"<polygon fill=\"black\" stroke=\"black\" points=\"67.75,-45.82 69.09,-35.31 61.49,-42.69 67.75,-45.82\"/>\n",
|
||||
"</g>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.SVG object>"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# wait 20 secs to see display\n",
|
||||
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||
"SVG(\"web.svg\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "255683de-0c1c-4131-b277-99d09f5ac1fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "34f04daf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -35,7 +35,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "a2648974",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -56,15 +56,110 @@
|
||||
"id": "78ff9df9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To extract entities, we need to create a schema like the following, were we specify all the properties we want to find and the type we expect them to have. We can also specify which of these properties are required and which are optional."
|
||||
"To extract entities, we need to create a schema where we specify all the properties we want to find and the type we expect them to have. We can also specify which of these properties are required and which are optional."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "4ac43eba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"name\": {\"type\": \"string\"},\n",
|
||||
" \"height\": {\"type\": \"integer\"},\n",
|
||||
" \"hair_color\": {\"type\": \"string\"},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"name\", \"height\"],\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "640bd005",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"inp = \"\"\"\n",
|
||||
"Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.\n",
|
||||
" \"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "64313214",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_extraction_chain(schema, llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17c48adb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As we can see, we extracted the required entities and their properties in the required format (it even calculated Claudia's height before returning!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "cc5436ed",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'Alex', 'height': 5, 'hair_color': 'blonde'},\n",
|
||||
" {'name': 'Claudia', 'height': 6, 'hair_color': 'brunette'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(inp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8d51fcdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Several entity types"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5813affe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that we are using OpenAI functions under the hood and thus the model can only call one function per request (with one, unique schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "511b9838",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we want to extract more than one entity type, we need to introduce a little hack - we will define our properties with an included entity type. \n",
|
||||
"\n",
|
||||
"Following we have an example where we also want to extract dog attributes from the passage. Notice the 'person_' and 'dog_' prefixes we use for each property; this tells the model which entity type the property refers to. In this way, the model can return properties from several entity types in one single call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "cf243a26",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
@@ -103,10 +198,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17c48adb",
|
||||
"id": "eb074f7b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As we can see, we extracted the required entities and their properties in the required format:"
|
||||
"People attributes and dog attributes were correctly extracted from the text in the same call"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -128,7 +223,207 @@
|
||||
" 'person_hair_color': 'brunette'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(inp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0273e0e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Unrelated entities"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c07b3480",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"What if our entities are unrelated? In that case, the model will return the unrelated entities in different dictionaries, allowing us to successfully extract several unrelated entity types in the same call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01d98af0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that we use `required: []`: we need to allow the model to return **only** person attributes or **only** dog attributes for a single entity (person or dog)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "e584c993",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"person_name\": {\"type\": \"string\"},\n",
|
||||
" \"person_height\": {\"type\": \"integer\"},\n",
|
||||
" \"person_hair_color\": {\"type\": \"string\"},\n",
|
||||
" \"dog_name\": {\"type\": \"string\"},\n",
|
||||
" \"dog_breed\": {\"type\": \"string\"},\n",
|
||||
" },\n",
|
||||
" \"required\": [],\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "ad6b105f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"inp = \"\"\"\n",
|
||||
"Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.\n",
|
||||
"\n",
|
||||
"Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "6bfe5a33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_extraction_chain(schema, llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "24fe09af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We have each entity in its own separate dictionary, with only the appropriate attributes being returned"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "f6e1fd89",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde'},\n",
|
||||
" {'person_name': 'Claudia',\n",
|
||||
" 'person_height': 6,\n",
|
||||
" 'person_hair_color': 'brunette'},\n",
|
||||
" {'dog_name': 'Willow', 'dog_breed': 'German Shepherd'},\n",
|
||||
" {'dog_name': 'Milo', 'dog_breed': 'border collie'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(inp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ac466d1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extra info for an entity"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d240ffc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"What if.. _we don't know what we want?_ More specifically, say we know a few properties we want to extract for a given entity but we also want to know if there's any extra information in the passage. Fortunately, we don't need to structure everything - we can have unstructured extraction as well. \n",
|
||||
"\n",
|
||||
"We can do this by introducing another hack, namely the *extra_info* attribute - let's see an example."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"id": "f19685f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"person_name\": {\"type\": \"string\"},\n",
|
||||
" \"person_height\": {\"type\": \"integer\"},\n",
|
||||
" \"person_hair_color\": {\"type\": \"string\"},\n",
|
||||
" \"dog_name\": {\"type\": \"string\"},\n",
|
||||
" \"dog_breed\": {\"type\": \"string\"},\n",
|
||||
" \"dog_extra_info\": {\"type\": \"string\"},\n",
|
||||
" },\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"id": "200c3477",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"inp = \"\"\"\n",
|
||||
"Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.\n",
|
||||
"\n",
|
||||
"Willow is a German Shepherd that likes to play with other dogs and can always be found playing with Milo, a border collie that lives close by.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"id": "ddad7dc6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_extraction_chain(schema, llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e5c0dbbc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is nice to know more about Willow and Milo!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"id": "c22cfd30",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'person_name': 'Alex', 'person_height': 5, 'person_hair_color': 'blonde'},\n",
|
||||
" {'person_name': 'Claudia',\n",
|
||||
" 'person_height': 6,\n",
|
||||
" 'person_hair_color': 'brunette'},\n",
|
||||
" {'dog_name': 'Willow',\n",
|
||||
" 'dog_breed': 'German Shepherd',\n",
|
||||
" 'dog_extra_information': 'likes to play with other dogs'},\n",
|
||||
" {'dog_name': 'Milo',\n",
|
||||
" 'dog_breed': 'border collie',\n",
|
||||
" 'dog_extra_information': 'lives close by'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -56,7 +56,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"SERPER_API_KEY\"] = \"\""
|
||||
"os.environ[\"SERPER_API_KEY\"] = \"\"",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -71,13 +72,16 @@
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from langchain.schema import BaseRetriever\n",
|
||||
"from langchain.callbacks.manager import AsyncCallbackManagerForRetrieverRun, CallbackManagerForRetrieverRun\n",
|
||||
"from langchain.callbacks.manager import (\n",
|
||||
" AsyncCallbackManagerForRetrieverRun,\n",
|
||||
" CallbackManagerForRetrieverRun,\n",
|
||||
")\n",
|
||||
"from langchain.utilities import GoogleSerperAPIWrapper\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from typing import Any"
|
||||
"from typing import Any, List"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -96,13 +100,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SerperSearchRetriever(BaseRetriever):\n",
|
||||
" def __init__(self, search):\n",
|
||||
" self.search = search\n",
|
||||
" search: GoogleSerperAPIWrapper = None\n",
|
||||
"\n",
|
||||
" def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any) -> List[Document]:\n",
|
||||
" def _get_relevant_documents(\n",
|
||||
" self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any\n",
|
||||
" ) -> List[Document]:\n",
|
||||
" return [Document(page_content=self.search.run(query))]\n",
|
||||
"\n",
|
||||
" async def _aget_relevant_documents(self,\n",
|
||||
" async def _aget_relevant_documents(\n",
|
||||
" self,\n",
|
||||
" query: str,\n",
|
||||
" *,\n",
|
||||
" run_manager: AsyncCallbackManagerForRetrieverRun,\n",
|
||||
@@ -111,7 +117,7 @@
|
||||
" raise NotImplementedError()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"retriever = SerperSearchRetriever(GoogleSerperAPIWrapper())"
|
||||
"retriever = SerperSearchRetriever(search=GoogleSerperAPIWrapper())"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
308
docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb
Normal file
@@ -0,0 +1,308 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2777010",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HugeGraph QA Chain\n",
|
||||
"\n",
|
||||
"This notebook shows how to use LLMs to provide a natural language interface to [HugeGraph](https://hugegraph.apache.org/cn/) database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f26dcbe4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You will need to have a running HugeGraph instance.\n",
|
||||
"You can run a local docker container by running the executing the following script:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"docker run \\\n",
|
||||
" --name=graph \\\n",
|
||||
" -itd \\\n",
|
||||
" -p 8080:8080 \\\n",
|
||||
" hugegraph/hugegraph\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If we want to connect HugeGraph in the application, we need to install python sdk:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip3 install hugegraph-python\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d64a29f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you are using the docker container, you need to wait a couple of second for the database to start, and then we need create schema and write graph data for the database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "e53ab93e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from hugegraph.connection import PyHugeGraph\n",
|
||||
"\n",
|
||||
"client = PyHugeGraph(\"localhost\", \"8080\", user=\"admin\", pwd=\"admin\", graph=\"hugegraph\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b7c3a50e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, we create the schema for a simple movie database:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ef5372a8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'create EdgeLabel success, Detail: \"b\\'{\"id\":1,\"name\":\"ActedIn\",\"source_label\":\"Person\",\"target_label\":\"Movie\",\"frequency\":\"SINGLE\",\"sort_keys\":[],\"nullable_keys\":[],\"index_labels\":[],\"properties\":[],\"status\":\"CREATED\",\"ttl\":0,\"enable_label_index\":true,\"user_data\":{\"~create_time\":\"2023-07-04 10:48:47.908\"}}\\'\"'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"schema\"\"\"\n",
|
||||
"schema = client.schema()\n",
|
||||
"schema.propertyKey(\"name\").asText().ifNotExist().create()\n",
|
||||
"schema.propertyKey(\"birthDate\").asText().ifNotExist().create()\n",
|
||||
"schema.vertexLabel(\"Person\").properties(\n",
|
||||
" \"name\", \"birthDate\"\n",
|
||||
").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n",
|
||||
"schema.vertexLabel(\"Movie\").properties(\"name\").usePrimaryKeyId().primaryKeys(\n",
|
||||
" \"name\"\n",
|
||||
").ifNotExist().create()\n",
|
||||
"schema.edgeLabel(\"ActedIn\").sourceLabel(\"Person\").targetLabel(\n",
|
||||
" \"Movie\"\n",
|
||||
").ifNotExist().create()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "016f7989",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we can insert some data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "b7f4c370",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1:Robert De Niro--ActedIn-->2:The Godfather Part II"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"graph\"\"\"\n",
|
||||
"g = client.graph()\n",
|
||||
"g.addVertex(\"Person\", {\"name\": \"Al Pacino\", \"birthDate\": \"1940-04-25\"})\n",
|
||||
"g.addVertex(\"Person\", {\"name\": \"Robert De Niro\", \"birthDate\": \"1943-08-17\"})\n",
|
||||
"g.addVertex(\"Movie\", {\"name\": \"The Godfather\"})\n",
|
||||
"g.addVertex(\"Movie\", {\"name\": \"The Godfather Part II\"})\n",
|
||||
"g.addVertex(\"Movie\", {\"name\": \"The Godfather Coda The Death of Michael Corleone\"})\n",
|
||||
"\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather\", {})\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Part II\", {})\n",
|
||||
"g.addEdge(\n",
|
||||
" \"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Coda The Death of Michael Corleone\", {}\n",
|
||||
")\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Robert De Niro\", \"2:The Godfather Part II\", {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5b8f7788",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating `HugeGraphQAChain`\n",
|
||||
"\n",
|
||||
"We can now create the `HugeGraph` and `HugeGraphQAChain`. To create the `HugeGraph` we simply need to pass the database object to the `HugeGraph` constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "f1f68fcf",
|
||||
"metadata": {
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import HugeGraphQAChain\n",
|
||||
"from langchain.graphs import HugeGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "b86ebfa7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = HugeGraph(\n",
|
||||
" username=\"admin\",\n",
|
||||
" password=\"admin\",\n",
|
||||
" address=\"localhost\",\n",
|
||||
" port=8080,\n",
|
||||
" graph=\"hugegraph\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e262540b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"\n",
|
||||
"If the schema of database changes, you can refresh the schema information needed to generate Gremlin statements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "134dd8d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# graph.refresh_schema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "e78b8e72",
|
||||
"metadata": {
|
||||
"ExecuteTime": {}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Node properties: [name: Person, primary_keys: ['name'], properties: ['name', 'birthDate'], name: Movie, primary_keys: ['name'], properties: ['name']]\n",
|
||||
"Edge properties: [name: ActedIn, properties: []]\n",
|
||||
"Relationships: ['Person--ActedIn-->Movie']\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(graph.get_schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c27e813",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"We can now use the graph Gremlin QA chain to ask question of the graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "3b23dead",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = HugeGraphQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "76aecc93",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Generated gremlin:\n",
|
||||
"\u001b[32;1m\u001b[1;3mg.V().has('Movie', 'name', 'The Godfather').in('ActedIn').valueMap(true)\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'id': '1:Al Pacino', 'label': 'Person', 'name': ['Al Pacino'], 'birthDate': ['1940-04-25']}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Al Pacino played in The Godfather.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in The Godfather?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "869f0258",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -31,6 +31,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import kuzu\n",
|
||||
"\n",
|
||||
"db = kuzu.Database(\"test_db\")\n",
|
||||
"conn = kuzu.Connection(db)"
|
||||
]
|
||||
@@ -61,7 +62,9 @@
|
||||
],
|
||||
"source": [
|
||||
"conn.execute(\"CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))\")\n",
|
||||
"conn.execute(\"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY KEY(name))\")\n",
|
||||
"conn.execute(\n",
|
||||
" \"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY KEY(name))\"\n",
|
||||
")\n",
|
||||
"conn.execute(\"CREATE REL TABLE ActedIn (FROM Person TO Movie)\")"
|
||||
]
|
||||
},
|
||||
@@ -94,11 +97,21 @@
|
||||
"conn.execute(\"CREATE (:Person {name: 'Robert De Niro', birthDate: '1943-08-17'})\")\n",
|
||||
"conn.execute(\"CREATE (:Movie {name: 'The Godfather'})\")\n",
|
||||
"conn.execute(\"CREATE (:Movie {name: 'The Godfather: Part II'})\")\n",
|
||||
"conn.execute(\"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael Corleone'})\")\n",
|
||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather' CREATE (p)-[:ActedIn]->(m)\")\n",
|
||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")\n",
|
||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather Coda: The Death of Michael Corleone' CREATE (p)-[:ActedIn]->(m)\")\n",
|
||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Robert De Niro' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")"
|
||||
"conn.execute(\n",
|
||||
" \"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael Corleone'})\"\n",
|
||||
")\n",
|
||||
"conn.execute(\n",
|
||||
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||
")\n",
|
||||
"conn.execute(\n",
|
||||
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||
")\n",
|
||||
"conn.execute(\n",
|
||||
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather Coda: The Death of Michael Corleone' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||
")\n",
|
||||
"conn.execute(\n",
|
||||
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Robert De Niro' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -137,9 +150,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = KuzuQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||
")"
|
||||
"chain = KuzuQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
303
docs/extras/modules/chains/additional/graph_sparql_qa.ipynb
Normal file
@@ -0,0 +1,303 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c94240f5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GraphSparqlQAChain\n",
|
||||
"\n",
|
||||
"Graph databases are an excellent choice for applications based on network-like models. To standardize the syntax and semantics of such graphs, the W3C recommends Semantic Web Technologies, cp. [Semantic Web](https://www.w3.org/standards/semanticweb/). [SPARQL](https://www.w3.org/TR/sparql11-query/) serves as a query language analogously to SQL or Cypher for these graphs. This notebook demonstrates the application of LLMs as a natural language interface to a graph database by generating SPARQL.\\\n",
|
||||
"Disclaimer: To date, SPARQL query generation via LLMs is still a bit unstable. Be especially careful with UPDATE queries, which alter the graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc0ee68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are several sources you can run queries against, including files on the web, files you have available locally, SPARQL endpoints, e.g., [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page), and [triple stores](https://www.w3.org/wiki/LargeTripleStores)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "62812aad",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import GraphSparqlQAChain\n",
|
||||
"from langchain.graphs import RdfGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "0928915d",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = RdfGraph(\n",
|
||||
" source_file=\"http://www.w3.org/People/Berners-Lee/card\",\n",
|
||||
" standard=\"rdf\",\n",
|
||||
" local_copy=\"test.ttl\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Note that providing a `local_file` is necessary for storing changes locally if the source is read-only."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "7af596b5"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58c1a8ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"If the schema of the database changes, you can refresh the schema information needed to generate SPARQL queries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "4e3de44f",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph.load_schema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "1fe76ccd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In the following, each IRI is followed by the local name and optionally its description in parentheses. \n",
|
||||
"The RDF graph supports the following node types:\n",
|
||||
"<http://xmlns.com/foaf/0.1/PersonalProfileDocument> (PersonalProfileDocument, None), <http://www.w3.org/ns/auth/cert#RSAPublicKey> (RSAPublicKey, None), <http://www.w3.org/2000/10/swap/pim/contact#Male> (Male, None), <http://xmlns.com/foaf/0.1/Person> (Person, None), <http://www.w3.org/2006/vcard/ns#Work> (Work, None)\n",
|
||||
"The RDF graph supports the following relationships:\n",
|
||||
"<http://www.w3.org/2000/01/rdf-schema#seeAlso> (seeAlso, None), <http://purl.org/dc/elements/1.1/title> (title, None), <http://xmlns.com/foaf/0.1/mbox_sha1sum> (mbox_sha1sum, None), <http://xmlns.com/foaf/0.1/maker> (maker, None), <http://www.w3.org/ns/solid/terms#oidcIssuer> (oidcIssuer, None), <http://www.w3.org/2000/10/swap/pim/contact#publicHomePage> (publicHomePage, None), <http://xmlns.com/foaf/0.1/openid> (openid, None), <http://www.w3.org/ns/pim/space#storage> (storage, None), <http://xmlns.com/foaf/0.1/name> (name, None), <http://www.w3.org/2000/10/swap/pim/contact#country> (country, None), <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> (type, None), <http://www.w3.org/ns/solid/terms#profileHighlightColor> (profileHighlightColor, None), <http://www.w3.org/ns/pim/space#preferencesFile> (preferencesFile, None), <http://www.w3.org/2000/01/rdf-schema#label> (label, None), <http://www.w3.org/ns/auth/cert#modulus> (modulus, None), <http://www.w3.org/2000/10/swap/pim/contact#participant> (participant, None), <http://www.w3.org/2000/10/swap/pim/contact#street2> (street2, None), <http://www.w3.org/2006/vcard/ns#locality> (locality, None), <http://xmlns.com/foaf/0.1/nick> (nick, None), <http://xmlns.com/foaf/0.1/homepage> (homepage, None), <http://creativecommons.org/ns#license> (license, None), <http://xmlns.com/foaf/0.1/givenname> (givenname, None), <http://www.w3.org/2006/vcard/ns#street-address> (street-address, None), <http://www.w3.org/2006/vcard/ns#postal-code> (postal-code, None), <http://www.w3.org/2000/10/swap/pim/contact#street> (street, None), <http://www.w3.org/2003/01/geo/wgs84_pos#lat> (lat, None), <http://xmlns.com/foaf/0.1/primaryTopic> (primaryTopic, None), <http://www.w3.org/2006/vcard/ns#fn> (fn, None), <http://www.w3.org/2003/01/geo/wgs84_pos#location> (location, None), <http://usefulinc.com/ns/doap#developer> (developer, None), <http://www.w3.org/2000/10/swap/pim/contact#city> (city, None), <http://www.w3.org/2006/vcard/ns#region> (region, None), <http://xmlns.com/foaf/0.1/member> (member, None), <http://www.w3.org/2003/01/geo/wgs84_pos#long> (long, None), <http://www.w3.org/2000/10/swap/pim/contact#address> (address, None), <http://xmlns.com/foaf/0.1/family_name> (family_name, None), <http://xmlns.com/foaf/0.1/account> (account, None), <http://xmlns.com/foaf/0.1/workplaceHomepage> (workplaceHomepage, None), <http://purl.org/dc/terms/title> (title, None), <http://www.w3.org/ns/solid/terms#publicTypeIndex> (publicTypeIndex, None), <http://www.w3.org/2000/10/swap/pim/contact#office> (office, None), <http://www.w3.org/2000/10/swap/pim/contact#homePage> (homePage, None), <http://xmlns.com/foaf/0.1/mbox> (mbox, None), <http://www.w3.org/2000/10/swap/pim/contact#preferredURI> (preferredURI, None), <http://www.w3.org/ns/solid/terms#profileBackgroundColor> (profileBackgroundColor, None), <http://schema.org/owns> (owns, None), <http://xmlns.com/foaf/0.1/based_near> (based_near, None), <http://www.w3.org/2006/vcard/ns#hasAddress> (hasAddress, None), <http://xmlns.com/foaf/0.1/img> (img, None), <http://www.w3.org/2000/10/swap/pim/contact#assistant> (assistant, None), <http://xmlns.com/foaf/0.1/title> (title, None), <http://www.w3.org/ns/auth/cert#key> (key, None), <http://www.w3.org/ns/ldp#inbox> (inbox, None), <http://www.w3.org/ns/solid/terms#editableProfile> (editableProfile, None), <http://www.w3.org/2000/10/swap/pim/contact#postalCode> (postalCode, None), <http://xmlns.com/foaf/0.1/weblog> (weblog, None), <http://www.w3.org/ns/auth/cert#exponent> (exponent, None), <http://rdfs.org/sioc/ns#avatar> (avatar, None)\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graph.get_schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68a3c677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"Now, you can use the graph SPARQL QA chain to ask questions about the graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "7476ce98",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = GraphSparqlQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "ef8ee27b",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphSparqlQAChain chain...\u001b[0m\n",
|
||||
"Identified intent:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSELECT\u001b[0m\n",
|
||||
"Generated SPARQL:\n",
|
||||
"\u001b[32;1m\u001b[1;3mPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n",
|
||||
"SELECT ?homepage\n",
|
||||
"WHERE {\n",
|
||||
" ?person foaf:name \"Tim Berners-Lee\" .\n",
|
||||
" ?person foaf:workplaceHomepage ?homepage .\n",
|
||||
"}\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"What is Tim Berners-Lee's work homepage?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af4b3294",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Updating the graph\n",
|
||||
"\n",
|
||||
"Analogously, you can update the graph, i.e., insert triples, using natural language."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "fdf38841",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphSparqlQAChain chain...\u001b[0m\n",
|
||||
"Identified intent:\n",
|
||||
"\u001b[32;1m\u001b[1;3mUPDATE\u001b[0m\n",
|
||||
"Generated SPARQL:\n",
|
||||
"\u001b[32;1m\u001b[1;3mPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n",
|
||||
"INSERT {\n",
|
||||
" ?person foaf:workplaceHomepage <http://www.w3.org/foo/bar/> .\n",
|
||||
"}\n",
|
||||
"WHERE {\n",
|
||||
" ?person foaf:name \"Timothy Berners-Lee\" .\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Successfully inserted triples into the graph.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\n",
|
||||
" \"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e0f7fc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's verify the results:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "f874171b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(rdflib.term.URIRef('https://www.w3.org/'),),\n",
|
||||
" (rdflib.term.URIRef('http://www.w3.org/foo/bar/'),)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = (\n",
|
||||
" \"\"\"PREFIX foaf: <http://xmlns.com/foaf/0.1/>\\n\"\"\"\n",
|
||||
" \"\"\"SELECT ?hp\\n\"\"\"\n",
|
||||
" \"\"\"WHERE {\\n\"\"\"\n",
|
||||
" \"\"\" ?person foaf:name \"Timothy Berners-Lee\" . \\n\"\"\"\n",
|
||||
" \"\"\" ?person foaf:workplaceHomepage ?hp .\\n\"\"\"\n",
|
||||
" \"\"\"}\"\"\"\n",
|
||||
")\n",
|
||||
"graph.query(query)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "lc",
|
||||
"language": "python",
|
||||
"name": "lc"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
160
docs/extras/modules/chains/additional/llm_symbolic_math.ipynb
Normal file
@@ -0,0 +1,160 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLM Symbolic Math \n",
|
||||
"This notebook showcases using LLMs and Python to Solve Algebraic Equations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculating the limit of an equation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMSymbolicMathChain chain...\u001b[0m\n",
|
||||
"What is the limit of sin(x) / x as x goes to 0?\u001b[32;1m\u001b[1;3mAnswer: 1\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Answer: 1'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains.llm_symbolic_math.base import LLMSymbolicMathChain\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"llm_symbolic_math = LLMSymbolicMathChain.from_llm(llm, verbose=True)\n",
|
||||
"\n",
|
||||
"llm_symbolic_math.run(\"What is the limit of sin(x) / x as x goes to 0?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculating an integral"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMSymbolicMathChain chain...\u001b[0m\n",
|
||||
"What is the integral of e^-x from 0 to infinity?\u001b[32;1m\u001b[1;3mAnswer: 1\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Answer: 1'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_symbolic_math.run(\"What is the integral of e^-x from 0 to infinity?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculating an algebraic equation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMSymbolicMathChain chain...\u001b[0m\n",
|
||||
"What are the solutions to this equation x**2 - x?\u001b[32;1m\u001b[1;3mAnswer: 0 and 1.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Answer: 0 and 1.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_symbolic_math.run(\"What are the solutions to this equation x**2 - x?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -38,7 +38,7 @@
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"for i, text in enumerate(texts):\n",
|
||||
" text.metadata['source'] = f\"{i}-pl\"\n",
|
||||
" text.metadata[\"source\"] = f\"{i}-pl\"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docsearch = Chroma.from_documents(texts, embeddings)"
|
||||
]
|
||||
@@ -97,8 +97,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"final_qa_chain = StuffDocumentsChain(\n",
|
||||
" llm_chain=qa_chain, \n",
|
||||
" document_variable_name='context',\n",
|
||||
" llm_chain=qa_chain,\n",
|
||||
" document_variable_name=\"context\",\n",
|
||||
" document_prompt=doc_prompt,\n",
|
||||
")"
|
||||
]
|
||||
@@ -111,8 +111,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retrieval_qa = RetrievalQA(\n",
|
||||
" retriever=docsearch.as_retriever(),\n",
|
||||
" combine_documents_chain=final_qa_chain\n",
|
||||
" retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -175,8 +174,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
||||
" llm_chain=qa_chain_pydantic, \n",
|
||||
" document_variable_name='context',\n",
|
||||
" llm_chain=qa_chain_pydantic,\n",
|
||||
" document_variable_name=\"context\",\n",
|
||||
" document_prompt=doc_prompt,\n",
|
||||
")"
|
||||
]
|
||||
@@ -189,8 +188,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retrieval_qa_pydantic = RetrievalQA(\n",
|
||||
" retriever=docsearch.as_retriever(),\n",
|
||||
" combine_documents_chain=final_qa_chain_pydantic\n",
|
||||
" retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain_pydantic\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -235,6 +233,7 @@
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
|
||||
"_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\\\n",
|
||||
"Make sure to avoid using any unclear pronouns.\n",
|
||||
@@ -258,10 +257,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa = ConversationalRetrievalChain(\n",
|
||||
" question_generator=condense_question_chain, \n",
|
||||
" question_generator=condense_question_chain,\n",
|
||||
" retriever=docsearch.as_retriever(),\n",
|
||||
" memory=memory, \n",
|
||||
" combine_docs_chain=final_qa_chain\n",
|
||||
" memory=memory,\n",
|
||||
" combine_docs_chain=final_qa_chain,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -389,7 +388,9 @@
|
||||
" \"\"\"An answer to the question being asked, with sources.\"\"\"\n",
|
||||
"\n",
|
||||
" answer: str = Field(..., description=\"Answer to the question that was asked\")\n",
|
||||
" countries_referenced: List[str] = Field(..., description=\"All of the countries mentioned in the sources\")\n",
|
||||
" countries_referenced: List[str] = Field(\n",
|
||||
" ..., description=\"All of the countries mentioned in the sources\"\n",
|
||||
" )\n",
|
||||
" sources: List[str] = Field(\n",
|
||||
" ..., description=\"List of sources used to answer the question\"\n",
|
||||
" )\n",
|
||||
@@ -405,20 +406,23 @@
|
||||
" HumanMessage(content=\"Answer question using the following context\"),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{context}\"),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"Question: {question}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format. Return all of the countries mentioned in the sources in uppercase characters.\"),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Tips: Make sure to answer in the correct format. Return all of the countries mentioned in the sources in uppercase characters.\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"chain_prompt = ChatPromptTemplate(messages=prompt_messages)\n",
|
||||
"\n",
|
||||
"qa_chain_pydantic = create_qa_with_structure_chain(llm, CustomResponseSchema, output_parser=\"pydantic\", prompt=chain_prompt)\n",
|
||||
"qa_chain_pydantic = create_qa_with_structure_chain(\n",
|
||||
" llm, CustomResponseSchema, output_parser=\"pydantic\", prompt=chain_prompt\n",
|
||||
")\n",
|
||||
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
||||
" llm_chain=qa_chain_pydantic,\n",
|
||||
" document_variable_name='context',\n",
|
||||
" document_variable_name=\"context\",\n",
|
||||
" document_prompt=doc_prompt,\n",
|
||||
")\n",
|
||||
"retrieval_qa_pydantic = RetrievalQA(\n",
|
||||
" retriever=docsearch.as_retriever(),\n",
|
||||
" combine_documents_chain=final_qa_chain_pydantic\n",
|
||||
" retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain_pydantic\n",
|
||||
")\n",
|
||||
"query = \"What did he say about russia\"\n",
|
||||
"retrieval_qa_pydantic.run(query)"
|
||||
|
||||
@@ -35,7 +35,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = get_openapi_chain(\"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\")"
|
||||
"chain = get_openapi_chain(\n",
|
||||
" \"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -186,7 +188,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = get_openapi_chain(\"https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml\")"
|
||||
"chain = get_openapi_chain(\n",
|
||||
" \"https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"\n",
|
||||
"from pydantic import Extra\n",
|
||||
"\n",
|
||||
"from langchain.base_language import BaseLanguageModel\n",
|
||||
"from langchain.schemea import BaseLanguageModel\n",
|
||||
"from langchain.callbacks.manager import (\n",
|
||||
" AsyncCallbackManagerForChainRun,\n",
|
||||
" CallbackManagerForChainRun,\n",
|
||||
|
||||
528
docs/extras/modules/chains/popular/openai_functions.ipynb
Normal file
@@ -0,0 +1,528 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54ccb772",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using OpenAI functions\n",
|
||||
"This walkthrough demonstrates how to incorporate OpenAI function-calling API's in a chain. We'll go over: \n",
|
||||
"1. How to use functions to get structured outputs from ChatOpenAI\n",
|
||||
"2. How to create a generic chain that uses (multiple) functions\n",
|
||||
"3. How to create a chain that actually executes the chosen function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "767ac575",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain.chains.openai_functions import (\n",
|
||||
" create_openai_fn_chain,\n",
|
||||
" create_structured_output_chain,\n",
|
||||
")\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "976b6496",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting structured outputs\n",
|
||||
"We can take advantage of OpenAI functions to try and force the model to return a particular kind of structured output. We'll use the `create_structured_output_chain` to create our chain, which takes the desired structured output either as a Pydantic class or as JsonSchema.\n",
|
||||
"\n",
|
||||
"See here for relevant [reference docs](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.base.create_structured_output_chain.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e052faae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Pydantic classes\n",
|
||||
"When passing in Pydantic classes to structure our text, we need to make sure to have a docstring description for the class. It also helps to have descriptions for each of the classes attributes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "0e085c99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Person(BaseModel):\n",
|
||||
" \"\"\"Identifying information about a person.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str = Field(..., description=\"The person's name\")\n",
|
||||
" age: int = Field(..., description=\"The person's age\")\n",
|
||||
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b459a33e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'Sally', 'age': 13}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
||||
"\n",
|
||||
"prompt_msgs = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Use the given format to extract information from the following input:\"\n",
|
||||
" ),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||
"]\n",
|
||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||
"\n",
|
||||
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Sally is 13\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3539936",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To extract arbitrarily many structured outputs of a given format, we can just create a wrapper Pydantic class that takes a sequence of the original class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4d8ea815",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'people': [{'name': 'Sally', 'age': 13, 'fav_food': ''},\n",
|
||||
" {'name': 'Joey', 'age': 12, 'fav_food': 'spinach'},\n",
|
||||
" {'name': 'Caroline', 'age': 23, 'fav_food': ''}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import Sequence\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class People(BaseModel):\n",
|
||||
" \"\"\"Identifying information about all people in a text.\"\"\"\n",
|
||||
"\n",
|
||||
" people: Sequence[Person] = Field(..., description=\"The people in the text\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\n",
|
||||
" \"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea66e10e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using JsonSchema\n",
|
||||
"\n",
|
||||
"We can also pass in JsonSchema instead of Pydantic classes to specify the desired structure. When we do this, our chain will output json corresponding to the properties described in the JsonSchema, instead of a Pydantic class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3484415e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_schema = {\n",
|
||||
" \"title\": \"Person\",\n",
|
||||
" \"description\": \"Identifying information about a person.\",\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"name\": {\"title\": \"Name\", \"description\": \"The person's name\", \"type\": \"string\"},\n",
|
||||
" \"age\": {\"title\": \"Age\", \"description\": \"The person's age\", \"type\": \"integer\"},\n",
|
||||
" \"fav_food\": {\n",
|
||||
" \"title\": \"Fav Food\",\n",
|
||||
" \"description\": \"The person's favorite food\",\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" \"required\": [\"name\", \"age\"],\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "be9b76b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'Sally', 'age': 13}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain = create_structured_output_chain(json_schema, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Sally is 13\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12394696",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a generic OpenAI functions chain\n",
|
||||
"To create a generic OpenAI functions chain, we can use the `create_openai_fn_chain` method. This is the same as `create_structured_output_chain` except that instead of taking a single output schema, it takes a sequence of function definitions.\n",
|
||||
"\n",
|
||||
"Functions can be passed in as:\n",
|
||||
"- dicts conforming to OpenAI functions spec,\n",
|
||||
"- Pydantic classes, in which case they should have docstring descriptions of the function they represent and descriptions for each of the parameters,\n",
|
||||
"- Python functions, in which case they should have docstring descriptions of the function and args, along with type hints.\n",
|
||||
"\n",
|
||||
"See here for relevant [reference docs](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.base.create_openai_fn_chain.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff19be25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Pydantic classes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "17f52508",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class RecordPerson(BaseModel):\n",
|
||||
" \"\"\"Record some identifying information about a pe.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str = Field(..., description=\"The person's name\")\n",
|
||||
" age: int = Field(..., description=\"The person's age\")\n",
|
||||
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class RecordDog(BaseModel):\n",
|
||||
" \"\"\"Record some identifying information about a dog.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str = Field(..., description=\"The dog's name\")\n",
|
||||
" color: str = Field(..., description=\"The dog's color\")\n",
|
||||
" fav_food: Optional[str] = Field(None, description=\"The dog's favorite food\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a4658ad8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: Harry was a chubby brown beagle who loved chicken\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"RecordDog(name='Harry', color='brown', fav_food='chicken')"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prompt_msgs = [\n",
|
||||
" SystemMessage(content=\"You are a world class algorithm for recording entities\"),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Make calls to the relevant function to record the entities in the following input:\"\n",
|
||||
" ),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||
"]\n",
|
||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||
"\n",
|
||||
"chain = create_openai_fn_chain([RecordPerson, RecordDog], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Harry was a chubby brown beagle who loved chicken\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "df6d9147",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Python functions\n",
|
||||
"We can pass in functions as Pydantic classes, directly as OpenAI function dicts, or Python functions. To pass Python function in directly, we'll want to make sure our parameters have type hints, we have a docstring, and we use [Google Python style docstrings](https://google.github.io/styleguide/pyguide.html#doc-function-args) to describe the parameters.\n",
|
||||
"\n",
|
||||
"**NOTE**: To use Python functions, make sure the function arguments are of primitive types (str, float, int, bool) or that they are Pydantic objects."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "95ac5825",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'Tommy', 'age': 12, 'fav_food': {'food': 'apple pie'}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"class OptionalFavFood(BaseModel):\n",
|
||||
" \"\"\"Either a food or null.\"\"\"\n",
|
||||
"\n",
|
||||
" food: Optional[str] = Field(\n",
|
||||
" None,\n",
|
||||
" description=\"Either the name of a food or null. Should be null if the food isn't known.\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def record_person(name: str, age: int, fav_food: OptionalFavFood) -> str:\n",
|
||||
" \"\"\"Record some basic identifying information about a person.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" name: The person's name.\n",
|
||||
" age: The person's age in years.\n",
|
||||
" fav_food: An OptionalFavFood object that either contains the person's favorite food or a null value. Food should be null if it's not known.\n",
|
||||
" \"\"\"\n",
|
||||
" return f\"Recording person {name} of age {age} with favorite food {fav_food.food}!\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = create_openai_fn_chain([record_person], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\n",
|
||||
" \"The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "403ea5dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we pass in multiple Python functions or OpenAI functions, then the returned output will be of the form\n",
|
||||
"```python\n",
|
||||
"{\"name\": \"<<function_name>>\", \"arguments\": {<<function_arguments>>}}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8b0d11de",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'record_dog',\n",
|
||||
" 'arguments': {'name': 'Henry', 'color': 'brown', 'fav_food': {'food': None}}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def record_dog(name: str, color: str, fav_food: OptionalFavFood) -> str:\n",
|
||||
" \"\"\"Record some basic identifying information about a dog.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" name: The dog's name.\n",
|
||||
" color: The dog's color.\n",
|
||||
" fav_food: An OptionalFavFood object that either contains the dog's favorite food or a null value. Food should be null if it's not known.\n",
|
||||
" \"\"\"\n",
|
||||
" return f\"Recording dog {name} of color {color} with favorite food {fav_food}!\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = create_openai_fn_chain([record_person, record_dog], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\n",
|
||||
" \"I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f93686b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Other Chains using OpenAI functions\n",
|
||||
"\n",
|
||||
"There are a number of more specific chains that use OpenAI functions.\n",
|
||||
"- [Extraction](/docs/modules/chains/additional/extraction): very similar to structured output chain, intended for information/entity extraction specifically.\n",
|
||||
"- [Tagging](/docs/modules/chains/additional/tagging): tag inputs.\n",
|
||||
"- [OpenAPI](/docs/modules/chains/additional/openapi_openai): take an OpenAPI spec and create + execute valid requests against the API, using OpenAI functions under the hood.\n",
|
||||
"- [QA with citations](/docs/modules/chains/additional/qa_citations): use OpenAI functions ability to extract citations from text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "93425c66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -77,7 +77,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = BraveSearchLoader(query=\"obama middle name\", api_key=api_key, search_kwargs={\"count\": 3})\n",
|
||||
"loader = BraveSearchLoader(\n",
|
||||
" query=\"obama middle name\", api_key=api_key, search_kwargs={\"count\": 3}\n",
|
||||
")\n",
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Browserless"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import BrowserlessLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"BROWSERLESS_API_TOKEN = \"YOUR_API_TOKEN\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<!DOCTYPE html><html class=\"client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled\" lang=\"en\" dir=\"ltr\"><head>\n",
|
||||
"<meta charset=\"UTF-8\">\n",
|
||||
"<title>Document classification - Wikipedia</title>\n",
|
||||
"<script>document.documentElement.className=\"client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled\";(function(){var cookie=document.cookie.match(/(?:^|; )enwikimwclien\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = BrowserlessLoader(\n",
|
||||
" api_token=BROWSERLESS_API_TOKEN,\n",
|
||||
" urls=[\n",
|
||||
" \"https://en.wikipedia.org/wiki/Document_classification\",\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"print(documents[0].page_content[:1000])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cube Semantic Layer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook demonstrates the process of retrieving Cube's data model metadata in a format suitable for passing to LLMs as embeddings, thereby enhancing contextual information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### About Cube"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Cube](https://cube.dev/) is the Semantic Layer for building data apps. It helps data engineers and application developers access data from modern data stores, organize it into consistent definitions, and deliver it to every application."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Cube’s data model provides structure and definitions that are used as a context for LLM to understand data and generate correct queries. LLM doesn’t need to navigate complex joins and metrics calculations because Cube abstracts those and provides a simple interface that operates on the business-level terminology, instead of SQL table and column names. This simplification helps LLM to be less error-prone and avoid hallucinations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`Cube Semantic Loader` requires 2 arguments:\n",
|
||||
"| Input Parameter | Description |\n",
|
||||
"| --- | --- |\n",
|
||||
"| `cube_api_url` | The URL of your Cube's deployment REST API. Please refer to the [Cube documentation](https://cube.dev/docs/http-api/rest#configuration-base-path) for more information on configuring the base path. |\n",
|
||||
"| `cube_api_token` | The authentication token generated based on your Cube's API secret. Please refer to the [Cube documentation](https://cube.dev/docs/security#generating-json-web-tokens-jwt) for instructions on generating JSON Web Tokens (JWT). |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import jwt\n",
|
||||
"from langchain.document_loaders import CubeSemanticLoader\n",
|
||||
"\n",
|
||||
"api_url = \"https://api-example.gcp-us-central1.cubecloudapp.dev/cubejs-api/v1/meta\"\n",
|
||||
"cubejs_api_secret = \"api-secret-here\"\n",
|
||||
"security_context = {}\n",
|
||||
"# Read more about security context here: https://cube.dev/docs/security\n",
|
||||
"api_token = jwt.encode(security_context, cubejs_api_secret, algorithm=\"HS256\")\n",
|
||||
"\n",
|
||||
"loader = CubeSemanticLoader(api_url, api_token)\n",
|
||||
"\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Returns:\n",
|
||||
"\n",
|
||||
"A list of documents with the following attributes:\n",
|
||||
"\n",
|
||||
"- `page_content`\n",
|
||||
"- `metadata`\n",
|
||||
" - `table_name`\n",
|
||||
" - `column_name`\n",
|
||||
" - `column_data_type`\n",
|
||||
" - `column_title`\n",
|
||||
" - `column_description`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> page_content='table name: orders_view, column name: orders_view.total_amount, column data type: number, column title: Orders View Total Amount, column description: None' metadata={'table_name': 'orders_view', 'column_name': 'orders_view.total_amount', 'column_data_type': 'number', 'column_title': 'Orders View Total Amount', 'column_description': 'None'}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Datadog Logs\n",
|
||||
"\n",
|
||||
">[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.\n",
|
||||
"\n",
|
||||
"This loader fetches the logs from your applications in Datadog using the `datadog_api_client` Python package. You must initialize the loader with your `Datadog API key` and `APP key`, and you need to pass in the query to extract the desired logs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import DatadogLogsLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install datadog-api-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"service:agent status:error\"\n",
|
||||
"\n",
|
||||
"loader = DatadogLogsLoader(\n",
|
||||
" query=query,\n",
|
||||
" api_key=DD_API_KEY,\n",
|
||||
" app_key=DD_APP_KEY,\n",
|
||||
" from_time=1688732708951, # Optional, timestamp in milliseconds\n",
|
||||
" to_time=1688736308951, # Optional, timestamp in milliseconds\n",
|
||||
" limit=100, # Optional, default is 100\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpQAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWQAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())}),\n",
|
||||
" Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpgAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWgAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents = loader.load()\n",
|
||||
"documents"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.11"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
Stanley Cups
|
||||
Team Location Stanley Cups
|
||||
Blues STL 1
|
||||
Flyers PHI 2
|
||||
Maple Leafs TOR 13
|
||||
|
@@ -1840,7 +1840,7 @@ This category contains articles that are incomplete and are tagged with the {{T|
|
||||
<username>FANDOM</username>
|
||||
<id>32769624</id>
|
||||
</contributor>
|
||||
<comment>Created page with "{{LicenseBox|text=''This work is licensed under the [https://opensource.org/licenses/MIT MIT License].''}}{{#ifeq: {{NAMESPACENUMBER}} | 0 | <includeonly>Category:MIT licens..."</comment>
|
||||
<comment>Created page with "{{LicenseBox|text=''This work is licensed under the [https://opensource.org/licenses/MIT MIT License].''}}{{#ifeq: {{NAMESPACENUMBER}} | 0 | <includeonly>Category:MIT license..."</comment>
|
||||
<origin>104</origin>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
|
||||
@@ -126,11 +126,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"file_id=\"1x9WBtFPWMEAdjcJzPScRsjpjQvpSo_kz\"\n",
|
||||
"file_id = \"1x9WBtFPWMEAdjcJzPScRsjpjQvpSo_kz\"\n",
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" file_ids=[file_id],\n",
|
||||
" file_loader_cls=UnstructuredFileIOLoader,\n",
|
||||
" file_loader_kwargs={\"mode\": \"elements\"}\n",
|
||||
" file_loader_kwargs={\"mode\": \"elements\"},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -180,11 +180,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id=\"1asMOHY1BqBS84JcRbOag5LOJac74gpmD\"\n",
|
||||
"folder_id = \"1asMOHY1BqBS84JcRbOag5LOJac74gpmD\"\n",
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" file_loader_cls=UnstructuredFileIOLoader,\n",
|
||||
" file_loader_kwargs={\"mode\": \"elements\"}\n",
|
||||
" file_loader_kwargs={\"mode\": \"elements\"},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
" \"../Papers/\",\n",
|
||||
" glob=\"*\",\n",
|
||||
" suffixes=[\".pdf\"],\n",
|
||||
" parser= GrobidParser(segment_sentences=False)\n",
|
||||
" parser=GrobidParser(segment_sentences=False),\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
|
||||
@@ -18,7 +18,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"loader_web = WebBaseLoader(\"https://github.com/basecamp/handbook/blob/master/37signals-is-you.md\")"
|
||||
"\n",
|
||||
"loader_web = WebBaseLoader(\n",
|
||||
" \"https://github.com/basecamp/handbook/blob/master/37signals-is-you.md\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -29,6 +32,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"loader_pdf = PyPDFLoader(\"../MachineLearning-Lecture01.pdf\")"
|
||||
]
|
||||
},
|
||||
@@ -40,7 +44,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.merge import MergedDataLoader\n",
|
||||
"loader_all=MergedDataLoader(loaders=[loader_web,loader_pdf])"
|
||||
"\n",
|
||||
"loader_all = MergedDataLoader(loaders=[loader_web, loader_pdf])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,7 +55,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_all=loader_all.load()"
|
||||
"docs_all = loader_all.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -36,7 +36,9 @@
|
||||
],
|
||||
"source": [
|
||||
"# Create a new loader object for the MHTML file\n",
|
||||
"loader = MHTMLLoader(file_path='../../../../../../tests/integration_tests/examples/example.mht')\n",
|
||||
"loader = MHTMLLoader(\n",
|
||||
" file_path=\"../../../../../../tests/integration_tests/examples/example.mht\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Load the document from the file\n",
|
||||
"documents = loader.load()\n",
|
||||
|
||||
@@ -53,11 +53,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = \"vw6y-z8j6\" # 311 data\n",
|
||||
"dataset = \"tmnf-yvry\" # crime data\n",
|
||||
"loader = OpenCityDataLoader(city_id=\"data.sfgov.org\",\n",
|
||||
" dataset_id=dataset,\n",
|
||||
" limit=2000)"
|
||||
"dataset = \"vw6y-z8j6\" # 311 data\n",
|
||||
"dataset = \"tmnf-yvry\" # crime data\n",
|
||||
"loader = OpenCityDataLoader(city_id=\"data.sfgov.org\", dataset_id=dataset, limit=2000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -33,9 +33,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredOrgModeLoader(\n",
|
||||
" file_path=\"example_data/README.org\", mode=\"elements\"\n",
|
||||
")\n",
|
||||
"loader = UnstructuredOrgModeLoader(file_path=\"example_data/README.org\", mode=\"elements\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -239,7 +239,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Use lazy load for larger table, which won't read the full table into memory \n",
|
||||
"# Use lazy load for larger table, which won't read the full table into memory\n",
|
||||
"for i in loader.lazy_load():\n",
|
||||
" print(i)"
|
||||
]
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "5a7cc773",
|
||||
"metadata": {},
|
||||
@@ -25,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "2e3532b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -34,7 +33,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6384c057",
|
||||
"metadata": {},
|
||||
@@ -44,19 +42,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "d69e5620",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = 'https://js.langchain.com/docs/modules/memory/examples/'\n",
|
||||
"loader=RecursiveUrlLoader(url=url)\n",
|
||||
"docs=loader.load()"
|
||||
"url = \"https://js.langchain.com/docs/modules/memory/examples/\"\n",
|
||||
"loader = RecursiveUrlLoader(url=url)\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "084fb2ce",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -66,7 +64,7 @@
|
||||
"12"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -77,17 +75,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "89355b7c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\n\\n\\n\\nDynamoDB-Backed Chat Memory | \\uf8ffü¶úÔ∏è\\uf8ffüîó Lan'"
|
||||
"'\\n\\n\\n\\n\\nBuffer Window Memory | 🦜️🔗 Langchain\\n\\n\\n\\n\\n\\nSki'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -98,20 +96,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "13bd7e16",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'source': 'https://js.langchain.com/docs/modules/memory/examples/dynamodb',\n",
|
||||
" 'title': 'DynamoDB-Backed Chat Memory | \\uf8ffü¶úÔ∏è\\uf8ffüîó Langchain',\n",
|
||||
" 'description': 'For longer-term persistence across chat sessions, you can swap out the default in-memory chatHistory that backs chat memory classes like BufferMemory for a DynamoDB instance.',\n",
|
||||
"{'source': 'https://js.langchain.com/docs/modules/memory/examples/buffer_window_memory',\n",
|
||||
" 'title': 'Buffer Window Memory | 🦜️🔗 Langchain',\n",
|
||||
" 'description': 'BufferWindowMemory keeps track of the back-and-forths in conversation, and then uses a window of size k to surface the last k back-and-forths to use as memory.',\n",
|
||||
" 'language': 'en'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -121,14 +119,29 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "40fc13ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's try a more extensive example, the `docs` root dir.\n",
|
||||
"\n",
|
||||
"We will skip everything under `api`."
|
||||
"We will skip everything under `api`.\n",
|
||||
"\n",
|
||||
"For this, we can `lazy_load` each page as we crawl the tree, using `WebBaseLoader` to load each as we go."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c938b9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = \"https://js.langchain.com/docs/\"\n",
|
||||
"exclude_dirs = [\"https://js.langchain.com/docs/api/\"]\n",
|
||||
"loader = RecursiveUrlLoader(url=url, exclude_dirs=exclude_dirs)\n",
|
||||
"# Lazy load each\n",
|
||||
"docs = [print(doc) or doc for doc in loader.lazy_load()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -138,22 +151,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = 'https://js.langchain.com/docs/'\n",
|
||||
"exclude_dirs=['https://js.langchain.com/docs/api/']\n",
|
||||
"loader=RecursiveUrlLoader(url=url,exclude_dirs=exclude_dirs)\n",
|
||||
"docs=loader.load()"
|
||||
"# Load all pages\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "457e30f3",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"176"
|
||||
"188"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@@ -174,7 +187,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\n\\n\\n\\nHacker News | \\uf8ffü¶úÔ∏è\\uf8ffüîó Langchain\\n\\n\\n\\n\\n\\nSkip'"
|
||||
"'\\n\\n\\n\\n\\nAgent Simulations | 🦜️🔗 Langchain\\n\\n\\n\\n\\n\\nSkip t'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@@ -195,9 +208,9 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'source': 'https://js.langchain.com/docs/modules/indexes/document_loaders/examples/web_loaders/hn',\n",
|
||||
" 'title': 'Hacker News | \\uf8ffü¶úÔ∏è\\uf8ffüîó Langchain',\n",
|
||||
" 'description': 'This example goes over how to load data from the hacker news website, using Cheerio. One document will be created for each page.',\n",
|
||||
"{'source': 'https://js.langchain.com/docs/use_cases/agent_simulations/',\n",
|
||||
" 'title': 'Agent Simulations | 🦜️🔗 Langchain',\n",
|
||||
" 'description': 'Agent simulations involve taking multiple agents and having them interact with each other.',\n",
|
||||
" 'language': 'en'}"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -33,9 +33,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredRSTLoader(\n",
|
||||
" file_path=\"example_data/README.rst\", mode=\"elements\"\n",
|
||||
")\n",
|
||||
"loader = UnstructuredRSTLoader(file_path=\"example_data/README.rst\", mode=\"elements\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -30,7 +30,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"\n",
|
||||
"warnings.filterwarnings(\"ignore\")\n",
|
||||
"from pprint import pprint\n",
|
||||
"from langchain.text_splitter import Language\n",
|
||||
"from langchain.document_loaders.generic import GenericLoader\n",
|
||||
@@ -48,7 +49,7 @@
|
||||
" \"./example_data/source_code\",\n",
|
||||
" glob=\"*\",\n",
|
||||
" suffixes=[\".py\", \".js\"],\n",
|
||||
" parser=LanguageParser()\n",
|
||||
" parser=LanguageParser(),\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
@@ -200,7 +201,7 @@
|
||||
" \"./example_data/source_code\",\n",
|
||||
" glob=\"*\",\n",
|
||||
" suffixes=[\".py\"],\n",
|
||||
" parser=LanguageParser(language=Language.PYTHON, parser_threshold=1000)\n",
|
||||
" parser=LanguageParser(language=Language.PYTHON, parser_threshold=1000),\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
@@ -281,7 +282,7 @@
|
||||
" \"./example_data/source_code\",\n",
|
||||
" glob=\"*\",\n",
|
||||
" suffixes=[\".js\"],\n",
|
||||
" parser=LanguageParser(language=Language.JS)\n",
|
||||
" parser=LanguageParser(language=Language.JS),\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
|
||||
@@ -43,10 +43,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conf = CosConfig(\n",
|
||||
" Region=\"your cos region\",\n",
|
||||
" SecretId=\"your cos secret_id\",\n",
|
||||
" SecretKey=\"your cos secret_key\",\n",
|
||||
" )\n",
|
||||
" Region=\"your cos region\",\n",
|
||||
" SecretId=\"your cos secret_id\",\n",
|
||||
" SecretKey=\"your cos secret_key\",\n",
|
||||
")\n",
|
||||
"loader = TencentCOSDirectoryLoader(conf=conf, bucket=\"you_cos_bucket\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -43,10 +43,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conf = CosConfig(\n",
|
||||
" Region=\"your cos region\",\n",
|
||||
" SecretId=\"your cos secret_id\",\n",
|
||||
" SecretKey=\"your cos secret_key\",\n",
|
||||
" )\n",
|
||||
" Region=\"your cos region\",\n",
|
||||
" SecretId=\"your cos secret_id\",\n",
|
||||
" SecretKey=\"your cos secret_key\",\n",
|
||||
")\n",
|
||||
"loader = TencentCOSFileLoader(conf=conf, bucket=\"you_cos_bucket\", key=\"fake.docx\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TSV\n",
|
||||
"\n",
|
||||
">A [tab-separated values (TSV)](https://en.wikipedia.org/wiki/Tab-separated_values) file is a simple, text-based file format for storing tabular data.[3] Records are separated by newlines, and values within a record are separated by tab characters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `UnstructuredTSVLoader`\n",
|
||||
"\n",
|
||||
"You can also load the table using the `UnstructuredTSVLoader`. One advantage of using `UnstructuredTSVLoader` is that if you use it in `\"elements\"` mode, an HTML representation of the table will be available in the metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.tsv import UnstructuredTSVLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredTSVLoader(\n",
|
||||
" file_path=\"example_data/mlb_teams_2012.csv\", mode=\"elements\"\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <td>Nationals, 81.34, 98</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Reds, 82.20, 97</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Yankees, 197.96, 95</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Giants, 117.62, 94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Braves, 83.31, 94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Athletics, 55.37, 94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Rangers, 120.51, 93</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Orioles, 81.43, 93</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Rays, 64.17, 90</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Angels, 154.49, 89</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Tigers, 132.30, 88</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Cardinals, 110.30, 88</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Dodgers, 95.14, 86</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>White Sox, 96.92, 85</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Brewers, 97.65, 83</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Phillies, 174.54, 81</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Diamondbacks, 74.28, 81</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Pirates, 63.43, 79</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Padres, 55.24, 76</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Mariners, 81.97, 75</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Mets, 93.35, 74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Blue Jays, 75.48, 73</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Royals, 60.91, 72</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Marlins, 118.07, 69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Red Sox, 173.18, 69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Indians, 78.43, 68</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Twins, 94.08, 66</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Rockies, 78.06, 64</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Cubs, 88.19, 61</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Astros, 60.65, 55</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata[\"text_as_html\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -233,7 +233,8 @@
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
"id": "672264ad"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -241,16 +242,18 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = WebBaseLoader(\n",
|
||||
" \"https://www.walmart.com/search?q=parrots\", proxies={\n",
|
||||
" \"https://www.walmart.com/search?q=parrots\",\n",
|
||||
" proxies={\n",
|
||||
" \"http\": \"http://{username}:{password}:@proxy.service.com:6666/\",\n",
|
||||
" \"https\": \"https://{username}:{password}:@proxy.service.com:6666/\"\n",
|
||||
" }\n",
|
||||
" \"https\": \"https://{username}:{password}:@proxy.service.com:6666/\",\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"docs = loader.load()\n"
|
||||
"docs = loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
"id": "9caf0310"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -274,4 +277,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Xorbits Pandas DataFrame\n",
|
||||
"\n",
|
||||
"This notebook goes over how to load data from a [xorbits.pandas](https://doc.xorbits.io/en/latest/reference/pandas/frame.html) DataFrame."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install xorbits"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import xorbits.pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv(\"example_data/mlb_teams_2012.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b0d1d84e23c04f1296f63b3ea3dd1e5b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Team</th>\n",
|
||||
" <th>\"Payroll (millions)\"</th>\n",
|
||||
" <th>\"Wins\"</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Nationals</td>\n",
|
||||
" <td>81.34</td>\n",
|
||||
" <td>98</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Reds</td>\n",
|
||||
" <td>82.20</td>\n",
|
||||
" <td>97</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Yankees</td>\n",
|
||||
" <td>197.96</td>\n",
|
||||
" <td>95</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Giants</td>\n",
|
||||
" <td>117.62</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Braves</td>\n",
|
||||
" <td>83.31</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Team \"Payroll (millions)\" \"Wins\"\n",
|
||||
"0 Nationals 81.34 98\n",
|
||||
"1 Reds 82.20 97\n",
|
||||
"2 Yankees 197.96 95\n",
|
||||
"3 Giants 117.62 94\n",
|
||||
"4 Braves 83.31 94"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import XorbitsLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = XorbitsLoader(df, page_content_column=\"Team\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c8c8b67f1aae4a3c9de7734bb6cf738e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}),\n",
|
||||
" Document(page_content='Reds', metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}),\n",
|
||||
" Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}),\n",
|
||||
" Document(page_content='Giants', metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Braves', metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}),\n",
|
||||
" Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}),\n",
|
||||
" Document(page_content='Rays', metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}),\n",
|
||||
" Document(page_content='Angels', metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}),\n",
|
||||
" Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}),\n",
|
||||
" Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}),\n",
|
||||
" Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}),\n",
|
||||
" Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}),\n",
|
||||
" Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}),\n",
|
||||
" Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}),\n",
|
||||
" Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}),\n",
|
||||
" Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}),\n",
|
||||
" Document(page_content='Padres', metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}),\n",
|
||||
" Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}),\n",
|
||||
" Document(page_content='Mets', metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}),\n",
|
||||
" Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}),\n",
|
||||
" Document(page_content='Royals', metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}),\n",
|
||||
" Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}),\n",
|
||||
" Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}),\n",
|
||||
" Document(page_content='Indians', metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}),\n",
|
||||
" Document(page_content='Twins', metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}),\n",
|
||||
" Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}),\n",
|
||||
" Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}),\n",
|
||||
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "fc85c9f59b3644689d05853159fbd358",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0.00/100 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Nationals' metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}\n",
|
||||
"page_content='Reds' metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}\n",
|
||||
"page_content='Yankees' metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}\n",
|
||||
"page_content='Giants' metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}\n",
|
||||
"page_content='Braves' metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}\n",
|
||||
"page_content='Athletics' metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}\n",
|
||||
"page_content='Rangers' metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}\n",
|
||||
"page_content='Orioles' metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}\n",
|
||||
"page_content='Rays' metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}\n",
|
||||
"page_content='Angels' metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}\n",
|
||||
"page_content='Tigers' metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}\n",
|
||||
"page_content='Cardinals' metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}\n",
|
||||
"page_content='Dodgers' metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}\n",
|
||||
"page_content='White Sox' metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}\n",
|
||||
"page_content='Brewers' metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}\n",
|
||||
"page_content='Phillies' metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}\n",
|
||||
"page_content='Diamondbacks' metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}\n",
|
||||
"page_content='Pirates' metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}\n",
|
||||
"page_content='Padres' metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}\n",
|
||||
"page_content='Mariners' metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}\n",
|
||||
"page_content='Mets' metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}\n",
|
||||
"page_content='Blue Jays' metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}\n",
|
||||
"page_content='Royals' metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}\n",
|
||||
"page_content='Marlins' metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}\n",
|
||||
"page_content='Red Sox' metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}\n",
|
||||
"page_content='Indians' metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}\n",
|
||||
"page_content='Twins' metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}\n",
|
||||
"page_content='Rockies' metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}\n",
|
||||
"page_content='Cubs' metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}\n",
|
||||
"page_content='Astros' metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Use lazy load for larger table, which won't read the full table into memory\n",
|
||||
"for i in loader.lazy_load():\n",
|
||||
" print(i)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
label: 'Integrations'
|
||||
@@ -0,0 +1,269 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Doctran Extract Properties\n",
|
||||
"\n",
|
||||
"We can extract useful features of documents using the [Doctran](https://github.com/psychic-api/doctran) library, which uses OpenAI's function calling feature to extract specific metadata.\n",
|
||||
"\n",
|
||||
"Extracting metadata from documents is helpful for a variety of tasks, including:\n",
|
||||
"* Classification: classifying documents into different categories\n",
|
||||
"* Data mining: Extract structured data that can be used for data analysis\n",
|
||||
"* Style transfer: Change the way text is written to more closely match expected user input, improving vector search results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install doctran"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.document_transformers import DoctranPropertyExtractor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Input\n",
|
||||
"This is the document we'll extract properties from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Generated with ChatGPT]\n",
|
||||
"\n",
|
||||
"Confidential Document - For Internal Use Only\n",
|
||||
"\n",
|
||||
"Date: July 1, 2023\n",
|
||||
"\n",
|
||||
"Subject: Updates and Discussions on Various Topics\n",
|
||||
"\n",
|
||||
"Dear Team,\n",
|
||||
"\n",
|
||||
"I hope this email finds you well. In this document, I would like to provide you with some important updates and discuss various topics that require our attention. Please treat the information contained herein as highly confidential.\n",
|
||||
"\n",
|
||||
"Security and Privacy Measures\n",
|
||||
"As part of our ongoing commitment to ensure the security and privacy of our customers' data, we have implemented robust measures across all our systems. We would like to commend John Doe (email: john.doe@example.com) from the IT department for his diligent work in enhancing our network security. Moving forward, we kindly remind everyone to strictly adhere to our data protection policies and guidelines. Additionally, if you come across any potential security risks or incidents, please report them immediately to our dedicated team at security@example.com.\n",
|
||||
"\n",
|
||||
"HR Updates and Employee Benefits\n",
|
||||
"Recently, we welcomed several new team members who have made significant contributions to their respective departments. I would like to recognize Jane Smith (SSN: 049-45-5928) for her outstanding performance in customer service. Jane has consistently received positive feedback from our clients. Furthermore, please remember that the open enrollment period for our employee benefits program is fast approaching. Should you have any questions or require assistance, please contact our HR representative, Michael Johnson (phone: 418-492-3850, email: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Marketing Initiatives and Campaigns\n",
|
||||
"Our marketing team has been actively working on developing new strategies to increase brand awareness and drive customer engagement. We would like to thank Sarah Thompson (phone: 415-555-1234) for her exceptional efforts in managing our social media platforms. Sarah has successfully increased our follower base by 20% in the past month alone. Moreover, please mark your calendars for the upcoming product launch event on July 15th. We encourage all team members to attend and support this exciting milestone for our company.\n",
|
||||
"\n",
|
||||
"Research and Development Projects\n",
|
||||
"In our pursuit of innovation, our research and development department has been working tirelessly on various projects. I would like to acknowledge the exceptional work of David Rodriguez (email: david.rodriguez@example.com) in his role as project lead. David's contributions to the development of our cutting-edge technology have been instrumental. Furthermore, we would like to remind everyone to share their ideas and suggestions for potential new projects during our monthly R&D brainstorming session, scheduled for July 10th.\n",
|
||||
"\n",
|
||||
"Please treat the information in this document with utmost confidentiality and ensure that it is not shared with unauthorized individuals. If you have any questions or concerns regarding the topics discussed, please do not hesitate to reach out to me directly.\n",
|
||||
"\n",
|
||||
"Thank you for your attention, and let's continue to work together to achieve our goals.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofounder & CEO\n",
|
||||
"Psychic\n",
|
||||
"jason@psychic.dev\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sample_text = \"\"\"[Generated with ChatGPT]\n",
|
||||
"\n",
|
||||
"Confidential Document - For Internal Use Only\n",
|
||||
"\n",
|
||||
"Date: July 1, 2023\n",
|
||||
"\n",
|
||||
"Subject: Updates and Discussions on Various Topics\n",
|
||||
"\n",
|
||||
"Dear Team,\n",
|
||||
"\n",
|
||||
"I hope this email finds you well. In this document, I would like to provide you with some important updates and discuss various topics that require our attention. Please treat the information contained herein as highly confidential.\n",
|
||||
"\n",
|
||||
"Security and Privacy Measures\n",
|
||||
"As part of our ongoing commitment to ensure the security and privacy of our customers' data, we have implemented robust measures across all our systems. We would like to commend John Doe (email: john.doe@example.com) from the IT department for his diligent work in enhancing our network security. Moving forward, we kindly remind everyone to strictly adhere to our data protection policies and guidelines. Additionally, if you come across any potential security risks or incidents, please report them immediately to our dedicated team at security@example.com.\n",
|
||||
"\n",
|
||||
"HR Updates and Employee Benefits\n",
|
||||
"Recently, we welcomed several new team members who have made significant contributions to their respective departments. I would like to recognize Jane Smith (SSN: 049-45-5928) for her outstanding performance in customer service. Jane has consistently received positive feedback from our clients. Furthermore, please remember that the open enrollment period for our employee benefits program is fast approaching. Should you have any questions or require assistance, please contact our HR representative, Michael Johnson (phone: 418-492-3850, email: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Marketing Initiatives and Campaigns\n",
|
||||
"Our marketing team has been actively working on developing new strategies to increase brand awareness and drive customer engagement. We would like to thank Sarah Thompson (phone: 415-555-1234) for her exceptional efforts in managing our social media platforms. Sarah has successfully increased our follower base by 20% in the past month alone. Moreover, please mark your calendars for the upcoming product launch event on July 15th. We encourage all team members to attend and support this exciting milestone for our company.\n",
|
||||
"\n",
|
||||
"Research and Development Projects\n",
|
||||
"In our pursuit of innovation, our research and development department has been working tirelessly on various projects. I would like to acknowledge the exceptional work of David Rodriguez (email: david.rodriguez@example.com) in his role as project lead. David's contributions to the development of our cutting-edge technology have been instrumental. Furthermore, we would like to remind everyone to share their ideas and suggestions for potential new projects during our monthly R&D brainstorming session, scheduled for July 10th.\n",
|
||||
"\n",
|
||||
"Please treat the information in this document with utmost confidentiality and ensure that it is not shared with unauthorized individuals. If you have any questions or concerns regarding the topics discussed, please do not hesitate to reach out to me directly.\n",
|
||||
"\n",
|
||||
"Thank you for your attention, and let's continue to work together to achieve our goals.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofounder & CEO\n",
|
||||
"Psychic\n",
|
||||
"jason@psychic.dev\n",
|
||||
"\"\"\"\n",
|
||||
"print(sample_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = [Document(page_content=sample_text)]\n",
|
||||
"properties = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"category\",\n",
|
||||
" \"description\": \"What type of email this is.\",\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"enum\": [\"update\", \"action_item\", \"customer_feedback\", \"announcement\", \"other\"],\n",
|
||||
" \"required\": True,\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"mentions\",\n",
|
||||
" \"description\": \"A list of all people mentioned in this email.\",\n",
|
||||
" \"type\": \"array\",\n",
|
||||
" \"items\": {\n",
|
||||
" \"name\": \"full_name\",\n",
|
||||
" \"description\": \"The full name of the person mentioned.\",\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" },\n",
|
||||
" \"required\": True,\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"eli5\",\n",
|
||||
" \"description\": \"Explain this email to me like I'm 5 years old.\",\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"required\": True,\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"property_extractor = DoctranPropertyExtractor(properties=properties)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output\n",
|
||||
"After extracting properties from a document, the result will be returned as a new document with properties provided in the metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"extracted_document = await property_extractor.atransform_documents(\n",
|
||||
" documents, properties=properties\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"extracted_properties\": {\n",
|
||||
" \"category\": \"update\",\n",
|
||||
" \"mentions\": [\n",
|
||||
" \"John Doe\",\n",
|
||||
" \"Jane Smith\",\n",
|
||||
" \"Michael Johnson\",\n",
|
||||
" \"Sarah Thompson\",\n",
|
||||
" \"David Rodriguez\",\n",
|
||||
" \"Jason Fan\"\n",
|
||||
" ],\n",
|
||||
" \"eli5\": \"This is an email from the CEO, Jason Fan, giving updates about different areas in the company. He talks about new security measures and praises John Doe for his work. He also mentions new hires and praises Jane Smith for her work in customer service. The CEO reminds everyone about the upcoming benefits enrollment and says to contact Michael Johnson with any questions. He talks about the marketing team's work and praises Sarah Thompson for increasing their social media followers. There's also a product launch event on July 15th. Lastly, he talks about the research and development projects and praises David Rodriguez for his work. There's a brainstorming session on July 10th.\"\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(json.dumps(extracted_document[0].metadata, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,266 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Doctran Interrogate Documents\n",
|
||||
"Documents used in a vector store knowledge base are typically stored in narrative or conversational format. However, most user queries are in question format. If we convert documents into Q&A format before vectorizing them, we can increase the liklihood of retrieving relevant documents, and decrease the liklihood of retrieving irrelevant documents.\n",
|
||||
"\n",
|
||||
"We can accomplish this using the [Doctran](https://github.com/psychic-api/doctran) library, which uses OpenAI's function calling feature to \"interrogate\" documents.\n",
|
||||
"\n",
|
||||
"See [this notebook](https://github.com/psychic-api/doctran/blob/main/benchmark.ipynb) for benchmarks on vector similarity scores for various queries based on raw documents versus interrogated documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install doctran"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.document_transformers import DoctranQATransformer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Input\n",
|
||||
"This is the document we'll interrogate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Generated with ChatGPT]\n",
|
||||
"\n",
|
||||
"Confidential Document - For Internal Use Only\n",
|
||||
"\n",
|
||||
"Date: July 1, 2023\n",
|
||||
"\n",
|
||||
"Subject: Updates and Discussions on Various Topics\n",
|
||||
"\n",
|
||||
"Dear Team,\n",
|
||||
"\n",
|
||||
"I hope this email finds you well. In this document, I would like to provide you with some important updates and discuss various topics that require our attention. Please treat the information contained herein as highly confidential.\n",
|
||||
"\n",
|
||||
"Security and Privacy Measures\n",
|
||||
"As part of our ongoing commitment to ensure the security and privacy of our customers' data, we have implemented robust measures across all our systems. We would like to commend John Doe (email: john.doe@example.com) from the IT department for his diligent work in enhancing our network security. Moving forward, we kindly remind everyone to strictly adhere to our data protection policies and guidelines. Additionally, if you come across any potential security risks or incidents, please report them immediately to our dedicated team at security@example.com.\n",
|
||||
"\n",
|
||||
"HR Updates and Employee Benefits\n",
|
||||
"Recently, we welcomed several new team members who have made significant contributions to their respective departments. I would like to recognize Jane Smith (SSN: 049-45-5928) for her outstanding performance in customer service. Jane has consistently received positive feedback from our clients. Furthermore, please remember that the open enrollment period for our employee benefits program is fast approaching. Should you have any questions or require assistance, please contact our HR representative, Michael Johnson (phone: 418-492-3850, email: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Marketing Initiatives and Campaigns\n",
|
||||
"Our marketing team has been actively working on developing new strategies to increase brand awareness and drive customer engagement. We would like to thank Sarah Thompson (phone: 415-555-1234) for her exceptional efforts in managing our social media platforms. Sarah has successfully increased our follower base by 20% in the past month alone. Moreover, please mark your calendars for the upcoming product launch event on July 15th. We encourage all team members to attend and support this exciting milestone for our company.\n",
|
||||
"\n",
|
||||
"Research and Development Projects\n",
|
||||
"In our pursuit of innovation, our research and development department has been working tirelessly on various projects. I would like to acknowledge the exceptional work of David Rodriguez (email: david.rodriguez@example.com) in his role as project lead. David's contributions to the development of our cutting-edge technology have been instrumental. Furthermore, we would like to remind everyone to share their ideas and suggestions for potential new projects during our monthly R&D brainstorming session, scheduled for July 10th.\n",
|
||||
"\n",
|
||||
"Please treat the information in this document with utmost confidentiality and ensure that it is not shared with unauthorized individuals. If you have any questions or concerns regarding the topics discussed, please do not hesitate to reach out to me directly.\n",
|
||||
"\n",
|
||||
"Thank you for your attention, and let's continue to work together to achieve our goals.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofounder & CEO\n",
|
||||
"Psychic\n",
|
||||
"jason@psychic.dev\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sample_text = \"\"\"[Generated with ChatGPT]\n",
|
||||
"\n",
|
||||
"Confidential Document - For Internal Use Only\n",
|
||||
"\n",
|
||||
"Date: July 1, 2023\n",
|
||||
"\n",
|
||||
"Subject: Updates and Discussions on Various Topics\n",
|
||||
"\n",
|
||||
"Dear Team,\n",
|
||||
"\n",
|
||||
"I hope this email finds you well. In this document, I would like to provide you with some important updates and discuss various topics that require our attention. Please treat the information contained herein as highly confidential.\n",
|
||||
"\n",
|
||||
"Security and Privacy Measures\n",
|
||||
"As part of our ongoing commitment to ensure the security and privacy of our customers' data, we have implemented robust measures across all our systems. We would like to commend John Doe (email: john.doe@example.com) from the IT department for his diligent work in enhancing our network security. Moving forward, we kindly remind everyone to strictly adhere to our data protection policies and guidelines. Additionally, if you come across any potential security risks or incidents, please report them immediately to our dedicated team at security@example.com.\n",
|
||||
"\n",
|
||||
"HR Updates and Employee Benefits\n",
|
||||
"Recently, we welcomed several new team members who have made significant contributions to their respective departments. I would like to recognize Jane Smith (SSN: 049-45-5928) for her outstanding performance in customer service. Jane has consistently received positive feedback from our clients. Furthermore, please remember that the open enrollment period for our employee benefits program is fast approaching. Should you have any questions or require assistance, please contact our HR representative, Michael Johnson (phone: 418-492-3850, email: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Marketing Initiatives and Campaigns\n",
|
||||
"Our marketing team has been actively working on developing new strategies to increase brand awareness and drive customer engagement. We would like to thank Sarah Thompson (phone: 415-555-1234) for her exceptional efforts in managing our social media platforms. Sarah has successfully increased our follower base by 20% in the past month alone. Moreover, please mark your calendars for the upcoming product launch event on July 15th. We encourage all team members to attend and support this exciting milestone for our company.\n",
|
||||
"\n",
|
||||
"Research and Development Projects\n",
|
||||
"In our pursuit of innovation, our research and development department has been working tirelessly on various projects. I would like to acknowledge the exceptional work of David Rodriguez (email: david.rodriguez@example.com) in his role as project lead. David's contributions to the development of our cutting-edge technology have been instrumental. Furthermore, we would like to remind everyone to share their ideas and suggestions for potential new projects during our monthly R&D brainstorming session, scheduled for July 10th.\n",
|
||||
"\n",
|
||||
"Please treat the information in this document with utmost confidentiality and ensure that it is not shared with unauthorized individuals. If you have any questions or concerns regarding the topics discussed, please do not hesitate to reach out to me directly.\n",
|
||||
"\n",
|
||||
"Thank you for your attention, and let's continue to work together to achieve our goals.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofounder & CEO\n",
|
||||
"Psychic\n",
|
||||
"jason@psychic.dev\n",
|
||||
"\"\"\"\n",
|
||||
"print(sample_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = [Document(page_content=sample_text)]\n",
|
||||
"qa_transformer = DoctranQATransformer()\n",
|
||||
"transformed_document = await qa_transformer.atransform_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output\n",
|
||||
"After interrogating a document, the result will be returned as a new document with questions and answers provided in the metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"questions_and_answers\": [\n",
|
||||
" {\n",
|
||||
" \"question\": \"What is the purpose of this document?\",\n",
|
||||
" \"answer\": \"The purpose of this document is to provide important updates and discuss various topics that require the team's attention.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who is responsible for enhancing the network security?\",\n",
|
||||
" \"answer\": \"John Doe from the IT department is responsible for enhancing the network security.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Where should potential security risks or incidents be reported?\",\n",
|
||||
" \"answer\": \"Potential security risks or incidents should be reported to the dedicated team at security@example.com.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who has been recognized for outstanding performance in customer service?\",\n",
|
||||
" \"answer\": \"Jane Smith has been recognized for her outstanding performance in customer service.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"When is the open enrollment period for the employee benefits program?\",\n",
|
||||
" \"answer\": \"The document does not specify the exact dates for the open enrollment period for the employee benefits program, but it mentions that it is fast approaching.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who should be contacted for questions or assistance regarding the employee benefits program?\",\n",
|
||||
" \"answer\": \"For questions or assistance regarding the employee benefits program, the HR representative, Michael Johnson, should be contacted.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who has been acknowledged for managing the company's social media platforms?\",\n",
|
||||
" \"answer\": \"Sarah Thompson has been acknowledged for managing the company's social media platforms.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"When is the upcoming product launch event?\",\n",
|
||||
" \"answer\": \"The upcoming product launch event is on July 15th.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who has been recognized for their contributions to the development of the company's technology?\",\n",
|
||||
" \"answer\": \"David Rodriguez has been recognized for his contributions to the development of the company's technology.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"When is the monthly R&D brainstorming session?\",\n",
|
||||
" \"answer\": \"The monthly R&D brainstorming session is scheduled for July 10th.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who should be contacted for questions or concerns regarding the topics discussed in the document?\",\n",
|
||||
" \"answer\": \"For questions or concerns regarding the topics discussed in the document, Jason Fan, the Cofounder & CEO, should be contacted.\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"transformed_document = await qa_transformer.atransform_documents(documents)\n",
|
||||
"print(json.dumps(transformed_document[0].metadata, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Doctran Translate Documents\n",
|
||||
"Comparing documents through embeddings has the benefit of working across multiple languages. \"Harrison says hello\" and \"Harrison dice hola\" will occupy similar positions in the vector space because they have the same meaning semantically.\n",
|
||||
"\n",
|
||||
"However, it can still be useful to use a LLM translate documents into other languages before vectorizing them. This is especially helpful when users are expected to query the knowledge base in different languages, or when state of the art embeddings models are not available for a given language.\n",
|
||||
"\n",
|
||||
"We can accomplish this using the [Doctran](https://github.com/psychic-api/doctran) library, which uses OpenAI's function calling feature to translate documents between languages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install doctran"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.document_transformers import DoctranTextTranslator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Input\n",
|
||||
"This is the document we'll translate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sample_text = \"\"\"[Generated with ChatGPT]\n",
|
||||
"\n",
|
||||
"Confidential Document - For Internal Use Only\n",
|
||||
"\n",
|
||||
"Date: July 1, 2023\n",
|
||||
"\n",
|
||||
"Subject: Updates and Discussions on Various Topics\n",
|
||||
"\n",
|
||||
"Dear Team,\n",
|
||||
"\n",
|
||||
"I hope this email finds you well. In this document, I would like to provide you with some important updates and discuss various topics that require our attention. Please treat the information contained herein as highly confidential.\n",
|
||||
"\n",
|
||||
"Security and Privacy Measures\n",
|
||||
"As part of our ongoing commitment to ensure the security and privacy of our customers' data, we have implemented robust measures across all our systems. We would like to commend John Doe (email: john.doe@example.com) from the IT department for his diligent work in enhancing our network security. Moving forward, we kindly remind everyone to strictly adhere to our data protection policies and guidelines. Additionally, if you come across any potential security risks or incidents, please report them immediately to our dedicated team at security@example.com.\n",
|
||||
"\n",
|
||||
"HR Updates and Employee Benefits\n",
|
||||
"Recently, we welcomed several new team members who have made significant contributions to their respective departments. I would like to recognize Jane Smith (SSN: 049-45-5928) for her outstanding performance in customer service. Jane has consistently received positive feedback from our clients. Furthermore, please remember that the open enrollment period for our employee benefits program is fast approaching. Should you have any questions or require assistance, please contact our HR representative, Michael Johnson (phone: 418-492-3850, email: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Marketing Initiatives and Campaigns\n",
|
||||
"Our marketing team has been actively working on developing new strategies to increase brand awareness and drive customer engagement. We would like to thank Sarah Thompson (phone: 415-555-1234) for her exceptional efforts in managing our social media platforms. Sarah has successfully increased our follower base by 20% in the past month alone. Moreover, please mark your calendars for the upcoming product launch event on July 15th. We encourage all team members to attend and support this exciting milestone for our company.\n",
|
||||
"\n",
|
||||
"Research and Development Projects\n",
|
||||
"In our pursuit of innovation, our research and development department has been working tirelessly on various projects. I would like to acknowledge the exceptional work of David Rodriguez (email: david.rodriguez@example.com) in his role as project lead. David's contributions to the development of our cutting-edge technology have been instrumental. Furthermore, we would like to remind everyone to share their ideas and suggestions for potential new projects during our monthly R&D brainstorming session, scheduled for July 10th.\n",
|
||||
"\n",
|
||||
"Please treat the information in this document with utmost confidentiality and ensure that it is not shared with unauthorized individuals. If you have any questions or concerns regarding the topics discussed, please do not hesitate to reach out to me directly.\n",
|
||||
"\n",
|
||||
"Thank you for your attention, and let's continue to work together to achieve our goals.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofounder & CEO\n",
|
||||
"Psychic\n",
|
||||
"jason@psychic.dev\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = [Document(page_content=sample_text)]\n",
|
||||
"qa_translator = DoctranTextTranslator(language=\"spanish\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output\n",
|
||||
"After translating a document, the result will be returned as a new document with the page_content translated into the target language"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"translated_document = await qa_translator.atransform_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Generado con ChatGPT]\n",
|
||||
"\n",
|
||||
"Documento confidencial - Solo para uso interno\n",
|
||||
"\n",
|
||||
"Fecha: 1 de julio de 2023\n",
|
||||
"\n",
|
||||
"Asunto: Actualizaciones y discusiones sobre varios temas\n",
|
||||
"\n",
|
||||
"Estimado equipo,\n",
|
||||
"\n",
|
||||
"Espero que este correo electrónico les encuentre bien. En este documento, me gustaría proporcionarles algunas actualizaciones importantes y discutir varios temas que requieren nuestra atención. Por favor, traten la información contenida aquí como altamente confidencial.\n",
|
||||
"\n",
|
||||
"Medidas de seguridad y privacidad\n",
|
||||
"Como parte de nuestro compromiso continuo para garantizar la seguridad y privacidad de los datos de nuestros clientes, hemos implementado medidas robustas en todos nuestros sistemas. Nos gustaría elogiar a John Doe (correo electrónico: john.doe@example.com) del departamento de TI por su diligente trabajo en mejorar nuestra seguridad de red. En adelante, recordamos amablemente a todos que se adhieran estrictamente a nuestras políticas y directrices de protección de datos. Además, si se encuentran con cualquier riesgo de seguridad o incidente potencial, por favor repórtelo inmediatamente a nuestro equipo dedicado en security@example.com.\n",
|
||||
"\n",
|
||||
"Actualizaciones de RRHH y beneficios para empleados\n",
|
||||
"Recientemente, dimos la bienvenida a varios nuevos miembros del equipo que han hecho contribuciones significativas a sus respectivos departamentos. Me gustaría reconocer a Jane Smith (SSN: 049-45-5928) por su sobresaliente rendimiento en el servicio al cliente. Jane ha recibido constantemente comentarios positivos de nuestros clientes. Además, recuerden que el período de inscripción abierta para nuestro programa de beneficios para empleados se acerca rápidamente. Si tienen alguna pregunta o necesitan asistencia, por favor contacten a nuestro representante de RRHH, Michael Johnson (teléfono: 418-492-3850, correo electrónico: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Iniciativas y campañas de marketing\n",
|
||||
"Nuestro equipo de marketing ha estado trabajando activamente en el desarrollo de nuevas estrategias para aumentar la conciencia de marca y fomentar la participación del cliente. Nos gustaría agradecer a Sarah Thompson (teléfono: 415-555-1234) por sus excepcionales esfuerzos en la gestión de nuestras plataformas de redes sociales. Sarah ha aumentado con éxito nuestra base de seguidores en un 20% solo en el último mes. Además, por favor marquen sus calendarios para el próximo evento de lanzamiento de producto el 15 de julio. Animamos a todos los miembros del equipo a asistir y apoyar este emocionante hito para nuestra empresa.\n",
|
||||
"\n",
|
||||
"Proyectos de investigación y desarrollo\n",
|
||||
"En nuestra búsqueda de la innovación, nuestro departamento de investigación y desarrollo ha estado trabajando incansablemente en varios proyectos. Me gustaría reconocer el excepcional trabajo de David Rodríguez (correo electrónico: david.rodriguez@example.com) en su papel de líder de proyecto. Las contribuciones de David al desarrollo de nuestra tecnología de vanguardia han sido fundamentales. Además, nos gustaría recordar a todos que compartan sus ideas y sugerencias para posibles nuevos proyectos durante nuestra sesión de lluvia de ideas de I+D mensual, programada para el 10 de julio.\n",
|
||||
"\n",
|
||||
"Por favor, traten la información de este documento con la máxima confidencialidad y asegúrense de que no se comparte con personas no autorizadas. Si tienen alguna pregunta o inquietud sobre los temas discutidos, no duden en ponerse en contacto conmigo directamente.\n",
|
||||
"\n",
|
||||
"Gracias por su atención, y sigamos trabajando juntos para alcanzar nuestros objetivos.\n",
|
||||
"\n",
|
||||
"Saludos cordiales,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofundador y CEO\n",
|
||||
"Psychic\n",
|
||||
"jason@psychic.dev\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(translated_document[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAI Functions Metadata Tagger\n",
|
||||
"\n",
|
||||
"It can often be useful to tag ingested documents with structured metadata, such as the title, tone, or length of a document, to allow for more targeted similarity search later. However, for large numbers of documents, performing this labelling process manually can be tedious.\n",
|
||||
"\n",
|
||||
"The `OpenAIMetadataTagger` document transformer automates this process by extracting metadata from each provided document according to a provided schema. It uses a configurable OpenAI Functions-powered chain under the hood, so if you pass a custom LLM instance, it must be an OpenAI model with functions support. \n",
|
||||
"\n",
|
||||
"**Note:** This document transformer works best with complete documents, so it's best to run it first with whole documents before doing any other splitting or processing!\n",
|
||||
"\n",
|
||||
"For example, let's say you wanted to index a set of movie reviews. You could initialize the document transformer with a valid JSON Schema object as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.document_transformers.openai_functions import create_metadata_tagger"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"movie_title\": {\"type\": \"string\"},\n",
|
||||
" \"critic\": {\"type\": \"string\"},\n",
|
||||
" \"tone\": {\"type\": \"string\", \"enum\": [\"positive\", \"negative\"]},\n",
|
||||
" \"rating\": {\n",
|
||||
" \"type\": \"integer\",\n",
|
||||
" \"description\": \"The number of stars the critic rated the movie\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" \"required\": [\"movie_title\", \"critic\", \"tone\"],\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Must be an OpenAI model that supports functions\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||
"\n",
|
||||
"document_transformer = create_metadata_tagger(metadata_schema=schema, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can then simply pass the document transformer a list of documents, and it will extract metadata from the contents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"original_documents = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Review of The Bee Movie\\nBy Roger Ebert\\n\\nThis is the greatest movie ever made. 4 out of 5 stars.\"\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Review of The Godfather\\nBy Anonymous\\n\\nThis movie was super boring. 1 out of 5 stars.\",\n",
|
||||
" metadata={\"reliable\": False},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"enhanced_documents = document_transformer.transform_documents(original_documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Review of The Bee Movie\n",
|
||||
"By Roger Ebert\n",
|
||||
"\n",
|
||||
"This is the greatest movie ever made. 4 out of 5 stars.\n",
|
||||
"\n",
|
||||
"{\"movie_title\": \"The Bee Movie\", \"critic\": \"Roger Ebert\", \"tone\": \"positive\", \"rating\": 4}\n",
|
||||
"\n",
|
||||
"---------------\n",
|
||||
"\n",
|
||||
"Review of The Godfather\n",
|
||||
"By Anonymous\n",
|
||||
"\n",
|
||||
"This movie was super boring. 1 out of 5 stars.\n",
|
||||
"\n",
|
||||
"{\"movie_title\": \"The Godfather\", \"critic\": \"Anonymous\", \"tone\": \"negative\", \"rating\": 1, \"reliable\": false}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" *[d.page_content + \"\\n\\n\" + json.dumps(d.metadata) for d in enhanced_documents],\n",
|
||||
" sep=\"\\n\\n---------------\\n\\n\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The new documents can then be further processed by a text splitter before being loaded into a vector store. Extracted fields will not overwrite existing metadata.\n",
|
||||
"\n",
|
||||
"You can also initialize the document transformer with a Pydantic schema:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Review of The Bee Movie\n",
|
||||
"By Roger Ebert\n",
|
||||
"\n",
|
||||
"This is the greatest movie ever made. 4 out of 5 stars.\n",
|
||||
"\n",
|
||||
"{\"movie_title\": \"The Bee Movie\", \"critic\": \"Roger Ebert\", \"tone\": \"positive\", \"rating\": 4}\n",
|
||||
"\n",
|
||||
"---------------\n",
|
||||
"\n",
|
||||
"Review of The Godfather\n",
|
||||
"By Anonymous\n",
|
||||
"\n",
|
||||
"This movie was super boring. 1 out of 5 stars.\n",
|
||||
"\n",
|
||||
"{\"movie_title\": \"The Godfather\", \"critic\": \"Anonymous\", \"tone\": \"negative\", \"rating\": 1, \"reliable\": false}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import Literal\n",
|
||||
"\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Properties(BaseModel):\n",
|
||||
" movie_title: str\n",
|
||||
" critic: str\n",
|
||||
" tone: Literal[\"positive\", \"negative\"]\n",
|
||||
" rating: int = Field(description=\"Rating out of 5 stars\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"document_transformer = create_metadata_tagger(Properties, llm)\n",
|
||||
"enhanced_documents = document_transformer.transform_documents(original_documents)\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" *[d.page_content + \"\\n\\n\" + json.dumps(d.metadata) for d in enhanced_documents],\n",
|
||||
" sep=\"\\n\\n---------------\\n\\n\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"## Customization\n",
|
||||
"\n",
|
||||
"You can pass the underlying tagging chain the standard LLMChain arguments in the document transformer constructor. For example, if you wanted to ask the LLM to focus specific details in the input documents, or extract metadata in a certain style, you could pass in a custom prompt:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Review of The Bee Movie\n",
|
||||
"By Roger Ebert\n",
|
||||
"\n",
|
||||
"This is the greatest movie ever made. 4 out of 5 stars.\n",
|
||||
"\n",
|
||||
"{\"movie_title\": \"The Bee Movie\", \"critic\": \"Roger Ebert\", \"tone\": \"positive\", \"rating\": 4}\n",
|
||||
"\n",
|
||||
"---------------\n",
|
||||
"\n",
|
||||
"Review of The Godfather\n",
|
||||
"By Anonymous\n",
|
||||
"\n",
|
||||
"This movie was super boring. 1 out of 5 stars.\n",
|
||||
"\n",
|
||||
"{\"movie_title\": \"The Godfather\", \"critic\": \"Roger Ebert\", \"tone\": \"negative\", \"rating\": 1, \"reliable\": false}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Extract relevant information from the following text.\n",
|
||||
"Anonymous critics are actually Roger Ebert.\n",
|
||||
"\n",
|
||||
"{input}\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"document_transformer = create_metadata_tagger(schema, llm, prompt=prompt)\n",
|
||||
"enhanced_documents = document_transformer.transform_documents(original_documents)\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" *[d.page_content + \"\\n\\n\" + json.dumps(d.metadata) for d in enhanced_documents],\n",
|
||||
" sep=\"\\n\\n---------------\\n\\n\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||