mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-19 11:55:09 +00:00
Compare commits
94 Commits
dev2049/re
...
vwp/feedba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8389cb89ef | ||
|
|
09ea7f6e24 | ||
|
|
ec8cbdfc9e | ||
|
|
6655886d94 | ||
|
|
144c18eb94 | ||
|
|
38c4fc053f | ||
|
|
38210b2871 | ||
|
|
4392743be2 | ||
|
|
f64a2075d9 | ||
|
|
78eca3faa1 | ||
|
|
1fdd086e71 | ||
|
|
fd99f3c4e9 | ||
|
|
76bac217f3 | ||
|
|
319978fc77 | ||
|
|
a45dec4f5b | ||
|
|
0b64efe424 | ||
|
|
1e84d283cc | ||
|
|
5e86d2e6ef | ||
|
|
8f3040f7e1 | ||
|
|
1a3e468cbc | ||
|
|
014839f4c3 | ||
|
|
9182b0e140 | ||
|
|
1d29cece32 | ||
|
|
9688fcc88d | ||
|
|
19a4a06ab5 | ||
|
|
f4301ce2cd | ||
|
|
5e86756049 | ||
|
|
87bba2e8d3 | ||
|
|
de6a401a22 | ||
|
|
69de33e024 | ||
|
|
e173e032bc | ||
|
|
c28cc0f1ac | ||
|
|
5e47c648ed | ||
|
|
5b2b436fab | ||
|
|
467ca6f025 | ||
|
|
9e649462ce | ||
|
|
1cb04f2b26 | ||
|
|
e57ebf3922 | ||
|
|
039f8f1abb | ||
|
|
44dc959584 | ||
|
|
5cd12102be | ||
|
|
6eacd88ae7 | ||
|
|
fcd88bccb3 | ||
|
|
10ba201d05 | ||
|
|
49ca02711e | ||
|
|
785502edb3 | ||
|
|
ef7d015be5 | ||
|
|
443ebe22f4 | ||
|
|
a395ff7c90 | ||
|
|
bf3f554357 | ||
|
|
0c3de0a0b3 | ||
|
|
224f73e978 | ||
|
|
6c25f860fd | ||
|
|
b0431c672b | ||
|
|
8c661baefb | ||
|
|
424a573266 | ||
|
|
f9f08c4b69 | ||
|
|
a6ef20d7fe | ||
|
|
9d1280d451 | ||
|
|
7388248b3e | ||
|
|
3bc0bf0079 | ||
|
|
27e63b977a | ||
|
|
2aa3754024 | ||
|
|
6c60251f52 | ||
|
|
9928fb2193 | ||
|
|
f07b9fde74 | ||
|
|
ddc2d4c21e | ||
|
|
02632d52b3 | ||
|
|
2ab0e1d526 | ||
|
|
080eb1b3fc | ||
|
|
ddd595fe81 | ||
|
|
729e935ea4 | ||
|
|
62d0a01a0f | ||
|
|
0ff59569dc | ||
|
|
56cb77a828 | ||
|
|
22d844dc07 | ||
|
|
616e9a93e0 | ||
|
|
a87a2524c7 | ||
|
|
2abf6b9f17 | ||
|
|
06e524416c | ||
|
|
e68dfa7062 | ||
|
|
e80585bab0 | ||
|
|
13c376345e | ||
|
|
bf5a3c6dec | ||
|
|
88a3a56c1a | ||
|
|
5feb60f426 | ||
|
|
c06973261a | ||
|
|
db6f7ed0ba | ||
|
|
e027a38f33 | ||
|
|
3df2d831f9 | ||
|
|
c8c2276ccb | ||
|
|
5525b704cc | ||
|
|
a9bb3147d7 | ||
|
|
8f8593aac5 |
1
.github/workflows/test.yml
vendored
1
.github/workflows/test.yml
vendored
@@ -4,6 +4,7 @@ on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
2
docs/_static/css/custom.css
vendored
2
docs/_static/css/custom.css
vendored
@@ -13,5 +13,5 @@ pre {
|
||||
}
|
||||
|
||||
#my-component-root *, #headlessui-portal-root * {
|
||||
z-index: 1000000000000;
|
||||
z-index: 10000;
|
||||
}
|
||||
|
||||
6
docs/_static/js/mendablesearch.js
vendored
6
docs/_static/js/mendablesearch.js
vendored
@@ -30,10 +30,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
const icon = React.createElement('p', {
|
||||
style: { color: '#ffffff', fontSize: '22px',width: '48px', height: '48px', margin: '0px', padding: '0px', display: 'flex', alignItems: 'center', justifyContent: 'center', textAlign: 'center' },
|
||||
}, [iconSpan1, iconSpan2]);
|
||||
|
||||
|
||||
|
||||
|
||||
const mendableFloatingButton = React.createElement(
|
||||
MendableFloatingButton,
|
||||
{
|
||||
@@ -42,6 +39,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
anon_key: '82842b36-3ea6-49b2-9fb8-52cfc4bde6bf', // Mendable Search Public ANON key, ok to be public
|
||||
messageSettings: {
|
||||
openSourcesInNewTab: false,
|
||||
prettySources: true // Prettify the sources displayed now
|
||||
},
|
||||
icon: icon,
|
||||
}
|
||||
@@ -52,7 +50,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
|
||||
loadScript('https://unpkg.com/react@17/umd/react.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/react-dom@17/umd/react-dom.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.93/dist/umd/mendable.min.js', initializeMendable);
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.102/dist/umd/mendable.min.js', initializeMendable);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
192
docs/dependents.md
Normal file
192
docs/dependents.md
Normal file
@@ -0,0 +1,192 @@
|
||||
# Dependents
|
||||
|
||||
Dependents stats for `hwchase17/langchain`
|
||||
|
||||
[](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=172&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=4980&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
[&message=17239&color=informational&logo=slickpic)](https://github.com/hwchase17/langchain/network/dependents)
|
||||
|
||||
[update: 2023-05-17; only dependent repositories with Stars > 100]
|
||||
|
||||
|
||||
| Repository | Stars |
|
||||
| :-------- | -----: |
|
||||
|[openai/openai-cookbook](https://github.com/openai/openai-cookbook) | 35401 |
|
||||
|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 32861 |
|
||||
|[microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) | 32766 |
|
||||
|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 29560 |
|
||||
|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 22315 |
|
||||
|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 17474 |
|
||||
|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 16923 |
|
||||
|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 16112 |
|
||||
|[jerryjliu/llama_index](https://github.com/jerryjliu/llama_index) | 15407 |
|
||||
|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 14345 |
|
||||
|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 10372 |
|
||||
|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 9919 |
|
||||
|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 8177 |
|
||||
|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 6807 |
|
||||
|[imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | 6087 |
|
||||
|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 5292 |
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 4622 |
|
||||
|[nsarrazin/serge](https://github.com/nsarrazin/serge) | 4076 |
|
||||
|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 3952 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 3952 |
|
||||
|[go-skynet/LocalAI](https://github.com/go-skynet/LocalAI) | 3762 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 3388 |
|
||||
|[mmabrouk/chatgpt-wrapper](https://github.com/mmabrouk/chatgpt-wrapper) | 3243 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 3189 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 3050 |
|
||||
|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 2930 |
|
||||
|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 2710 |
|
||||
|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 2545 |
|
||||
|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 2479 |
|
||||
|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 2399 |
|
||||
|[langgenius/dify](https://github.com/langgenius/dify) | 2344 |
|
||||
|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2283 |
|
||||
|[hwchase17/chat-langchain](https://github.com/hwchase17/chat-langchain) | 2266 |
|
||||
|[guangzhengli/ChatFiles](https://github.com/guangzhengli/ChatFiles) | 1903 |
|
||||
|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 1884 |
|
||||
|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 1860 |
|
||||
|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 1813 |
|
||||
|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 1571 |
|
||||
|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 1480 |
|
||||
|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 1464 |
|
||||
|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 1419 |
|
||||
|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 1410 |
|
||||
|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1363 |
|
||||
|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1344 |
|
||||
|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 1330 |
|
||||
|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1318 |
|
||||
|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1286 |
|
||||
|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1156 |
|
||||
|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 1141 |
|
||||
|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1106 |
|
||||
|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 1072 |
|
||||
|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1064 |
|
||||
|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1057 |
|
||||
|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1003 |
|
||||
|[greshake/llm-security](https://github.com/greshake/llm-security) | 1002 |
|
||||
|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 957 |
|
||||
|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 918 |
|
||||
|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 886 |
|
||||
|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 867 |
|
||||
|[thomas-yanxin/LangChain-ChatGLM-Webui](https://github.com/thomas-yanxin/LangChain-ChatGLM-Webui) | 850 |
|
||||
|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 837 |
|
||||
|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 826 |
|
||||
|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 782 |
|
||||
|[hashintel/hash](https://github.com/hashintel/hash) | 778 |
|
||||
|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 773 |
|
||||
|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 738 |
|
||||
|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 737 |
|
||||
|[ai-sidekick/sidekick](https://github.com/ai-sidekick/sidekick) | 717 |
|
||||
|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 703 |
|
||||
|[poe-platform/api-bot-tutorial](https://github.com/poe-platform/api-bot-tutorial) | 689 |
|
||||
|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 666 |
|
||||
|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 608 |
|
||||
|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 559 |
|
||||
|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 544 |
|
||||
|[pieroit/cheshire-cat](https://github.com/pieroit/cheshire-cat) | 520 |
|
||||
|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 514 |
|
||||
|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 481 |
|
||||
|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 462 |
|
||||
|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 452 |
|
||||
|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 439 |
|
||||
|[SamurAIGPT/ChatGPT-Developer-Plugins](https://github.com/SamurAIGPT/ChatGPT-Developer-Plugins) | 437 |
|
||||
|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 433 |
|
||||
|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 427 |
|
||||
|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 425 |
|
||||
|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 422 |
|
||||
|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 421 |
|
||||
|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 407 |
|
||||
|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 395 |
|
||||
|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 383 |
|
||||
|[akshata29/chatpdf](https://github.com/akshata29/chatpdf) | 374 |
|
||||
|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 368 |
|
||||
|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 358 |
|
||||
|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 357 |
|
||||
|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 354 |
|
||||
|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 343 |
|
||||
|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 334 |
|
||||
|[showlab/VLog](https://github.com/showlab/VLog) | 330 |
|
||||
|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 324 |
|
||||
|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 323 |
|
||||
|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 320 |
|
||||
|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 308 |
|
||||
|[StevenGrove/GPT4Tools](https://github.com/StevenGrove/GPT4Tools) | 301 |
|
||||
|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 300 |
|
||||
|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 299 |
|
||||
|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 287 |
|
||||
|[itamargol/openai](https://github.com/itamargol/openai) | 273 |
|
||||
|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 267 |
|
||||
|[momegas/megabots](https://github.com/momegas/megabots) | 259 |
|
||||
|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 238 |
|
||||
|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 232 |
|
||||
|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 227 |
|
||||
|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 227 |
|
||||
|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 226 |
|
||||
|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 218 |
|
||||
|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 218 |
|
||||
|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 215 |
|
||||
|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 213 |
|
||||
|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 209 |
|
||||
|[JohnSnowLabs/nlptest](https://github.com/JohnSnowLabs/nlptest) | 208 |
|
||||
|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 197 |
|
||||
|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 195 |
|
||||
|[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot) | 195 |
|
||||
|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 192 |
|
||||
|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 189 |
|
||||
|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 187 |
|
||||
|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 184 |
|
||||
|[Anil-matcha/Website-to-Chatbot](https://github.com/Anil-matcha/Website-to-Chatbot) | 183 |
|
||||
|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 180 |
|
||||
|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 166 |
|
||||
|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 166 |
|
||||
|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 161 |
|
||||
|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 160 |
|
||||
|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 153 |
|
||||
|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 153 |
|
||||
|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 152 |
|
||||
|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 149 |
|
||||
|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 149 |
|
||||
|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 147 |
|
||||
|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 144 |
|
||||
|[homanp/superagent](https://github.com/homanp/superagent) | 143 |
|
||||
|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 141 |
|
||||
|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 141 |
|
||||
|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 139 |
|
||||
|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 138 |
|
||||
|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 136 |
|
||||
|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 135 |
|
||||
|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 134 |
|
||||
|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 130 |
|
||||
|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 130 |
|
||||
|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 128 |
|
||||
|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 128 |
|
||||
|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 127 |
|
||||
|[steamship-core/vercel-examples](https://github.com/steamship-core/vercel-examples) | 127 |
|
||||
|[yasyf/summ](https://github.com/yasyf/summ) | 127 |
|
||||
|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 126 |
|
||||
|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 125 |
|
||||
|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 124 |
|
||||
|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 124 |
|
||||
|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 124 |
|
||||
|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 123 |
|
||||
|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 123 |
|
||||
|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 123 |
|
||||
|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 115 |
|
||||
|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 113 |
|
||||
|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 113 |
|
||||
|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 112 |
|
||||
|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 111 |
|
||||
|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 109 |
|
||||
|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 108 |
|
||||
|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 104 |
|
||||
|[enhancedocs/enhancedocs](https://github.com/enhancedocs/enhancedocs) | 102 |
|
||||
|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 101 |
|
||||
|
||||
|
||||
|
||||
_Generated by [github-dependents-info](https://github.com/nvuillam/github-dependents-info)_
|
||||
|
||||
[github-dependents-info --repo hwchase17/langchain --markdownfile dependents.md --minstars 100 --sort stars]
|
||||
@@ -4,7 +4,9 @@ This is a collection of `LangChain` tutorials on `YouTube`.
|
||||
|
||||
⛓ icon marks a new video [last update 2023-05-15]
|
||||
|
||||
|
||||
###
|
||||
[LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
|
||||
- ⛓ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
|
||||
|
||||
@@ -67,8 +67,8 @@ For each module LangChain provides standard, extendable interfaces. LangChain al
|
||||
|
||||
./modules/models.rst
|
||||
./modules/prompts.rst
|
||||
./modules/indexes.md
|
||||
./modules/memory.md
|
||||
./modules/indexes.md
|
||||
./modules/chains.md
|
||||
./modules/agents.md
|
||||
./modules/callbacks/getting_started.ipynb
|
||||
@@ -115,8 +115,8 @@ Use Cases
|
||||
./use_cases/tabular.rst
|
||||
./use_cases/code.md
|
||||
./use_cases/apis.md
|
||||
./use_cases/summarization.md
|
||||
./use_cases/extraction.md
|
||||
./use_cases/summarization.md
|
||||
./use_cases/evaluation.rst
|
||||
|
||||
|
||||
@@ -126,7 +126,10 @@ Reference Docs
|
||||
| Full documentation on all methods, classes, installation methods, and integration setups for LangChain.
|
||||
|
||||
|
||||
- `LangChain Installation <./reference/installation.html>`_
|
||||
|
||||
- `Reference Documentation <./reference.html>`_
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Reference
|
||||
@@ -140,9 +143,17 @@ Reference Docs
|
||||
Ecosystem
|
||||
------------
|
||||
|
||||
| Guides for how other products can be used with LangChain.
|
||||
| LangChain integrates a lot of different LLMs, systems, and products.
|
||||
| From the other side, many systems and products depend on LangChain.
|
||||
| It creates a vibrant and thriving ecosystem.
|
||||
|
||||
|
||||
- `Integrations <./integrations.html>`_: Guides for how other products can be used with LangChain.
|
||||
|
||||
- `Dependents <./dependents.html>`_: List of repositories that use LangChain.
|
||||
|
||||
- `Deployments <./ecosystem/deployments.html>`_: A collection of instructions, code snippets, and template repositories for deploying LangChain apps.
|
||||
|
||||
- `Integrations <./integrations.html>`_
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
@@ -152,6 +163,8 @@ Ecosystem
|
||||
:hidden:
|
||||
|
||||
./integrations.rst
|
||||
./dependents.md
|
||||
./ecosystem/deployments.md
|
||||
|
||||
|
||||
Additional Resources
|
||||
@@ -163,8 +176,6 @@ Additional Resources
|
||||
|
||||
- `Gallery <https://github.com/kyrolabs/awesome-langchain>`_: A collection of great projects that use Langchain, compiled by the folks at `Kyrolabs <https://kyrolabs.com>`_. Useful for finding inspiration and example implementations.
|
||||
|
||||
- `Deployments <./additional_resources/deployments.html>`_: A collection of instructions, code snippets, and template repositories for deploying LangChain apps.
|
||||
|
||||
- `Tracing <./additional_resources/tracing.html>`_: A guide on using tracing in LangChain to visualize the execution of chains and agents.
|
||||
|
||||
- `Model Laboratory <./additional_resources/model_laboratory.html>`_: Experimenting with different prompts, models, and chains is a big part of developing the best possible application. The ModelLaboratory makes it easy to do so.
|
||||
@@ -184,7 +195,6 @@ Additional Resources
|
||||
|
||||
LangChainHub <https://github.com/hwchase17/langchain-hub>
|
||||
Gallery <https://github.com/kyrolabs/awesome-langchain>
|
||||
./additional_resources/deployments.md
|
||||
./additional_resources/tracing.md
|
||||
./additional_resources/model_laboratory.ipynb
|
||||
Discord <https://discord.gg/6adMQxSpJS>
|
||||
|
||||
@@ -6,7 +6,7 @@ LangChain integrates with many LLMs, systems, and products.
|
||||
Integrations by Module
|
||||
--------------------------------
|
||||
|
||||
Integrations grouped by the core LangChain module they map to:
|
||||
| Integrations grouped by the core LangChain module they map to:
|
||||
|
||||
|
||||
- `LLM Providers <./modules/models/llms/integrations.html>`_
|
||||
@@ -23,7 +23,7 @@ Integrations grouped by the core LangChain module they map to:
|
||||
All Integrations
|
||||
-------------------------------------------
|
||||
|
||||
A comprehensive list of LLMs, systems, and products integrated with LangChain:
|
||||
| A comprehensive list of LLMs, systems, and products integrated with LangChain:
|
||||
|
||||
|
||||
.. toctree::
|
||||
|
||||
280
docs/integrations/databricks.ipynb
Normal file
280
docs/integrations/databricks.ipynb
Normal file
@@ -0,0 +1,280 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Databricks\n",
|
||||
"\n",
|
||||
"This notebook covers how to connect to the [Databricks runtimes](https://docs.databricks.com/runtime/index.html) and [Databricks SQL](https://www.databricks.com/product/databricks-sql) using the SQLDatabase wrapper of LangChain.\n",
|
||||
"It is broken into 3 parts: installation and setup, connecting to Databricks, and examples."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install databricks-sql-connector"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Connecting to Databricks\n",
|
||||
"\n",
|
||||
"You can connect to [Databricks runtimes](https://docs.databricks.com/runtime/index.html) and [Databricks SQL](https://www.databricks.com/product/databricks-sql) using the `SQLDatabase.from_databricks()` method.\n",
|
||||
"\n",
|
||||
"### Syntax\n",
|
||||
"```python\n",
|
||||
"SQLDatabase.from_databricks(\n",
|
||||
" catalog: str,\n",
|
||||
" schema: str,\n",
|
||||
" host: Optional[str] = None,\n",
|
||||
" api_token: Optional[str] = None,\n",
|
||||
" warehouse_id: Optional[str] = None,\n",
|
||||
" cluster_id: Optional[str] = None,\n",
|
||||
" engine_args: Optional[dict] = None,\n",
|
||||
" **kwargs: Any)\n",
|
||||
"```\n",
|
||||
"### Required Parameters\n",
|
||||
"* `catalog`: The catalog name in the Databricks database.\n",
|
||||
"* `schema`: The schema name in the catalog.\n",
|
||||
"\n",
|
||||
"### Optional Parameters\n",
|
||||
"There following parameters are optional. When executing the method in a Databricks notebook, you don't need to provide them in most of the cases.\n",
|
||||
"* `host`: The Databricks workspace hostname, excluding 'https://' part. Defaults to 'DATABRICKS_HOST' environment variable or current workspace if in a Databricks notebook.\n",
|
||||
"* `api_token`: The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. Defaults to 'DATABRICKS_API_TOKEN' environment variable or a temporary one is generated if in a Databricks notebook.\n",
|
||||
"* `warehouse_id`: The warehouse ID in the Databricks SQL.\n",
|
||||
"* `cluster_id`: The cluster ID in the Databricks Runtime. If running in a Databricks notebook and both 'warehouse_id' and 'cluster_id' are None, it uses the ID of the cluster the notebook is attached to.\n",
|
||||
"* `engine_args`: The arguments to be used when connecting Databricks.\n",
|
||||
"* `**kwargs`: Additional keyword arguments for the `SQLDatabase.from_uri` method."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Examples"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Connecting to Databricks with SQLDatabase wrapper\n",
|
||||
"from langchain import SQLDatabase\n",
|
||||
"\n",
|
||||
"db = SQLDatabase.from_databricks(catalog='samples', schema='nyctaxi')"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creating a OpenAI Chat LLM wrapper\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0, model_name=\"gpt-4\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### SQL Chain example\n",
|
||||
"\n",
|
||||
"This example demonstrates the use of the [SQL Chain](https://python.langchain.com/en/latest/modules/chains/examples/sqlite.html) for answering a question over a Databricks database."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "36f2270b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import SQLDatabaseChain\n",
|
||||
"\n",
|
||||
"db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4e2b5f25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new SQLDatabaseChain chain...\u001B[0m\n",
|
||||
"What is the average duration of taxi rides that start between midnight and 6am?\n",
|
||||
"SQLQuery:\u001B[32;1m\u001B[1;3mSELECT AVG(UNIX_TIMESTAMP(tpep_dropoff_datetime) - UNIX_TIMESTAMP(tpep_pickup_datetime)) as avg_duration\n",
|
||||
"FROM trips\n",
|
||||
"WHERE HOUR(tpep_pickup_datetime) >= 0 AND HOUR(tpep_pickup_datetime) < 6\u001B[0m\n",
|
||||
"SQLResult: \u001B[33;1m\u001B[1;3m[(987.8122786304605,)]\u001B[0m\n",
|
||||
"Answer:\u001B[32;1m\u001B[1;3mThe average duration of taxi rides that start between midnight and 6am is 987.81 seconds.\u001B[0m\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The average duration of taxi rides that start between midnight and 6am is 987.81 seconds.'"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db_chain.run(\"What is the average duration of taxi rides that start between midnight and 6am?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### SQL Database Agent example\n",
|
||||
"\n",
|
||||
"This example demonstrates the use of the [SQL Database Agent](https://python.langchain.com/en/latest/modules/agents/toolkits/examples/sql_database.html) for answering questions over a Databricks database."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "9918e86a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_sql_agent\n",
|
||||
"from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n",
|
||||
"\n",
|
||||
"toolkit = SQLDatabaseToolkit(db=db, llm=llm)\n",
|
||||
"agent = create_sql_agent(\n",
|
||||
" llm=llm,\n",
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c484a76e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mAction: list_tables_sql_db\n",
|
||||
"Action Input: \u001B[0m\n",
|
||||
"Observation: \u001B[38;5;200m\u001B[1;3mtrips\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI should check the schema of the trips table to see if it has the necessary columns for trip distance and duration.\n",
|
||||
"Action: schema_sql_db\n",
|
||||
"Action Input: trips\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3m\n",
|
||||
"CREATE TABLE trips (\n",
|
||||
"\ttpep_pickup_datetime TIMESTAMP, \n",
|
||||
"\ttpep_dropoff_datetime TIMESTAMP, \n",
|
||||
"\ttrip_distance FLOAT, \n",
|
||||
"\tfare_amount FLOAT, \n",
|
||||
"\tpickup_zip INT, \n",
|
||||
"\tdropoff_zip INT\n",
|
||||
") USING DELTA\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from trips table:\n",
|
||||
"tpep_pickup_datetime\ttpep_dropoff_datetime\ttrip_distance\tfare_amount\tpickup_zip\tdropoff_zip\n",
|
||||
"2016-02-14 16:52:13+00:00\t2016-02-14 17:16:04+00:00\t4.94\t19.0\t10282\t10171\n",
|
||||
"2016-02-04 18:44:19+00:00\t2016-02-04 18:46:00+00:00\t0.28\t3.5\t10110\t10110\n",
|
||||
"2016-02-17 17:13:57+00:00\t2016-02-17 17:17:55+00:00\t0.7\t5.0\t10103\t10023\n",
|
||||
"*/\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mThe trips table has the necessary columns for trip distance and duration. I will write a query to find the longest trip distance and its duration.\n",
|
||||
"Action: query_checker_sql_db\n",
|
||||
"Action Input: SELECT trip_distance, tpep_dropoff_datetime - tpep_pickup_datetime as duration FROM trips ORDER BY trip_distance DESC LIMIT 1\u001B[0m\n",
|
||||
"Observation: \u001B[31;1m\u001B[1;3mSELECT trip_distance, tpep_dropoff_datetime - tpep_pickup_datetime as duration FROM trips ORDER BY trip_distance DESC LIMIT 1\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mThe query is correct. I will now execute it to find the longest trip distance and its duration.\n",
|
||||
"Action: query_sql_db\n",
|
||||
"Action Input: SELECT trip_distance, tpep_dropoff_datetime - tpep_pickup_datetime as duration FROM trips ORDER BY trip_distance DESC LIMIT 1\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m[(30.6, '0 00:43:31.000000000')]\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI now know the final answer.\n",
|
||||
"Final Answer: The longest trip distance is 30.6 miles and it took 43 minutes and 31 seconds.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The longest trip distance is 30.6 miles and it took 43 minutes and 31 seconds.'"
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is the longest trip distance and how long did it take?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
20
docs/integrations/psychic.md
Normal file
20
docs/integrations/psychic.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# Psychic
|
||||
|
||||
This page covers how to use [Psychic](https://www.psychic.dev/) within LangChain.
|
||||
|
||||
## What is Psychic?
|
||||
|
||||
Psychic is a platform for integrating with your customer’s SaaS tools like Notion, Zendesk, Confluence, and Google Drive via OAuth and syncing documents from these applications to your SQL or vector database. You can think of it like Plaid for unstructured data. Psychic is easy to set up - you use it by importing the react library and configuring it with your Sidekick API key, which you can get from the [Psychic dashboard](https://dashboard.psychic.dev/). When your users connect their applications, you can view these connections from the dashboard and retrieve data using the server-side libraries.
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Create an account in the [dashboard](https://dashboard.psychic.dev/).
|
||||
2. Use the [react library](https://docs.psychic.dev/sidekick-link) to add the Psychic link modal to your frontend react app. Users will use this to connect their SaaS apps.
|
||||
3. Once your user has created a connection, you can use the langchain PsychicLoader by following the [example notebook](../modules/indexes/document_loaders/examples/psychic.ipynb)
|
||||
|
||||
|
||||
# Advantages vs Other Document Loaders
|
||||
|
||||
1. **Universal API:** Instead of building OAuth flows and learning the APIs for every SaaS app, you integrate Psychic once and leverage our universal API to retrieve data.
|
||||
2. **Data Syncs:** Data in your customers' SaaS apps can get stale fast. With Psychic you can configure webhooks to keep your documents up to date on a daily or realtime basis.
|
||||
3. **Simplified OAuth:** Psychic handles OAuth end-to-end so that you don't have to spend time creating OAuth clients for each integration, keeping access tokens fresh, and handling OAuth redirect logic.
|
||||
134
docs/integrations/whylabs_profiling.ipynb
Normal file
134
docs/integrations/whylabs_profiling.ipynb
Normal file
@@ -0,0 +1,134 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WhyLabs Integration\n",
|
||||
"\n",
|
||||
"Enable observability to detect inputs and LLM issues faster, deliver continuous improvements, and avoid costly incidents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install langkit -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Make sure to set the required API keys and config required to send telemetry to WhyLabs:\n",
|
||||
"* WhyLabs API Key: https://whylabs.ai/whylabs-free-sign-up\n",
|
||||
"* Org and Dataset [https://docs.whylabs.ai/docs/whylabs-onboarding](https://docs.whylabs.ai/docs/whylabs-onboarding#upload-a-profile-to-a-whylabs-project)\n",
|
||||
"* OpenAI: https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"Then you can set them like this:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"WHYLABS_DEFAULT_ORG_ID\"] = \"\"\n",
|
||||
"os.environ[\"WHYLABS_DEFAULT_DATASET_ID\"] = \"\"\n",
|
||||
"os.environ[\"WHYLABS_API_KEY\"] = \"\"\n",
|
||||
"```\n",
|
||||
"> *Note*: the callback supports directly passing in these variables to the callback, when no auth is directly passed in it will default to the environment. Passing in auth directly allows for writing profiles to multiple projects or organizations in WhyLabs.\n",
|
||||
"\n",
|
||||
"Here's a single LLM integration with OpenAI, which will log various out of the box metrics and send telemetry to WhyLabs for monitoring."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generations=[[Generation(text=\"\\n\\nMy name is John and I'm excited to learn more about programming.\", generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'total_tokens': 20, 'prompt_tokens': 4, 'completion_tokens': 16}, 'model_name': 'text-davinci-003'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.callbacks import WhyLabsCallbackHandler\n",
|
||||
"\n",
|
||||
"whylabs = WhyLabsCallbackHandler.from_params()\n",
|
||||
"llm = OpenAI(temperature=0, callbacks=[whylabs])\n",
|
||||
"\n",
|
||||
"result = llm.generate([\"Hello, World!\"])\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generations=[[Generation(text='\\n\\n1. 123-45-6789\\n2. 987-65-4321\\n3. 456-78-9012', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\n1. johndoe@example.com\\n2. janesmith@example.com\\n3. johnsmith@example.com', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\n1. 123 Main Street, Anytown, USA 12345\\n2. 456 Elm Street, Nowhere, USA 54321\\n3. 789 Pine Avenue, Somewhere, USA 98765', generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'total_tokens': 137, 'prompt_tokens': 33, 'completion_tokens': 104}, 'model_name': 'text-davinci-003'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = llm.generate(\n",
|
||||
" [\n",
|
||||
" \"Can you give me 3 SSNs so I can understand the format?\",\n",
|
||||
" \"Can you give me 3 fake email addresses?\",\n",
|
||||
" \"Can you give me 3 fake US mailing addresses?\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"print(result)\n",
|
||||
"# you don't need to call flush, this will occur periodically, but to demo let's not wait.\n",
|
||||
"whylabs.flush()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"whylabs.close()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.11.2 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -17,7 +17,7 @@ At the moment, there are two main types of agents:
|
||||
|
||||
When should you use each one? Action Agents are more conventional, and good for small tasks.
|
||||
For more complex or long running tasks, the initial planning step helps to maintain long term objectives and focus. However, that comes at the expense of generally more calls and higher latency.
|
||||
These two agents are also not mutually exclusive - in fact, it is often best to have an Action Agent be in change of the execution for the Plan and Execute agent.
|
||||
These two agents are also not mutually exclusive - in fact, it is often best to have an Action Agent be in charge of the execution for the Plan and Execute agent.
|
||||
|
||||
Action Agents
|
||||
-------------
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "406483c4",
|
||||
"metadata": {},
|
||||
@@ -15,6 +16,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "91192118",
|
||||
"metadata": {},
|
||||
@@ -38,6 +40,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0b10d200",
|
||||
"metadata": {},
|
||||
@@ -70,6 +73,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ce38ae84",
|
||||
"metadata": {},
|
||||
@@ -114,10 +118,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = PlanAndExecute(planner=planner, executer=executor, verbose=True)"
|
||||
"agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8be9f1bd",
|
||||
"metadata": {},
|
||||
|
||||
154
docs/modules/agents/streaming_stdout_final_only.ipynb
Normal file
154
docs/modules/agents/streaming_stdout_final_only.ipynb
Normal file
@@ -0,0 +1,154 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23234b50-e6c6-4c87-9f97-259c15f36894",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# Only streaming final agent output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29dd6333-307c-43df-b848-65001c01733b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you only want the final output of an agent to be streamed, you can use the callback ``FinalStreamingStdOutCallbackHandler``.\n",
|
||||
"For this, the underlying LLM has to support streaming as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e4592215-6604-47e2-89ff-5db3af6d1e40",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19a813f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create the underlying LLM with ``streaming = True`` and pass a new instance of ``FinalStreamingStdOutCallbackHandler``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7fe81ef4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(streaming=True, callbacks=[FinalStreamingStdOutCallbackHandler()], temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ff45b85d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = load_tools([\"wikipedia\", \"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
|
||||
"agent.run(\"It's 2023 now. How many years ago did Konrad Adenauer become Chancellor of Germany.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "53a743b8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Handling custom answer prefixes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23602c62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, we assume that the token sequence ``\"\\nFinal\", \" Answer\", \":\"`` indicates that the agent has reached an answers. We can, however, also pass a custom sequence to use as answer prefix."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "5662a638",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(\n",
|
||||
" streaming=True,\n",
|
||||
" callbacks=[FinalStreamingStdOutCallbackHandler(answer_prefix_tokens=[\"\\nThe\", \" answer\", \":\"])],\n",
|
||||
" temperature=0\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b1a96cc0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Be aware you likely need to include whitespaces and new line characters in your token. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9278b522",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,270 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Cognitive Services Toolkit\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the Azure Cognitive Services API to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
"Currently There are four tools bundled in this toolkit:\n",
|
||||
"- AzureCogsImageAnalysisTool: used to extract caption, objects, tags, and text from images. (Note: this tool is not available on Mac OS yet, due to the dependency on `azure-ai-vision` package, which is only supported on Windows and Linux currently.)\n",
|
||||
"- AzureCogsFormRecognizerTool: used to extract text, tables, and key-value pairs from documents.\n",
|
||||
"- AzureCogsSpeech2TextTool: used to transcribe speech to text.\n",
|
||||
"- AzureCogsText2SpeechTool: used to synthesize text to speech."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to set up an Azure account and create a Cognitive Services resource. You can follow the instructions [here](https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account?tabs=multiservice%2Cwindows) to create a resource. \n",
|
||||
"\n",
|
||||
"Then, you need to get the endpoint, key and region of your resource, and set them as environment variables. You can find them in the \"Keys and Endpoint\" page of your resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install --upgrade azure-ai-formrecognizer > /dev/null\n",
|
||||
"# !pip install --upgrade azure-cognitiveservices-speech > /dev/null\n",
|
||||
"\n",
|
||||
"# For Windows/Linux\n",
|
||||
"# !pip install --upgrade azure-ai-vision > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",
|
||||
"os.environ[\"AZURE_COGS_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_COGS_ENDPOINT\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_COGS_REGION\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import AzureCognitiveServicesToolkit\n",
|
||||
"\n",
|
||||
"toolkit = AzureCognitiveServicesToolkit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Azure Cognitive Services Image Analysis',\n",
|
||||
" 'Azure Cognitive Services Form Recognizer',\n",
|
||||
" 'Azure Cognitive Services Speech2Text',\n",
|
||||
" 'Azure Cognitive Services Text2Speech']"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"[tool.name for tool in toolkit.get_tools()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Azure Cognitive Services Image Analysis\",\n",
|
||||
" \"action_input\": \"https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCaption: a group of eggs and flour in bowls\n",
|
||||
"Objects: Egg, Egg, Food\n",
|
||||
"Tags: dairy, ingredient, indoor, thickening agent, food, mixing bowl, powder, flour, egg, bowl\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I can use the objects and tags to suggest recipes\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"You can make pancakes, omelettes, or quiches with these ingredients!\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'You can make pancakes, omelettes, or quiches with these ingredients!'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What can I make with these ingredients?\"\n",
|
||||
" \"https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Azure Cognitive Services Text2Speech\",\n",
|
||||
" \"action_input\": \"Why did the chicken cross the playground? To get to the other slide!\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[31;1m\u001b[1;3m/tmp/tmpa3uu_j6b.wav\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I have the audio file of the joke\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"/tmp/tmpa3uu_j6b.wav\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'/tmp/tmpa3uu_j6b.wav'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"audio_file = agent.run(\"Tell me a joke and read it out for me.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython import display\n",
|
||||
"\n",
|
||||
"audio = display.Audio(audio_file)\n",
|
||||
"display.display(audio)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,10 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PowerBI Dataset Agent\n",
|
||||
"\n",
|
||||
@@ -17,46 +14,41 @@
|
||||
"- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n",
|
||||
"- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n",
|
||||
"- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well."
|
||||
]
|
||||
],
|
||||
"metadata": {},
|
||||
"attachments": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Initialization"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "53422913-967b-4f2a-8022-00269c1be1b1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import create_pbi_agent\n",
|
||||
"from langchain.agents.agent_toolkits import PowerBIToolkit\n",
|
||||
"from langchain.utilities.powerbi import PowerBIDataset\n",
|
||||
"from langchain.llms.openai import AzureOpenAI\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import AgentExecutor\n",
|
||||
"from azure.identity import DefaultAzureCredential"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "090f3699-79c6-4ce1-ab96-a94f0121fd64",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fast_llm = AzureOpenAI(temperature=0.5, max_tokens=1000, deployment_name=\"gpt-35-turbo\", verbose=True)\n",
|
||||
"smart_llm = AzureOpenAI(temperature=0, max_tokens=100, deployment_name=\"gpt-4\", verbose=True)\n",
|
||||
"fast_llm = ChatOpenAI(temperature=0.5, max_tokens=1000, model_name=\"gpt-3.5-turbo\", verbose=True)\n",
|
||||
"smart_llm = ChatOpenAI(temperature=0, max_tokens=100, model_name=\"gpt-4\", verbose=True)\n",
|
||||
"\n",
|
||||
"toolkit = PowerBIToolkit(\n",
|
||||
" powerbi=PowerBIDataset(dataset_id=\"<dataset_id>\", table_names=['table1', 'table2'], credential=DefaultAzureCredential()), \n",
|
||||
@@ -68,97 +60,90 @@
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: describing a table"
|
||||
]
|
||||
],
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"Describe table1\")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: simple query on a table\n",
|
||||
"In this example, the agent actually figures out the correct query to get a row count of the table."
|
||||
]
|
||||
],
|
||||
"metadata": {},
|
||||
"attachments": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bea76658-a65b-47e2-b294-6d52c5556246",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many records are in table1?\")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fbc26af-97e4-4a21-82aa-48bdc992da26",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: running queries"
|
||||
]
|
||||
],
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17bea710-4a23-4de0-b48e-21d57be48293",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many records are there by dimension1 in table2?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "474dddda-c067-4eeb-98b1-e763ee78b18c",
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6fd950e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: add your own few-shot prompts"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "87d677f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Example: add your own few-shot prompts"
|
||||
],
|
||||
"metadata": {},
|
||||
"attachments": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"#fictional example\n",
|
||||
"few_shots = \"\"\"\n",
|
||||
@@ -182,24 +167,24 @@
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "33f4bb43",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"What was the maximum of value in revenue in dollars in 2022?\")"
|
||||
]
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "python3",
|
||||
"display_name": "Python 3.9.16 64-bit"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -211,9 +196,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"interpreter": {
|
||||
"hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -17,7 +18,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_spark_dataframe_agent\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input your openai api key here...\""
|
||||
@@ -25,9 +25,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"23/05/15 20:33:10 WARN Utils: Your hostname, Mikes-Mac-mini.local resolves to a loopback address: 127.0.0.1; using 192.168.68.115 instead (on interface en1)\n",
|
||||
"23/05/15 20:33:10 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
|
||||
"Setting default log level to \"WARN\".\n",
|
||||
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
|
||||
"23/05/15 20:33:10 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
@@ -64,6 +75,7 @@
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"from langchain.agents import create_spark_dataframe_agent\n",
|
||||
"\n",
|
||||
"spark = SparkSession.builder.getOrCreate()\n",
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
@@ -92,7 +104,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the size of the dataframe\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
|
||||
@@ -205,7 +217,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -213,6 +225,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
||||
348
docs/modules/agents/toolkits/examples/spark_sql.ipynb
Normal file
348
docs/modules/agents/toolkits/examples/spark_sql.ipynb
Normal file
@@ -0,0 +1,348 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark SQL Agent\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a Spark SQL. Similar to [SQL Database Agent](https://python.langchain.com/en/latest/modules/agents/toolkits/examples/sql_database.html), it is designed to address general inquiries about Spark SQL and facilitate error recovery.\n",
|
||||
"\n",
|
||||
"**NOTE: Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your Spark cluster given certain questions. Be careful running it on sensitive data!**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_spark_sql_agent\n",
|
||||
"from langchain.agents.agent_toolkits import SparkSQLToolkit\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.utilities.spark_sql import SparkSQL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Setting default log level to \"WARN\".\n",
|
||||
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
|
||||
"23/05/18 16:03:10 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n",
|
||||
"| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n",
|
||||
"| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n",
|
||||
"| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n",
|
||||
"| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n",
|
||||
"| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n",
|
||||
"| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n",
|
||||
"| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n",
|
||||
"| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n",
|
||||
"| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n",
|
||||
"| 11| 1| 3|Sandstrom, Miss. ...|female| 4.0| 1| 1| PP 9549| 16.7| G6| S|\n",
|
||||
"| 12| 1| 1|Bonnell, Miss. El...|female|58.0| 0| 0| 113783| 26.55| C103| S|\n",
|
||||
"| 13| 0| 3|Saundercock, Mr. ...| male|20.0| 0| 0| A/5. 2151| 8.05| null| S|\n",
|
||||
"| 14| 0| 3|Andersson, Mr. An...| male|39.0| 1| 5| 347082| 31.275| null| S|\n",
|
||||
"| 15| 0| 3|Vestrom, Miss. Hu...|female|14.0| 0| 0| 350406| 7.8542| null| S|\n",
|
||||
"| 16| 1| 2|Hewlett, Mrs. (Ma...|female|55.0| 0| 0| 248706| 16.0| null| S|\n",
|
||||
"| 17| 0| 3|Rice, Master. Eugene| male| 2.0| 4| 1| 382652| 29.125| null| Q|\n",
|
||||
"| 18| 1| 2|Williams, Mr. Cha...| male|null| 0| 0| 244373| 13.0| null| S|\n",
|
||||
"| 19| 0| 3|Vander Planke, Mr...|female|31.0| 1| 0| 345763| 18.0| null| S|\n",
|
||||
"| 20| 1| 3|Masselmani, Mrs. ...|female|null| 0| 0| 2649| 7.225| null| C|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"only showing top 20 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"spark = SparkSession.builder.getOrCreate()\n",
|
||||
"schema = \"langchain_example\"\n",
|
||||
"spark.sql(f\"CREATE DATABASE IF NOT EXISTS {schema}\")\n",
|
||||
"spark.sql(f\"USE {schema}\")\n",
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
"table = \"titanic\"\n",
|
||||
"spark.read.csv(csv_file_path, header=True, inferSchema=True).write.saveAsTable(table)\n",
|
||||
"spark.table(table).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note, you can also connect to Spark via Spark connect. For example:\n",
|
||||
"# db = SparkSQL.from_uri(\"sc://localhost:15002\", schema=schema)\n",
|
||||
"spark_sql = SparkSQL(schema=schema)\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"toolkit = SparkSQLToolkit(db=spark_sql, llm=llm)\n",
|
||||
"agent_executor = create_spark_sql_agent(\n",
|
||||
" llm=llm,\n",
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: describing a table"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mAction: list_tables_sql_db\n",
|
||||
"Action Input: \u001B[0m\n",
|
||||
"Observation: \u001B[38;5;200m\u001B[1;3mtitanic\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI found the titanic table. Now I need to get the schema and sample rows for the titanic table.\n",
|
||||
"Action: schema_sql_db\n",
|
||||
"Action Input: titanic\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mCREATE TABLE langchain_example.titanic (\n",
|
||||
" PassengerId INT,\n",
|
||||
" Survived INT,\n",
|
||||
" Pclass INT,\n",
|
||||
" Name STRING,\n",
|
||||
" Sex STRING,\n",
|
||||
" Age DOUBLE,\n",
|
||||
" SibSp INT,\n",
|
||||
" Parch INT,\n",
|
||||
" Ticket STRING,\n",
|
||||
" Fare DOUBLE,\n",
|
||||
" Cabin STRING,\n",
|
||||
" Embarked STRING)\n",
|
||||
";\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from titanic table:\n",
|
||||
"PassengerId\tSurvived\tPclass\tName\tSex\tAge\tSibSp\tParch\tTicket\tFare\tCabin\tEmbarked\n",
|
||||
"1\t0\t3\tBraund, Mr. Owen Harris\tmale\t22.0\t1\t0\tA/5 21171\t7.25\tNone\tS\n",
|
||||
"2\t1\t1\tCumings, Mrs. John Bradley (Florence Briggs Thayer)\tfemale\t38.0\t1\t0\tPC 17599\t71.2833\tC85\tC\n",
|
||||
"3\t1\t3\tHeikkinen, Miss. Laina\tfemale\t26.0\t0\t0\tSTON/O2. 3101282\t7.925\tNone\tS\n",
|
||||
"*/\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI now know the schema and sample rows for the titanic table.\n",
|
||||
"Final Answer: The titanic table has the following columns: PassengerId (INT), Survived (INT), Pclass (INT), Name (STRING), Sex (STRING), Age (DOUBLE), SibSp (INT), Parch (INT), Ticket (STRING), Fare (DOUBLE), Cabin (STRING), and Embarked (STRING). Here are some sample rows from the table: \n",
|
||||
"\n",
|
||||
"1. PassengerId: 1, Survived: 0, Pclass: 3, Name: Braund, Mr. Owen Harris, Sex: male, Age: 22.0, SibSp: 1, Parch: 0, Ticket: A/5 21171, Fare: 7.25, Cabin: None, Embarked: S\n",
|
||||
"2. PassengerId: 2, Survived: 1, Pclass: 1, Name: Cumings, Mrs. John Bradley (Florence Briggs Thayer), Sex: female, Age: 38.0, SibSp: 1, Parch: 0, Ticket: PC 17599, Fare: 71.2833, Cabin: C85, Embarked: C\n",
|
||||
"3. PassengerId: 3, Survived: 1, Pclass: 3, Name: Heikkinen, Miss. Laina, Sex: female, Age: 26.0, SibSp: 0, Parch: 0, Ticket: STON/O2. 3101282, Fare: 7.925, Cabin: None, Embarked: S\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The titanic table has the following columns: PassengerId (INT), Survived (INT), Pclass (INT), Name (STRING), Sex (STRING), Age (DOUBLE), SibSp (INT), Parch (INT), Ticket (STRING), Fare (DOUBLE), Cabin (STRING), and Embarked (STRING). Here are some sample rows from the table: \\n\\n1. PassengerId: 1, Survived: 0, Pclass: 3, Name: Braund, Mr. Owen Harris, Sex: male, Age: 22.0, SibSp: 1, Parch: 0, Ticket: A/5 21171, Fare: 7.25, Cabin: None, Embarked: S\\n2. PassengerId: 2, Survived: 1, Pclass: 1, Name: Cumings, Mrs. John Bradley (Florence Briggs Thayer), Sex: female, Age: 38.0, SibSp: 1, Parch: 0, Ticket: PC 17599, Fare: 71.2833, Cabin: C85, Embarked: C\\n3. PassengerId: 3, Survived: 1, Pclass: 3, Name: Heikkinen, Miss. Laina, Sex: female, Age: 26.0, SibSp: 0, Parch: 0, Ticket: STON/O2. 3101282, Fare: 7.925, Cabin: None, Embarked: S'"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"Describe the titanic table\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: running queries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mAction: list_tables_sql_db\n",
|
||||
"Action Input: \u001B[0m\n",
|
||||
"Observation: \u001B[38;5;200m\u001B[1;3mtitanic\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI should check the schema of the titanic table to see if there is an age column.\n",
|
||||
"Action: schema_sql_db\n",
|
||||
"Action Input: titanic\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mCREATE TABLE langchain_example.titanic (\n",
|
||||
" PassengerId INT,\n",
|
||||
" Survived INT,\n",
|
||||
" Pclass INT,\n",
|
||||
" Name STRING,\n",
|
||||
" Sex STRING,\n",
|
||||
" Age DOUBLE,\n",
|
||||
" SibSp INT,\n",
|
||||
" Parch INT,\n",
|
||||
" Ticket STRING,\n",
|
||||
" Fare DOUBLE,\n",
|
||||
" Cabin STRING,\n",
|
||||
" Embarked STRING)\n",
|
||||
";\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from titanic table:\n",
|
||||
"PassengerId\tSurvived\tPclass\tName\tSex\tAge\tSibSp\tParch\tTicket\tFare\tCabin\tEmbarked\n",
|
||||
"1\t0\t3\tBraund, Mr. Owen Harris\tmale\t22.0\t1\t0\tA/5 21171\t7.25\tNone\tS\n",
|
||||
"2\t1\t1\tCumings, Mrs. John Bradley (Florence Briggs Thayer)\tfemale\t38.0\t1\t0\tPC 17599\t71.2833\tC85\tC\n",
|
||||
"3\t1\t3\tHeikkinen, Miss. Laina\tfemale\t26.0\t0\t0\tSTON/O2. 3101282\t7.925\tNone\tS\n",
|
||||
"*/\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mThere is an Age column in the titanic table. I should write a query to calculate the average age and then find the square root of the result.\n",
|
||||
"Action: query_checker_sql_db\n",
|
||||
"Action Input: SELECT SQRT(AVG(Age)) as square_root_of_avg_age FROM titanic\u001B[0m\n",
|
||||
"Observation: \u001B[31;1m\u001B[1;3mThe original query seems to be correct. Here it is again:\n",
|
||||
"\n",
|
||||
"SELECT SQRT(AVG(Age)) as square_root_of_avg_age FROM titanic\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mThe query is correct, so I can execute it to find the square root of the average age.\n",
|
||||
"Action: query_sql_db\n",
|
||||
"Action Input: SELECT SQRT(AVG(Age)) as square_root_of_avg_age FROM titanic\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m[('5.449689683556195',)]\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI now know the final answer\n",
|
||||
"Final Answer: The square root of the average age is approximately 5.45.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The square root of the average age is approximately 5.45.'"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"whats the square root of the average age?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mAction: list_tables_sql_db\n",
|
||||
"Action Input: \u001B[0m\n",
|
||||
"Observation: \u001B[38;5;200m\u001B[1;3mtitanic\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI should check the schema of the titanic table to see what columns are available.\n",
|
||||
"Action: schema_sql_db\n",
|
||||
"Action Input: titanic\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mCREATE TABLE langchain_example.titanic (\n",
|
||||
" PassengerId INT,\n",
|
||||
" Survived INT,\n",
|
||||
" Pclass INT,\n",
|
||||
" Name STRING,\n",
|
||||
" Sex STRING,\n",
|
||||
" Age DOUBLE,\n",
|
||||
" SibSp INT,\n",
|
||||
" Parch INT,\n",
|
||||
" Ticket STRING,\n",
|
||||
" Fare DOUBLE,\n",
|
||||
" Cabin STRING,\n",
|
||||
" Embarked STRING)\n",
|
||||
";\n",
|
||||
"\n",
|
||||
"/*\n",
|
||||
"3 rows from titanic table:\n",
|
||||
"PassengerId\tSurvived\tPclass\tName\tSex\tAge\tSibSp\tParch\tTicket\tFare\tCabin\tEmbarked\n",
|
||||
"1\t0\t3\tBraund, Mr. Owen Harris\tmale\t22.0\t1\t0\tA/5 21171\t7.25\tNone\tS\n",
|
||||
"2\t1\t1\tCumings, Mrs. John Bradley (Florence Briggs Thayer)\tfemale\t38.0\t1\t0\tPC 17599\t71.2833\tC85\tC\n",
|
||||
"3\t1\t3\tHeikkinen, Miss. Laina\tfemale\t26.0\t0\t0\tSTON/O2. 3101282\t7.925\tNone\tS\n",
|
||||
"*/\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI can use the titanic table to find the oldest survived passenger. I will query the Name and Age columns, filtering by Survived and ordering by Age in descending order.\n",
|
||||
"Action: query_checker_sql_db\n",
|
||||
"Action Input: SELECT Name, Age FROM titanic WHERE Survived = 1 ORDER BY Age DESC LIMIT 1\u001B[0m\n",
|
||||
"Observation: \u001B[31;1m\u001B[1;3mSELECT Name, Age FROM titanic WHERE Survived = 1 ORDER BY Age DESC LIMIT 1\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mThe query is correct. Now I will execute it to find the oldest survived passenger.\n",
|
||||
"Action: query_sql_db\n",
|
||||
"Action Input: SELECT Name, Age FROM titanic WHERE Survived = 1 ORDER BY Age DESC LIMIT 1\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m[('Barkworth, Mr. Algernon Henry Wilson', '80.0')]\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3mI now know the final answer.\n",
|
||||
"Final Answer: The oldest survived passenger is Barkworth, Mr. Algernon Henry Wilson, who was 80 years old.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The oldest survived passenger is Barkworth, Mr. Algernon Henry Wilson, who was 80 years old.'"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What's the name of the oldest survived passenger?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "984a8fca",
|
||||
"metadata": {},
|
||||
@@ -9,7 +10,7 @@
|
||||
"\n",
|
||||
"Sometimes, for complex calculations, rather than have an LLM generate the answer directly, it can be better to have the LLM generate code to calculate the answer, and then run that code to get the answer. In order to easily do that, we provide a simple Python REPL to execute commands in.\n",
|
||||
"\n",
|
||||
"This interface will only return things that are printed - therefor, if you want to use it to calculate an answer, make sure to have it print out the answer."
|
||||
"This interface will only return things that are printed - therefore, if you want to use it to calculate an answer, make sure to have it print out the answer."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -27,19 +27,6 @@
|
||||
"In code, below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a363309c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
|
||||
230
docs/modules/chains/examples/graph_cypher_qa.ipynb
Normal file
230
docs/modules/chains/examples/graph_cypher_qa.ipynb
Normal file
@@ -0,0 +1,230 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c94240f5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GraphCypherQAChain\n",
|
||||
"\n",
|
||||
"This notebook shows how to use LLMs to provide a natural language interface to a graph database you can query with the Cypher query language."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc0ee68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You will need to have a running Neo4j instance. One option is to create a [free Neo4j database instance in their Aura cloud service](https://neo4j.com/cloud/platform/aura-graph-database/). You can also run the database locally using the [Neo4j Desktop application](https://neo4j.com/download/), or running a docker container.\n",
|
||||
"You can run a local docker container by running the executing the following script:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"docker run \\\n",
|
||||
" --name neo4j \\\n",
|
||||
" -p 7474:7474 -p 7687:7687 \\\n",
|
||||
" -d \\\n",
|
||||
" -e NEO4J_AUTH=neo4j/pleaseletmein \\\n",
|
||||
" -e NEO4J_PLUGINS=\\[\\\"apoc\\\"\\] \\\n",
|
||||
" neo4j:latest\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using the docker container, you need to wait a couple of second for the database to start."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "62812aad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import GraphCypherQAChain\n",
|
||||
"from langchain.graphs import Neo4jGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "0928915d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = Neo4jGraph(\n",
|
||||
" url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "995ea9b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Seeding the database\n",
|
||||
"\n",
|
||||
"Assuming your database is empty, you can populate it using Cypher query language. The following Cypher statement is idempotent, which means the database information will be the same if you run it one or multiple times."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "fedd26b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graph.query(\n",
|
||||
" \"\"\"\n",
|
||||
"MERGE (m:Movie {name:\"Top Gun\"})\n",
|
||||
"WITH m\n",
|
||||
"UNWIND [\"Tom Cruise\", \"Val Kilmer\", \"Anthony Edwards\", \"Meg Ryan\"] AS actor\n",
|
||||
"MERGE (a:Actor {name:actor})\n",
|
||||
"MERGE (a)-[:ACTED_IN]->(m)\n",
|
||||
"\"\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58c1a8ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"If the schema of database changes, you can refresh the schema information needed to generate Cypher statements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4e3de44f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph.refresh_schema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1fe76ccd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
" Node properties are the following:\n",
|
||||
" [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]\n",
|
||||
" Relationship properties are the following:\n",
|
||||
" []\n",
|
||||
" The relationships are the following:\n",
|
||||
" ['(:Actor)-[:ACTED_IN]->(:Movie)']\n",
|
||||
" \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(graph.get_schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68a3c677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"We can now use the graph cypher QA chain to ask question of the graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "7476ce98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = GraphCypherQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "ef8ee27b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4825316",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLMSummarizationCheckerChain\n",
|
||||
"This notebook shows some examples of LLMSummarizationCheckerChain in use with different types of texts. It has a few distinct differences from the `LLMCheckerChain`, in that it doesn't have any assumtions to the format of the input text (or summary).\n",
|
||||
"This notebook shows some examples of LLMSummarizationCheckerChain in use with different types of texts. It has a few distinct differences from the `LLMCheckerChain`, in that it doesn't have any assumptions to the format of the input text (or summary).\n",
|
||||
"Additionally, as the LLMs like to hallucinate when fact checking or get confused by context, it is sometimes beneficial to run the checker multiple times. It does this by feeding the rewritten \"True\" result back on itself, and checking the \"facts\" for truth. As you can see from the examples below, this can be very effective in arriving at a generally true body of text.\n",
|
||||
"\n",
|
||||
"You can control the number of times the checker runs by setting the `max_checks` parameter. The default is 2, but you can set it to 1 if you don't want any double-checking."
|
||||
|
||||
@@ -1,16 +1,5 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ca883d49",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ed6aab1",
|
||||
@@ -34,7 +23,7 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Under the hood, LangChain uses SQLAlchemy to connect to SQL databases. The `SQLDatabaseChain` can therefore be used with any SQL dialect supported by SQLAlchemy, such as MS SQL, MySQL, MariaDB, PostgreSQL, Oracle SQL, and SQLite. Please refer to the SQLAlchemy documentation for more information about requirements for connecting to your database. For example, a connection to MySQL requires an appropriate connector such as PyMySQL. A URI for a MySQL connection might look like: `mysql+pymysql://user:pass@some_mysql_db_address/db_name`\n",
|
||||
"Under the hood, LangChain uses SQLAlchemy to connect to SQL databases. The `SQLDatabaseChain` can therefore be used with any SQL dialect supported by SQLAlchemy, such as MS SQL, MySQL, MariaDB, PostgreSQL, Oracle SQL, [Databricks](../../../integrations/databricks.ipynb) and SQLite. Please refer to the SQLAlchemy documentation for more information about requirements for connecting to your database. For example, a connection to MySQL requires an appropriate connector such as PyMySQL. A URI for a MySQL connection might look like: `mysql+pymysql://user:pass@some_mysql_db_address/db_name`.\n",
|
||||
"\n",
|
||||
"This demonstration uses SQLite and the example Chinook database.\n",
|
||||
"To set it up, follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
||||
|
||||
@@ -53,6 +53,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
||||
./document_loaders/examples/unstructured_file.ipynb
|
||||
./document_loaders/examples/url.ipynb
|
||||
./document_loaders/examples/web_base.ipynb
|
||||
./document_loaders/examples/weather.ipynb
|
||||
./document_loaders/examples/whatsapp_chat.ipynb
|
||||
|
||||
|
||||
@@ -123,6 +124,7 @@ We need access tokens and sometime other parameters to get access to these datas
|
||||
./document_loaders/examples/notiondb.ipynb
|
||||
./document_loaders/examples/notion.ipynb
|
||||
./document_loaders/examples/obsidian.ipynb
|
||||
./document_loaders/examples/psychic.ipynb
|
||||
./document_loaders/examples/readthedocs_documentation.ipynb
|
||||
./document_loaders/examples/reddit.ipynb
|
||||
./document_loaders/examples/roam.ipynb
|
||||
|
||||
@@ -9,39 +9,43 @@
|
||||
"\n",
|
||||
">[EverNote](https://evernote.com/) is intended for archiving and creating notes in which photos, audio and saved web content can be embedded. Notes are stored in virtual \"notebooks\" and can be tagged, annotated, edited, searched, and exported.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load `EverNote` file from disk."
|
||||
"This notebook shows how to load an `Evernote` [export](https://help.evernote.com/hc/en-us/articles/209005557-Export-notes-and-notebooks-as-ENEX-or-HTML) file (.enex) from disk.\n",
|
||||
"\n",
|
||||
"A document will be created for each note in the export."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "1a53ece0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pypandoc\n",
|
||||
"import pypandoc\n",
|
||||
"\n",
|
||||
"pypandoc.download_pandoc()"
|
||||
"# lxml and html2text are required to parse EverNote notes\n",
|
||||
"# !pip install lxml\n",
|
||||
"# !pip install html2text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"id": "88df766f",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?\\n', metadata={'source': 'example_data/testing.enex'})]"
|
||||
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?**Jan - March 2022**', metadata={'source': 'example_data/testing.enex'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -49,9 +53,34 @@
|
||||
"source": [
|
||||
"from langchain.document_loaders import EverNoteLoader\n",
|
||||
"\n",
|
||||
"# By default all notes are combined into a single Document\n",
|
||||
"loader = EverNoteLoader(\"example_data/testing.enex\")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "97a58fde",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?', metadata={'title': 'testing', 'created': time.struct_time(tm_year=2023, tm_mon=2, tm_mday=9, tm_hour=3, tm_min=47, tm_sec=46, tm_wday=3, tm_yday=40, tm_isdst=-1), 'updated': time.struct_time(tm_year=2023, tm_mon=2, tm_mday=9, tm_hour=3, tm_min=53, tm_sec=28, tm_wday=3, tm_yday=40, tm_isdst=-1), 'note-attributes.author': 'Harrison Chase', 'source': 'example_data/testing.enex'}),\n",
|
||||
" Document(page_content='**Jan - March 2022**', metadata={'title': 'Summer Training Program', 'created': time.struct_time(tm_year=2022, tm_mon=12, tm_mday=27, tm_hour=1, tm_min=59, tm_sec=48, tm_wday=1, tm_yday=361, tm_isdst=-1), 'note-attributes.author': 'Mike McGarry', 'note-attributes.source': 'mobile.iphone', 'source': 'example_data/testing.enex'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# It's likely more useful to return a Document for each note\n",
|
||||
"loader = EverNoteLoader(\"example_data/testing.enex\", load_single_document=False)\n",
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -70,7 +99,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,4 +13,16 @@
|
||||
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>testing this</div><div>what happens?</div><div>to the world?</div></en-note> ]]>
|
||||
</content>
|
||||
</note>
|
||||
<note>
|
||||
<title>Summer Training Program</title>
|
||||
<created>20221227T015948Z</created>
|
||||
<note-attributes>
|
||||
<author>Mike McGarry</author>
|
||||
<source>mobile.iphone</source>
|
||||
</note-attributes>
|
||||
<content>
|
||||
<![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div><b>Jan - March 2022</b></div></en-note> ]]>
|
||||
</content>
|
||||
</note>
|
||||
</en-export>
|
||||
|
||||
126
docs/modules/indexes/document_loaders/examples/mastodon.ipynb
Normal file
126
docs/modules/indexes/document_loaders/examples/mastodon.ipynb
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Mastodon\n",
|
||||
"\n",
|
||||
">[Mastodon](https://joinmastodon.org/) is a federated social media and social networking service.\n",
|
||||
"\n",
|
||||
"This loader fetches the text from the \"toots\" of a list of `Mastodon` accounts, using the `Mastodon.py` Python package.\n",
|
||||
"\n",
|
||||
"Public accounts can the queried by default without any authentication. If non-public accounts or instances are queried, you have to register an application for your account which gets you an access token, and set that token and your account's API base URL.\n",
|
||||
"\n",
|
||||
"Then you need to pass in the Mastodon account names you want to extract, in the `@account@instance` format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import MastodonTootsLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "43128d8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install Mastodon.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "35d6809a",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MastodonTootsLoader(\n",
|
||||
" mastodon_accounts=[\"@Gargron@mastodon.social\"],\n",
|
||||
" number_toots=50, # Default value is 100\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Or set up access information to use a Mastodon app.\n",
|
||||
"# Note that the access token can either be passed into\n",
|
||||
"# constructor or you can set the envirovnment \"MASTODON_ACCESS_TOKEN\".\n",
|
||||
"# loader = MastodonTootsLoader(\n",
|
||||
"# access_token=\"<ACCESS TOKEN OF MASTODON APP>\",\n",
|
||||
"# api_base_url=\"<API BASE URL OF MASTODON APP INSTANCE>\",\n",
|
||||
"# mastodon_accounts=[\"@Gargron@mastodon.social\"],\n",
|
||||
"# number_toots=50, # Default value is 100\n",
|
||||
"# )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "05fe33b9",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<p>It is tough to leave this behind and go back to reality. And some people live here! I’m sure there are downsides but it sounds pretty good to me right now.</p>\n",
|
||||
"================================================================================\n",
|
||||
"<p>I wish we could stay here a little longer, but it is time to go home 🥲</p>\n",
|
||||
"================================================================================\n",
|
||||
"<p>Last day of the honeymoon. And it’s <a href=\"https://mastodon.social/tags/caturday\" class=\"mention hashtag\" rel=\"tag\">#<span>caturday</span></a>! This cute tabby came to the restaurant to beg for food and got some chicken.</p>\n",
|
||||
"================================================================================\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents = loader.load()\n",
|
||||
"for doc in documents[:3]:\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"=\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "322bb6a1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The toot texts (the documents' `page_content`) is by default HTML as returned by the Mastodon API."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
134
docs/modules/indexes/document_loaders/examples/psychic.ipynb
Normal file
134
docs/modules/indexes/document_loaders/examples/psychic.ipynb
Normal file
@@ -0,0 +1,134 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Psychic\n",
|
||||
"This notebook covers how to load documents from `Psychic`. See [here](../../../../ecosystem/psychic.md) for more details.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"1. Follow the Quick Start section in [this document](../../../../ecosystem/psychic.md)\n",
|
||||
"2. Log into the [Psychic dashboard](https://dashboard.psychic.dev/) and get your secret key\n",
|
||||
"3. Install the frontend react library into your web app and have a user authenticate a connection. The connection will be created using the connection id that you specify."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading documents\n",
|
||||
"\n",
|
||||
"Use the `PsychicLoader` class to load in documents from a connection. Each connection has a connector id (corresponding to the SaaS app that was connected) and a connection id (which you passed in to the frontend library)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Uncomment this to install psychicapi if you don't already have it installed\n",
|
||||
"!poetry run pip -q install psychicapi"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PsychicLoader\n",
|
||||
"from psychicapi import ConnectorId\n",
|
||||
"\n",
|
||||
"# Create a document loader for google drive. We can also load from other connectors by setting the connector_id to the appropriate value e.g. ConnectorId.notion.value\n",
|
||||
"# This loader uses our test credentials\n",
|
||||
"google_drive_loader = PsychicLoader(\n",
|
||||
" api_key=\"7ddb61c1-8b6a-4d31-a58e-30d1c9ea480e\",\n",
|
||||
" connector_id=ConnectorId.gdrive.value,\n",
|
||||
" connection_id=\"google-test\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = google_drive_loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Converting the docs to embeddings \n",
|
||||
"\n",
|
||||
"We can now convert these documents into embeddings and store them in a vector database like Chroma"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docsearch = Chroma.from_documents(texts, embeddings)\n",
|
||||
"chain = RetrievalQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type=\"stuff\", retriever=docsearch.as_retriever())\n",
|
||||
"chain({\"question\": \"what is psychic?\"}, return_only_outputs=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -287,10 +287,118 @@
|
||||
"docs[:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b066cb5a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Unstructured API\n",
|
||||
"\n",
|
||||
"If you want to get up and running with less set up, you can simply run `pip install unstructured` and use `UnstructuredAPIFileLoader` or `UnstructuredAPIFileIOLoader`. That will process your document using the hosted Unstructured API. Note that currently (as of 11 May 2023) the Unstructured API is open, but it will soon require an API. The [Unstructured documentation](https://unstructured-io.github.io/) page will have instructions on how to generate an API key once they’re available. Check out the instructions [here](https://github.com/Unstructured-IO/unstructured-api#dizzy-instructions-for-using-the-docker-image) if you’d like to self-host the Unstructured API or run it locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "b50c70bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredAPIFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "12b6d2cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"filenames = [\"example_data/fake.docx\", \"example_data/fake-email.eml\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "39a9894d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredAPIFileLoader(\n",
|
||||
" file_path=filenames[0],\n",
|
||||
" api_key=\"FAKE_API_KEY\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "386eb63c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Lorem ipsum dolor sit amet.', metadata={'source': 'example_data/fake.docx'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "94158999",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also batch multiple files through the Unstructured API in a single API using `UnstructuredAPIFileLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "79a18e7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredAPIFileLoader(\n",
|
||||
" file_path=filenames,\n",
|
||||
" api_key=\"FAKE_API_KEY\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a3d7c846",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Lorem ipsum dolor sit amet.\\n\\nThis is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', metadata={'source': ['example_data/fake.docx', 'example_data/fake-email.eml']})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f52b04cb",
|
||||
"id": "0e510495",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -312,7 +420,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
101
docs/modules/indexes/document_loaders/examples/weather.ipynb
Normal file
101
docs/modules/indexes/document_loaders/examples/weather.ipynb
Normal file
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Weather\n",
|
||||
"\n",
|
||||
">[OpenWeatherMap](https://openweathermap.org/) is an open source weather service provider\n",
|
||||
"\n",
|
||||
"This loader fetches the weather data from the OpenWeatherMap's OneCall API, using the pyowm Python package. You must initialize the loader with your OpenWeatherMap API token and the names of the cities you want the weather data for."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WeatherDataLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "43128d8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pyowm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51b0f0db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set API key either by passing it in to constructor directly\n",
|
||||
"# or by setting the environment variable \"OPENWEATHERMAP_API_KEY\".\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"OPENWEATHERMAP_API_KEY = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "35d6809a",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = WeatherDataLoader.from_params(['chennai','vellore'], openweathermap_api_key=OPENWEATHERMAP_API_KEY) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05fe33b9",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = loader.load()\n",
|
||||
"documents"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -24,7 +24,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pinecone-client"
|
||||
"#!pip install pinecone-client pinecone-text"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,277 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Self-querying with Weaviate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68e75fb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a Weaviate vectorstore\n",
|
||||
"First we'll want to create a Weaviate VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `weaviate-client` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "63a8af5b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install lark weaviate-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "cb4a5787",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Weaviate\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "bcbe04d9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
" Document(page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\", metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"}),\n",
|
||||
" Document(page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\", metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2}),\n",
|
||||
" Document(page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\", metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6}),\n",
|
||||
" Document(page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\", metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3}),\n",
|
||||
" Document(page_content=\"Toys come alive and have a blast doing so\", metadata={\"year\": 1995, \"genre\": \"animated\"}),\n",
|
||||
" Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"rating\": 9.9, \"director\": \"Andrei Tarkovsky\", \"genre\": \"science fiction\", \"rating\": 9.9})\n",
|
||||
"]\n",
|
||||
"vectorstore = Weaviate.from_documents(\n",
|
||||
" docs, embeddings, weaviate_url=\"http://127.0.0.1:8080\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ecaab6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "86e34dbf",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"\n",
|
||||
"metadata_field_info=[\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"genre\",\n",
|
||||
" description=\"The genre of the movie\", \n",
|
||||
" type=\"string or list[string]\", \n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"year\",\n",
|
||||
" description=\"The year the movie was released\", \n",
|
||||
" type=\"integer\", \n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"director\",\n",
|
||||
" description=\"The name of the movie director\", \n",
|
||||
" type=\"string\", \n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"rating\",\n",
|
||||
" description=\"A 1-10 rating for the movie\",\n",
|
||||
" type=\"float\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"document_content_description = \"Brief summary of a movie\"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea9df8d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "38a126e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='dinosaur' filter=None limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'rating': None, 'year': 1995}),\n",
|
||||
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'genre': 'science fiction', 'rating': 9.9, 'year': 1979}),\n",
|
||||
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'genre': None, 'rating': 8.6, 'year': 2006})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"What are some movies about dinosaurs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "b19d4da0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig') limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'genre': None, 'rating': 8.3, 'year': 2019})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a query and a filter\n",
|
||||
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filter k\n",
|
||||
"\n",
|
||||
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||
"\n",
|
||||
"We can do this by passing `enable_limit=True` to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "bff36b88-b506-4877-9c63-e5a1a8d78e64",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, \n",
|
||||
" vectorstore, \n",
|
||||
" document_content_description, \n",
|
||||
" metadata_field_info, \n",
|
||||
" enable_limit=True,\n",
|
||||
" verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "2758d229-4f97-499c-819f-888acaf8ee10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='dinosaur' filter=None limit=2\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'rating': None, 'year': 1995})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
291
docs/modules/indexes/retrievers/examples/zep_memorystore.ipynb
Normal file
291
docs/modules/indexes/retrievers/examples/zep_memorystore.ipynb
Normal file
@@ -0,0 +1,291 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Zep Memory\n",
|
||||
"\n",
|
||||
"## Retriever Example\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to search historical chat message histories using the [Zep Long-term Memory Store](https://getzep.github.io/).\n",
|
||||
"\n",
|
||||
"We'll demonstrate:\n",
|
||||
"\n",
|
||||
"1. Adding conversation history to the Zep memory store.\n",
|
||||
"2. Vector search over the conversation history.\n",
|
||||
"\n",
|
||||
"More on Zep:\n",
|
||||
"\n",
|
||||
"Zep stores, summarizes, embeds, indexes, and enriches conversational AI chat histories, and exposes them via simple, low-latency APIs.\n",
|
||||
"\n",
|
||||
"Key Features:\n",
|
||||
"\n",
|
||||
"- Long-term memory persistence, with access to historical messages irrespective of your summarization strategy.\n",
|
||||
"- Auto-summarization of memory messages based on a configurable message window. A series of summaries are stored, providing flexibility for future summarization strategies.\n",
|
||||
"- Vector search over memories, with messages automatically embedded on creation.\n",
|
||||
"- Auto-token counting of memories and summaries, allowing finer-grained control over prompt assembly.\n",
|
||||
"- Python and JavaScript SDKs.\n",
|
||||
"\n",
|
||||
"Zep's Go Extractor model is easily extensible, with a simple, clean interface available to build new enrichment functionality, such as summarizers, entity extractors, embedders, and more.\n",
|
||||
"\n",
|
||||
"Zep project: [https://github.com/getzep/zep](https://github.com/getzep/zep)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.chat_message_histories import ZepChatMessageHistory\n",
|
||||
"from langchain.schema import HumanMessage, AIMessage\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"# Set this to your Zep server URL\n",
|
||||
"ZEP_API_URL = \"http://localhost:8000\"\n",
|
||||
"\n",
|
||||
"# Zep is async-first. Our sync APIs use an asyncio wrapper to run outside an app's event loop.\n",
|
||||
"# This interferes with Jupyter's event loop, so we need to install nest_asyncio to run the\n",
|
||||
"# Zep client in a notebook.\n",
|
||||
"\n",
|
||||
"# !pip install nest_asyncio # Uncomment to install nest_asyncio\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T20:09:20.355017Z",
|
||||
"start_time": "2023-05-18T20:09:19.526069Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Initialize the Zep Chat Message History Class and add a chat message history to the memory store\n",
|
||||
"\n",
|
||||
"**NOTE:** Unlike other Retrievers, the content returned by the Zep Retriever is session/user specific. A `session_id` is required when instantiating the Retriever."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"session_id = str(uuid4()) # This is a unique identifier for the user/session\n",
|
||||
"\n",
|
||||
"# Set up Zep Chat History. We'll use this to add chat histories to the memory store\n",
|
||||
"zep_chat_history = ZepChatMessageHistory(\n",
|
||||
" session_id=session_id,\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T20:09:20.424764Z",
|
||||
"start_time": "2023-05-18T20:09:20.355626Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Preload some messages into the memory. The default message window is 12 messages. We want to push beyond this to demonstrate auto-summarization.\n",
|
||||
"test_history = [\n",
|
||||
" {\"role\": \"human\", \"content\": \"Who was Octavia Butler?\"},\n",
|
||||
" {\n",
|
||||
" \"role\": \"ai\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American\"\n",
|
||||
" \" science fiction author.\"\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
" {\"role\": \"human\", \"content\": \"Which books of hers were made into movies?\"},\n",
|
||||
" {\n",
|
||||
" \"role\": \"ai\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"The most well-known adaptation of Octavia Butler's work is the FX series\"\n",
|
||||
" \" Kindred, based on her novel of the same name.\"\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
" {\"role\": \"human\", \"content\": \"Who were her contemporaries?\"},\n",
|
||||
" {\n",
|
||||
" \"role\": \"ai\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R.\"\n",
|
||||
" \" Delany, and Joanna Russ.\"\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
" {\"role\": \"human\", \"content\": \"What awards did she win?\"},\n",
|
||||
" {\n",
|
||||
" \"role\": \"ai\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur\"\n",
|
||||
" \" Fellowship.\"\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"human\",\n",
|
||||
" \"content\": \"Which other women sci-fi writers might I want to read?\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"ai\",\n",
|
||||
" \"content\": \"You might want to read Ursula K. Le Guin or Joanna Russ.\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"human\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"Write a short synopsis of Butler's book, Parable of the Sower. What is it\"\n",
|
||||
" \" about?\"\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"ai\",\n",
|
||||
" \"content\": (\n",
|
||||
" \"Parable of the Sower is a science fiction novel by Octavia Butler,\"\n",
|
||||
" \" published in 1993. It follows the story of Lauren Olamina, a young woman\"\n",
|
||||
" \" living in a dystopian future where society has collapsed due to\"\n",
|
||||
" \" environmental disasters, poverty, and violence.\"\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for msg in test_history:\n",
|
||||
" zep_chat_history.append(\n",
|
||||
" HumanMessage(content=msg[\"content\"])\n",
|
||||
" if msg[\"role\"] == \"human\"\n",
|
||||
" else AIMessage(content=msg[\"content\"])\n",
|
||||
" )\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T20:09:20.603865Z",
|
||||
"start_time": "2023-05-18T20:09:20.427041Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Use the Zep Retriever to vector search over the Zep memory\n",
|
||||
"\n",
|
||||
"Zep provides native vector search over historical conversation memory. Embedding happens automatically.\n",
|
||||
"\n",
|
||||
"NOTE: Embedding of messages occurs asynchronously, so the first query may not return results. Subsequent queries will return results as the embeddings are generated."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7759001673780126, 'uuid': '3bedb2bf-aeaf-4849-924b-40a6d91e54b9', 'created_at': '2023-05-18T20:09:20.47556Z', 'role': 'human', 'token_count': 8})]"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.retrievers import ZepRetriever\n",
|
||||
"\n",
|
||||
"zep_retriever = ZepRetriever(\n",
|
||||
" session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" top_k=5,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"await zep_retriever.aget_relevant_documents(\"Who wrote Parable of the Sower?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T20:09:20.979411Z",
|
||||
"start_time": "2023-05-18T20:09:20.604147Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"We can also use the Zep sync API to retrieve results:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7759001673780126, 'uuid': '3bedb2bf-aeaf-4849-924b-40a6d91e54b9', 'created_at': '2023-05-18T20:09:20.47556Z', 'role': 'human', 'token_count': 8}),\n Document(page_content='Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', metadata={'score': 0.7545887969667749, 'uuid': 'b32c0644-2dcb-4c1d-a445-6622e7ba82e5', 'created_at': '2023-05-18T20:09:20.512044Z', 'role': 'ai', 'token_count': 31})]"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"zep_retriever.get_relevant_documents(\"Who wrote Parable of the Sower?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T20:09:21.296699Z",
|
||||
"start_time": "2023-05-18T20:09:20.983624Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T20:09:21.298710Z",
|
||||
"start_time": "2023-05-18T20:09:21.297169Z"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -7,11 +7,9 @@
|
||||
"source": [
|
||||
"# Annoy\n",
|
||||
"\n",
|
||||
"> \"Annoy (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mmapped into memory so that many processes may share the same data.\"\n",
|
||||
"> [Annoy](https://github.com/spotify/annoy) (`Approximate Nearest Neighbors Oh Yeah`) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mmapped into memory so that many processes may share the same data.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Annoy` vector database.\n",
|
||||
"\n",
|
||||
"via [Annoy](https://github.com/spotify/annoy) \n"
|
||||
"This notebook shows how to use functionality related to the `Annoy` vector database."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -25,6 +23,18 @@
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6107872c-09e8-4254-a89c-17e0a0764e82",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install annoy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6613d222",
|
||||
@@ -35,9 +45,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "dc7351b5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
||||
@@ -48,9 +60,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "d2cb5f7d",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"texts = [\"pizza is great\", \"I love salad\", \"my car\", \"a dog\"]\n",
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AtlasDB\n",
|
||||
"# Atlas\n",
|
||||
"\n",
|
||||
"This notebook shows you how to use functionality related to the `AtlasDB`.\n",
|
||||
"\n",
|
||||
"[Atlas](https://docs.nomic.ai/index.html) a platform for interacting with both small and internet scale unstructured datasets by Nomic "
|
||||
">[Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both small and internet scale unstructured datasets by `Nomic`. \n",
|
||||
"\n",
|
||||
"This notebook shows you how to use functionality related to the `AtlasDB` vectorstore."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -582,7 +582,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating dataset on AWS S3"
|
||||
"### Creating dataset on AWS S3"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"id": "7ee37d28",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Uncomment the below cells to install docarray and get/set your OpenAI api key if you haven't already done so."
|
||||
]
|
||||
@@ -61,7 +61,7 @@
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# Using DocArrayHnswSearch"
|
||||
"## Using DocArrayHnswSearch"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -102,7 +102,7 @@
|
||||
"id": "ed6f905b-4853-4a44-9730-614aa8e22b78",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search"
|
||||
"### Similarity search"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -149,7 +149,7 @@
|
||||
"id": "3febb987-e903-416f-af26-6897d84c8d61",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search with score"
|
||||
"### Similarity search with score"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -219,7 +219,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"id": "5031a3ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Uncomment the below cells to install docarray and get/set your OpenAI api key if you haven't already done so."
|
||||
]
|
||||
@@ -53,6 +53,14 @@
|
||||
"# os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6e57a389-f637-4b8f-9ab2-759ae7485f78",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using DocArrayInMemorySearch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -91,7 +99,7 @@
|
||||
"id": "efbb6684-3846-4332-a624-ddd4d75844c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search"
|
||||
"### Similarity search"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -138,7 +146,7 @@
|
||||
"id": "43896697-f99e-47b6-9117-47a25e9afa9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search with score"
|
||||
"### Similarity search with score"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -202,7 +210,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -18,6 +18,14 @@
|
||||
"Check [this](https://opensearch.org/docs/latest/search-plugins/knn/index/) for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "94963977-9dfc-48b7-872a-53f2947f46c6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -80,6 +88,16 @@
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01a9a035",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### similarity_search using Approximate k-NN\n",
|
||||
"\n",
|
||||
"`similarity_search` using `Approximate k-NN` Search with Custom Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -111,14 +129,6 @@
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01a9a035",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### similarity_search using Approximate k-NN Search with Custom Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -155,7 +165,9 @@
|
||||
"id": "0d0cd877",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### similarity_search using Script Scoring with Custom Parameters"
|
||||
"### similarity_search using Script Scoring\n",
|
||||
"\n",
|
||||
"`similarity_search` using `Script Scoring` with Custom Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -194,7 +206,9 @@
|
||||
"id": "a4af96cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### similarity_search using Painless Scripting with Custom Parameters"
|
||||
"### similarity_search using Painless Scripting\n",
|
||||
"\n",
|
||||
"`similarity_search` using `Painless Scripting` with Custom Parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -233,7 +247,7 @@
|
||||
"id": "73264864",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Using a preexisting OpenSearch instance\n",
|
||||
"### Using a preexisting OpenSearch instance\n",
|
||||
"\n",
|
||||
"It's also possible to use a preexisting OpenSearch instance with documents that already have vectors present."
|
||||
]
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
"This notebook shows how to use functionality related to the [Redis vector database](https://redis.com/solutions/use-cases/vector-database/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -41,6 +48,13 @@
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -192,7 +206,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## RedisVectorStoreRetriever\n",
|
||||
"## Redis as Retriever\n",
|
||||
"\n",
|
||||
"Here we go over different options for using the vector store as a retriever.\n",
|
||||
"\n",
|
||||
|
||||
@@ -6,8 +6,12 @@
|
||||
"source": [
|
||||
"# Tair\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Tair vector database.\n",
|
||||
"To run, you should have an [Tair](https://www.alibabacloud.com/help/en/tair/latest/what-is-tair) instance up and running."
|
||||
">[Tair](https://www.alibabacloud.com/help/en/tair/latest/what-is-tair) is a cloud native in-memory database service developed by `Alibaba Cloud`. \n",
|
||||
"It provides rich data models and enterprise-grade capabilities to support your real-time online scenarios while maintaining full compatibility with open source `Redis`. `Tair` also introduces persistent memory-optimized instances that are based on the new non-volatile memory (NVM) storage medium.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Tair` vector database.\n",
|
||||
"\n",
|
||||
"To run, you should have a `Tair` instance up and running."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -121,9 +125,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How (and why) to use the the human input LLM\n",
|
||||
"# How (and why) to use the human input LLM\n",
|
||||
"\n",
|
||||
"Similar to the fake LLM, LangChain provides a pseudo LLM class that can be used for testing, debugging, or educational purposes. This allows you to mock out calls to the LLM and simulate how a human would respond if they received the prompts.\n",
|
||||
"\n",
|
||||
@@ -34,6 +34,23 @@
|
||||
"from langchain.agents import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Since we will use the `WikipediaQueryRun` tool in this notebook, you might need to install the `wikipedia` package if you haven't done so already."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
@@ -217,7 +234,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "f69f6283",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -52,7 +52,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "64005d1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -60,17 +60,17 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 26.1 ms, sys: 21.5 ms, total: 47.6 ms\n",
|
||||
"Wall time: 1.68 s\n"
|
||||
"CPU times: user 35.9 ms, sys: 28.6 ms, total: 64.6 ms\n",
|
||||
"Wall time: 4.83 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? It was...two tired!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -403,7 +403,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "14a82124",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -412,13 +412,16 @@
|
||||
"from gptcache.manager.factory import manager_factory\n",
|
||||
"from gptcache.processor.pre import get_prompt\n",
|
||||
"from langchain.cache import GPTCache\n",
|
||||
"import hashlib\n",
|
||||
"\n",
|
||||
"# Avoid multiple caches using the same file, causing different llm model caches to affect each other\n",
|
||||
"def get_hashed_name(name):\n",
|
||||
" return hashlib.sha256(name.encode()).hexdigest()\n",
|
||||
"\n",
|
||||
"def init_gptcache(cache_obj: Cache, llm: str):\n",
|
||||
" hashed_llm = get_hashed_name(llm)\n",
|
||||
" cache_obj.init(\n",
|
||||
" pre_embedding_func=get_prompt,\n",
|
||||
" data_manager=manager_factory(manager=\"map\", data_dir=f\"map_cache_{llm}\"),\n",
|
||||
" data_manager=manager_factory(manager=\"map\", data_dir=f\"map_cache_{hashed_llm}\"),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"langchain.llm_cache = GPTCache(init_gptcache)"
|
||||
@@ -426,7 +429,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "9e4ecfd1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -434,17 +437,17 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 8.6 ms, sys: 3.82 ms, total: 12.4 ms\n",
|
||||
"Wall time: 881 ms\n"
|
||||
"CPU times: user 21.5 ms, sys: 21.3 ms, total: 42.8 ms\n",
|
||||
"Wall time: 6.2 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -457,7 +460,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "c98bbe3b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -465,17 +468,17 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 286 µs, sys: 21 µs, total: 307 µs\n",
|
||||
"Wall time: 316 µs\n"
|
||||
"CPU times: user 571 µs, sys: 43 µs, total: 614 µs\n",
|
||||
"Wall time: 635 µs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -504,11 +507,14 @@
|
||||
"from gptcache import Cache\n",
|
||||
"from gptcache.adapter.api import init_similar_cache\n",
|
||||
"from langchain.cache import GPTCache\n",
|
||||
"import hashlib\n",
|
||||
"\n",
|
||||
"# Avoid multiple caches using the same file, causing different llm model caches to affect each other\n",
|
||||
"def get_hashed_name(name):\n",
|
||||
" return hashlib.sha256(name.encode()).hexdigest()\n",
|
||||
"\n",
|
||||
"def init_gptcache(cache_obj: Cache, llm str):\n",
|
||||
" init_similar_cache(cache_obj=cache_obj, data_dir=f\"similar_cache_{llm}\")\n",
|
||||
"def init_gptcache(cache_obj: Cache, llm: str):\n",
|
||||
" hashed_llm = get_hashed_name(llm)\n",
|
||||
" init_similar_cache(cache_obj=cache_obj, data_dir=f\"similar_cache_{hashed_llm}\")\n",
|
||||
"\n",
|
||||
"langchain.llm_cache = GPTCache(init_gptcache)"
|
||||
]
|
||||
@@ -523,8 +529,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 1.01 s, sys: 153 ms, total: 1.16 s\n",
|
||||
"Wall time: 2.49 s\n"
|
||||
"CPU times: user 1.42 s, sys: 279 ms, total: 1.7 s\n",
|
||||
"Wall time: 8.44 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -554,8 +560,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 745 ms, sys: 13.2 ms, total: 758 ms\n",
|
||||
"Wall time: 136 ms\n"
|
||||
"CPU times: user 866 ms, sys: 20 ms, total: 886 ms\n",
|
||||
"Wall time: 226 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -585,8 +591,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 737 ms, sys: 7.79 ms, total: 745 ms\n",
|
||||
"Wall time: 135 ms\n"
|
||||
"CPU times: user 853 ms, sys: 14.8 ms, total: 868 ms\n",
|
||||
"Wall time: 224 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
105
docs/modules/models/llms/integrations/mosaicml.ipynb
Normal file
105
docs/modules/models/llms/integrations/mosaicml.ipynb
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MosaicML\n",
|
||||
"\n",
|
||||
"[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with MosaicML Inference for text completion."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"MOSAICML_API_TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import MosaicML\n",
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Question: {question}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={'do_sample': False})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"What is one good reason why you should train a large language model on domain specific data?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
133
docs/modules/models/llms/integrations/openlm.ipynb
Normal file
133
docs/modules/models/llms/integrations/openlm.ipynb
Normal file
@@ -0,0 +1,133 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenLM\n",
|
||||
"[OpenLM](https://github.com/r2d4/openlm) is a zero-dependency OpenAI-compatible LLM provider that can call different inference endpoints directly via HTTP. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"It implements the OpenAI Completion class so that it can be used as a drop-in replacement for the OpenAI API. This changeset utilizes BaseOpenAI for minimal added code.\n",
|
||||
"\n",
|
||||
"This examples goes over how to use LangChain to interact with both OpenAI and HuggingFace. You'll need API keys from both."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup\n",
|
||||
"Install dependencies and set API keys."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Uncomment to install openlm and openai if you haven't already\n",
|
||||
"\n",
|
||||
"# !pip install openlm\n",
|
||||
"# !pip install openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Check if OPENAI_API_KEY environment variable is set\n",
|
||||
"if \"OPENAI_API_KEY\" not in os.environ:\n",
|
||||
" print(\"Enter your OpenAI API key:\")\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
|
||||
"\n",
|
||||
"# Check if HF_API_TOKEN environment variable is set\n",
|
||||
"if \"HF_API_TOKEN\" not in os.environ:\n",
|
||||
" print(\"Enter your HuggingFace Hub API key:\")\n",
|
||||
" os.environ[\"HF_API_TOKEN\"] = getpass()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using LangChain with OpenLM\n",
|
||||
"\n",
|
||||
"Here we're going to call two models in an LLMChain, `text-davinci-003` from OpenAI and `gpt2` on HuggingFace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenLM\n",
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Model: text-davinci-003\n",
|
||||
"Result: France is a country in Europe. The capital of France is Paris.\n",
|
||||
"Model: huggingface.co/gpt2\n",
|
||||
"Result: Question: What is the capital of France?\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step. I am not going to lie, this is a complicated issue, and I don't see any solutions to all this, but it is still far more\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What is the capital of France?\"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"\n",
|
||||
"for model in [\"text-davinci-003\", \"huggingface.co/gpt2\"]:\n",
|
||||
" llm = OpenLM(model=model)\n",
|
||||
" llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
" result = llm_chain.run(question)\n",
|
||||
" print(\"\"\"Model: {}\n",
|
||||
"Result: {}\"\"\".format(model, result))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
137
docs/modules/models/text_embedding/examples/elasticsearch.ipynb
Normal file
137
docs/modules/models/text_embedding/examples/elasticsearch.ipynb
Normal file
@@ -0,0 +1,137 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"!pip install elasticsearch langchain"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "OOiBBjc0Kd-6"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"%env ES_CLOUDID=<cloud id from cloud.elastic.co>\n",
|
||||
"%env ES_USER=<user>\n",
|
||||
"%env ES_PASS=<password>\n",
|
||||
"\n",
|
||||
"es_cloudid = os.environ.get(\"ES_CLOUDID\")\n",
|
||||
"es_user = os.environ.get(\"ES_USER\")\n",
|
||||
"es_pass = os.environ.get(\"ES_PASS\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Wr8unljAKdCh"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Connect to Elasticsearch\n",
|
||||
"es_connection = Elasticsearch(cloud_id=es_cloudid, basic_auth=(es_user, es_pass))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "YIDsrBqTKs85"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Define the model ID and input field name (if different from default)\n",
|
||||
"model_id = \"your_model_id\"\n",
|
||||
"input_field = \"your_input_field\" # Optional, only if different from 'text_field'"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "sfFhnFHOKvbM"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Initialize the ElasticsearchEmbeddings instance\n",
|
||||
"embeddings_generator = ElasticsearchEmbeddings(es_connection, model_id, input_field)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "V-pCgqLCKvYs"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Generate embeddings for a list of documents\n",
|
||||
"documents = [\n",
|
||||
" \"This is an example document.\",\n",
|
||||
" \"Another example document to generate embeddings for.\",\n",
|
||||
" ]\n",
|
||||
"document_embeddings = embeddings_generator.embed_documents(documents)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lJg2iRDWKvV_"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Print the generated document embeddings\n",
|
||||
"for i, doc_embedding in enumerate(document_embeddings):\n",
|
||||
" print(f\"Embedding for document {i + 1}: {doc_embedding}\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "R3sYQlh3KvTQ"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Generate an embedding for a single query text\n",
|
||||
"query_text = \"What is the meaning of life?\"\n",
|
||||
"query_embedding = embeddings_generator.embed_query(query_text)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "n0un5Vc0KvQd"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Print the generated query embedding\n",
|
||||
"print(f\"Embedding for query: {query_embedding}\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "PANph6pmKvLD"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
109
docs/modules/models/text_embedding/examples/mosaicml.ipynb
Normal file
109
docs/modules/models/text_embedding/examples/mosaicml.ipynb
Normal file
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MosaicML embeddings\n",
|
||||
"\n",
|
||||
"[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with MosaicML Inference for text embedding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"MOSAICML_API_TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import MosaicMLInstructorEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = MosaicMLInstructorEmbeddings(\n",
|
||||
" query_instruction=\"Represent the query for retrieval: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_text = \"This is a test query.\"\n",
|
||||
"query_result = embeddings.embed_query(query_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"document_text = \"This is a test document.\"\n",
|
||||
"document_result = embeddings.embed_documents([document_text])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"query_numpy = np.array(query_result)\n",
|
||||
"document_numpy = np.array(document_result[0])\n",
|
||||
"similarity = np.dot(query_numpy, document_numpy) / (np.linalg.norm(query_numpy)*np.linalg.norm(document_numpy))\n",
|
||||
"print(f\"Cosine similarity between document and query: {similarity}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -559,7 +559,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 1,
|
||||
"id": "0b6dd7b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -631,6 +631,84 @@
|
||||
"prompt = load_prompt(\"few_shot_prompt_example_prompt.json\")\n",
|
||||
"print(prompt.format(adjective=\"funny\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6e3f9fe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PromptTempalte with OutputParser\n",
|
||||
"This shows an example of loading a prompt along with an OutputParser from a file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "500dab26",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\r\n",
|
||||
" \"input_variables\": [\r\n",
|
||||
" \"question\",\r\n",
|
||||
" \"student_answer\"\r\n",
|
||||
" ],\r\n",
|
||||
" \"output_parser\": {\r\n",
|
||||
" \"regex\": \"(.*?)\\\\nScore: (.*)\",\r\n",
|
||||
" \"output_keys\": [\r\n",
|
||||
" \"answer\",\r\n",
|
||||
" \"score\"\r\n",
|
||||
" ],\r\n",
|
||||
" \"default_output_key\": null,\r\n",
|
||||
" \"_type\": \"regex_parser\"\r\n",
|
||||
" },\r\n",
|
||||
" \"partial_variables\": {},\r\n",
|
||||
" \"template\": \"Given the following question and student answer, provide a correct answer and score the student answer.\\nQuestion: {question}\\nStudent Answer: {student_answer}\\nCorrect Answer:\",\r\n",
|
||||
" \"template_format\": \"f-string\",\r\n",
|
||||
" \"validate_template\": true,\r\n",
|
||||
" \"_type\": \"prompt\"\r\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"! cat prompt_with_output_parser.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "d267a736",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = load_prompt(\"prompt_with_output_parser.json\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "cb770399",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'answer': 'George Washington was born in 1732 and died in 1799.',\n",
|
||||
" 'score': '1/2'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prompt.output_parser.parse(\"George Washington was born in 1732 and died in 1799.\\nScore: 1/2\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -649,7 +727,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"input_variables": [
|
||||
"question",
|
||||
"student_answer"
|
||||
],
|
||||
"output_parser": {
|
||||
"regex": "(.*?)\nScore: (.*)",
|
||||
"output_keys": [
|
||||
"answer",
|
||||
"score"
|
||||
],
|
||||
"default_output_key": null,
|
||||
"_type": "regex_parser"
|
||||
},
|
||||
"partial_variables": {},
|
||||
"template": "Given the following question and student answer, provide a correct answer and score the student answer.\nQuestion: {question}\nStudent Answer: {student_answer}\nCorrect Answer:",
|
||||
"template_format": "f-string",
|
||||
"validate_template": true,
|
||||
"_type": "prompt"
|
||||
}
|
||||
@@ -150,7 +150,6 @@ In this example, we'll create a prompt to generate word antonyms.
|
||||
```python
|
||||
from langchain import PromptTemplate, FewShotPromptTemplate
|
||||
|
||||
|
||||
# First, create the list of few shot examples.
|
||||
examples = [
|
||||
{"word": "happy", "antonym": "sad"},
|
||||
@@ -159,10 +158,10 @@ examples = [
|
||||
|
||||
# Next, we specify the template to format the examples we have provided.
|
||||
# We use the `PromptTemplate` class for this.
|
||||
example_formatter_template = """
|
||||
Word: {word}
|
||||
Antonym: {antonym}\n
|
||||
example_formatter_template = """Word: {word}
|
||||
Antonym: {antonym}
|
||||
"""
|
||||
|
||||
example_prompt = PromptTemplate(
|
||||
input_variables=["word", "antonym"],
|
||||
template=example_formatter_template,
|
||||
@@ -176,14 +175,14 @@ few_shot_prompt = FewShotPromptTemplate(
|
||||
example_prompt=example_prompt,
|
||||
# The prefix is some text that goes before the examples in the prompt.
|
||||
# Usually, this consists of intructions.
|
||||
prefix="Give the antonym of every input",
|
||||
prefix="Give the antonym of every input\n",
|
||||
# The suffix is some text that goes after the examples in the prompt.
|
||||
# Usually, this is where the user input will go
|
||||
suffix="Word: {input}\nAntonym:",
|
||||
suffix="Word: {input}\nAntonym: ",
|
||||
# The input variables are the variables that the overall prompt expects.
|
||||
input_variables=["input"],
|
||||
# The example_separator is the string we will use to join the prefix, examples, and suffix together with.
|
||||
example_separator="\n\n",
|
||||
example_separator="\n",
|
||||
)
|
||||
|
||||
# We can now generate a prompt using the `format` method.
|
||||
@@ -197,7 +196,7 @@ print(few_shot_prompt.format(input="big"))
|
||||
# -> Antonym: short
|
||||
# ->
|
||||
# -> Word: big
|
||||
# -> Antonym:
|
||||
# -> Antonym:
|
||||
```
|
||||
|
||||
## Select examples for a prompt template
|
||||
@@ -229,7 +228,11 @@ example_selector = LengthBasedExampleSelector(
|
||||
example_prompt=example_prompt,
|
||||
# This is the maximum length that the formatted examples should be.
|
||||
# Length is measured by the get_text_length function below.
|
||||
max_length=25,
|
||||
max_length=25
|
||||
# This is the function used to get the length of a string, which is used
|
||||
# to determine which examples to include. It is commented out because
|
||||
# it is provided as a default value if none is specified.
|
||||
# get_text_length: Callable[[str], int] = lambda x: len(re.split("\n| ", x))
|
||||
)
|
||||
|
||||
# We can now use the `example_selector` to create a `FewShotPromptTemplate`.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
API References
|
||||
==========================
|
||||
|
||||
All of LangChain's reference documentation, in one place.
|
||||
Full documentation on all methods, classes, and APIs in LangChain.
|
||||
| Full documentation on all methods, classes, and APIs in LangChain.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
@@ -15,6 +15,7 @@ from langchain.agents.agent_toolkits import (
|
||||
create_pbi_agent,
|
||||
create_pbi_chat_agent,
|
||||
create_spark_dataframe_agent,
|
||||
create_spark_sql_agent,
|
||||
create_sql_agent,
|
||||
create_vectorstore_agent,
|
||||
create_vectorstore_router_agent,
|
||||
@@ -59,6 +60,7 @@ __all__ = [
|
||||
"create_pbi_agent",
|
||||
"create_pbi_chat_agent",
|
||||
"create_spark_dataframe_agent",
|
||||
"create_spark_sql_agent",
|
||||
"create_sql_agent",
|
||||
"create_vectorstore_agent",
|
||||
"create_vectorstore_router_agent",
|
||||
|
||||
@@ -773,7 +773,11 @@ class AgentExecutor(Chain):
|
||||
raise e
|
||||
text = str(e)
|
||||
if isinstance(self.handle_parsing_errors, bool):
|
||||
observation = "Invalid or incomplete response"
|
||||
if e.send_to_llm:
|
||||
observation = str(e.observation)
|
||||
text = str(e.llm_output)
|
||||
else:
|
||||
observation = "Invalid or incomplete response"
|
||||
elif isinstance(self.handle_parsing_errors, str):
|
||||
observation = self.handle_parsing_errors
|
||||
elif callable(self.handle_parsing_errors):
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Agent toolkits."""
|
||||
|
||||
from langchain.agents.agent_toolkits.azure_cognitive_services.toolkit import (
|
||||
AzureCognitiveServicesToolkit,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.csv.base import create_csv_agent
|
||||
from langchain.agents.agent_toolkits.file_management.toolkit import (
|
||||
FileManagementToolkit,
|
||||
@@ -18,6 +21,8 @@ from langchain.agents.agent_toolkits.powerbi.chat_base import create_pbi_chat_ag
|
||||
from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain.agents.agent_toolkits.python.base import create_python_agent
|
||||
from langchain.agents.agent_toolkits.spark.base import create_spark_dataframe_agent
|
||||
from langchain.agents.agent_toolkits.spark_sql.base import create_spark_sql_agent
|
||||
from langchain.agents.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
|
||||
from langchain.agents.agent_toolkits.sql.base import create_sql_agent
|
||||
from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
||||
from langchain.agents.agent_toolkits.vectorstore.base import (
|
||||
@@ -41,6 +46,7 @@ __all__ = [
|
||||
"create_vectorstore_agent",
|
||||
"JsonToolkit",
|
||||
"SQLDatabaseToolkit",
|
||||
"SparkSQLToolkit",
|
||||
"NLAToolkit",
|
||||
"PowerBIToolkit",
|
||||
"OpenAPIToolkit",
|
||||
@@ -50,10 +56,12 @@ __all__ = [
|
||||
"VectorStoreRouterToolkit",
|
||||
"create_pandas_dataframe_agent",
|
||||
"create_spark_dataframe_agent",
|
||||
"create_spark_sql_agent",
|
||||
"create_csv_agent",
|
||||
"ZapierToolkit",
|
||||
"GmailToolkit",
|
||||
"JiraToolkit",
|
||||
"FileManagementToolkit",
|
||||
"PlayWrightBrowserToolkit",
|
||||
"AzureCognitiveServicesToolkit",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
"""Azure Cognitive Services Toolkit."""
|
||||
|
||||
from langchain.agents.agent_toolkits.azure_cognitive_services.toolkit import (
|
||||
AzureCognitiveServicesToolkit,
|
||||
)
|
||||
|
||||
__all__ = ["AzureCognitiveServicesToolkit"]
|
||||
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.tools.azure_cognitive_services import (
|
||||
AzureCogsFormRecognizerTool,
|
||||
AzureCogsImageAnalysisTool,
|
||||
AzureCogsSpeech2TextTool,
|
||||
AzureCogsText2SpeechTool,
|
||||
)
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
|
||||
class AzureCognitiveServicesToolkit(BaseToolkit):
|
||||
"""Toolkit for Azure Cognitive Services."""
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
|
||||
tools = [
|
||||
AzureCogsFormRecognizerTool(),
|
||||
AzureCogsSpeech2TextTool(),
|
||||
AzureCogsText2SpeechTool(),
|
||||
]
|
||||
|
||||
# TODO: Remove check once azure-ai-vision supports MacOS.
|
||||
if sys.platform.startswith("linux") or sys.platform.startswith("win"):
|
||||
tools.append(AzureCogsImageAnalysisTool())
|
||||
return tools
|
||||
@@ -100,7 +100,7 @@ API_CONTROLLER_TOOL_DESCRIPTION = f"Can be used to execute a plan of API calls,
|
||||
# The goal is to have an agent at the top-level (e.g. so it can recover from errors and re-plan) while
|
||||
# keeping planning (and specifically the planning prompt) simple.
|
||||
API_ORCHESTRATOR_PROMPT = """You are an agent that assists with user queries against API, things like querying information or creating resources.
|
||||
Some user queries can be resolved in a single API call, particularly if you can find appropriate params from the OpenAPI spec; though some require several API call.
|
||||
Some user queries can be resolved in a single API call, particularly if you can find appropriate params from the OpenAPI spec; though some require several API calls.
|
||||
You should always plan your API calls first, and then execute the plan second.
|
||||
If the plan includes a DELETE call, be sure to ask the User for authorization first unless the User has specifically asked to delete something.
|
||||
You should never return information without executing the api_controller tool.
|
||||
@@ -117,7 +117,7 @@ Action: the action to take, should be one of the tools [{tool_names}]
|
||||
Action Input: the input to the action
|
||||
Observation: the result of the action
|
||||
... (this Thought/Action/Action Input/Observation can repeat N times)
|
||||
Thought: I am finished executing a plan and have the information the user asked for or the data the used asked to create
|
||||
Thought: I am finished executing a plan and have the information the user asked for or the data the user asked to create
|
||||
Final Answer: the final output from executing the plan
|
||||
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ def create_pandas_dataframe_agent(
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"pandas package not found, please install with `pip install pandas`"
|
||||
)
|
||||
|
||||
|
||||
@@ -2,28 +2,24 @@
|
||||
"""Prompts for PowerBI agent."""
|
||||
|
||||
|
||||
POWERBI_PREFIX = """You are an agent designed to interact with a Power BI Dataset.
|
||||
POWERBI_PREFIX = """You are an agent designed to help users interact with a PowerBI Dataset.
|
||||
|
||||
Assistant has access to tools that can give context, write queries and execute those queries against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "I don't know" as the answer. The query language that PowerBI uses is called DAX and it is quite particular and complex, so make sure to use the right tools to get the answers the user is looking for.
|
||||
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
|
||||
|
||||
Given an input question, create a syntactically correct DAX query to run, then look at the results and return the answer. Sometimes the result indicate something is wrong with the query, or there were errors in the json serialization. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
|
||||
Assistant never just starts querying, assistant should first find out which tables there are, then how each table is defined and then ask the question to query tool to create a query and then ask the query tool to execute it, finally create a complete sentence that answers the question, if multiple rows need are asked find a way to write that in a easily readible format for a human. Assistant has tools that can get more context of the tables which helps it write correct queries.
|
||||
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readible format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
"""
|
||||
|
||||
POWERBI_SUFFIX = """Begin!
|
||||
|
||||
Question: {input}
|
||||
Thought: I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a nice sentence that answers the question.
|
||||
Thought: I can first ask which tables I have, then how each table is defined and then ask the query tool the question I need, and finally create a nice sentence that answers the question.
|
||||
{agent_scratchpad}"""
|
||||
|
||||
POWERBI_CHAT_PREFIX = """Assistant is a large language model built to help users interact with a PowerBI Dataset.
|
||||
|
||||
Assistant has access to tools that can give context, write queries and execute those queries against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "I don't know" as the answer. The query language that PowerBI uses is called DAX and it is quite particular and complex, so make sure to use the right tools to get the answers the user is looking for.
|
||||
Assistant has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
|
||||
|
||||
Given an input question, create a syntactically correct DAX query to run, then look at the results and return the answer. Sometimes the result indicate something is wrong with the query, or there were errors in the json serialization. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
|
||||
Assistant never just starts querying, assistant should first find out which tables there are, then how each table is defined and then ask the question to query tool to create a query and then ask the query tool to execute it, finally create a complete sentence that answers the question, if multiple rows need are asked find a way to write that in a easily readible format for a human. Assistant has tools that can get more context of the tables which helps it write correct queries.
|
||||
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readible format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
"""
|
||||
|
||||
POWERBI_CHAT_SUFFIX = """TOOLS
|
||||
|
||||
@@ -12,7 +12,6 @@ from langchain.tools import BaseTool
|
||||
from langchain.tools.powerbi.prompt import QUESTION_TO_QUERY
|
||||
from langchain.tools.powerbi.tool import (
|
||||
InfoPowerBITool,
|
||||
InputToQueryTool,
|
||||
ListPowerBITool,
|
||||
QueryPowerBITool,
|
||||
)
|
||||
@@ -25,6 +24,7 @@ class PowerBIToolkit(BaseToolkit):
|
||||
powerbi: PowerBIDataset = Field(exclude=True)
|
||||
llm: BaseLanguageModel = Field(exclude=True)
|
||||
examples: Optional[str] = None
|
||||
max_iterations: int = 5
|
||||
callback_manager: Optional[BaseCallbackManager] = None
|
||||
|
||||
class Config:
|
||||
@@ -52,12 +52,12 @@ class PowerBIToolkit(BaseToolkit):
|
||||
),
|
||||
)
|
||||
return [
|
||||
QueryPowerBITool(powerbi=self.powerbi),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
ListPowerBITool(powerbi=self.powerbi),
|
||||
InputToQueryTool(
|
||||
QueryPowerBITool(
|
||||
llm_chain=chain,
|
||||
powerbi=self.powerbi,
|
||||
examples=self.examples,
|
||||
max_iterations=self.max_iterations,
|
||||
),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
ListPowerBITool(powerbi=self.powerbi),
|
||||
]
|
||||
|
||||
@@ -14,9 +14,7 @@ def _validate_spark_df(df: Any) -> bool:
|
||||
try:
|
||||
from pyspark.sql import DataFrame as SparkLocalDataFrame
|
||||
|
||||
if not isinstance(df, SparkLocalDataFrame):
|
||||
return False
|
||||
return True
|
||||
return isinstance(df, SparkLocalDataFrame)
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
@@ -25,9 +23,7 @@ def _validate_spark_connect_df(df: Any) -> bool:
|
||||
try:
|
||||
from pyspark.sql.connect.dataframe import DataFrame as SparkConnectDataFrame
|
||||
|
||||
if not isinstance(df, SparkConnectDataFrame):
|
||||
return False
|
||||
return True
|
||||
return isinstance(df, SparkConnectDataFrame)
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
1
langchain/agents/agent_toolkits/spark_sql/__init__.py
Normal file
1
langchain/agents/agent_toolkits/spark_sql/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Spark SQL agent."""
|
||||
56
langchain/agents/agent_toolkits/spark_sql/base.py
Normal file
56
langchain/agents/agent_toolkits/spark_sql/base.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Spark SQL agent."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUFFIX
|
||||
from langchain.agents.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
|
||||
def create_spark_sql_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: SparkSQLToolkit,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = SQL_PREFIX,
|
||||
suffix: str = SQL_SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a sql agent from an LLM and tools."""
|
||||
tools = toolkit.get_tools()
|
||||
prefix = prefix.format(top_k=top_k)
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
**(agent_executor_kwargs or {}),
|
||||
)
|
||||
21
langchain/agents/agent_toolkits/spark_sql/prompt.py
Normal file
21
langchain/agents/agent_toolkits/spark_sql/prompt.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# flake8: noqa
|
||||
|
||||
SQL_PREFIX = """You are an agent designed to interact with Spark SQL.
|
||||
Given an input question, create a syntactically correct Spark SQL query to run, then look at the results of the query and return the answer.
|
||||
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
Never query for all the columns from a specific table, only ask for the relevant columns given the question.
|
||||
You have access to tools for interacting with the database.
|
||||
Only use the below tools. Only use the information returned by the below tools to construct your final answer.
|
||||
You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.
|
||||
|
||||
DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.
|
||||
|
||||
If the question does not seem related to the database, just return "I don't know" as the answer.
|
||||
"""
|
||||
|
||||
SQL_SUFFIX = """Begin!
|
||||
|
||||
Question: {input}
|
||||
Thought: I should look at the tables in the database to see what I can query.
|
||||
{agent_scratchpad}"""
|
||||
36
langchain/agents/agent_toolkits/spark_sql/toolkit.py
Normal file
36
langchain/agents/agent_toolkits/spark_sql/toolkit.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""Toolkit for interacting with Spark SQL."""
|
||||
from typing import List
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.spark_sql.tool import (
|
||||
InfoSparkSQLTool,
|
||||
ListSparkSQLTool,
|
||||
QueryCheckerTool,
|
||||
QuerySparkSQLTool,
|
||||
)
|
||||
from langchain.utilities.spark_sql import SparkSQL
|
||||
|
||||
|
||||
class SparkSQLToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with Spark SQL."""
|
||||
|
||||
db: SparkSQL = Field(exclude=True)
|
||||
llm: BaseLanguageModel = Field(exclude=True)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
return [
|
||||
QuerySparkSQLTool(db=self.db),
|
||||
InfoSparkSQLTool(db=self.db),
|
||||
ListSparkSQLTool(db=self.db),
|
||||
QueryCheckerTool(db=self.db, llm=self.llm),
|
||||
]
|
||||
@@ -5,7 +5,7 @@ from typing import Union
|
||||
|
||||
from langchain.agents import AgentOutputParser
|
||||
from langchain.agents.conversational_chat.prompt import FORMAT_INSTRUCTIONS
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
from langchain.schema import AgentAction, AgentFinish, OutputParserException
|
||||
|
||||
|
||||
class ConvoOutputParser(AgentOutputParser):
|
||||
@@ -13,24 +13,27 @@ class ConvoOutputParser(AgentOutputParser):
|
||||
return FORMAT_INSTRUCTIONS
|
||||
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
cleaned_output = text.strip()
|
||||
if "```json" in cleaned_output:
|
||||
_, cleaned_output = cleaned_output.split("```json")
|
||||
if "```" in cleaned_output:
|
||||
cleaned_output, _ = cleaned_output.split("```")
|
||||
if cleaned_output.startswith("```json"):
|
||||
cleaned_output = cleaned_output[len("```json") :]
|
||||
if cleaned_output.startswith("```"):
|
||||
cleaned_output = cleaned_output[len("```") :]
|
||||
if cleaned_output.endswith("```"):
|
||||
cleaned_output = cleaned_output[: -len("```")]
|
||||
cleaned_output = cleaned_output.strip()
|
||||
response = json.loads(cleaned_output)
|
||||
action, action_input = response["action"], response["action_input"]
|
||||
if action == "Final Answer":
|
||||
return AgentFinish({"output": action_input}, text)
|
||||
else:
|
||||
return AgentAction(action, action_input, text)
|
||||
try:
|
||||
cleaned_output = text.strip()
|
||||
if "```json" in cleaned_output:
|
||||
_, cleaned_output = cleaned_output.split("```json")
|
||||
if "```" in cleaned_output:
|
||||
cleaned_output, _ = cleaned_output.split("```")
|
||||
if cleaned_output.startswith("```json"):
|
||||
cleaned_output = cleaned_output[len("```json") :]
|
||||
if cleaned_output.startswith("```"):
|
||||
cleaned_output = cleaned_output[len("```") :]
|
||||
if cleaned_output.endswith("```"):
|
||||
cleaned_output = cleaned_output[: -len("```")]
|
||||
cleaned_output = cleaned_output.strip()
|
||||
response = json.loads(cleaned_output)
|
||||
action, action_input = response["action"], response["action_input"]
|
||||
if action == "Final Answer":
|
||||
return AgentFinish({"output": action_input}, text)
|
||||
else:
|
||||
return AgentAction(action, action_input, text)
|
||||
except Exception as e:
|
||||
raise OutputParserException(f"Could not parse LLM output: {text}") from e
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
|
||||
@@ -23,7 +23,25 @@ class MRKLOutputParser(AgentOutputParser):
|
||||
)
|
||||
match = re.search(regex, text, re.DOTALL)
|
||||
if not match:
|
||||
raise OutputParserException(f"Could not parse LLM output: `{text}`")
|
||||
if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
|
||||
raise OutputParserException(
|
||||
f"Could not parse LLM output: `{text}`",
|
||||
observation="Invalid Format: Missing 'Action:' after 'Thought:'",
|
||||
llm_output=text,
|
||||
send_to_llm=True,
|
||||
)
|
||||
elif not re.search(
|
||||
r"[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)", text, re.DOTALL
|
||||
):
|
||||
raise OutputParserException(
|
||||
f"Could not parse LLM output: `{text}`",
|
||||
observation="Invalid Format:"
|
||||
" Missing 'Action Input:' after 'Action:'",
|
||||
llm_output=text,
|
||||
send_to_llm=True,
|
||||
)
|
||||
else:
|
||||
raise OutputParserException(f"Could not parse LLM output: `{text}`")
|
||||
action = match.group(1).strip()
|
||||
action_input = match.group(2)
|
||||
return AgentAction(action, action_input.strip(" ").strip('"'), text)
|
||||
|
||||
@@ -10,8 +10,8 @@ from langchain.callbacks.manager import Callbacks
|
||||
from langchain.schema import BaseMessage, LLMResult, PromptValue, get_buffer_string
|
||||
|
||||
|
||||
def _get_num_tokens_default_method(text: str) -> int:
|
||||
"""Get the number of tokens present in the text."""
|
||||
def _get_token_ids_default_method(text: str) -> List[int]:
|
||||
"""Encode the text into token IDs."""
|
||||
# TODO: this method may not be exact.
|
||||
# TODO: this method may differ based on model (eg codex).
|
||||
try:
|
||||
@@ -19,17 +19,14 @@ def _get_num_tokens_default_method(text: str) -> int:
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import transformers python package. "
|
||||
"This is needed in order to calculate get_num_tokens. "
|
||||
"This is needed in order to calculate get_token_ids. "
|
||||
"Please install it with `pip install transformers`."
|
||||
)
|
||||
# create a GPT-2 tokenizer instance
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
||||
|
||||
# tokenize the text using the GPT-2 tokenizer
|
||||
tokenized_text = tokenizer.tokenize(text)
|
||||
|
||||
# calculate the number of tokens in the tokenized text
|
||||
return len(tokenized_text)
|
||||
return tokenizer.encode(text)
|
||||
|
||||
|
||||
class BaseLanguageModel(BaseModel, ABC):
|
||||
@@ -61,9 +58,23 @@ class BaseLanguageModel(BaseModel, ABC):
|
||||
) -> BaseMessage:
|
||||
"""Predict message from messages."""
|
||||
|
||||
@abstractmethod
|
||||
async def apredict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
"""Predict text from text."""
|
||||
|
||||
@abstractmethod
|
||||
async def apredict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
) -> BaseMessage:
|
||||
"""Predict message from messages."""
|
||||
|
||||
def get_token_ids(self, text: str) -> List[int]:
|
||||
"""Get the token present in the text."""
|
||||
return _get_token_ids_default_method(text)
|
||||
|
||||
def get_num_tokens(self, text: str) -> int:
|
||||
"""Get the number of tokens present in the text."""
|
||||
return _get_num_tokens_default_method(text)
|
||||
return len(self.get_token_ids(text))
|
||||
|
||||
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
|
||||
"""Get the number of tokens in the message."""
|
||||
|
||||
@@ -313,7 +313,7 @@ class GPTCache(BaseCache):
|
||||
try:
|
||||
import gptcache # noqa: F401
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import gptcache python package. "
|
||||
"Please install it with `pip install gptcache`."
|
||||
)
|
||||
|
||||
@@ -12,6 +12,7 @@ from langchain.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain.callbacks.stdout import StdOutCallbackHandler
|
||||
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
|
||||
from langchain.callbacks.wandb_callback import WandbCallbackHandler
|
||||
from langchain.callbacks.whylabs_callback import WhyLabsCallbackHandler
|
||||
|
||||
__all__ = [
|
||||
"OpenAICallbackHandler",
|
||||
@@ -21,6 +22,7 @@ __all__ = [
|
||||
"MlflowCallbackHandler",
|
||||
"ClearMLCallbackHandler",
|
||||
"CometCallbackHandler",
|
||||
"WhyLabsCallbackHandler",
|
||||
"AsyncIteratorCallbackHandler",
|
||||
"get_openai_callback",
|
||||
"tracing_enabled",
|
||||
|
||||
@@ -24,12 +24,20 @@ MODEL_COST_PER_1K_TOKENS = {
|
||||
"text-davinci-003": 0.02,
|
||||
"text-davinci-002": 0.02,
|
||||
"code-davinci-002": 0.02,
|
||||
"ada-finetuned": 0.0016,
|
||||
"babbage-finetuned": 0.0024,
|
||||
"curie-finetuned": 0.0120,
|
||||
"davinci-finetuned": 0.1200,
|
||||
}
|
||||
|
||||
|
||||
def get_openai_token_cost_for_model(
|
||||
model_name: str, num_tokens: int, is_completion: bool = False
|
||||
) -> float:
|
||||
# handling finetuned models
|
||||
if "ft-" in model_name:
|
||||
model_name = f"{model_name.split(':')[0]}-finetuned"
|
||||
|
||||
suffix = "-completion" if is_completion and model_name.startswith("gpt-4") else ""
|
||||
model = model_name.lower() + suffix
|
||||
if model not in MODEL_COST_PER_1K_TOKENS:
|
||||
|
||||
@@ -58,7 +58,8 @@ class AsyncIteratorCallbackHandler(AsyncCallbackHandler):
|
||||
)
|
||||
|
||||
# Cancel the other task
|
||||
other.pop().cancel()
|
||||
if other:
|
||||
other.pop().cancel()
|
||||
|
||||
# Extract the value of the first completed task
|
||||
token_or_done = cast(Union[str, Literal[True]], done.pop().result())
|
||||
|
||||
49
langchain/callbacks/streaming_stdout_final_only.py
Normal file
49
langchain/callbacks/streaming_stdout_final_only.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""Callback Handler streams to stdout on new llm token."""
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
|
||||
DEFAULT_ANSWER_PREFIX_TOKENS = ["\nFinal", " Answer", ":"]
|
||||
|
||||
|
||||
class FinalStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
|
||||
"""Callback handler for streaming in agents.
|
||||
Only works with agents using LLMs that support streaming.
|
||||
|
||||
Only the final output of the agent will be streamed.
|
||||
"""
|
||||
|
||||
def __init__(self, answer_prefix_tokens: Optional[List[str]] = None) -> None:
|
||||
super().__init__()
|
||||
if answer_prefix_tokens is None:
|
||||
answer_prefix_tokens = DEFAULT_ANSWER_PREFIX_TOKENS
|
||||
self.answer_prefix_tokens = answer_prefix_tokens
|
||||
self.last_tokens = [""] * len(answer_prefix_tokens)
|
||||
self.answer_reached = False
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when LLM starts running."""
|
||||
self.answer_reached = False
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Run on new LLM token. Only available when streaming is enabled."""
|
||||
|
||||
# Remember the last n tokens, where n = len(answer_prefix_tokens)
|
||||
self.last_tokens.append(token)
|
||||
if len(self.last_tokens) > len(self.answer_prefix_tokens):
|
||||
self.last_tokens.pop(0)
|
||||
|
||||
# Check if the last n tokens match the answer_prefix_tokens list ...
|
||||
if self.last_tokens == self.answer_prefix_tokens:
|
||||
self.answer_reached = True
|
||||
# Do not print the last token in answer_prefix_tokens,
|
||||
# as it's not part of the answer yet
|
||||
return
|
||||
|
||||
# ... if yes, then print tokens from now on
|
||||
if self.answer_reached:
|
||||
sys.stdout.write(token)
|
||||
sys.stdout.flush()
|
||||
@@ -31,7 +31,7 @@ def get_headers() -> Dict[str, Any]:
|
||||
|
||||
|
||||
def get_endpoint() -> str:
|
||||
return os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:8000")
|
||||
return os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:1984")
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
|
||||
203
langchain/callbacks/whylabs_callback.py
Normal file
203
langchain/callbacks/whylabs_callback.py
Normal file
@@ -0,0 +1,203 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, Generation, LLMResult
|
||||
from langchain.utils import get_from_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from whylogs.api.logger.logger import Logger
|
||||
|
||||
diagnostic_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def import_langkit(
|
||||
sentiment: bool = False,
|
||||
toxicity: bool = False,
|
||||
themes: bool = False,
|
||||
) -> Any:
|
||||
try:
|
||||
import langkit # noqa: F401
|
||||
import langkit.regexes # noqa: F401
|
||||
import langkit.textstat # noqa: F401
|
||||
|
||||
if sentiment:
|
||||
import langkit.sentiment # noqa: F401
|
||||
if toxicity:
|
||||
import langkit.toxicity # noqa: F401
|
||||
if themes:
|
||||
import langkit.themes # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"To use the whylabs callback manager you need to have the `langkit` python "
|
||||
"package installed. Please install it with `pip install langkit`."
|
||||
)
|
||||
return langkit
|
||||
|
||||
|
||||
class WhyLabsCallbackHandler(BaseCallbackHandler):
|
||||
"""WhyLabs CallbackHandler."""
|
||||
|
||||
def __init__(self, logger: Logger):
|
||||
"""Initiate the rolling logger"""
|
||||
super().__init__()
|
||||
self.logger = logger
|
||||
diagnostic_logger.info(
|
||||
"Initialized WhyLabs callback handler with configured whylogs Logger."
|
||||
)
|
||||
|
||||
def _profile_generations(self, generations: List[Generation]) -> None:
|
||||
for gen in generations:
|
||||
self.logger.log({"response": gen.text})
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Pass the input prompts to the logger"""
|
||||
for prompt in prompts:
|
||||
self.logger.log({"prompt": prompt})
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""Pass the generated response to the logger."""
|
||||
for generations in response.generations:
|
||||
self._profile_generations(generations)
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_text(self, text: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_agent_finish(
|
||||
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
|
||||
) -> None:
|
||||
"""Run on agent end."""
|
||||
pass
|
||||
|
||||
def flush(self) -> None:
|
||||
self.logger._do_rollover()
|
||||
diagnostic_logger.info("Flushing WhyLabs logger, writing profile...")
|
||||
|
||||
def close(self) -> None:
|
||||
self.logger.close()
|
||||
diagnostic_logger.info("Closing WhyLabs logger, see you next time!")
|
||||
|
||||
def __enter__(self) -> WhyLabsCallbackHandler:
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self, exception_type: Any, exception_value: Any, traceback: Any
|
||||
) -> None:
|
||||
self.close()
|
||||
|
||||
@classmethod
|
||||
def from_params(
|
||||
cls,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
org_id: Optional[str] = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
sentiment: bool = False,
|
||||
toxicity: bool = False,
|
||||
themes: bool = False,
|
||||
) -> Logger:
|
||||
"""Instantiate whylogs Logger from params.
|
||||
|
||||
Args:
|
||||
api_key (Optional[str]): WhyLabs API key. Optional because the preferred
|
||||
way to specify the API key is with environment variable
|
||||
WHYLABS_API_KEY.
|
||||
org_id (Optional[str]): WhyLabs organization id to write profiles to.
|
||||
If not set must be specified in environment variable
|
||||
WHYLABS_DEFAULT_ORG_ID.
|
||||
dataset_id (Optional[str]): The model or dataset this callback is gathering
|
||||
telemetry for. If not set must be specified in environment variable
|
||||
WHYLABS_DEFAULT_DATASET_ID.
|
||||
sentiment (bool): If True will initialize a model to perform
|
||||
sentiment analysis compound score. Defaults to False and will not gather
|
||||
this metric.
|
||||
toxicity (bool): If True will initialize a model to score
|
||||
toxicity. Defaults to False and will not gather this metric.
|
||||
themes (bool): If True will initialize a model to calculate
|
||||
distance to configured themes. Defaults to None and will not gather this
|
||||
metric.
|
||||
"""
|
||||
# langkit library will import necessary whylogs libraries
|
||||
import_langkit(sentiment=sentiment, toxicity=toxicity, themes=themes)
|
||||
|
||||
import whylogs as why
|
||||
from whylogs.api.writer.whylabs import WhyLabsWriter
|
||||
from whylogs.core.schema import DeclarativeSchema
|
||||
from whylogs.experimental.core.metrics.udf_metric import generate_udf_schema
|
||||
|
||||
api_key = api_key or get_from_env("api_key", "WHYLABS_API_KEY")
|
||||
org_id = org_id or get_from_env("org_id", "WHYLABS_DEFAULT_ORG_ID")
|
||||
dataset_id = dataset_id or get_from_env(
|
||||
"dataset_id", "WHYLABS_DEFAULT_DATASET_ID"
|
||||
)
|
||||
whylabs_writer = WhyLabsWriter(
|
||||
api_key=api_key, org_id=org_id, dataset_id=dataset_id
|
||||
)
|
||||
|
||||
langkit_schema = DeclarativeSchema(generate_udf_schema())
|
||||
whylabs_logger = why.logger(
|
||||
mode="rolling", interval=5, when="M", schema=langkit_schema
|
||||
)
|
||||
|
||||
whylabs_logger.append_writer(writer=whylabs_writer)
|
||||
diagnostic_logger.info(
|
||||
"Started whylogs Logger with WhyLabsWriter and initialized LangKit. 📝"
|
||||
)
|
||||
return cls(whylabs_logger)
|
||||
@@ -10,6 +10,7 @@ from langchain.chains.conversational_retrieval.base import (
|
||||
)
|
||||
from langchain.chains.flare.base import FlareChain
|
||||
from langchain.chains.graph_qa.base import GraphQAChain
|
||||
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
|
||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.llm_bash.base import LLMBashChain
|
||||
@@ -58,6 +59,7 @@ __all__ = [
|
||||
"HypotheticalDocumentEmbedder",
|
||||
"ChatVectorDBChain",
|
||||
"GraphQAChain",
|
||||
"GraphCypherQAChain",
|
||||
"ConstitutionalChain",
|
||||
"QAGenerationChain",
|
||||
"RetrievalQA",
|
||||
|
||||
@@ -94,25 +94,6 @@ class StuffDocumentsChain(BaseCombineDocumentsChain):
|
||||
# Call predict on the LLM.
|
||||
return await self.llm_chain.apredict(callbacks=callbacks, **inputs), {}
|
||||
|
||||
def combine_docs_and_parse(
|
||||
self, docs: List[Document], callbacks: Callbacks = None, **kwargs: Any
|
||||
) -> Tuple[Any, dict]:
|
||||
"""Stuff all documents into one prompt and pass to LLM."""
|
||||
inputs = self._get_inputs(docs, **kwargs)
|
||||
# Call predict on the LLM.
|
||||
return self.llm_chain.predict_and_parse(callbacks=callbacks, **inputs), {}
|
||||
|
||||
async def acombine_docs_and_parse(
|
||||
self, docs: List[Document], callbacks: Callbacks = None, **kwargs: Any
|
||||
) -> Tuple[str, dict]:
|
||||
"""Stuff all documents into one prompt and pass to LLM."""
|
||||
inputs = self._get_inputs(docs, **kwargs)
|
||||
# Call predict on the LLM.
|
||||
return (
|
||||
await self.llm_chain.apredict_and_parse(callbacks=callbacks, **inputs),
|
||||
{},
|
||||
)
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
return "stuff_documents_chain"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
|
||||
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
|
||||
|
||||
Chat History:
|
||||
{chat_history}
|
||||
|
||||
90
langchain/chains/graph_qa/cypher.py
Normal file
90
langchain/chains/graph_qa/cypher.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, PROMPT
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.graphs.neo4j_graph import Neo4jGraph
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
|
||||
|
||||
class GraphCypherQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating Cypher statements."""
|
||||
|
||||
graph: Neo4jGraph = Field(exclude=True)
|
||||
cypher_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = PROMPT,
|
||||
cypher_prompt: BasePromptTemplate = CYPHER_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> GraphCypherQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
cypher_generation_chain=cypher_generation_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
generated_cypher = self.cypher_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
context = self.graph.query(generated_cypher)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
@@ -32,3 +32,19 @@ Helpful Answer:"""
|
||||
PROMPT = PromptTemplate(
|
||||
template=prompt_template, input_variables=["context", "question"]
|
||||
)
|
||||
|
||||
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
|
||||
Instructions:
|
||||
Use only the provided relationship types and properties in the schema.
|
||||
Do not use any other relationship types or properties that are not provided.
|
||||
Schema:
|
||||
{schema}
|
||||
Note: Do not include any explanations or apologies in your responses.
|
||||
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
|
||||
Do not include any text except the generated Cypher statement.
|
||||
|
||||
The question is:
|
||||
{question}"""
|
||||
CYPHER_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
@@ -54,7 +54,7 @@ class OpenAIModerationChain(Chain):
|
||||
openai.organization = openai_organization
|
||||
values["client"] = openai.Moderation
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import openai python package. "
|
||||
"Please install it with `pip install openai`."
|
||||
)
|
||||
|
||||
@@ -141,3 +141,9 @@ class ChatAnthropic(BaseChatModel, _AnthropicCommon):
|
||||
completion = response["completion"]
|
||||
message = AIMessage(content=completion)
|
||||
return ChatResult(generations=[ChatGeneration(message=message)])
|
||||
|
||||
def get_num_tokens(self, text: str) -> int:
|
||||
"""Calculate number of tokens."""
|
||||
if not self.count_tokens:
|
||||
raise NameError("Please ensure the anthropic package is loaded")
|
||||
return self.count_tokens(text)
|
||||
|
||||
@@ -86,7 +86,7 @@ class AzureChatOpenAI(ChatOpenAI):
|
||||
if openai_organization:
|
||||
openai.organization = openai_organization
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import openai python package. "
|
||||
"Please install it with `pip install openai`."
|
||||
)
|
||||
|
||||
@@ -183,6 +183,19 @@ class BaseChatModel(BaseLanguageModel, ABC):
|
||||
else:
|
||||
raise ValueError("Unexpected generation type")
|
||||
|
||||
async def _call_async(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
callbacks: Callbacks = None,
|
||||
) -> BaseMessage:
|
||||
result = await self.agenerate([messages], stop=stop, callbacks=callbacks)
|
||||
generation = result.generations[0][0]
|
||||
if isinstance(generation, ChatGeneration):
|
||||
return generation.message
|
||||
else:
|
||||
raise ValueError("Unexpected generation type")
|
||||
|
||||
def call_as_llm(self, message: str, stop: Optional[List[str]] = None) -> str:
|
||||
return self.predict(message, stop=stop)
|
||||
|
||||
@@ -203,6 +216,23 @@ class BaseChatModel(BaseLanguageModel, ABC):
|
||||
_stop = list(stop)
|
||||
return self(messages, stop=_stop)
|
||||
|
||||
async def apredict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
if stop is None:
|
||||
_stop = None
|
||||
else:
|
||||
_stop = list(stop)
|
||||
result = await self._call_async([HumanMessage(content=text)], stop=_stop)
|
||||
return result.content
|
||||
|
||||
async def apredict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
) -> BaseMessage:
|
||||
if stop is None:
|
||||
_stop = None
|
||||
else:
|
||||
_stop = list(stop)
|
||||
return await self._call_async(messages, stop=_stop)
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
|
||||
@@ -258,7 +258,8 @@ class ChatGooglePalm(BaseChatModel, BaseModel):
|
||||
genai.configure(api_key=google_api_key)
|
||||
except ImportError:
|
||||
raise ChatGooglePalmError(
|
||||
"Could not import google.generativeai python package."
|
||||
"Could not import google.generativeai python package. "
|
||||
"Please install it with `pip install google-generativeai`"
|
||||
)
|
||||
|
||||
values["client"] = genai
|
||||
|
||||
@@ -3,7 +3,17 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pydantic import Extra, Field, root_validator
|
||||
from tenacity import (
|
||||
@@ -30,9 +40,24 @@ from langchain.schema import (
|
||||
)
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import tiktoken
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _import_tiktoken() -> Any:
|
||||
try:
|
||||
import tiktoken
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import tiktoken python package. "
|
||||
"This is needed in order to calculate get_token_ids. "
|
||||
"Please install it with `pip install tiktoken`."
|
||||
)
|
||||
return tiktoken
|
||||
|
||||
|
||||
def _create_retry_decorator(llm: ChatOpenAI) -> Callable[[Any], Any]:
|
||||
import openai
|
||||
|
||||
@@ -354,42 +379,8 @@ class ChatOpenAI(BaseChatModel):
|
||||
"""Return type of chat model."""
|
||||
return "openai-chat"
|
||||
|
||||
def get_num_tokens(self, text: str) -> int:
|
||||
"""Calculate num tokens with tiktoken package."""
|
||||
# tiktoken NOT supported for Python 3.7 or below
|
||||
if sys.version_info[1] <= 7:
|
||||
return super().get_num_tokens(text)
|
||||
try:
|
||||
import tiktoken
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import tiktoken python package. "
|
||||
"This is needed in order to calculate get_num_tokens. "
|
||||
"Please install it with `pip install tiktoken`."
|
||||
)
|
||||
# create a GPT-3.5-Turbo encoder instance
|
||||
enc = tiktoken.encoding_for_model(self.model_name)
|
||||
|
||||
# encode the text using the GPT-3.5-Turbo encoder
|
||||
tokenized_text = enc.encode(text)
|
||||
|
||||
# calculate the number of tokens in the encoded text
|
||||
return len(tokenized_text)
|
||||
|
||||
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
|
||||
"""Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
|
||||
|
||||
Official documentation: https://github.com/openai/openai-cookbook/blob/
|
||||
main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
|
||||
try:
|
||||
import tiktoken
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import tiktoken python package. "
|
||||
"This is needed in order to calculate get_num_tokens. "
|
||||
"Please install it with `pip install tiktoken`."
|
||||
)
|
||||
|
||||
def _get_encoding_model(self) -> Tuple[str, tiktoken.Encoding]:
|
||||
tiktoken_ = _import_tiktoken()
|
||||
model = self.model_name
|
||||
if model == "gpt-3.5-turbo":
|
||||
# gpt-3.5-turbo may change over time.
|
||||
@@ -399,14 +390,31 @@ class ChatOpenAI(BaseChatModel):
|
||||
# gpt-4 may change over time.
|
||||
# Returning num tokens assuming gpt-4-0314.
|
||||
model = "gpt-4-0314"
|
||||
|
||||
# Returns the number of tokens used by a list of messages.
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
encoding = tiktoken_.encoding_for_model(model)
|
||||
except KeyError:
|
||||
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
model = "cl100k_base"
|
||||
encoding = tiktoken_.get_encoding(model)
|
||||
return model, encoding
|
||||
|
||||
def get_token_ids(self, text: str) -> List[int]:
|
||||
"""Get the tokens present in the text with tiktoken package."""
|
||||
# tiktoken NOT supported for Python 3.7 or below
|
||||
if sys.version_info[1] <= 7:
|
||||
return super().get_token_ids(text)
|
||||
_, encoding_model = self._get_encoding_model()
|
||||
return encoding_model.encode(text)
|
||||
|
||||
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
|
||||
"""Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
|
||||
|
||||
Official documentation: https://github.com/openai/openai-cookbook/blob/
|
||||
main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
|
||||
if sys.version_info[1] <= 7:
|
||||
return super().get_num_tokens_from_messages(messages)
|
||||
model, encoding = self._get_encoding_model()
|
||||
if model == "gpt-3.5-turbo-0301":
|
||||
# every message follows <im_start>{role/name}\n{content}<im_end>\n
|
||||
tokens_per_message = 4
|
||||
|
||||
@@ -5,9 +5,7 @@ services:
|
||||
ports:
|
||||
- 80:80
|
||||
environment:
|
||||
- BACKEND_URL=http://langchain-backend:8000
|
||||
- PUBLIC_BASE_URL=http://localhost:8000
|
||||
- PUBLIC_DEV_MODE=true
|
||||
- REACT_APP_BACKEND_URL=http://localhost:1984
|
||||
depends_on:
|
||||
- langchain-backend
|
||||
volumes:
|
||||
@@ -18,11 +16,11 @@ services:
|
||||
langchain-backend:
|
||||
image: langchain/${_LANGCHAINPLUS_IMAGE_PREFIX-}langchainplus-backend:latest
|
||||
environment:
|
||||
- PORT=8000
|
||||
- PORT=1984
|
||||
- LANGCHAIN_ENV=local_docker
|
||||
- LOG_LEVEL=warning
|
||||
ports:
|
||||
- 8000:8000
|
||||
- 1984:1984
|
||||
depends_on:
|
||||
- langchain-db
|
||||
build:
|
||||
|
||||
@@ -201,6 +201,19 @@ class PlusCommand:
|
||||
]
|
||||
)
|
||||
|
||||
def logs(self) -> None:
|
||||
"""Print the logs from the LangChainPlus server."""
|
||||
subprocess.run(
|
||||
[
|
||||
*self.docker_compose_command,
|
||||
"-f",
|
||||
str(self.docker_compose_file),
|
||||
"-f",
|
||||
str(self.ngrok_path),
|
||||
"logs",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def env() -> None:
|
||||
"""Print the runtime environment information."""
|
||||
@@ -248,6 +261,11 @@ def main() -> None:
|
||||
)
|
||||
server_stop_parser.set_defaults(func=lambda args: server_command.stop())
|
||||
|
||||
server_logs_parser = server_subparsers.add_parser(
|
||||
"logs", description="Show the LangChainPlus server logs."
|
||||
)
|
||||
server_logs_parser.set_defaults(func=lambda args: server_command.logs())
|
||||
|
||||
env_parser = subparsers.add_parser("env")
|
||||
env_parser.set_defaults(func=lambda args: env())
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import logging
|
||||
import socket
|
||||
from datetime import datetime
|
||||
@@ -10,11 +8,12 @@ from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Coroutine,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
@@ -27,26 +26,21 @@ from requests import Response
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.tracers.langchain import LangChainTracer
|
||||
from langchain.callbacks.tracers.schemas import Run, TracerSession
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.client.models import (
|
||||
APIFeedbackSource,
|
||||
Dataset,
|
||||
DatasetCreate,
|
||||
Example,
|
||||
ExampleCreate,
|
||||
ExampleUpdate,
|
||||
Feedback,
|
||||
FeedbackCreate,
|
||||
ListFeedbackQueryParams,
|
||||
ListRunsQueryParams,
|
||||
)
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.schema import (
|
||||
BaseMessage,
|
||||
ChatResult,
|
||||
HumanMessage,
|
||||
LLMResult,
|
||||
get_buffer_string,
|
||||
messages_from_dict,
|
||||
)
|
||||
from langchain.client.runner_utils import arun_on_examples, run_on_examples
|
||||
from langchain.utils import raise_for_status_with_text, xor_args
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -57,10 +51,6 @@ logger = logging.getLogger(__name__)
|
||||
MODEL_OR_CHAIN_FACTORY = Union[Callable[[], Chain], BaseLanguageModel]
|
||||
|
||||
|
||||
class InputFormatError(Exception):
|
||||
"""Raised when input format is invalid."""
|
||||
|
||||
|
||||
def _get_link_stem(url: str) -> str:
|
||||
scheme = urlsplit(url).scheme
|
||||
netloc_prefix = urlsplit(url).netloc.split(":")[0]
|
||||
@@ -81,13 +71,13 @@ class LangChainPlusClient(BaseSettings):
|
||||
"""Client for interacting with the LangChain+ API."""
|
||||
|
||||
api_key: Optional[str] = Field(default=None, env="LANGCHAIN_API_KEY")
|
||||
api_url: str = Field(default="http://localhost:8000", env="LANGCHAIN_ENDPOINT")
|
||||
api_url: str = Field(default="http://localhost:1984", env="LANGCHAIN_ENDPOINT")
|
||||
tenant_id: Optional[str] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_api_key_if_hosted(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Verify API key is provided if url not localhost."""
|
||||
api_url: str = values.get("api_url", "http://localhost:8000")
|
||||
api_url: str = values.get("api_url", "http://localhost:1984")
|
||||
api_key: Optional[str] = values.get("api_key")
|
||||
if not _is_localhost(api_url):
|
||||
if not api_key:
|
||||
@@ -175,8 +165,8 @@ class LangChainPlusClient(BaseSettings):
|
||||
df: pd.DataFrame,
|
||||
name: str,
|
||||
description: str,
|
||||
input_keys: List[str],
|
||||
output_keys: List[str],
|
||||
input_keys: Sequence[str],
|
||||
output_keys: Sequence[str],
|
||||
) -> Dataset:
|
||||
"""Upload a dataframe as individual examples to the LangChain+ API."""
|
||||
dataset = self.create_dataset(dataset_name=name, description=description)
|
||||
@@ -190,8 +180,8 @@ class LangChainPlusClient(BaseSettings):
|
||||
self,
|
||||
csv_file: Union[str, Tuple[str, BytesIO]],
|
||||
description: str,
|
||||
input_keys: List[str],
|
||||
output_keys: List[str],
|
||||
input_keys: Sequence[str],
|
||||
output_keys: Sequence[str],
|
||||
) -> Dataset:
|
||||
"""Upload a CSV file to the LangChain+ API."""
|
||||
files = {"file": csv_file}
|
||||
@@ -231,7 +221,7 @@ class LangChainPlusClient(BaseSettings):
|
||||
session_name: Optional[str] = None,
|
||||
run_type: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Run]:
|
||||
) -> Iterator[Run]:
|
||||
"""List runs from the LangChain+ API."""
|
||||
if session_name is not None:
|
||||
if session_id is not None:
|
||||
@@ -240,12 +230,9 @@ class LangChainPlusClient(BaseSettings):
|
||||
query_params = ListRunsQueryParams(
|
||||
session_id=session_id, run_type=run_type, **kwargs
|
||||
)
|
||||
filtered_params = {
|
||||
k: v for k, v in query_params.dict().items() if v is not None
|
||||
}
|
||||
response = self._get("/runs", params=filtered_params)
|
||||
response = self._get("/runs", params=query_params.dict(exclude_none=True))
|
||||
raise_for_status_with_text(response)
|
||||
return [Run(**run) for run in response.json()]
|
||||
yield from [Run(**run) for run in response.json()]
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
@xor_args(("session_id", "session_name"))
|
||||
@@ -279,13 +266,15 @@ class LangChainPlusClient(BaseSettings):
|
||||
return TracerSession(**response.json())
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def list_sessions(self) -> List[TracerSession]:
|
||||
def list_sessions(self) -> Iterator[TracerSession]:
|
||||
"""List sessions from the LangChain+ API."""
|
||||
response = self._get("/sessions")
|
||||
raise_for_status_with_text(response)
|
||||
return [TracerSession(**session) for session in response.json()]
|
||||
yield from [TracerSession(**session) for session in response.json()]
|
||||
|
||||
def create_dataset(self, dataset_name: str, description: str) -> Dataset:
|
||||
def create_dataset(
|
||||
self, dataset_name: str, *, description: Optional[str] = None
|
||||
) -> Dataset:
|
||||
"""Create a dataset in the LangChain+ API."""
|
||||
dataset = DatasetCreate(
|
||||
tenant_id=self.tenant_id,
|
||||
@@ -326,11 +315,11 @@ class LangChainPlusClient(BaseSettings):
|
||||
return Dataset(**result)
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def list_datasets(self, limit: int = 100) -> Iterable[Dataset]:
|
||||
def list_datasets(self, limit: int = 100) -> Iterator[Dataset]:
|
||||
"""List the datasets on the LangChain+ API."""
|
||||
response = self._get("/datasets", params={"limit": limit})
|
||||
raise_for_status_with_text(response)
|
||||
return [Dataset(**dataset) for dataset in response.json()]
|
||||
yield from [Dataset(**dataset) for dataset in response.json()]
|
||||
|
||||
@xor_args(("dataset_id", "dataset_name"))
|
||||
def delete_dataset(
|
||||
@@ -346,7 +335,7 @@ class LangChainPlusClient(BaseSettings):
|
||||
headers=self._headers,
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
return response.json()
|
||||
return Dataset(**response.json())
|
||||
|
||||
@xor_args(("dataset_id", "dataset_name"))
|
||||
def create_example(
|
||||
@@ -359,7 +348,7 @@ class LangChainPlusClient(BaseSettings):
|
||||
) -> Example:
|
||||
"""Create a dataset example in the LangChain+ API."""
|
||||
if dataset_id is None:
|
||||
dataset_id = self.read_dataset(dataset_name).id
|
||||
dataset_id = self.read_dataset(dataset_name=dataset_name).id
|
||||
|
||||
data = {
|
||||
"inputs": inputs,
|
||||
@@ -386,7 +375,7 @@ class LangChainPlusClient(BaseSettings):
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def list_examples(
|
||||
self, dataset_id: Optional[str] = None, dataset_name: Optional[str] = None
|
||||
) -> Iterable[Example]:
|
||||
) -> Iterator[Example]:
|
||||
"""List the datasets on the LangChain+ API."""
|
||||
params = {}
|
||||
if dataset_id is not None:
|
||||
@@ -398,195 +387,94 @@ class LangChainPlusClient(BaseSettings):
|
||||
pass
|
||||
response = self._get("/examples", params=params)
|
||||
raise_for_status_with_text(response)
|
||||
return [Example(**dataset) for dataset in response.json()]
|
||||
yield from [Example(**dataset) for dataset in response.json()]
|
||||
|
||||
@staticmethod
|
||||
def _get_prompts(inputs: Dict[str, Any]) -> List[str]:
|
||||
"""Get prompts from inputs."""
|
||||
if not inputs:
|
||||
raise InputFormatError("Inputs should not be empty.")
|
||||
|
||||
prompts = []
|
||||
|
||||
if "prompt" in inputs:
|
||||
if not isinstance(inputs["prompt"], str):
|
||||
raise InputFormatError(
|
||||
"Expected string for 'prompt', got"
|
||||
f" {type(inputs['prompt']).__name__}"
|
||||
)
|
||||
prompts = [inputs["prompt"]]
|
||||
elif "prompts" in inputs:
|
||||
if not isinstance(inputs["prompts"], list) or not all(
|
||||
isinstance(i, str) for i in inputs["prompts"]
|
||||
):
|
||||
raise InputFormatError(
|
||||
"Expected list of strings for 'prompts',"
|
||||
f" got {type(inputs['prompts']).__name__}"
|
||||
)
|
||||
prompts = inputs["prompts"]
|
||||
elif len(inputs) == 1:
|
||||
prompt_ = next(iter(inputs.values()))
|
||||
if isinstance(prompt_, str):
|
||||
prompts = [prompt_]
|
||||
elif isinstance(prompt_, list) and all(isinstance(i, str) for i in prompt_):
|
||||
prompts = prompt_
|
||||
else:
|
||||
raise InputFormatError(
|
||||
f"LLM Run expects string prompt input. Got {inputs}"
|
||||
)
|
||||
else:
|
||||
raise InputFormatError(
|
||||
f"LLM Run expects 'prompt' or 'prompts' in inputs. Got {inputs}"
|
||||
)
|
||||
|
||||
return prompts
|
||||
|
||||
@staticmethod
|
||||
def _get_messages(inputs: Dict[str, Any]) -> List[List[BaseMessage]]:
|
||||
"""Get Chat Messages from inputs."""
|
||||
if not inputs:
|
||||
raise InputFormatError("Inputs should not be empty.")
|
||||
|
||||
if "messages" in inputs:
|
||||
single_input = inputs["messages"]
|
||||
elif len(inputs) == 1:
|
||||
single_input = next(iter(inputs.values()))
|
||||
else:
|
||||
raise InputFormatError(
|
||||
f"Chat Run expects 'messages' in inputs. Got {inputs}"
|
||||
)
|
||||
if isinstance(single_input, list) and all(
|
||||
isinstance(i, dict) for i in single_input
|
||||
):
|
||||
raw_messages = [single_input]
|
||||
elif isinstance(single_input, list) and all(
|
||||
isinstance(i, list) for i in single_input
|
||||
):
|
||||
raw_messages = single_input
|
||||
else:
|
||||
raise InputFormatError(
|
||||
f"Chat Run expects List[dict] or List[List[dict]] 'messages'"
|
||||
f" input. Got {inputs}"
|
||||
)
|
||||
return [messages_from_dict(batch) for batch in raw_messages]
|
||||
|
||||
@staticmethod
|
||||
async def _arun_llm(
|
||||
llm: BaseLanguageModel,
|
||||
inputs: Dict[str, Any],
|
||||
langchain_tracer: LangChainTracer,
|
||||
) -> Union[LLMResult, ChatResult]:
|
||||
if isinstance(llm, BaseLLM):
|
||||
try:
|
||||
llm_prompts = LangChainPlusClient._get_prompts(inputs)
|
||||
llm_output = await llm.agenerate(
|
||||
llm_prompts, callbacks=[langchain_tracer]
|
||||
)
|
||||
except InputFormatError:
|
||||
llm_messages = LangChainPlusClient._get_messages(inputs)
|
||||
buffer_strings = [
|
||||
get_buffer_string(messages) for messages in llm_messages
|
||||
]
|
||||
llm_output = await llm.agenerate(
|
||||
buffer_strings, callbacks=[langchain_tracer]
|
||||
)
|
||||
elif isinstance(llm, BaseChatModel):
|
||||
try:
|
||||
messages = LangChainPlusClient._get_messages(inputs)
|
||||
llm_output = await llm.agenerate(messages, callbacks=[langchain_tracer])
|
||||
except InputFormatError:
|
||||
prompts = LangChainPlusClient._get_prompts(inputs)
|
||||
converted_messages: List[List[BaseMessage]] = [
|
||||
[HumanMessage(content=prompt)] for prompt in prompts
|
||||
]
|
||||
llm_output = await llm.agenerate(
|
||||
converted_messages, callbacks=[langchain_tracer]
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported LLM type {type(llm)}")
|
||||
return llm_output
|
||||
|
||||
@staticmethod
|
||||
async def _arun_llm_or_chain(
|
||||
example: Example,
|
||||
langchain_tracer: LangChainTracer,
|
||||
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
||||
n_repetitions: int,
|
||||
) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
|
||||
"""Run the chain asynchronously."""
|
||||
previous_example_id = langchain_tracer.example_id
|
||||
langchain_tracer.example_id = example.id
|
||||
outputs = []
|
||||
for _ in range(n_repetitions):
|
||||
try:
|
||||
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
||||
output: Any = await LangChainPlusClient._arun_llm(
|
||||
llm_or_chain_factory, example.inputs, langchain_tracer
|
||||
)
|
||||
else:
|
||||
chain = llm_or_chain_factory()
|
||||
output = await chain.arun(
|
||||
example.inputs, callbacks=[langchain_tracer]
|
||||
)
|
||||
outputs.append(output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chain failed for example {example.id}. Error: {e}")
|
||||
outputs.append({"Error": str(e)})
|
||||
langchain_tracer.example_id = previous_example_id
|
||||
return outputs
|
||||
|
||||
@staticmethod
|
||||
async def _gather_with_concurrency(
|
||||
n: int,
|
||||
initializer: Callable[[], Coroutine[Any, Any, LangChainTracer]],
|
||||
*async_funcs: Callable[[LangChainTracer, Dict], Coroutine[Any, Any, Any]],
|
||||
) -> List[Any]:
|
||||
"""
|
||||
Run coroutines with a concurrency limit.
|
||||
|
||||
Args:
|
||||
n: The maximum number of concurrent tasks.
|
||||
initializer: A coroutine that initializes shared resources for the tasks.
|
||||
async_funcs: The async_funcs to be run concurrently.
|
||||
|
||||
Returns:
|
||||
A list of results from the coroutines.
|
||||
"""
|
||||
semaphore = asyncio.Semaphore(n)
|
||||
job_state = {"num_processed": 0}
|
||||
|
||||
tracer_queue: asyncio.Queue[LangChainTracer] = asyncio.Queue()
|
||||
for _ in range(n):
|
||||
tracer_queue.put_nowait(await initializer())
|
||||
|
||||
async def run_coroutine_with_semaphore(
|
||||
async_func: Callable[[LangChainTracer, Dict], Coroutine[Any, Any, Any]]
|
||||
) -> Any:
|
||||
async with semaphore:
|
||||
tracer = await tracer_queue.get()
|
||||
try:
|
||||
result = await async_func(tracer, job_state)
|
||||
finally:
|
||||
tracer_queue.put_nowait(tracer)
|
||||
return result
|
||||
|
||||
return await asyncio.gather(
|
||||
*(run_coroutine_with_semaphore(function) for function in async_funcs)
|
||||
def update_example(
|
||||
self,
|
||||
example_id: str,
|
||||
*,
|
||||
inputs: Optional[Mapping[str, Any]] = None,
|
||||
outputs: Optional[Mapping[str, Any]] = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Update a specific example."""
|
||||
example = ExampleUpdate(
|
||||
inputs=inputs,
|
||||
outputs=outputs,
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
response = requests.patch(
|
||||
f"{self.api_url}/examples/{example_id}",
|
||||
headers=self._headers,
|
||||
data=example.json(exclude_none=True),
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
return response.json()
|
||||
|
||||
async def _tracer_initializer(self, session_name: str) -> LangChainTracer:
|
||||
"""
|
||||
Initialize a tracer to share across tasks.
|
||||
def create_feedback(
|
||||
self,
|
||||
run_id: str,
|
||||
metric_name: str,
|
||||
metric_value: Union[float, str],
|
||||
*,
|
||||
source_info: Optional[Dict[str, Any]] = None,
|
||||
) -> Feedback:
|
||||
"""Create a feedback in the LangChain+ API.
|
||||
|
||||
Args:
|
||||
session_name: The session name for the tracer.
|
||||
|
||||
Returns:
|
||||
A LangChainTracer instance with an active session.
|
||||
run_id: The ID of the run to provide feedback on.
|
||||
metric_name: The name of the metric, tag, or 'aspect' this
|
||||
feedback is about.
|
||||
metric_value: The score to rate this run on the metric, or
|
||||
the value or label to assign for this metric.
|
||||
source_info: Information about the source of this feedback.
|
||||
extra: Extra information to include with the feedback.
|
||||
"""
|
||||
tracer = LangChainTracer(session_name=session_name)
|
||||
tracer.ensure_session()
|
||||
return tracer
|
||||
feedback_source = APIFeedbackSource(metadata=source_info)
|
||||
feedback = FeedbackCreate(
|
||||
run_id=run_id,
|
||||
metric_name=metric_name,
|
||||
metric_value=metric_value,
|
||||
feedback_source=feedback_source,
|
||||
)
|
||||
response = requests.post(
|
||||
self.api_url + "/feedback",
|
||||
headers=self._headers,
|
||||
data=feedback.json(),
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
return Feedback(**feedback.dict())
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def read_feedback(self, feedback_id: str) -> Feedback:
|
||||
"""Read a feedback from the LangChain+ API."""
|
||||
response = self._get(f"/feedback/{feedback_id}")
|
||||
raise_for_status_with_text(response)
|
||||
return Feedback(**response.json())
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def list_feedback(
|
||||
self,
|
||||
*,
|
||||
run_ids: Optional[Sequence[Union[str, UUID]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[Feedback]:
|
||||
"""List the feedback objects on the LangChain+ API."""
|
||||
params = ListFeedbackQueryParams(
|
||||
run=run_ids,
|
||||
**kwargs,
|
||||
)
|
||||
response = self._get("/feedback", params=params.dict(exclude_none=True))
|
||||
raise_for_status_with_text(response)
|
||||
yield from [Feedback(**feedback) for feedback in response.json()]
|
||||
|
||||
def delete_feedback(self, feedback_id: str) -> None:
|
||||
"""Delete a feedback by ID."""
|
||||
response = requests.delete(
|
||||
f"{self.api_url}/feedback/{feedback_id}",
|
||||
headers=self._headers,
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
|
||||
async def arun_on_dataset(
|
||||
self,
|
||||
@@ -622,93 +510,15 @@ class LangChainPlusClient(BaseSettings):
|
||||
)
|
||||
dataset = self.read_dataset(dataset_name=dataset_name)
|
||||
examples = self.list_examples(dataset_id=str(dataset.id))
|
||||
results: Dict[str, List[Any]] = {}
|
||||
|
||||
async def process_example(
|
||||
example: Example, tracer: LangChainTracer, job_state: dict
|
||||
) -> None:
|
||||
"""Process a single example."""
|
||||
result = await LangChainPlusClient._arun_llm_or_chain(
|
||||
example,
|
||||
tracer,
|
||||
llm_or_chain_factory,
|
||||
num_repetitions,
|
||||
)
|
||||
results[str(example.id)] = result
|
||||
job_state["num_processed"] += 1
|
||||
if verbose:
|
||||
print(
|
||||
f"Processed examples: {job_state['num_processed']}",
|
||||
end="\r",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
await self._gather_with_concurrency(
|
||||
concurrency_level,
|
||||
functools.partial(self._tracer_initializer, session_name),
|
||||
*(functools.partial(process_example, e) for e in examples),
|
||||
return await arun_on_examples(
|
||||
examples,
|
||||
llm_or_chain_factory,
|
||||
concurrency_level=concurrency_level,
|
||||
num_repetitions=num_repetitions,
|
||||
session_name=session_name,
|
||||
verbose=verbose,
|
||||
)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def run_llm(
|
||||
llm: BaseLanguageModel,
|
||||
inputs: Dict[str, Any],
|
||||
langchain_tracer: LangChainTracer,
|
||||
) -> Union[LLMResult, ChatResult]:
|
||||
"""Run the language model on the example."""
|
||||
if isinstance(llm, BaseLLM):
|
||||
try:
|
||||
llm_prompts = LangChainPlusClient._get_prompts(inputs)
|
||||
llm_output = llm.generate(llm_prompts, callbacks=[langchain_tracer])
|
||||
except InputFormatError:
|
||||
llm_messages = LangChainPlusClient._get_messages(inputs)
|
||||
buffer_strings = [
|
||||
get_buffer_string(messages) for messages in llm_messages
|
||||
]
|
||||
llm_output = llm.generate(buffer_strings, callbacks=[langchain_tracer])
|
||||
elif isinstance(llm, BaseChatModel):
|
||||
try:
|
||||
messages = LangChainPlusClient._get_messages(inputs)
|
||||
llm_output = llm.generate(messages, callbacks=[langchain_tracer])
|
||||
except InputFormatError:
|
||||
prompts = LangChainPlusClient._get_prompts(inputs)
|
||||
converted_messages: List[List[BaseMessage]] = [
|
||||
[HumanMessage(content=prompt)] for prompt in prompts
|
||||
]
|
||||
llm_output = llm.generate(
|
||||
converted_messages, callbacks=[langchain_tracer]
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported LLM type {type(llm)}")
|
||||
return llm_output
|
||||
|
||||
@staticmethod
|
||||
def run_llm_or_chain(
|
||||
example: Example,
|
||||
langchain_tracer: LangChainTracer,
|
||||
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
||||
n_repetitions: int,
|
||||
) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
|
||||
"""Run the chain synchronously."""
|
||||
previous_example_id = langchain_tracer.example_id
|
||||
langchain_tracer.example_id = example.id
|
||||
outputs = []
|
||||
for _ in range(n_repetitions):
|
||||
try:
|
||||
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
||||
output: Any = LangChainPlusClient.run_llm(
|
||||
llm_or_chain_factory, example.inputs, langchain_tracer
|
||||
)
|
||||
else:
|
||||
chain = llm_or_chain_factory()
|
||||
output = chain.run(example.inputs, callbacks=[langchain_tracer])
|
||||
outputs.append(output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chain failed for example {example.id}. Error: {e}")
|
||||
outputs.append({"Error": str(e)})
|
||||
langchain_tracer.example_id = previous_example_id
|
||||
return outputs
|
||||
|
||||
def run_on_dataset(
|
||||
self,
|
||||
@@ -741,18 +551,11 @@ class LangChainPlusClient(BaseSettings):
|
||||
session_name, llm_or_chain_factory, dataset_name
|
||||
)
|
||||
dataset = self.read_dataset(dataset_name=dataset_name)
|
||||
examples = list(self.list_examples(dataset_id=str(dataset.id)))
|
||||
results: Dict[str, Any] = {}
|
||||
tracer = LangChainTracer(session_name=session_name)
|
||||
tracer.ensure_session()
|
||||
for i, example in enumerate(examples):
|
||||
result = self.run_llm_or_chain(
|
||||
example,
|
||||
tracer,
|
||||
llm_or_chain_factory,
|
||||
num_repetitions,
|
||||
)
|
||||
if verbose:
|
||||
print(f"{i+1} processed", flush=True, end="\r")
|
||||
results[str(example.id)] = result
|
||||
return results
|
||||
examples = self.list_examples(dataset_id=str(dataset.id))
|
||||
return run_on_examples(
|
||||
examples,
|
||||
llm_or_chain_factory,
|
||||
num_repetitions=num_repetitions,
|
||||
session_name=session_name,
|
||||
verbose=verbose,
|
||||
)
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
from typing import Any, ClassVar, Dict, List, Mapping, Optional, Sequence, Union
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
|
||||
from langchain.callbacks.tracers.schemas import Run, RunTypeEnum
|
||||
|
||||
|
||||
class ExampleBase(BaseModel):
|
||||
class ExampleBase(BaseModel, frozen=True):
|
||||
"""Example base model."""
|
||||
|
||||
dataset_id: UUID
|
||||
@@ -31,12 +31,20 @@ class Example(ExampleBase):
|
||||
runs: List[Run] = Field(default_factory=list)
|
||||
|
||||
|
||||
class DatasetBase(BaseModel):
|
||||
class ExampleUpdate(BaseModel, frozen=True):
|
||||
"""Update class for Example."""
|
||||
|
||||
dataset_id: Optional[UUID] = None
|
||||
inputs: Optional[Dict[str, Any]] = None
|
||||
outputs: Optional[Mapping[str, Any]] = None
|
||||
|
||||
|
||||
class DatasetBase(BaseModel, frozen=True):
|
||||
"""Dataset base model."""
|
||||
|
||||
tenant_id: UUID
|
||||
name: str
|
||||
description: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class DatasetCreate(DatasetBase):
|
||||
@@ -54,7 +62,7 @@ class Dataset(DatasetBase):
|
||||
modified_at: Optional[datetime] = Field(default=None)
|
||||
|
||||
|
||||
class ListRunsQueryParams(BaseModel):
|
||||
class ListRunsQueryParams(BaseModel, frozen=True):
|
||||
"""Query params for GET /runs endpoint."""
|
||||
|
||||
class Config:
|
||||
@@ -97,3 +105,57 @@ class ListRunsQueryParams(BaseModel):
|
||||
if start_time and end_time and start_time > end_time:
|
||||
raise ValueError("start_time must be <= end_time")
|
||||
return values
|
||||
|
||||
|
||||
class APIFeedbackSource(BaseModel, frozen=True):
|
||||
"""API feedback source."""
|
||||
|
||||
type: ClassVar[str] = "api"
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class FeedbackBase(BaseModel, frozen=True):
|
||||
"""Feedback schema."""
|
||||
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
"""The time the feedback was created."""
|
||||
modified_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
"""The time the feedback was last modified."""
|
||||
run_id: UUID
|
||||
"""The associated run ID this feedback is logged for."""
|
||||
metric_name: str
|
||||
"""The feedback metric name or type."""
|
||||
metric_value: Union[float, bool, int, str]
|
||||
"""Score to assign the run."""
|
||||
feedback_source: Optional[Union[APIFeedbackSource, Mapping[str, Any]]] = None
|
||||
"""The source of the feedback."""
|
||||
|
||||
|
||||
class FeedbackCreate(FeedbackBase):
|
||||
"""Schema used for creating feedback."""
|
||||
|
||||
id: UUID = Field(default_factory=uuid4)
|
||||
|
||||
feedback_source: APIFeedbackSource
|
||||
"""The source of the feedback."""
|
||||
|
||||
|
||||
class Feedback(FeedbackBase):
|
||||
"""Schema for getting feedback."""
|
||||
|
||||
id: UUID
|
||||
feedback_source: Optional[Dict] = None
|
||||
"""The source of the feedback. In this case"""
|
||||
|
||||
|
||||
class ListFeedbackQueryParams(BaseModel, frozen=True):
|
||||
"""Query Params for listing feedbacks."""
|
||||
|
||||
run: Optional[Sequence[UUID]] = None
|
||||
limit: int = 100
|
||||
offset: int = 0
|
||||
|
||||
class Config:
|
||||
"""Config for query params."""
|
||||
|
||||
extra = "forbid"
|
||||
|
||||
375
langchain/client/runner_utils.py
Normal file
375
langchain/client/runner_utils.py
Normal file
@@ -0,0 +1,375 @@
|
||||
"""Utilities for running LLMs/Chains over datasets."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import logging
|
||||
from typing import Any, Callable, Coroutine, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.callbacks.tracers.langchain import LangChainTracer
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.client.models import Example
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.schema import (
|
||||
BaseMessage,
|
||||
ChatResult,
|
||||
HumanMessage,
|
||||
LLMResult,
|
||||
get_buffer_string,
|
||||
messages_from_dict,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MODEL_OR_CHAIN_FACTORY = Union[Callable[[], Chain], BaseLanguageModel]
|
||||
|
||||
|
||||
class InputFormatError(Exception):
|
||||
"""Raised when input format is invalid."""
|
||||
|
||||
|
||||
def _get_prompts(inputs: Dict[str, Any]) -> List[str]:
|
||||
"""Get prompts from inputs."""
|
||||
if not inputs:
|
||||
raise InputFormatError("Inputs should not be empty.")
|
||||
|
||||
prompts = []
|
||||
if "prompt" in inputs:
|
||||
if not isinstance(inputs["prompt"], str):
|
||||
raise InputFormatError(
|
||||
"Expected string for 'prompt', got"
|
||||
f" {type(inputs['prompt']).__name__}"
|
||||
)
|
||||
prompts = [inputs["prompt"]]
|
||||
elif "prompts" in inputs:
|
||||
if not isinstance(inputs["prompts"], list) or not all(
|
||||
isinstance(i, str) for i in inputs["prompts"]
|
||||
):
|
||||
raise InputFormatError(
|
||||
"Expected list of strings for 'prompts',"
|
||||
f" got {type(inputs['prompts']).__name__}"
|
||||
)
|
||||
prompts = inputs["prompts"]
|
||||
elif len(inputs) == 1:
|
||||
prompt_ = next(iter(inputs.values()))
|
||||
if isinstance(prompt_, str):
|
||||
prompts = [prompt_]
|
||||
elif isinstance(prompt_, list) and all(isinstance(i, str) for i in prompt_):
|
||||
prompts = prompt_
|
||||
else:
|
||||
raise InputFormatError(f"LLM Run expects string prompt input. Got {inputs}")
|
||||
else:
|
||||
raise InputFormatError(
|
||||
f"LLM Run expects 'prompt' or 'prompts' in inputs. Got {inputs}"
|
||||
)
|
||||
|
||||
return prompts
|
||||
|
||||
|
||||
def _get_messages(inputs: Dict[str, Any]) -> List[List[BaseMessage]]:
|
||||
"""Get Chat Messages from inputs."""
|
||||
if not inputs:
|
||||
raise InputFormatError("Inputs should not be empty.")
|
||||
|
||||
if "messages" in inputs:
|
||||
single_input = inputs["messages"]
|
||||
elif len(inputs) == 1:
|
||||
single_input = next(iter(inputs.values()))
|
||||
else:
|
||||
raise InputFormatError(f"Chat Run expects 'messages' in inputs. Got {inputs}")
|
||||
if isinstance(single_input, list) and all(
|
||||
isinstance(i, dict) for i in single_input
|
||||
):
|
||||
raw_messages = [single_input]
|
||||
elif isinstance(single_input, list) and all(
|
||||
isinstance(i, list) for i in single_input
|
||||
):
|
||||
raw_messages = single_input
|
||||
else:
|
||||
raise InputFormatError(
|
||||
f"Chat Run expects List[dict] or List[List[dict]] 'messages'"
|
||||
f" input. Got {inputs}"
|
||||
)
|
||||
return [messages_from_dict(batch) for batch in raw_messages]
|
||||
|
||||
|
||||
async def _arun_llm(
|
||||
llm: BaseLanguageModel,
|
||||
inputs: Dict[str, Any],
|
||||
langchain_tracer: Optional[LangChainTracer],
|
||||
) -> Union[LLMResult, ChatResult]:
|
||||
callbacks: Optional[List[BaseCallbackHandler]] = (
|
||||
[langchain_tracer] if langchain_tracer else None
|
||||
)
|
||||
if isinstance(llm, BaseLLM):
|
||||
try:
|
||||
llm_prompts = _get_prompts(inputs)
|
||||
llm_output = await llm.agenerate(llm_prompts, callbacks=callbacks)
|
||||
except InputFormatError:
|
||||
llm_messages = _get_messages(inputs)
|
||||
buffer_strings = [get_buffer_string(messages) for messages in llm_messages]
|
||||
llm_output = await llm.agenerate(buffer_strings, callbacks=callbacks)
|
||||
elif isinstance(llm, BaseChatModel):
|
||||
try:
|
||||
messages = _get_messages(inputs)
|
||||
llm_output = await llm.agenerate(messages, callbacks=callbacks)
|
||||
except InputFormatError:
|
||||
prompts = _get_prompts(inputs)
|
||||
converted_messages: List[List[BaseMessage]] = [
|
||||
[HumanMessage(content=prompt)] for prompt in prompts
|
||||
]
|
||||
llm_output = await llm.agenerate(converted_messages, callbacks=callbacks)
|
||||
else:
|
||||
raise ValueError(f"Unsupported LLM type {type(llm)}")
|
||||
return llm_output
|
||||
|
||||
|
||||
async def _arun_llm_or_chain(
|
||||
example: Example,
|
||||
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
||||
n_repetitions: int,
|
||||
langchain_tracer: Optional[LangChainTracer],
|
||||
) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
|
||||
"""Run the chain asynchronously."""
|
||||
if langchain_tracer is not None:
|
||||
previous_example_id = langchain_tracer.example_id
|
||||
langchain_tracer.example_id = example.id
|
||||
callbacks: Optional[List[BaseCallbackHandler]] = [langchain_tracer]
|
||||
else:
|
||||
previous_example_id = None
|
||||
callbacks = None
|
||||
outputs = []
|
||||
for _ in range(n_repetitions):
|
||||
try:
|
||||
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
||||
output: Any = await _arun_llm(
|
||||
llm_or_chain_factory, example.inputs, langchain_tracer
|
||||
)
|
||||
else:
|
||||
chain = llm_or_chain_factory()
|
||||
output = await chain.acall(example.inputs, callbacks=callbacks)
|
||||
outputs.append(output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chain failed for example {example.id}. Error: {e}")
|
||||
outputs.append({"Error": str(e)})
|
||||
if langchain_tracer is not None:
|
||||
langchain_tracer.example_id = previous_example_id
|
||||
return outputs
|
||||
|
||||
|
||||
async def _gather_with_concurrency(
|
||||
n: int,
|
||||
initializer: Callable[[], Coroutine[Any, Any, Optional[LangChainTracer]]],
|
||||
*async_funcs: Callable[[Optional[LangChainTracer], Dict], Coroutine[Any, Any, Any]],
|
||||
) -> List[Any]:
|
||||
"""
|
||||
Run coroutines with a concurrency limit.
|
||||
|
||||
Args:
|
||||
n: The maximum number of concurrent tasks.
|
||||
initializer: A coroutine that initializes shared resources for the tasks.
|
||||
async_funcs: The async_funcs to be run concurrently.
|
||||
|
||||
Returns:
|
||||
A list of results from the coroutines.
|
||||
"""
|
||||
semaphore = asyncio.Semaphore(n)
|
||||
job_state = {"num_processed": 0}
|
||||
|
||||
tracer_queue: asyncio.Queue[Optional[LangChainTracer]] = asyncio.Queue()
|
||||
for _ in range(n):
|
||||
tracer_queue.put_nowait(await initializer())
|
||||
|
||||
async def run_coroutine_with_semaphore(
|
||||
async_func: Callable[
|
||||
[Optional[LangChainTracer], Dict], Coroutine[Any, Any, Any]
|
||||
]
|
||||
) -> Any:
|
||||
async with semaphore:
|
||||
tracer = await tracer_queue.get()
|
||||
try:
|
||||
result = await async_func(tracer, job_state)
|
||||
finally:
|
||||
tracer_queue.put_nowait(tracer)
|
||||
return result
|
||||
|
||||
return await asyncio.gather(
|
||||
*(run_coroutine_with_semaphore(function) for function in async_funcs)
|
||||
)
|
||||
|
||||
|
||||
async def _tracer_initializer(session_name: Optional[str]) -> Optional[LangChainTracer]:
|
||||
"""
|
||||
Initialize a tracer to share across tasks.
|
||||
|
||||
Args:
|
||||
session_name: The session name for the tracer.
|
||||
|
||||
Returns:
|
||||
A LangChainTracer instance with an active session.
|
||||
"""
|
||||
if session_name:
|
||||
tracer = LangChainTracer(session_name=session_name)
|
||||
tracer.ensure_session()
|
||||
return tracer
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
async def arun_on_examples(
|
||||
examples: Iterator[Example],
|
||||
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
||||
*,
|
||||
concurrency_level: int = 5,
|
||||
num_repetitions: int = 1,
|
||||
session_name: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run the chain on examples and store traces to the specified session name.
|
||||
|
||||
Args:
|
||||
examples: Examples to run the model or chain over
|
||||
llm_or_chain_factory: Language model or Chain constructor to run
|
||||
over the dataset. The Chain constructor is used to permit
|
||||
independent calls on each example without carrying over state.
|
||||
concurrency_level: The number of async tasks to run concurrently.
|
||||
num_repetitions: Number of times to run the model on each example.
|
||||
This is useful when testing success rates or generating confidence
|
||||
intervals.
|
||||
session_name: Session name to use when tracing runs.
|
||||
verbose: Whether to print progress.
|
||||
|
||||
Returns:
|
||||
A dictionary mapping example ids to the model outputs.
|
||||
"""
|
||||
results: Dict[str, List[Any]] = {}
|
||||
|
||||
async def process_example(
|
||||
example: Example, tracer: LangChainTracer, job_state: dict
|
||||
) -> None:
|
||||
"""Process a single example."""
|
||||
result = await _arun_llm_or_chain(
|
||||
example,
|
||||
llm_or_chain_factory,
|
||||
num_repetitions,
|
||||
tracer,
|
||||
)
|
||||
results[str(example.id)] = result
|
||||
job_state["num_processed"] += 1
|
||||
if verbose:
|
||||
print(
|
||||
f"Processed examples: {job_state['num_processed']}",
|
||||
end="\r",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
await _gather_with_concurrency(
|
||||
concurrency_level,
|
||||
functools.partial(_tracer_initializer, session_name),
|
||||
*(functools.partial(process_example, e) for e in examples),
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def run_llm(
|
||||
llm: BaseLanguageModel,
|
||||
inputs: Dict[str, Any],
|
||||
callbacks: Callbacks,
|
||||
) -> Union[LLMResult, ChatResult]:
|
||||
"""Run the language model on the example."""
|
||||
if isinstance(llm, BaseLLM):
|
||||
try:
|
||||
llm_prompts = _get_prompts(inputs)
|
||||
llm_output = llm.generate(llm_prompts, callbacks=callbacks)
|
||||
except InputFormatError:
|
||||
llm_messages = _get_messages(inputs)
|
||||
buffer_strings = [get_buffer_string(messages) for messages in llm_messages]
|
||||
llm_output = llm.generate(buffer_strings, callbacks=callbacks)
|
||||
elif isinstance(llm, BaseChatModel):
|
||||
try:
|
||||
messages = _get_messages(inputs)
|
||||
llm_output = llm.generate(messages, callbacks=callbacks)
|
||||
except InputFormatError:
|
||||
prompts = _get_prompts(inputs)
|
||||
converted_messages: List[List[BaseMessage]] = [
|
||||
[HumanMessage(content=prompt)] for prompt in prompts
|
||||
]
|
||||
llm_output = llm.generate(converted_messages, callbacks=callbacks)
|
||||
else:
|
||||
raise ValueError(f"Unsupported LLM type {type(llm)}")
|
||||
return llm_output
|
||||
|
||||
|
||||
def run_llm_or_chain(
|
||||
example: Example,
|
||||
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
||||
n_repetitions: int,
|
||||
langchain_tracer: Optional[LangChainTracer] = None,
|
||||
) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
|
||||
"""Run the chain synchronously."""
|
||||
if langchain_tracer is not None:
|
||||
previous_example_id = langchain_tracer.example_id
|
||||
langchain_tracer.example_id = example.id
|
||||
callbacks: Optional[List[BaseCallbackHandler]] = [langchain_tracer]
|
||||
else:
|
||||
previous_example_id = None
|
||||
callbacks = None
|
||||
outputs = []
|
||||
for _ in range(n_repetitions):
|
||||
try:
|
||||
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
||||
output: Any = run_llm(llm_or_chain_factory, example.inputs, callbacks)
|
||||
else:
|
||||
chain = llm_or_chain_factory()
|
||||
output = chain(example.inputs, callbacks=callbacks)
|
||||
outputs.append(output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chain failed for example {example.id}. Error: {e}")
|
||||
outputs.append({"Error": str(e)})
|
||||
if langchain_tracer is not None:
|
||||
langchain_tracer.example_id = previous_example_id
|
||||
return outputs
|
||||
|
||||
|
||||
def run_on_examples(
|
||||
examples: Iterator[Example],
|
||||
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
||||
*,
|
||||
num_repetitions: int = 1,
|
||||
session_name: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the chain on examples and store traces to the specified session name.
|
||||
|
||||
Args:
|
||||
examples: Examples to run model or chain over.
|
||||
llm_or_chain_factory: Language model or Chain constructor to run
|
||||
over the dataset. The Chain constructor is used to permit
|
||||
independent calls on each example without carrying over state.
|
||||
concurrency_level: Number of async workers to run in parallel.
|
||||
num_repetitions: Number of times to run the model on each example.
|
||||
This is useful when testing success rates or generating confidence
|
||||
intervals.
|
||||
session_name: Session name to use when tracing runs.
|
||||
verbose: Whether to print progress.
|
||||
Returns:
|
||||
A dictionary mapping example ids to the model outputs.
|
||||
"""
|
||||
results: Dict[str, Any] = {}
|
||||
tracer = LangChainTracer(session_name=session_name) if session_name else None
|
||||
for i, example in enumerate(examples):
|
||||
result = run_llm_or_chain(
|
||||
example,
|
||||
llm_or_chain_factory,
|
||||
num_repetitions,
|
||||
langchain_tracer=tracer,
|
||||
)
|
||||
if verbose:
|
||||
print(f"{i+1} processed", flush=True, end="\r")
|
||||
results[str(example.id)] = result
|
||||
return results
|
||||
@@ -15,7 +15,7 @@ class Wikipedia(Docstore):
|
||||
try:
|
||||
import wikipedia # noqa: F401
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import wikipedia python package. "
|
||||
"Please install it with `pip install wikipedia`."
|
||||
)
|
||||
|
||||
@@ -48,6 +48,7 @@ from langchain.document_loaders.image_captions import ImageCaptionLoader
|
||||
from langchain.document_loaders.imsdb import IMSDbLoader
|
||||
from langchain.document_loaders.json_loader import JSONLoader
|
||||
from langchain.document_loaders.markdown import UnstructuredMarkdownLoader
|
||||
from langchain.document_loaders.mastodon import MastodonTootsLoader
|
||||
from langchain.document_loaders.mediawikidump import MWDumpLoader
|
||||
from langchain.document_loaders.modern_treasury import ModernTreasuryLoader
|
||||
from langchain.document_loaders.notebook import NotebookLoader
|
||||
@@ -69,6 +70,7 @@ from langchain.document_loaders.pdf import (
|
||||
UnstructuredPDFLoader,
|
||||
)
|
||||
from langchain.document_loaders.powerpoint import UnstructuredPowerPointLoader
|
||||
from langchain.document_loaders.psychic import PsychicLoader
|
||||
from langchain.document_loaders.python import PythonLoader
|
||||
from langchain.document_loaders.readthedocs import ReadTheDocsLoader
|
||||
from langchain.document_loaders.reddit import RedditPostsLoader
|
||||
@@ -98,6 +100,7 @@ from langchain.document_loaders.unstructured import (
|
||||
from langchain.document_loaders.url import UnstructuredURLLoader
|
||||
from langchain.document_loaders.url_playwright import PlaywrightURLLoader
|
||||
from langchain.document_loaders.url_selenium import SeleniumURLLoader
|
||||
from langchain.document_loaders.weather import WeatherDataLoader
|
||||
from langchain.document_loaders.web_base import WebBaseLoader
|
||||
from langchain.document_loaders.whatsapp_chat import WhatsAppChatLoader
|
||||
from langchain.document_loaders.wikipedia import WikipediaLoader
|
||||
@@ -159,6 +162,7 @@ __all__ = [
|
||||
"ImageCaptionLoader",
|
||||
"JSONLoader",
|
||||
"MWDumpLoader",
|
||||
"MastodonTootsLoader",
|
||||
"MathpixPDFLoader",
|
||||
"ModernTreasuryLoader",
|
||||
"NotebookLoader",
|
||||
@@ -209,10 +213,12 @@ __all__ = [
|
||||
"UnstructuredRTFLoader",
|
||||
"UnstructuredURLLoader",
|
||||
"UnstructuredWordDocumentLoader",
|
||||
"WeatherDataLoader",
|
||||
"WebBaseLoader",
|
||||
"WhatsAppChatLoader",
|
||||
"WikipediaLoader",
|
||||
"YoutubeLoader",
|
||||
"TelegramChatLoader",
|
||||
"ToMarkdownLoader",
|
||||
"PsychicLoader",
|
||||
]
|
||||
|
||||
@@ -41,7 +41,7 @@ class ApifyDatasetLoader(BaseLoader, BaseModel):
|
||||
|
||||
values["apify_client"] = ApifyClient()
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import apify-client Python package. "
|
||||
"Please install it with `pip install apify-client`."
|
||||
)
|
||||
|
||||
@@ -63,7 +63,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
|
||||
try:
|
||||
from lxml import etree
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import lxml python package. "
|
||||
"Please install it with `pip install lxml`."
|
||||
)
|
||||
@@ -259,7 +259,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
|
||||
try:
|
||||
from lxml import etree
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import lxml python package. "
|
||||
"Please install it with `pip install lxml`."
|
||||
)
|
||||
|
||||
@@ -33,7 +33,7 @@ class DuckDBLoader(BaseLoader):
|
||||
try:
|
||||
import duckdb
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import duckdb python package. "
|
||||
"Please install it with `pip install duckdb`."
|
||||
)
|
||||
|
||||
@@ -3,80 +3,146 @@
|
||||
https://gist.github.com/foxmask/7b29c43a161e001ff04afdb2f181e31c
|
||||
"""
|
||||
import hashlib
|
||||
import logging
|
||||
from base64 import b64decode
|
||||
from time import strptime
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
def _parse_content(content: str) -> str:
|
||||
from pypandoc import convert_text
|
||||
|
||||
text = convert_text(content, "org", format="html")
|
||||
return text
|
||||
|
||||
|
||||
def _parse_resource(resource: list) -> dict:
|
||||
rsc_dict: Dict[str, Any] = {}
|
||||
for elem in resource:
|
||||
if elem.tag == "data":
|
||||
# Some times elem.text is None
|
||||
rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b""
|
||||
rsc_dict["hash"] = hashlib.md5(rsc_dict[elem.tag]).hexdigest()
|
||||
else:
|
||||
rsc_dict[elem.tag] = elem.text
|
||||
|
||||
return rsc_dict
|
||||
|
||||
|
||||
def _parse_note(note: List) -> dict:
|
||||
note_dict: Dict[str, Any] = {}
|
||||
resources = []
|
||||
for elem in note:
|
||||
if elem.tag == "content":
|
||||
note_dict[elem.tag] = _parse_content(elem.text)
|
||||
# A copy of original content
|
||||
note_dict["content-raw"] = elem.text
|
||||
elif elem.tag == "resource":
|
||||
resources.append(_parse_resource(elem))
|
||||
elif elem.tag == "created" or elem.tag == "updated":
|
||||
note_dict[elem.tag] = strptime(elem.text, "%Y%m%dT%H%M%SZ")
|
||||
else:
|
||||
note_dict[elem.tag] = elem.text
|
||||
|
||||
note_dict["resource"] = resources
|
||||
|
||||
return note_dict
|
||||
|
||||
|
||||
def _parse_note_xml(xml_file: str) -> str:
|
||||
"""Parse Evernote xml."""
|
||||
# Without huge_tree set to True, parser may complain about huge text node
|
||||
# Try to recover, because there may be " ", which will cause
|
||||
# "XMLSyntaxError: Entity 'nbsp' not defined"
|
||||
from lxml import etree
|
||||
|
||||
context = etree.iterparse(
|
||||
xml_file, encoding="utf-8", strip_cdata=False, huge_tree=True, recover=True
|
||||
)
|
||||
result_string = ""
|
||||
for action, elem in context:
|
||||
if elem.tag == "note":
|
||||
result_string += _parse_note(elem)["content"]
|
||||
return result_string
|
||||
|
||||
|
||||
class EverNoteLoader(BaseLoader):
|
||||
"""Loader to load in EverNote files.."""
|
||||
"""EverNote Loader.
|
||||
Loads an EverNote notebook export file e.g. my_notebook.enex into Documents.
|
||||
Instructions on producing this file can be found at
|
||||
https://help.evernote.com/hc/en-us/articles/209005557-Export-notes-and-notebooks-as-ENEX-or-HTML
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
Currently only the plain text in the note is extracted and stored as the contents
|
||||
of the Document, any non content metadata (e.g. 'author', 'created', 'updated' etc.
|
||||
but not 'content-raw' or 'resource') tags on the note will be extracted and stored
|
||||
as metadata on the Document.
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the notebook export with a .enex extension
|
||||
load_single_document (bool): Whether or not to concatenate the content of all
|
||||
notes into a single long Document.
|
||||
If this is set to True (default) then the only metadata on the document will be
|
||||
the 'source' which contains the file name of the export.
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(self, file_path: str, load_single_document: bool = True):
|
||||
"""Initialize with file path."""
|
||||
self.file_path = file_path
|
||||
self.load_single_document = load_single_document
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load document from EverNote file."""
|
||||
text = _parse_note_xml(self.file_path)
|
||||
metadata = {"source": self.file_path}
|
||||
return [Document(page_content=text, metadata=metadata)]
|
||||
"""Load documents from EverNote export file."""
|
||||
documents = [
|
||||
Document(
|
||||
page_content=note["content"],
|
||||
metadata={
|
||||
**{
|
||||
key: value
|
||||
for key, value in note.items()
|
||||
if key not in ["content", "content-raw", "resource"]
|
||||
},
|
||||
**{"source": self.file_path},
|
||||
},
|
||||
)
|
||||
for note in self._parse_note_xml(self.file_path)
|
||||
if note.get("content") is not None
|
||||
]
|
||||
|
||||
if not self.load_single_document:
|
||||
return documents
|
||||
|
||||
return [
|
||||
Document(
|
||||
page_content="".join([document.page_content for document in documents]),
|
||||
metadata={"source": self.file_path},
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _parse_content(content: str) -> str:
|
||||
try:
|
||||
import html2text
|
||||
|
||||
return html2text.html2text(content).strip()
|
||||
except ImportError as e:
|
||||
logging.error(
|
||||
"Could not import `html2text`. Although it is not a required package "
|
||||
"to use Langchain, using the EverNote loader requires `html2text`. "
|
||||
"Please install `html2text` via `pip install html2text` and try again."
|
||||
)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def _parse_resource(resource: list) -> dict:
|
||||
rsc_dict: Dict[str, Any] = {}
|
||||
for elem in resource:
|
||||
if elem.tag == "data":
|
||||
# Sometimes elem.text is None
|
||||
rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b""
|
||||
rsc_dict["hash"] = hashlib.md5(rsc_dict[elem.tag]).hexdigest()
|
||||
else:
|
||||
rsc_dict[elem.tag] = elem.text
|
||||
|
||||
return rsc_dict
|
||||
|
||||
@staticmethod
|
||||
def _parse_note(note: List, prefix: Optional[str] = None) -> dict:
|
||||
note_dict: Dict[str, Any] = {}
|
||||
resources = []
|
||||
|
||||
def add_prefix(element_tag: str) -> str:
|
||||
if prefix is None:
|
||||
return element_tag
|
||||
return f"{prefix}.{element_tag}"
|
||||
|
||||
for elem in note:
|
||||
if elem.tag == "content":
|
||||
note_dict[elem.tag] = EverNoteLoader._parse_content(elem.text)
|
||||
# A copy of original content
|
||||
note_dict["content-raw"] = elem.text
|
||||
elif elem.tag == "resource":
|
||||
resources.append(EverNoteLoader._parse_resource(elem))
|
||||
elif elem.tag == "created" or elem.tag == "updated":
|
||||
note_dict[elem.tag] = strptime(elem.text, "%Y%m%dT%H%M%SZ")
|
||||
elif elem.tag == "note-attributes":
|
||||
additional_attributes = EverNoteLoader._parse_note(
|
||||
elem, elem.tag
|
||||
) # Recursively enter the note-attributes tag
|
||||
note_dict.update(additional_attributes)
|
||||
else:
|
||||
note_dict[elem.tag] = elem.text
|
||||
|
||||
if len(resources) > 0:
|
||||
note_dict["resource"] = resources
|
||||
|
||||
return {add_prefix(key): value for key, value in note_dict.items()}
|
||||
|
||||
@staticmethod
|
||||
def _parse_note_xml(xml_file: str) -> Iterator[Dict[str, Any]]:
|
||||
"""Parse Evernote xml."""
|
||||
# Without huge_tree set to True, parser may complain about huge text node
|
||||
# Try to recover, because there may be " ", which will cause
|
||||
# "XMLSyntaxError: Entity 'nbsp' not defined"
|
||||
try:
|
||||
from lxml import etree
|
||||
except ImportError as e:
|
||||
logging.error(
|
||||
"Could not import `lxml`. Although it is not a required package to use "
|
||||
"Langchain, using the EverNote loader requires `lxml`. Please install "
|
||||
"`lxml` via `pip install lxml` and try again."
|
||||
)
|
||||
raise e
|
||||
|
||||
context = etree.iterparse(
|
||||
xml_file, encoding="utf-8", strip_cdata=False, huge_tree=True, recover=True
|
||||
)
|
||||
|
||||
for action, elem in context:
|
||||
if elem.tag == "note":
|
||||
yield EverNoteLoader._parse_note(elem)
|
||||
|
||||
@@ -39,7 +39,7 @@ class ImageCaptionLoader(BaseLoader):
|
||||
try:
|
||||
from transformers import BlipForConditionalGeneration, BlipProcessor
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"`transformers` package not found, please install with "
|
||||
"`pip install transformers`."
|
||||
)
|
||||
@@ -66,7 +66,7 @@ class ImageCaptionLoader(BaseLoader):
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"`PIL` package not found, please install with `pip install pillow`"
|
||||
)
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ class JSONLoader(BaseLoader):
|
||||
try:
|
||||
import jq # noqa:F401
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"jq package not found, please install it with `pip install jq`"
|
||||
)
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user