mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-06 09:10:27 +00:00
Compare commits
163 Commits
vwp/fix_pr
...
v0.0.227
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb4e88e4fb | ||
|
|
d1c7237034 | ||
|
|
0ed2da7020 | ||
|
|
1c8cff32f1 | ||
|
|
fd7145970f | ||
|
|
3074306ae1 | ||
|
|
5809c3d29d | ||
|
|
87f75cb322 | ||
|
|
284d40b7af | ||
|
|
8d961b9e33 | ||
|
|
a9c5b4bcea | ||
|
|
9954eff8fd | ||
|
|
6095a0a310 | ||
|
|
e047541b5f | ||
|
|
152dc59060 | ||
|
|
927c8eb91a | ||
|
|
bac56618b4 | ||
|
|
d642609a23 | ||
|
|
ec10787bc7 | ||
|
|
b21c2f8704 | ||
|
|
e736d60516 | ||
|
|
12d14f8947 | ||
|
|
cb9ff6efb8 | ||
|
|
1f4a51cb9c | ||
|
|
a6b39afe0e | ||
|
|
1a4ca3eff9 | ||
|
|
6ff9e9b34a | ||
|
|
09acbb8410 | ||
|
|
e0cb3ea90c | ||
|
|
4450791edd | ||
|
|
a7ae35fe4e | ||
|
|
681f2678a3 | ||
|
|
c23e16c459 | ||
|
|
8c371e12eb | ||
|
|
c7cf11b8ab | ||
|
|
fed64ae060 | ||
|
|
576880abc5 | ||
|
|
e8f24164f0 | ||
|
|
ae5aa496ee | ||
|
|
b9d6d4cd4c | ||
|
|
8b19f6a0da | ||
|
|
ec66d5188c | ||
|
|
e61cfb6e99 | ||
|
|
0c7a5cb206 | ||
|
|
b151d4257a | ||
|
|
887bb12287 | ||
|
|
f773c21723 | ||
|
|
0e878ccc2d | ||
|
|
57d8a3d1e8 | ||
|
|
c36f852846 | ||
|
|
035ad33a5b | ||
|
|
cabd358c3a | ||
|
|
52b016920c | ||
|
|
695e7027e6 | ||
|
|
930e319ca7 | ||
|
|
6aa66fd2b0 | ||
|
|
8afc8e6f5d | ||
|
|
f891f7d69f | ||
|
|
83cf01683e | ||
|
|
607708a411 | ||
|
|
75aa408f10 | ||
|
|
0dc700eebf | ||
|
|
d6541da161 | ||
|
|
d669b9ece9 | ||
|
|
e533da8bf2 | ||
|
|
836d2009cb | ||
|
|
d65b1951bd | ||
|
|
265f05b10e | ||
|
|
47e7d09dff | ||
|
|
79b59a8e06 | ||
|
|
6711854e30 | ||
|
|
cab7d86f23 | ||
|
|
3ae11b7582 | ||
|
|
a2f191a322 | ||
|
|
61938a02a1 | ||
|
|
ecee4d6e92 | ||
|
|
fa55c5a16b | ||
|
|
8a7c95e555 | ||
|
|
e4459e423b | ||
|
|
4c1c05c2c7 | ||
|
|
30d8d1d3d0 | ||
|
|
9abf1847f4 | ||
|
|
7d92e9407b | ||
|
|
e288410e72 | ||
|
|
26409b01bd | ||
|
|
6f358bb04a | ||
|
|
6eff0fa2ca | ||
|
|
81e5b1ad36 | ||
|
|
baf48d3583 | ||
|
|
8045870a0f | ||
|
|
db98c44f8f | ||
|
|
7cd0936b1c | ||
|
|
38f853dfa3 | ||
|
|
ee1d488c03 | ||
|
|
6666e422c6 | ||
|
|
8410c6a747 | ||
|
|
7b585c7585 | ||
|
|
6fc24743b7 | ||
|
|
79fb90aafd | ||
|
|
1415966d64 | ||
|
|
a94c4cca68 | ||
|
|
e18e838aae | ||
|
|
e27ba9d92b | ||
|
|
39e685b80f | ||
|
|
bf9e4ef35f | ||
|
|
9cfb311ecb | ||
|
|
405865c91a | ||
|
|
c9f696f063 | ||
|
|
e8531769f7 | ||
|
|
2984803597 | ||
|
|
da69a6771f | ||
|
|
b39017dc11 | ||
|
|
898087d02c | ||
|
|
0ad984fa27 | ||
|
|
81eebc4070 | ||
|
|
5585607654 | ||
|
|
265c285057 | ||
|
|
6631fd5168 | ||
|
|
696886f397 | ||
|
|
0b69a7e9ab | ||
|
|
9ca4c54428 | ||
|
|
dfa48dc3b5 | ||
|
|
04001ff077 | ||
|
|
3f9744c9f4 | ||
|
|
fd3f8efec7 | ||
|
|
490fcf9d98 | ||
|
|
c8f8b1b327 | ||
|
|
e49abd1277 | ||
|
|
fad2c7e5e0 | ||
|
|
98dbea6310 | ||
|
|
d0c7f7c317 | ||
|
|
719316e84c | ||
|
|
e2d61ab85a | ||
|
|
16fbd528c5 | ||
|
|
80e86b602e | ||
|
|
c669d98693 | ||
|
|
1cdb33a090 | ||
|
|
a081e419a0 | ||
|
|
be93775ebc | ||
|
|
60b05511d3 | ||
|
|
200be43da6 | ||
|
|
6d15854cda | ||
|
|
153b56d19b | ||
|
|
1feac83323 | ||
|
|
77ae8084a0 | ||
|
|
e41b382e1c | ||
|
|
5a45363954 | ||
|
|
7acd524210 | ||
|
|
9dc77614e3 | ||
|
|
e5f6f0ffc4 | ||
|
|
052c797429 | ||
|
|
dc2264619a | ||
|
|
6a64870ea0 | ||
|
|
7ebb76a5fa | ||
|
|
8d2281a8ca | ||
|
|
3bfe7cf467 | ||
|
|
556c425042 | ||
|
|
0498dad562 | ||
|
|
59697b406d | ||
|
|
aa37b10b28 | ||
|
|
b0859c9b18 | ||
|
|
a5b206caf3 | ||
|
|
b26cca8008 |
6
.github/PULL_REQUEST_TEMPLATE.md
vendored
6
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -12,11 +12,11 @@ If you're adding a new integration, please include:
|
||||
2. an example notebook showing its use.
|
||||
|
||||
Maintainer responsibilities:
|
||||
- General / Misc / if you don't know who to tag: @dev2049
|
||||
- General / Misc / if you don't know who to tag: @baskaryan
|
||||
- DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
|
||||
- Models / Prompts: @hwchase17, @dev2049
|
||||
- Models / Prompts: @hwchase17, @baskaryan
|
||||
- Memory: @hwchase17
|
||||
- Agents / Tools / Toolkits: @vowelparrot
|
||||
- Agents / Tools / Toolkits: @hinthornw
|
||||
- Tracing / Callbacks: @agola11
|
||||
- Async: @agola11
|
||||
|
||||
|
||||
@@ -16,22 +16,6 @@
|
||||
{%- set development_attrs = '' %}
|
||||
{%- endif %}
|
||||
|
||||
{# title, link, link_attrs #}
|
||||
{%- set drop_down_navigation = [
|
||||
('Getting Started', pathto('getting_started'), ''),
|
||||
('Tutorial', pathto('tutorial/index'), ''),
|
||||
("What's new", pathto('whats_new/v' + version), ''),
|
||||
('Glossary', pathto('glossary'), ''),
|
||||
('Development', development_link, development_attrs),
|
||||
('FAQ', pathto('faq'), ''),
|
||||
('Support', pathto('support'), ''),
|
||||
('Related packages', pathto('related_projects'), ''),
|
||||
('Roadmap', pathto('roadmap'), ''),
|
||||
('Governance', pathto('governance'), ''),
|
||||
('About us', pathto('about'), ''),
|
||||
('GitHub', 'https://github.com/scikit-learn/scikit-learn', ''),
|
||||
('Other Versions and Download', 'https://scikit-learn.org/dev/versions.html', '')]
|
||||
-%}
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
|
||||
@@ -47,7 +47,7 @@ import ChatModel from "@snippets/get_started/quickstart/chat_model.mdx"
|
||||
|
||||
## Prompt templates
|
||||
|
||||
Most LLM applications do not pass user input directly into to an LLM. Usually they will add the user input to a larger piece of text, called a prompt template, that provides additional context on the specific task at hand.
|
||||
Most LLM applications do not pass user input directly into an LLM. Usually they will add the user input to a larger piece of text, called a prompt template, that provides additional context on the specific task at hand.
|
||||
|
||||
In the previous example, the text we passed to the model contained instructions to generate a company name. For our application, it'd be great if the user only had to provide the description of a company/product, without having to worry about giving the model instructions.
|
||||
|
||||
@@ -138,7 +138,7 @@ The chains and agents we've looked at so far have been stateless, but for many a
|
||||
|
||||
The Memory module gives you a way to maintain application state. The base Memory interface is simple: it lets you update state given the latest run inputs and outputs and it lets you modify (or contextualize) the next input using the stored state.
|
||||
|
||||
There are a number of built-in memory systems. The simplest of these are is a buffer memory which just prepends the last few inputs/outputs to the current input - we will use this in the example below.
|
||||
There are a number of built-in memory systems. The simplest of these is a buffer memory which just prepends the last few inputs/outputs to the current input - we will use this in the example below.
|
||||
|
||||
import MemoryLLM from "@snippets/get_started/quickstart/memory_llms.mdx"
|
||||
import MemoryChatModel from "@snippets/get_started/quickstart/memory_chat_models.mdx"
|
||||
@@ -155,4 +155,4 @@ You can use Memory with chains and agents initialized with chat models. The main
|
||||
<MemoryChatModel/>
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
</Tabs>
|
||||
|
||||
124
docs/docs_skeleton/docs/get_started/tutorials.mdx
Normal file
124
docs/docs_skeleton/docs/get_started/tutorials.mdx
Normal file
@@ -0,0 +1,124 @@
|
||||
# Tutorials
|
||||
|
||||
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
|
||||
---------------------
|
||||
|
||||
### DeepLearning.AI courses
|
||||
by [Harrison Chase](https://github.com/hwchase17) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
|
||||
- [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
|
||||
- ⛓ [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
|
||||
|
||||
### Handbook
|
||||
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
|
||||
### Short Tutorials
|
||||
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
|
||||
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
|
||||
|
||||
## Tutorials
|
||||
|
||||
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs)
|
||||
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
|
||||
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
|
||||
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
|
||||
- [LangChain Data Loaders, Tokenizers, Chunking, and Datasets - Data Prep 101](https://youtu.be/eqOfr4AGLk8)
|
||||
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
|
||||
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
|
||||
- #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
|
||||
- #7 [LangChain Agents Deep Dive with `GPT 3.5`](https://youtu.be/jSP-gSEyVeI)
|
||||
- #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
|
||||
- #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
|
||||
- [Using NEW `MPT-7B` in Hugging Face and LangChain](https://youtu.be/DXpk9K7DgMo)
|
||||
- ⛓ [`MPT-30B` Chatbot with LangChain](https://youtu.be/pnem-EhT6VI)
|
||||
|
||||
|
||||
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Greg Kamradt (Data Indy)](https://www.youtube.com/@DataIndependent)
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [Beginner Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
|
||||
- [Agents Overview + Google Searches](https://youtu.be/Jq9Sf68ozk0)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
|
||||
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
|
||||
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
|
||||
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
|
||||
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
|
||||
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
|
||||
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
|
||||
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
|
||||
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
|
||||
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
|
||||
- [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
|
||||
- [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
|
||||
- [Control Tone & Writing Style Of Your LLM Output](https://youtu.be/miBG-a3FuhU)
|
||||
- [Build Your Own `AI Twitter Bot` Using LLMs](https://youtu.be/yLWLDjT01q8)
|
||||
- [ChatGPT made my interview questions for me (`Streamlit` + LangChain)](https://youtu.be/zvoAMx0WKkw)
|
||||
- [Function Calling via ChatGPT API - First Look With LangChain](https://youtu.be/0-zlUy7VUjg)
|
||||
- ⛓ [Extract Topics From Video/Audio With LLMs (Topic Modeling w/ LangChain)](https://youtu.be/pEkxRQFNAs4)
|
||||
|
||||
|
||||
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai)
|
||||
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
|
||||
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
|
||||
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
|
||||
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
|
||||
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
|
||||
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
|
||||
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
- [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
|
||||
- [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
|
||||
- [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
|
||||
- [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
|
||||
- [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
|
||||
- [`Camel` + LangChain for Synthetic Data & Market Research](https://youtu.be/GldMMK6-_-g)
|
||||
- [Information Extraction with LangChain & `Kor`](https://youtu.be/SW1ZdqH0rRQ)
|
||||
- [Converting a LangChain App from OpenAI to OpenSource](https://youtu.be/KUDn7bVyIfc)
|
||||
- [Using LangChain `Output Parsers` to get what you want out of LLMs](https://youtu.be/UVn2NroKQCw)
|
||||
- [Building a LangChain Custom Medical Agent with Memory](https://youtu.be/6UFtRwWnHws)
|
||||
- [Understanding `ReACT` with LangChain](https://youtu.be/Eug2clsLtFs)
|
||||
- [`OpenAI Functions` + LangChain : Building a Multi Tool Agent](https://youtu.be/4KXK6c6TVXQ)
|
||||
- [What can you do with 16K tokens in LangChain?](https://youtu.be/z2aCZBAtWXs)
|
||||
- [Tagging and Extraction - Classification using `OpenAI Functions`](https://youtu.be/a8hMgIcUEnE)
|
||||
- ⛓ [HOW to Make Conversational Form with LangChain](https://youtu.be/IT93On2LB5k)
|
||||
|
||||
|
||||
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
- [Langchain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
|
||||
- [LangFlow: Build Chatbots without Writing Code](https://youtu.be/KJ-ux3hre4s)
|
||||
- [LangChain: Giving Memory to LLMs](https://youtu.be/dxO6pzlgJiY)
|
||||
- [BEST OPEN Alternative to `OPENAI's EMBEDDINGs` for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY)
|
||||
|
||||
|
||||
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
|
||||
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
|
||||
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
|
||||
- [LangChain & Supabase Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
|
||||
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI)
|
||||
|
||||
|
||||
---------------------
|
||||
⛓ icon marks a new addition [last update 2023-07-05]
|
||||
@@ -1,216 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PromptLayer\n",
|
||||
"\n",
|
||||
"<img src=\"https://promptlayer.com/logo.png\" height=\"300\">\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[PromptLayer](https://promptlayer.com) is a an observability platform for prompts and LLMs. In this guide we will go over how to setup the `PromptLayerCallbackHandler`. While PromptLayer does have LLMs that integrate directly with LangChain (eg [`PromptLayerOpenAI`](https://python.langchain.com/docs/modules/model_io/models/llms/integrations/promptlayer_openai)), this callback will be an easier and more feature rich way to integrate PromptLayer with any model on LangChain. \n",
|
||||
"\n",
|
||||
"This callback is also the recommended way to connect with PromptLayer when building Chains and Agents on LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install promptlayer --upgrade"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Getting API Credentials\n",
|
||||
"\n",
|
||||
"If you have not already create an account on [PromptLayer](https://www.promptlayer.com) and get an API key by clicking on the settings cog in the navbar\n",
|
||||
"Set it as an environment variabled called `PROMPTLAYER_API_KEY`\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Usage\n",
|
||||
"\n",
|
||||
"To get started with `PromptLayerCallbackHandler` is fairly simple, it takes two optional arguments:\n",
|
||||
"1. `pl_tags` - an optional list of strings that will be tags tracked on PromptLayer\n",
|
||||
"2. `pl_id_callback` - an optional function that will get a `promptlayer_request_id` as an argument. This id can be used with all of PromptLayers tracking features to track, metadata, scores, and prompt usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Simple Example\n",
|
||||
"\n",
|
||||
"In this simple example we use `PromptLayerCallbackHandler` with `ChatOpenAI`. We add a PromptLayer tag named `chatopenai`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"Sure, here's one:\\n\\nWhy did the tomato turn red?\\n\\nBecause it saw the salad dressing!\" additional_kwargs={} example=False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema import (\n",
|
||||
" HumanMessage,\n",
|
||||
")\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"\n",
|
||||
"chat_llm = ChatOpenAI(\n",
|
||||
" temperature=0,\n",
|
||||
" callbacks=[PromptLayerCallbackHandler(pl_tags=[\"chatopenai\"])],\n",
|
||||
")\n",
|
||||
"llm_results = chat_llm(\n",
|
||||
" [\n",
|
||||
" HumanMessage(content=\"What comes after 1,2,3 ?\"),\n",
|
||||
" HumanMessage(content=\"Tell me another joke?\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"print(llm_results)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Full Featured Example\n",
|
||||
"\n",
|
||||
"In this example we unlock more of the power of PromptLayer.\n",
|
||||
"\n",
|
||||
"We are using the Prompt Registry and fetching the prompt called `example`.\n",
|
||||
"\n",
|
||||
"We also define a `pl_id_callback` function that tracks a score, metadata and the prompt used. Read more about tracking on [our docs](docs.promptlayer.com)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"prompt layer id 6050929\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nToasterCo.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"import promptlayer\n",
|
||||
"\n",
|
||||
"def pl_id_callback(promptlayer_request_id):\n",
|
||||
" print(\"prompt layer id \", promptlayer_request_id)\n",
|
||||
" promptlayer.track.score(\n",
|
||||
" request_id=promptlayer_request_id, score=100\n",
|
||||
" ) # score is an integer 0-100\n",
|
||||
" promptlayer.track.metadata(\n",
|
||||
" request_id=promptlayer_request_id, metadata={\"foo\": \"bar\"}\n",
|
||||
" ) # metadata is a dictionary of key value pairs that is tracked on PromptLayer\n",
|
||||
" promptlayer.track.prompt(\n",
|
||||
" request_id=promptlayer_request_id,\n",
|
||||
" prompt_name=\"example\",\n",
|
||||
" prompt_input_variables={\"product\": \"toasters\"},\n",
|
||||
" version=1,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"openai_llm = OpenAI(\n",
|
||||
" model_name=\"text-davinci-002\",\n",
|
||||
" callbacks=[PromptLayerCallbackHandler(pl_id_callback=pl_id_callback)],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"example_prompt = promptlayer.prompts.get(\"example\", version=1, langchain=True)\n",
|
||||
"openai_llm(example_prompt.format(product=\"toasters\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"That is all it takes! After setup all your requests will show up on the PromptLayer dasahboard.\n",
|
||||
"This callback also works with any LLM implemented on LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c4fe2cd85a8d9e8baaec5340ce66faff1c77581a9f43e6c45e85e09b6fced008"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
>[JSON (JavaScript Object Notation)](https://en.wikipedia.org/wiki/JSON) is an open standard file format and data interchange format that uses human-readable text to store and transmit data objects consisting of attribute–value pairs and arrays (or other serializable values).
|
||||
|
||||
>[JSON Lines](https://jsonlines.org/) is a file format where each line is a valid JSON value.
|
||||
|
||||
import Example from "@snippets/modules/data_connection/document_loaders/how_to/json.mdx"
|
||||
|
||||
<Example/>
|
||||
|
||||
@@ -10,7 +10,7 @@ for you.
|
||||
|
||||
## Get started
|
||||
|
||||
This walkthrough showcases basic functionality related to VectorStores. A key part of working with vector stores is creating the vector to put in them, which is usually created via embeddings. Therefore, it is recommended that you familiarize yourself with the [text embedding model](/docs/modules/model_io/models/embeddings.html) interfaces before diving into this.
|
||||
This walkthrough showcases basic functionality related to VectorStores. A key part of working with vector stores is creating the vector to put in them, which is usually created via embeddings. Therefore, it is recommended that you familiarize yourself with the [text embedding model](/docs/modules/data_connection/text_embedding/) interfaces before diving into this.
|
||||
|
||||
import GetStarted from "@snippets/modules/data_connection/vectorstores/get_started.mdx"
|
||||
|
||||
|
||||
@@ -87,7 +87,6 @@ const config = {
|
||||
({
|
||||
docs: {
|
||||
sidebarPath: require.resolve("./sidebars.js"),
|
||||
editUrl: "https://github.com/hwchase17/langchain/edit/master/docs/",
|
||||
remarkPlugins: [
|
||||
[require("@docusaurus/remark-plugin-npm2yarn"), { sync: true }],
|
||||
],
|
||||
|
||||
15104
docs/docs_skeleton/package-lock.json
generated
Normal file
15104
docs/docs_skeleton/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@
|
||||
"@docusaurus/preset-classic": "2.4.0",
|
||||
"@docusaurus/remark-plugin-npm2yarn": "^2.4.0",
|
||||
"@mdx-js/react": "^1.6.22",
|
||||
"@mendable/search": "^0.0.102",
|
||||
"@mendable/search": "^0.0.112-beta.7",
|
||||
"clsx": "^1.2.1",
|
||||
"json-loader": "^0.5.7",
|
||||
"process": "^0.11.10",
|
||||
|
||||
@@ -1864,6 +1864,14 @@
|
||||
"source": "/en/latest/modules/models/llms/integrations/writer.html",
|
||||
"destination": "/docs/modules/model_io/models/llms/integrations/writer"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/output_parsers.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/prompts/output_parsers.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts/output_parsers/examples/datetime.html",
|
||||
"destination": "/docs/modules/model_io/output_parsers/datetime"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# YouTube tutorials
|
||||
# YouTube videos
|
||||
|
||||
This is a collection of `LangChain` videos on `YouTube`.
|
||||
⛓ icon marks a new addition [last update 2023-06-20]
|
||||
|
||||
### [Official LangChain YouTube channel](https://www.youtube.com/@LangChain)
|
||||
|
||||
@@ -9,7 +9,6 @@ This is a collection of `LangChain` videos on `YouTube`.
|
||||
- [LangChain and Weaviate with Harrison Chase and Bob van Luijt - Weaviate Podcast #36](https://youtu.be/lhby7Ql7hbk) by [Weaviate • Vector Database](https://www.youtube.com/@Weaviate)
|
||||
- [LangChain Demo + Q&A with Harrison Chase](https://youtu.be/zaYTXQFR0_s?t=788) by [Full Stack Deep Learning](https://www.youtube.com/@FullStackDeepLearning)
|
||||
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI) by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- ⛓️ [LangChain "Agents in Production" Webinar](https://youtu.be/k8GNCCs16F4) by [LangChain](https://www.youtube.com/@LangChain)
|
||||
|
||||
## Videos (sorted by views)
|
||||
|
||||
@@ -31,6 +30,9 @@ This is a collection of `LangChain` videos on `YouTube`.
|
||||
- [`Weaviate` + LangChain for LLM apps presented by Erika Cardenas](https://youtu.be/7AGj4Td5Lgw) by [`Weaviate` • Vector Database](https://www.youtube.com/@Weaviate)
|
||||
- [Langchain Overview — How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
|
||||
- [Langchain Overview - How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
|
||||
- [LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
|
||||
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- [LangChain 101: The Complete Beginner's Guide](https://youtu.be/P3MAbZ2eMUI)
|
||||
- [Custom langchain Agent & Tools with memory. Turn any `Python function` into langchain tool with Gpt 3](https://youtu.be/NIG8lXk0ULg) by [echohive](https://www.youtube.com/@echohive)
|
||||
- [LangChain: Run Language Models Locally - `Hugging Face Models`](https://youtu.be/Xxxuw4_iCzw) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [`ChatGPT` with any `YouTube` video using langchain and `chromadb`](https://youtu.be/TQZfB2bzVwU) by [echohive](https://www.youtube.com/@echohive)
|
||||
@@ -46,154 +48,68 @@ This is a collection of `LangChain` videos on `YouTube`.
|
||||
- [Langchain + `Zapier` Agent](https://youtu.be/yribLAb-pxA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- [Connecting the Internet with `ChatGPT` (LLMs) using Langchain And Answers Your Questions](https://youtu.be/9Y0TBC63yZg) by [Kamalraj M M](https://www.youtube.com/@insightbuilder)
|
||||
- [Build More Powerful LLM Applications for Business’s with LangChain (Beginners Guide)](https://youtu.be/sp3-WLKEcBg) by[ No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
- ⛓️ [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
|
||||
- ⛓️ [Chatbot Factory: Streamline Python Chatbot Creation with LLMs and Langchain](https://youtu.be/eYer3uzrcuM) by [Finxter](https://www.youtube.com/@CobusGreylingZA)
|
||||
- ⛓️ [LangChain Tutorial - ChatGPT mit eigenen Daten](https://youtu.be/0XDLyY90E2c) by [Coding Crashkurse](https://www.youtube.com/@codingcrashkurse6429)
|
||||
- ⛓️ [Chat with a `CSV` | LangChain Agents Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [GoDataProf](https://www.youtube.com/@godataprof)
|
||||
- ⛓️ [Introdução ao Langchain - #Cortes - Live DataHackers](https://youtu.be/fw8y5VRei5Y) by [Prof. João Gabriel Lima](https://www.youtube.com/@profjoaogabriellima)
|
||||
- ⛓️ [LangChain: Level up `ChatGPT` !? | LangChain Tutorial Part 1](https://youtu.be/vxUGx8aZpDE) by [Code Affinity](https://www.youtube.com/@codeaffinitydev)
|
||||
- ⛓️ [KI schreibt krasses Youtube Skript 😲😳 | LangChain Tutorial Deutsch](https://youtu.be/QpTiXyK1jus) by [SimpleKI](https://www.youtube.com/@simpleki)
|
||||
- ⛓️ [Chat with Audio: Langchain, `Chroma DB`, OpenAI, and `Assembly AI`](https://youtu.be/Kjy7cx1r75g) by [AI Anytime](https://www.youtube.com/@AIAnytime)
|
||||
- ⛓️ [QA over documents with Auto vector index selection with Langchain router chains](https://youtu.be/9G05qybShv8) by [echohive](https://www.youtube.com/@echohive)
|
||||
- ⛓️ [Build your own custom LLM application with `Bubble.io` & Langchain (No Code & Beginner friendly)](https://youtu.be/O7NhQGu1m6c) by [No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
- ⛓️ [Simple App to Question Your Docs: Leveraging `Streamlit`, `Hugging Face Spaces`, LangChain, and `Claude`!](https://youtu.be/X4YbNECRr7o) by [Chris Alexiuk](https://www.youtube.com/@chrisalexiuk)
|
||||
- ⛓️ [LANGCHAIN AI- `ConstitutionalChainAI` + Databutton AI ASSISTANT Web App](https://youtu.be/5zIU6_rdJCU) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- ⛓️ [LANGCHAIN AI AUTONOMOUS AGENT WEB APP - 👶 `BABY AGI` 🤖 with EMAIL AUTOMATION using `DATABUTTON`](https://youtu.be/cvAwOGfeHgw) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- ⛓️ [The Future of Data Analysis: Using A.I. Models in Data Analysis (LangChain)](https://youtu.be/v_LIcVyg5dk) by [Absent Data](https://www.youtube.com/@absentdata)
|
||||
- ⛓️ [Memory in LangChain | Deep dive (python)](https://youtu.be/70lqvTFh_Yg) by [Eden Marco](https://www.youtube.com/@EdenMarco)
|
||||
- ⛓️ [9 LangChain UseCases | Beginner's Guide | 2023](https://youtu.be/zS8_qosHNMw) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- ⛓️ [Use Large Language Models in Jupyter Notebook | LangChain | Agents & Indexes](https://youtu.be/JSe11L1a_QQ) by [Abhinaw Tiwari](https://www.youtube.com/@AbhinawTiwariAT)
|
||||
- ⛓️ [How to Talk to Your Langchain Agent | `11 Labs` + `Whisper`](https://youtu.be/N4k459Zw2PU) by [VRSEN](https://www.youtube.com/@vrsen)
|
||||
- ⛓️ [LangChain Deep Dive: 5 FUN AI App Ideas To Build Quickly and Easily](https://youtu.be/mPYEPzLkeks) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- ⛓️ [BEST OPEN Alternative to OPENAI's EMBEDDINGs for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- ⛓️ [LangChain 101: Models](https://youtu.be/T6c_XsyaNSQ) by [Mckay Wrigley](https://www.youtube.com/@realmckaywrigley)
|
||||
- ⛓️ [LangChain with JavaScript Tutorial #1 | Setup & Using LLMs](https://youtu.be/W3AoeMrg27o) by [Leon van Zyl](https://www.youtube.com/@leonvanzyl)
|
||||
- ⛓️ [LangChain Overview & Tutorial for Beginners: Build Powerful AI Apps Quickly & Easily (ZERO CODE)](https://youtu.be/iI84yym473Q) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- ⛓️ [LangChain In Action: Real-World Use Case With Step-by-Step Tutorial](https://youtu.be/UO699Szp82M) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- ⛓️ [Summarizing and Querying Multiple Papers with LangChain](https://youtu.be/p_MQRWH5Y6k) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
|
||||
- ⛓️ [Using Langchain (and `Replit`) through `Tana`, ask `Google`/`Wikipedia`/`Wolfram Alpha` to fill out a table](https://youtu.be/Webau9lEzoI) by [Stian Håklev](https://www.youtube.com/@StianHaklev)
|
||||
- ⛓️ [Langchain PDF App (GUI) | Create a ChatGPT For Your `PDF` in Python](https://youtu.be/wUAUdEw5oxM) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓️ [Auto-GPT with LangChain 🔥 | Create Your Own Personal AI Assistant](https://youtu.be/imDfPmMKEjM) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- ⛓️ [Create Your OWN Slack AI Assistant with Python & LangChain](https://youtu.be/3jFXRNn2Bu8) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
|
||||
- ⛓️ [How to Create LOCAL Chatbots with GPT4All and LangChain [Full Guide]](https://youtu.be/4p1Fojur8Zw) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
|
||||
- ⛓️ [Build a `Multilingual PDF` Search App with LangChain, `Cohere` and `Bubble`](https://youtu.be/hOrtuumOrv8) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- ⛓️ [Building a LangChain Agent (code-free!) Using `Bubble` and `Flowise`](https://youtu.be/jDJIIVWTZDE) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- ⛓️ [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- ⛓️ [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓️ [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- ⛓️ [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- ⛓️ [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
|
||||
- ⛓️ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- [LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
- [LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
- [LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- [LangFlow LLM Agent Demo for 🦜🔗LangChain](https://youtu.be/zJxDHaWt-6o) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
|
||||
- [Chatbot Factory: Streamline Python Chatbot Creation with LLMs and Langchain](https://youtu.be/eYer3uzrcuM) by [Finxter](https://www.youtube.com/@CobusGreylingZA)
|
||||
- [LangChain Tutorial - ChatGPT mit eigenen Daten](https://youtu.be/0XDLyY90E2c) by [Coding Crashkurse](https://www.youtube.com/@codingcrashkurse6429)
|
||||
- [Chat with a `CSV` | LangChain Agents Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [GoDataProf](https://www.youtube.com/@godataprof)
|
||||
- [Introdução ao Langchain - #Cortes - Live DataHackers](https://youtu.be/fw8y5VRei5Y) by [Prof. João Gabriel Lima](https://www.youtube.com/@profjoaogabriellima)
|
||||
- [LangChain: Level up `ChatGPT` !? | LangChain Tutorial Part 1](https://youtu.be/vxUGx8aZpDE) by [Code Affinity](https://www.youtube.com/@codeaffinitydev)
|
||||
- [KI schreibt krasses Youtube Skript 😲😳 | LangChain Tutorial Deutsch](https://youtu.be/QpTiXyK1jus) by [SimpleKI](https://www.youtube.com/@simpleki)
|
||||
- [Chat with Audio: Langchain, `Chroma DB`, OpenAI, and `Assembly AI`](https://youtu.be/Kjy7cx1r75g) by [AI Anytime](https://www.youtube.com/@AIAnytime)
|
||||
- [QA over documents with Auto vector index selection with Langchain router chains](https://youtu.be/9G05qybShv8) by [echohive](https://www.youtube.com/@echohive)
|
||||
- [Build your own custom LLM application with `Bubble.io` & Langchain (No Code & Beginner friendly)](https://youtu.be/O7NhQGu1m6c) by [No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
- [Simple App to Question Your Docs: Leveraging `Streamlit`, `Hugging Face Spaces`, LangChain, and `Claude`!](https://youtu.be/X4YbNECRr7o) by [Chris Alexiuk](https://www.youtube.com/@chrisalexiuk)
|
||||
- [LANGCHAIN AI- `ConstitutionalChainAI` + Databutton AI ASSISTANT Web App](https://youtu.be/5zIU6_rdJCU) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- [LANGCHAIN AI AUTONOMOUS AGENT WEB APP - 👶 `BABY AGI` 🤖 with EMAIL AUTOMATION using `DATABUTTON`](https://youtu.be/cvAwOGfeHgw) by [Avra](https://www.youtube.com/@Avra_b)
|
||||
- [The Future of Data Analysis: Using A.I. Models in Data Analysis (LangChain)](https://youtu.be/v_LIcVyg5dk) by [Absent Data](https://www.youtube.com/@absentdata)
|
||||
- [Memory in LangChain | Deep dive (python)](https://youtu.be/70lqvTFh_Yg) by [Eden Marco](https://www.youtube.com/@EdenMarco)
|
||||
- [9 LangChain UseCases | Beginner's Guide | 2023](https://youtu.be/zS8_qosHNMw) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- [Use Large Language Models in Jupyter Notebook | LangChain | Agents & Indexes](https://youtu.be/JSe11L1a_QQ) by [Abhinaw Tiwari](https://www.youtube.com/@AbhinawTiwariAT)
|
||||
- [How to Talk to Your Langchain Agent | `11 Labs` + `Whisper`](https://youtu.be/N4k459Zw2PU) by [VRSEN](https://www.youtube.com/@vrsen)
|
||||
- [LangChain Deep Dive: 5 FUN AI App Ideas To Build Quickly and Easily](https://youtu.be/mPYEPzLkeks) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- [BEST OPEN Alternative to OPENAI's EMBEDDINGs for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [LangChain 101: Models](https://youtu.be/T6c_XsyaNSQ) by [Mckay Wrigley](https://www.youtube.com/@realmckaywrigley)
|
||||
- [LangChain with JavaScript Tutorial #1 | Setup & Using LLMs](https://youtu.be/W3AoeMrg27o) by [Leon van Zyl](https://www.youtube.com/@leonvanzyl)
|
||||
- [LangChain Overview & Tutorial for Beginners: Build Powerful AI Apps Quickly & Easily (ZERO CODE)](https://youtu.be/iI84yym473Q) by [James NoCode](https://www.youtube.com/@jamesnocode)
|
||||
- [LangChain In Action: Real-World Use Case With Step-by-Step Tutorial](https://youtu.be/UO699Szp82M) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- [Summarizing and Querying Multiple Papers with LangChain](https://youtu.be/p_MQRWH5Y6k) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
|
||||
- [Using Langchain (and `Replit`) through `Tana`, ask `Google`/`Wikipedia`/`Wolfram Alpha` to fill out a table](https://youtu.be/Webau9lEzoI) by [Stian Håklev](https://www.youtube.com/@StianHaklev)
|
||||
- [Langchain PDF App (GUI) | Create a ChatGPT For Your `PDF` in Python](https://youtu.be/wUAUdEw5oxM) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- [Auto-GPT with LangChain 🔥 | Create Your Own Personal AI Assistant](https://youtu.be/imDfPmMKEjM) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- [Create Your OWN Slack AI Assistant with Python & LangChain](https://youtu.be/3jFXRNn2Bu8) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
|
||||
- [How to Create LOCAL Chatbots with GPT4All and LangChain [Full Guide]](https://youtu.be/4p1Fojur8Zw) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
|
||||
- [Build a `Multilingual PDF` Search App with LangChain, `Cohere` and `Bubble`](https://youtu.be/hOrtuumOrv8) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- [Building a LangChain Agent (code-free!) Using `Bubble` and `Flowise`](https://youtu.be/jDJIIVWTZDE) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- [Build a LangChain-based Semantic PDF Search App with No-Code Tools Bubble and Flowise](https://youtu.be/s33v5cIeqA4) by [Menlo Park Lab](https://www.youtube.com/@menloparklab)
|
||||
- [LangChain Memory Tutorial | Building a ChatGPT Clone in Python](https://youtu.be/Cwq91cj2Pnc) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- [ChatGPT For Your DATA | Chat with Multiple Documents Using LangChain](https://youtu.be/TeDgIDqQmzs) by [Data Science Basics](https://www.youtube.com/@datasciencebasics)
|
||||
- [`Llama Index`: Chat with Documentation using URL Loader](https://youtu.be/XJRoDEctAwA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- [Using OpenAI, LangChain, and `Gradio` to Build Custom GenAI Applications](https://youtu.be/1MsmqMg3yUc) by [David Hundley](https://www.youtube.com/@dkhundley)
|
||||
- [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- ⛓ [Build AI chatbot with custom knowledge base using OpenAI API and GPT Index](https://youtu.be/vDZAZuaXf48) by [Irina Nik](https://www.youtube.com/@irina_nik)
|
||||
- ⛓ [Build Your Own Auto-GPT Apps with LangChain (Python Tutorial)](https://youtu.be/NYSWn1ipbgg) by [Dave Ebbelaar](https://www.youtube.com/@daveebbelaar)
|
||||
- ⛓ [Chat with Multiple `PDFs` | LangChain App Tutorial in Python (Free LLMs and Embeddings)](https://youtu.be/dXxQ0LR-3Hg) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓ [Chat with a `CSV` | `LangChain Agents` Tutorial (Beginners)](https://youtu.be/tjeti5vXWOU) by [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)
|
||||
- ⛓ [Create Your Own ChatGPT with `PDF` Data in 5 Minutes (LangChain Tutorial)](https://youtu.be/au2WVVGUvc8) by [Liam Ottley](https://www.youtube.com/@LiamOttley)
|
||||
- ⛓ [Using ChatGPT with YOUR OWN Data. This is magical. (LangChain OpenAI API)](https://youtu.be/9AXP7tCI9PI) by [TechLead](https://www.youtube.com/@TechLead)
|
||||
- ⛓ [Build a Custom Chatbot with OpenAI: `GPT-Index` & LangChain | Step-by-Step Tutorial](https://youtu.be/FIDv6nc4CgU) by [Fabrikod](https://www.youtube.com/@fabrikod)
|
||||
- ⛓ [`Flowise` is an open source no-code UI visual tool to build 🦜🔗LangChain applications](https://youtu.be/CovAPtQPU0k) by [Cobus Greyling](https://www.youtube.com/@CobusGreylingZA)
|
||||
- ⛓ [LangChain & GPT 4 For Data Analysis: The `Pandas` Dataframe Agent](https://youtu.be/rFQ5Kmkd4jc) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
- ⛓ [`GirlfriendGPT` - AI girlfriend with LangChain](https://youtu.be/LiN3D1QZGQw) by [Toolfinder AI](https://www.youtube.com/@toolfinderai)
|
||||
- ⛓ [`PrivateGPT`: Chat to your FILES OFFLINE and FREE [Installation and Tutorial]](https://youtu.be/G7iLllmx4qc) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- ⛓ [How to build with Langchain 10x easier | ⛓️ LangFlow & `Flowise`](https://youtu.be/Ya1oGL7ZTvU) by [AI Jason](https://www.youtube.com/@AIJasonZ)
|
||||
- ⛓ [Getting Started With LangChain In 20 Minutes- Build Celebrity Search Application](https://youtu.be/_FpT1cwcSLg) by [Krish Naik](https://www.youtube.com/@krishnaik06)
|
||||
|
||||
|
||||
## Tutorial Series
|
||||
|
||||
|
||||
⛓ icon marks a new addition [last update 2023-05-15]
|
||||
|
||||
### DeepLearning.AI course
|
||||
⛓[LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain) by Harrison Chase presented by [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
|
||||
|
||||
### Handbook
|
||||
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
|
||||
### Tutorials
|
||||
[LangChain Tutorials](https://www.youtube.com/watch?v=FuqdVNB_8c0&list=PL9V0lbeJ69brU-ojMpU1Y7Ic58Tap0Cw6) by [Edrick](https://www.youtube.com/@edrickdch):
|
||||
- ⛓ [LangChain, Chroma DB, OpenAI Beginner Guide | ChatGPT with your PDF](https://youtu.be/FuqdVNB_8c0)
|
||||
- ⛓ [LangChain 101: The Complete Beginner's Guide](https://youtu.be/P3MAbZ2eMUI)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
|
||||
|
||||
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
|
||||
|
||||
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
|
||||
|
||||
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs):
|
||||
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
|
||||
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
|
||||
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
|
||||
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
|
||||
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
|
||||
- ⛓ #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
|
||||
- ⛓ #7 [LangChain Agents Deep Dive with GPT 3.5](https://youtu.be/jSP-gSEyVeI)
|
||||
- ⛓ #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
|
||||
- ⛓ #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
|
||||
|
||||
|
||||
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Data Independent](https://www.youtube.com/@DataIndependent):
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
|
||||
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
|
||||
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
|
||||
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
|
||||
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
|
||||
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
|
||||
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
|
||||
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
|
||||
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
|
||||
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
|
||||
- ⛓ [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
|
||||
- ⛓ [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
|
||||
|
||||
|
||||
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai):
|
||||
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
|
||||
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
|
||||
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
|
||||
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
|
||||
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
|
||||
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
|
||||
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
- ⛓ [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
|
||||
- ⛓ [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- ⛓ [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
|
||||
- ⛓ [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
|
||||
- ⛓ [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
|
||||
- ⛓ [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
|
||||
|
||||
|
||||
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt):
|
||||
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
- ⛓️ [CHATGPT For WEBSITES: Custom ChatBOT](https://youtu.be/RBnuhhmD21U)
|
||||
|
||||
|
||||
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
|
||||
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
|
||||
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
|
||||
- ⛓ [LangChain & Supabase Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
|
||||
|
||||
|
||||
### [Get SH\*T Done with Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
|
||||
### [Prompt Engineering and LangChain](https://www.youtube.com/watch?v=muXbPpG_ys4&list=PLEJK-H61Xlwzm5FYLDdKt_6yibO33zoMW) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
|
||||
- [Getting Started with LangChain: Load Custom Data, Run OpenAI Models, Embeddings and `ChatGPT`](https://www.youtube.com/watch?v=muXbPpG_ys4)
|
||||
- [Loaders, Indexes & Vectorstores in LangChain: Question Answering on `PDF` files with `ChatGPT`](https://www.youtube.com/watch?v=FQnvfR8Dmr0)
|
||||
- [LangChain Models: `ChatGPT`, `Flan Alpaca`, `OpenAI Embeddings`, Prompt Templates & Streaming](https://www.youtube.com/watch?v=zy6LiK5F5-s)
|
||||
- [LangChain Chains: Use `ChatGPT` to Build Conversational Agents, Summaries and Q&A on Text With LLMs](https://www.youtube.com/watch?v=h1tJZQPcimM)
|
||||
- [Analyze Custom CSV Data with `GPT-4` using Langchain](https://www.youtube.com/watch?v=Ew3sGdX8at4)
|
||||
- ⛓ [Build ChatGPT Chatbots with LangChain Memory: Understanding and Implementing Memory in Conversations](https://youtu.be/CyuUlf54wTs)
|
||||
- [Build ChatGPT Chatbots with LangChain Memory: Understanding and Implementing Memory in Conversations](https://youtu.be/CyuUlf54wTs)
|
||||
|
||||
|
||||
---------------------
|
||||
⛓ icon marks a new addition [last update 2023-05-15]
|
||||
⛓ icon marks a new addition [last update 2023-06-20]
|
||||
|
||||
28
docs/extras/ecosystem/integrations/airtable.md
Normal file
28
docs/extras/ecosystem/integrations/airtable.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Airtable
|
||||
|
||||
>[Airtable](https://en.wikipedia.org/wiki/Airtable) is a cloud collaboration service.
|
||||
`Airtable` is a spreadsheet-database hybrid, with the features of a database but applied to a spreadsheet.
|
||||
> The fields in an Airtable table are similar to cells in a spreadsheet, but have types such as 'checkbox',
|
||||
> 'phone number', and 'drop-down list', and can reference file attachments like images.
|
||||
|
||||
>Users can create a database, set up column types, add records, link tables to one another, collaborate, sort records
|
||||
> and publish views to external websites.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install pyairtable
|
||||
```
|
||||
|
||||
* Get your [API key](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).
|
||||
* Get the [ID of your base](https://airtable.com/developers/web/api/introduction).
|
||||
* Get the [table ID from the table url](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl).
|
||||
|
||||
## Document Loader
|
||||
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import AirtableLoader
|
||||
```
|
||||
|
||||
See an [example](/docs/modules/data_connection/document_loaders/integrations/airtable.html).
|
||||
@@ -2,203 +2,121 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "944e4194",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Arthur LangChain integration"
|
||||
"# Arthur"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b1ccdfe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Arthur](https://www.arthur.ai/) is a model monitoring and observability platform.\n",
|
||||
"[Arthur](https://arthur.ai) is a model monitoring and observability platform.\n",
|
||||
"\n",
|
||||
"This notebook shows how to register LLMs (chat and non-chat) as models with the Arthur platform. Then we show how to set up langchain LLMs with an Arthur callback that will automatically log model inferences to Arthur.\n",
|
||||
"The following guide shows how to run a registered chat LLM with the Arthur callback handler to automatically log model inferences to Arthur.\n",
|
||||
"\n",
|
||||
"For more information about how to use the Arthur SDK, visit our [docs](http://docs.arthur.ai), in particular our [model onboarding guide](https://docs.arthur.ai/user-guide/walkthroughs/model-onboarding/index.html)"
|
||||
"If you do not have a model currently onboarded to Arthur, visit our [onboarding guide for generative text models](https://docs.arthur.ai/user-guide/walkthroughs/model-onboarding/generative_text_onboarding.html). For more information about how to use the Arthur SDK, visit our [docs](https://docs.arthur.ai/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "961c6691",
|
||||
"metadata": {},
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import ArthurCallbackHandler\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.chat_models import ChatOpenAI, ChatAnthropic\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain.llms import OpenAI, Cohere, HuggingFacePipeline"
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Place Arthur credentials here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a23d1963",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "Me3prhqjsoqz"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from arthurai import ArthurAI\n",
|
||||
"from arthurai.common.constants import InputType, OutputType, Stage, ValueType\n",
|
||||
"from arthurai.core.attributes import ArthurAttribute, AttributeCategory"
|
||||
"arthur_url = \"https://app.arthur.ai\"\n",
|
||||
"arthur_login = \"your-arthur-login-username-here\"\n",
|
||||
"arthur_model_id = \"your-arthur-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4d1b90c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArthurModel for chatbot with only input text and output text attributes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a4a4a8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Connect to Arthur client"
|
||||
"Create Langchain LLM with Arthur callback handler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f49e9b79",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "9Hq9snQasynA"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_url = \"https://app.arthur.ai\"\n",
|
||||
"arthur_login = \"your-username-here\"\n",
|
||||
"arthur = ArthurAI(url=arthur_url, login=arthur_login)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6e063bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before you can register model inferences to Arthur, you must have a registered model with an ID in the Arthur platform. We will provide this ID to the ArthurCallbackHandler.\n",
|
||||
"\n",
|
||||
"You can register a model with Arthur here in the notebook using this `register_chat_llm()` function. This function returns the ID of the model saved to the platform. To use the function, uncomment `arthur_model_chatbot_id = register_chat_llm()` in the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "31b17b5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def register_chat_llm():\n",
|
||||
"\n",
|
||||
" arthur_model = arthur.model(\n",
|
||||
" display_name=\"LangChainChat\",\n",
|
||||
" input_type=InputType.NLP,\n",
|
||||
" output_type=OutputType.TokenSequence\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_input_text\",\n",
|
||||
" stage=Stage.ModelPipelineInput,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=True\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_text\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=False,\n",
|
||||
" ))\n",
|
||||
" \n",
|
||||
" return arthur_model.save()\n",
|
||||
"# arthur_model_chatbot_id = register_chat_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d1d1e60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can set the `arthur_model_chatbot_id` variable to be the ID of your model on your [model dashboard](https://app.arthur.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cdfa02c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_model_chatbot_id = \"your-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58be5234",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function creates a Langchain chat LLM with the ArthurCallbackHandler to log inferences to Arthur. We provide our `arthur_model_chatbot_id`, as well as the Arthur url and login we are using."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "448a8fee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_langchain_chat_llm(chat_model=ChatOpenAI):\n",
|
||||
" if chat_model not in [ChatOpenAI, ChatAnthropic]:\n",
|
||||
" raise ValueError(\"For this notebook, use one of the chat models imported from langchain.chat_models\")\n",
|
||||
" return chat_model(\n",
|
||||
" streaming=True, \n",
|
||||
"def make_langchain_chat_llm(chat_model=):\n",
|
||||
" return ChatOpenAI(\n",
|
||||
" streaming=True,\n",
|
||||
" temperature=0.1,\n",
|
||||
" callbacks=[\n",
|
||||
" StreamingStdOutCallbackHandler(), \n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n"
|
||||
" StreamingStdOutCallbackHandler(),\n",
|
||||
" ArthurCallbackHandler.from_credentials(\n",
|
||||
" arthur_model_id, \n",
|
||||
" arthur_url=arthur_url, \n",
|
||||
" arthur_login=arthur_login)\n",
|
||||
" ])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17c182da",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2dfc00ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Please enter password for admin: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_llm = make_langchain_chat_llm()"
|
||||
"chatgpt = make_langchain_chat_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "139291f2",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "aXRyj50Ls8eP"
|
||||
},
|
||||
"source": [
|
||||
"Run the chatbot (it will save the chat history in the `history` list so that the conversation can reference earlier messages)\n",
|
||||
"Running the chat LLM with this `run` function will save the chat history in an ongoing list so that the conversation can reference earlier messages and log each response to the Arthur platform. You can view the history of this model's inferences on your [model dashboard page](https://app.arthur.ai/).\n",
|
||||
"\n",
|
||||
"Type `q` to quit"
|
||||
"Enter `q` to quit the run loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "7480a443",
|
||||
"metadata": {},
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"id": "4taWSbN-s31Y"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def run_langchain_chat_llm(llm):\n",
|
||||
"def run(llm):\n",
|
||||
" history = []\n",
|
||||
" while True:\n",
|
||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||
@@ -207,240 +125,56 @@
|
||||
" history.append(llm(history))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "6868ce71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_chat_llm(chat_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0be7d01",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArthurModel with input text, output text, token likelihoods, finish reason, and amount of token usage attributes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ee4b741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function registers an LLM with additional metadata attributes to log to Arthur with each inference\n",
|
||||
"\n",
|
||||
"As above, you can register your callback handler for an LLM using this function here in the notebook or by pasting the ID of an already-registered model from your [model dashboard](https://app.arthur.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e671836c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def register_llm():\n",
|
||||
"\n",
|
||||
" arthur_model = arthur.model(\n",
|
||||
" display_name=\"LangChainLLM\",\n",
|
||||
" input_type=InputType.NLP,\n",
|
||||
" output_type=OutputType.TokenSequence\n",
|
||||
" )\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_input_text\",\n",
|
||||
" stage=Stage.ModelPipelineInput,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=True\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_text\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.Unstructured_Text,\n",
|
||||
" categorical=True,\n",
|
||||
" is_unique=False,\n",
|
||||
" token_attribute_link=\"my_output_likelihoods\"\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"my_output_likelihoods\",\n",
|
||||
" stage=Stage.PredictedValue,\n",
|
||||
" value_type=ValueType.TokenLikelihoods,\n",
|
||||
" token_attribute_link=\"my_output_text\"\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"finish_reason\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.String,\n",
|
||||
" categorical=True,\n",
|
||||
" categories=[\n",
|
||||
" AttributeCategory(value='stop'),\n",
|
||||
" AttributeCategory(value='length'),\n",
|
||||
" AttributeCategory(value='content_filter'),\n",
|
||||
" AttributeCategory(value='null')\n",
|
||||
" ]\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"prompt_tokens\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Integer\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"completion_tokens\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Integer\n",
|
||||
" ))\n",
|
||||
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||
" name=\"duration\",\n",
|
||||
" stage=Stage.NonInputData,\n",
|
||||
" value_type=ValueType.Float\n",
|
||||
" ))\n",
|
||||
" \n",
|
||||
" return arthur_model.save()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2a6686f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arthur_model_llm_id = \"your-model-id-here\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dcacb96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These functions create Langchain LLMs with the ArthurCallbackHandler to log inferences to Arthur.\n",
|
||||
"\n",
|
||||
"There are small differences in the underlying Langchain integrations with these libraries and the available metadata for model inputs & outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "34cf0072",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_langchain_openai_llm():\n",
|
||||
" return OpenAI(\n",
|
||||
" temperature=0.1,\n",
|
||||
" model_kwargs = {'logprobs': 3},\n",
|
||||
" callbacks=[\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_llm_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"def make_langchain_cohere_llm():\n",
|
||||
" return Cohere(\n",
|
||||
" temperature=0.1,\n",
|
||||
" callbacks=[\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"def make_langchain_huggingface_llm():\n",
|
||||
" llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"bert-base-uncased\", \n",
|
||||
" task=\"text-generation\", \n",
|
||||
" model_kwargs={\"temperature\":2.5, \"max_length\":64})\n",
|
||||
" llm.callbacks = [\n",
|
||||
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||
" ]\n",
|
||||
" return llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f40c3ce0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_llm = make_langchain_openai_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "8476d531",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cohere_llm = make_langchain_cohere_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7483b9d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"huggingface_llm = make_langchain_huggingface_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c17d8e86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the LLM (each completion is independent, no chat history is saved as we were doing above with the chat llms)\n",
|
||||
"\n",
|
||||
"Type `q` to quit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "72ee0790",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"id": "MEx8nWJps-EG"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
">>> input >>>\n",
|
||||
">>>: What is a callback handler?\n",
|
||||
"A callback handler, also known as a callback function or callback method, is a piece of code that is executed in response to a specific event or condition. It is commonly used in programming languages that support event-driven or asynchronous programming paradigms.\n",
|
||||
"\n",
|
||||
"The purpose of a callback handler is to provide a way for developers to define custom behavior that should be executed when a certain event occurs. Instead of waiting for a result or blocking the execution, the program registers a callback function and continues with other tasks. When the event is triggered, the callback function is invoked, allowing the program to respond accordingly.\n",
|
||||
"\n",
|
||||
"Callback handlers are commonly used in various scenarios, such as handling user input, responding to network requests, processing asynchronous operations, and implementing event-driven architectures. They provide a flexible and modular way to handle events and decouple different components of a system.\n",
|
||||
">>> input >>>\n",
|
||||
">>>: What do I need to do to get the full benefits of this\n",
|
||||
"To get the full benefits of using a callback handler, you should consider the following:\n",
|
||||
"\n",
|
||||
"1. Understand the event or condition: Identify the specific event or condition that you want to respond to with a callback handler. This could be user input, network requests, or any other asynchronous operation.\n",
|
||||
"\n",
|
||||
"2. Define the callback function: Create a function that will be executed when the event or condition occurs. This function should contain the desired behavior or actions you want to take in response to the event.\n",
|
||||
"\n",
|
||||
"3. Register the callback function: Depending on the programming language or framework you are using, you may need to register or attach the callback function to the appropriate event or condition. This ensures that the callback function is invoked when the event occurs.\n",
|
||||
"\n",
|
||||
"4. Handle the callback: Implement the necessary logic within the callback function to handle the event or condition. This could involve updating the user interface, processing data, making further requests, or triggering other actions.\n",
|
||||
"\n",
|
||||
"5. Consider error handling: It's important to handle any potential errors or exceptions that may occur within the callback function. This ensures that your program can gracefully handle unexpected situations and prevent crashes or undesired behavior.\n",
|
||||
"\n",
|
||||
"6. Maintain code readability and modularity: As your codebase grows, it's crucial to keep your callback handlers organized and maintainable. Consider using design patterns or architectural principles to structure your code in a modular and scalable way.\n",
|
||||
"\n",
|
||||
"By following these steps, you can leverage the benefits of callback handlers, such as asynchronous and event-driven programming, improved responsiveness, and modular code design.\n",
|
||||
">>> input >>>\n",
|
||||
">>>: q\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def run_langchain_llm(llm):\n",
|
||||
" while True:\n",
|
||||
" print(\"Type your text for completion:\\n\")\n",
|
||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||
" if user_input == 'q': break\n",
|
||||
" print(llm(user_input), \"\\n================\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "fb864057",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(openai_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e6673769",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(cohere_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "85541f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_langchain_llm(huggingface_llm)"
|
||||
"run(chatgpt)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
@@ -456,9 +190,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
|
||||
36
docs/extras/ecosystem/integrations/brave_search.mdx
Normal file
36
docs/extras/ecosystem/integrations/brave_search.mdx
Normal file
@@ -0,0 +1,36 @@
|
||||
# Brave Search
|
||||
|
||||
|
||||
>[Brave Search](https://en.wikipedia.org/wiki/Brave_Search) is a search engine developed by Brave Software.
|
||||
> - `Brave Search` uses its own web index. As of May 2022, it covered over 10 billion pages and was used to serve 92%
|
||||
> of search results without relying on any third-parties, with the remainder being retrieved
|
||||
> server-side from the Bing API or (on an opt-in basis) client-side from Google. According
|
||||
> to Brave, the index was kept "intentionally smaller than that of Google or Bing" in order to
|
||||
> help avoid spam and other low-quality content, with the disadvantage that "Brave Search is
|
||||
> not yet as good as Google in recovering long-tail queries."
|
||||
>- `Brave Search Premium`: As of April 2023 Brave Search is an ad-free website, but it will
|
||||
> eventually switch to a new model that will include ads and premium users will get an ad-free experience.
|
||||
> User data including IP addresses won't be collected from its users by default. A premium account
|
||||
> will be required for opt-in data-collection.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
To get access to the Brave Search API, you need to [create an account and get an API key](https://api.search.brave.com/app/dashboard).
|
||||
|
||||
|
||||
## Document Loader
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/document_loaders/integrations/brave_search.html).
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import BraveSearchLoader
|
||||
```
|
||||
|
||||
## Tool
|
||||
|
||||
See a [usage example](/docs/modules/agents/tools/integrations/brave_search.html).
|
||||
|
||||
```python
|
||||
from langchain.tools import BraveSearch
|
||||
```
|
||||
@@ -1,10 +1,10 @@
|
||||
# Cassandra
|
||||
|
||||
>[Apache Cassandra®](https://cassandra.apache.org/) is a free and open-source, distributed, wide-column
|
||||
>[Apache Cassandra®](https://cassandra.apache.org/) is a free and open-source, distributed, wide-column
|
||||
> store, NoSQL database management system designed to handle large amounts of data across many commodity servers,
|
||||
> providing high availability with no single point of failure. Cassandra offers support for clusters spanning
|
||||
> providing high availability with no single point of failure. Cassandra offers support for clusters spanning
|
||||
> multiple datacenters, with asynchronous masterless replication allowing low latency operations for all clients.
|
||||
> Cassandra was designed to implement a combination of _Amazon's Dynamo_ distributed storage and replication
|
||||
> Cassandra was designed to implement a combination of _Amazon's Dynamo_ distributed storage and replication
|
||||
> techniques combined with _Google's Bigtable_ data and storage engine model.
|
||||
|
||||
## Installation and Setup
|
||||
@@ -16,6 +16,16 @@ pip install cassio
|
||||
|
||||
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/vectorstores/integrations/cassandra.html).
|
||||
|
||||
```python
|
||||
from langchain.memory import CassandraChatMessageHistory
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Memory
|
||||
|
||||
See a [usage example](/docs/modules/memory/integrations/cassandra_chat_message_history.html).
|
||||
|
||||
52
docs/extras/ecosystem/integrations/clarifai.mdx
Normal file
52
docs/extras/ecosystem/integrations/clarifai.mdx
Normal file
@@ -0,0 +1,52 @@
|
||||
# Clarifai
|
||||
|
||||
>[Clarifai](https://clarifai.com) is one of first deep learning platforms having been founded in 2013. Clarifai provides an AI platform with the full AI lifecycle for data exploration, data labeling, model training, evaluation and inference around images, video, text and audio data. In the LangChain ecosystem, as far as we're aware, Clarifai is the only provider that supports LLMs, embeddings and a vector store in one production scale platform, making it an excellent choice to operationalize your LangChain implementations.
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK:
|
||||
```bash
|
||||
pip install clarifai
|
||||
```
|
||||
[Sign-up](https://clarifai.com/signup) for a Clarifai account, then get a personal access token to access the Clarifai API from your [security settings](https://clarifai.com/settings/security) and set it as an environment variable (`CLARIFAI_PAT`).
|
||||
|
||||
|
||||
## Models
|
||||
|
||||
Clarifai provides 1,000s of AI models for many different use cases. You can [explore them here](https://clarifai.com/explore) to find the one most suited for your use case. These models include those created by other providers such as OpenAI, Anthropic, Cohere, AI21, etc. as well as state of the art from open source such as Falcon, InstructorXL, etc. so that you build the best in AI into your products. You'll find these organized by the creator's user_id and into projects we call applications denoted by their app_id. Those IDs will be needed in additional to the model_id and optionally the version_id, so make note of all these IDs once you found the best model for your use case!
|
||||
|
||||
Also note that given there are many models for images, video, text and audio understanding, you can build some interested AI agents that utilize the variety of AI models as experts to understand those data types.
|
||||
|
||||
### LLMs
|
||||
|
||||
To find the selection of LLMs in the Clarifai platform you can select the text to text model type [here](https://clarifai.com/explore/models?filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-to-text%22%5D%7D%5D&page=1&perPage=24).
|
||||
|
||||
```python
|
||||
from langchain.llms import Clarifai
|
||||
llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
|
||||
```
|
||||
|
||||
For more details, the docs on the Clarifai LLM wrapper provide a [detailed walkthrough](/docs/modules/model_io/models/llms/integrations/clarifai.html).
|
||||
|
||||
|
||||
### Text Embedding Models
|
||||
|
||||
To find the selection of text embeddings models in the Clarifai platform you can select the text to embedding model type [here](https://clarifai.com/explore/models?page=1&perPage=24&filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-embedder%22%5D%7D%5D).
|
||||
|
||||
There is a Clarifai Embedding model in LangChain, which you can access with:
|
||||
```python
|
||||
from langchain.embeddings import ClarifaiEmbeddings
|
||||
embeddings = ClarifaiEmbeddings(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)
|
||||
```
|
||||
For more details, the docs on the Clarifai Embeddings wrapper provide a [detailed walthrough](/docs/modules/data_connection/text_embedding/integrations/clarifai.html).
|
||||
|
||||
## Vectorstore
|
||||
|
||||
Clarifai's vector DB was launched in 2016 and has been optimized to support live search queries. With workflows in the Clarifai platform, you data is automatically indexed by am embedding model and optionally other models as well to index that information in the DB for search. You can query the DB not only via the vectors but also filter by metadata matches, other AI predicted concepts, and even do geo-coordinate search. Simply create an application, select the appropriate base workflow for your type of data, and upload it (through the API as [documented here](https://docs.clarifai.com/api-guide/data/create-get-update-delete) or the UIs at clarifai.com).
|
||||
|
||||
You an also add data directly from LangChain as well, and the auto-indexing will take place for you. You'll notice this is a little different than other vectorstores where you need to provde an embedding model in their constructor and have LangChain coordinate getting the embeddings from text and writing those to the index. Not only is it more convenient, but it's much more scalable to use Clarifai's distributed cloud to do all the index in the background.
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Clarifai
|
||||
clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)
|
||||
```
|
||||
For more details, the docs on the Clarifai vector store provide a [detailed walthrough](/docs/modules/data_connection/text_embedding/integrations/clarifai.html).
|
||||
108
docs/extras/ecosystem/integrations/cnosdb.mdx
Normal file
108
docs/extras/ecosystem/integrations/cnosdb.mdx
Normal file
@@ -0,0 +1,108 @@
|
||||
# CnosDB
|
||||
> [CnosDB](https://github.com/cnosdb/cnosdb) is an open source distributed time series database with high performance, high compression rate and high ease of use.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```python
|
||||
pip install cnos-connector
|
||||
```
|
||||
|
||||
## Connecting to CnosDB
|
||||
You can connect to CnosDB using the SQLDatabase.from_cnosdb() method.
|
||||
### Syntax
|
||||
```python
|
||||
def SQLDatabase.from_cnosdb(url: str = "127.0.0.1:8902",
|
||||
user: str = "root",
|
||||
password: str = "",
|
||||
tenant: str = "cnosdb",
|
||||
database: str = "public")
|
||||
```
|
||||
Args:
|
||||
1. url (str): The HTTP connection host name and port number of the CnosDB
|
||||
service, excluding "http://" or "https://", with a default value
|
||||
of "127.0.0.1:8902".
|
||||
2. user (str): The username used to connect to the CnosDB service, with a
|
||||
default value of "root".
|
||||
3. password (str): The password of the user connecting to the CnosDB service,
|
||||
with a default value of "".
|
||||
4. tenant (str): The name of the tenant used to connect to the CnosDB service,
|
||||
with a default value of "cnosdb".
|
||||
5. database (str): The name of the database in the CnosDB tenant.
|
||||
## Examples
|
||||
```python
|
||||
# Connecting to CnosDB with SQLDatabase Wrapper
|
||||
from cnosdb_connector import make_cnosdb_langchain_uri
|
||||
from langchain import SQLDatabase
|
||||
|
||||
db = SQLDatabase.from_cnosdb()
|
||||
```
|
||||
```python
|
||||
# Creating a OpenAI Chat LLM Wrapper
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
|
||||
```
|
||||
|
||||
### SQL Chain
|
||||
This example demonstrates the use of the SQL Chain for answering a question over a CnosDB.
|
||||
```python
|
||||
from langchain import SQLDatabaseChain
|
||||
|
||||
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
|
||||
|
||||
db_chain.run(
|
||||
"What is the average fa of test table that time between November 3,2022 and November 4, 2022?"
|
||||
)
|
||||
```
|
||||
```shell
|
||||
> Entering new chain...
|
||||
What is the average fa of test table that time between November 3, 2022 and November 4, 2022?
|
||||
SQLQuery:SELECT AVG(fa) FROM test WHERE time >= '2022-11-03' AND time < '2022-11-04'
|
||||
SQLResult: [(2.0,)]
|
||||
Answer:The average fa of the test table between November 3, 2022, and November 4, 2022, is 2.0.
|
||||
> Finished chain.
|
||||
```
|
||||
### SQL Database Agent
|
||||
This example demonstrates the use of the SQL Database Agent for answering questions over a CnosDB.
|
||||
```python
|
||||
from langchain.agents import create_sql_agent
|
||||
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
|
||||
|
||||
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
|
||||
agent = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True)
|
||||
```
|
||||
```python
|
||||
agent.run(
|
||||
"What is the average fa of test table that time between November 3, 2022 and November 4, 2022?"
|
||||
)
|
||||
```
|
||||
```shell
|
||||
> Entering new chain...
|
||||
Action: sql_db_list_tables
|
||||
Action Input: ""
|
||||
Observation: test
|
||||
Thought:The relevant table is "test". I should query the schema of this table to see the column names.
|
||||
Action: sql_db_schema
|
||||
Action Input: "test"
|
||||
Observation:
|
||||
CREATE TABLE test (
|
||||
time TIMESTAMP,
|
||||
fa BIGINT
|
||||
)
|
||||
|
||||
/*
|
||||
3 rows from test table:
|
||||
fa time
|
||||
1 2022-11-03T06:20:11
|
||||
2 2022-11-03T06:20:11.000000001
|
||||
3 2022-11-03T06:20:11.000000002
|
||||
*/
|
||||
Thought:The relevant column is "fa" in the "test" table. I can now construct the query to calculate the average "fa" between the specified time range.
|
||||
Action: sql_db_query
|
||||
Action Input: "SELECT AVG(fa) FROM test WHERE time >= '2022-11-03' AND time < '2022-11-04'"
|
||||
Observation: [(2.0,)]
|
||||
Thought:The average "fa" of the "test" table between November 3, 2022 and November 4, 2022 is 2.0.
|
||||
Final Answer: 2.0
|
||||
|
||||
> Finished chain.
|
||||
```
|
||||
51
docs/extras/ecosystem/integrations/dataforseo.mdx
Normal file
51
docs/extras/ecosystem/integrations/dataforseo.mdx
Normal file
@@ -0,0 +1,51 @@
|
||||
# DataForSEO
|
||||
|
||||
This page provides instructions on how to use the DataForSEO search APIs within LangChain.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get a DataForSEO API Access login and password, and set them as environment variables (`DATAFORSEO_LOGIN` and `DATAFORSEO_PASSWORD` respectively). You can find it in your dashboard.
|
||||
|
||||
## Wrappers
|
||||
|
||||
### Utility
|
||||
|
||||
The DataForSEO utility wraps the API. To import this utility, use:
|
||||
|
||||
```python
|
||||
from langchain.utilities import DataForSeoAPIWrapper
|
||||
```
|
||||
|
||||
For a detailed walkthrough of this wrapper, see [this notebook](/docs/modules/agents/tools/integrations/dataforseo.ipynb).
|
||||
|
||||
### Tool
|
||||
|
||||
You can also load this wrapper as a Tool to use with an Agent:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
tools = load_tools(["dataforseo-api-search"])
|
||||
```
|
||||
|
||||
## Example usage
|
||||
|
||||
```python
|
||||
dataforseo = DataForSeoAPIWrapper(api_login="your_login", api_password="your_password")
|
||||
result = dataforseo.run("Bill Gates")
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
You can store your DataForSEO API Access login and password as environment variables. The wrapper will automatically check for these environment variables if no values are provided:
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
os.environ["DATAFORSEO_LOGIN"] = "your_login"
|
||||
os.environ["DATAFORSEO_PASSWORD"] = "your_password"
|
||||
|
||||
dataforseo = DataForSeoAPIWrapper()
|
||||
result = dataforseo.run("weather in Los Angeles")
|
||||
print(result)
|
||||
```
|
||||
@@ -16,3 +16,59 @@ There exists a Jina Embeddings wrapper, which you can access with
|
||||
from langchain.embeddings import JinaEmbeddings
|
||||
```
|
||||
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/text_embedding/integrations/jina.html)
|
||||
|
||||
## Deployment
|
||||
|
||||
[Langchain-serve](https://github.com/jina-ai/langchain-serve), powered by Jina, helps take LangChain apps to production with easy to use REST/WebSocket APIs and Slack bots.
|
||||
|
||||
### Usage
|
||||
|
||||
Install the package from PyPI.
|
||||
|
||||
```bash
|
||||
pip install langchain-serve
|
||||
```
|
||||
|
||||
Wrap your LangChain app with the `@serving` decorator.
|
||||
|
||||
```python
|
||||
# app.py
|
||||
from lcserve import serving
|
||||
|
||||
@serving
|
||||
def ask(input: str) -> str:
|
||||
from langchain import LLMChain, OpenAI
|
||||
from langchain.agents import AgentExecutor, ZeroShotAgent
|
||||
|
||||
tools = [...] # list of tools
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools, input_variables=["input", "agent_scratchpad"],
|
||||
)
|
||||
llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=llm_chain, allowed_tools=[tool.name for tool in tools]
|
||||
)
|
||||
agent_executor = AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
verbose=True,
|
||||
)
|
||||
return agent_executor.run(input)
|
||||
```
|
||||
|
||||
Deploy on Jina AI Cloud with `lc-serve deploy jcloud app`. Once deployed, we can send a POST request to the API endpoint to get a response.
|
||||
|
||||
```bash
|
||||
curl -X 'POST' 'https://<your-app>.wolf.jina.ai/ask' \
|
||||
-d '{
|
||||
"input": "Your Quesion here?",
|
||||
"envs": {
|
||||
"OPENAI_API_KEY": "sk-***"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
You can also self-host the app on your infrastructure with Docker-compose or Kubernetes. See [here](https://github.com/jina-ai/langchain-serve#-self-host-llm-apps-with-docker-compose-or-kubernetes) for more details.
|
||||
|
||||
|
||||
Langchain-serve also allows to deploy the apps with WebSocket APIs and Slack Bots both on [Jina AI Cloud](https://cloud.jina.ai/) or self-hosted infrastructure.
|
||||
|
||||
31
docs/extras/ecosystem/integrations/marqo.md
Normal file
31
docs/extras/ecosystem/integrations/marqo.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# Marqo
|
||||
|
||||
This page covers how to use the Marqo ecosystem within LangChain.
|
||||
|
||||
### **What is Marqo?**
|
||||
|
||||
Marqo is a tensor search engine that uses embeddings stored in in-memory HNSW indexes to achieve cutting edge search speeds. Marqo can scale to hundred-million document indexes with horizontal index sharding and allows for async and non-blocking data upload and search. Marqo uses the latest machine learning models from PyTorch, Huggingface, OpenAI and more. You can start with a pre-configured model or bring your own. The built in ONNX support and conversion allows for faster inference and higher throughput on both CPU and GPU.
|
||||
|
||||
Because Marqo include its own inference your documents can have a mix of text and images, you can bring Marqo indexes with data from your other systems into the langchain ecosystem without having to worry about your embeddings being compatible.
|
||||
|
||||
Deployment of Marqo is flexible, you can get started yourself with our docker image or [contact us about our managed cloud offering!](https://www.marqo.ai/pricing)
|
||||
|
||||
To run Marqo locally with our docker image, [see our getting started.](https://docs.marqo.ai/latest/)
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install marqo`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around Marqo indexes, allowing you to use them within the vectorstore framework. Marqo lets you select from a range of models for generating embeddings and exposes some preprocessing configurations.
|
||||
|
||||
The Marqo vectorstore can also work with existing multimodel indexes where your documents have a mix of images and text, for more information refer to [our documentation](https://docs.marqo.ai/latest/#multi-modal-and-cross-modal-search). Note that instaniating the Marqo vectorstore with an existing multimodal index will disable the ability to add any new documents to it via the langchain vectorstore `add_texts` method.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import Marqo
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Marqo wrapper and some of its unique features, see [this notebook](../modules/data_connection/vectorstores/integrations/marqo.ipynb)
|
||||
56
docs/extras/ecosystem/integrations/trulens.mdx
Normal file
56
docs/extras/ecosystem/integrations/trulens.mdx
Normal file
@@ -0,0 +1,56 @@
|
||||
# TruLens
|
||||
|
||||
This page covers how to use [TruLens](https://trulens.org) to evaluate and track LLM apps built on langchain.
|
||||
|
||||
## What is TruLens?
|
||||
|
||||
TruLens is an [opensource](https://github.com/truera/trulens) package that provides instrumentation and evaluation tools for large language model (LLM) based applications.
|
||||
|
||||
## Quick start
|
||||
|
||||
Once you've created your LLM chain, you can use TruLens for evaluation and tracking. TruLens has a number of [out-of-the-box Feedback Functions](https://www.trulens.org/trulens_eval/feedback_functions/), and is also an extensible framework for LLM evaluation.
|
||||
|
||||
```python
|
||||
# create a feedback function
|
||||
|
||||
from trulens_eval.feedback import Feedback, Huggingface, OpenAI
|
||||
# Initialize HuggingFace-based feedback function collection class:
|
||||
hugs = Huggingface()
|
||||
openai = OpenAI()
|
||||
|
||||
# Define a language match feedback function using HuggingFace.
|
||||
lang_match = Feedback(hugs.language_match).on_input_output()
|
||||
# By default this will check language match on the main app input and main app
|
||||
# output.
|
||||
|
||||
# Question/answer relevance between overall question and answer.
|
||||
qa_relevance = Feedback(openai.relevance).on_input_output()
|
||||
# By default this will evaluate feedback on main app input and main app output.
|
||||
|
||||
# Toxicity of input
|
||||
toxicity = Feedback(openai.toxicity).on_input()
|
||||
|
||||
```
|
||||
|
||||
After you've set up Feedback Function(s) for evaluating your LLM, you can wrap your application with TruChain to get detailed tracing, logging and evaluation of your LLM app.
|
||||
|
||||
```python
|
||||
# wrap your chain with TruChain
|
||||
truchain = TruChain(
|
||||
chain,
|
||||
app_id='Chain1_ChatApplication',
|
||||
feedbacks=[lang_match, qa_relevance, toxicity]
|
||||
)
|
||||
# Note: any `feedbacks` specified here will be evaluated and logged whenever the chain is used.
|
||||
truchain("que hora es?")
|
||||
```
|
||||
|
||||
Now you can explore your LLM-based application!
|
||||
|
||||
Doing so will help you understand how your LLM application is performing at a glance. As you iterate new versions of your LLM application, you can compare their performance across all of the different quality metrics you've set up. You'll also be able to view evaluations at a record level, and explore the chain metadata for each record.
|
||||
|
||||
```python
|
||||
tru.run_dashboard() # open a Streamlit app to explore
|
||||
```
|
||||
|
||||
For more information on TruLens, visit [trulens.org](https://www.trulens.org/)
|
||||
@@ -39,6 +39,21 @@ vectara = Vectara(
|
||||
```
|
||||
The customer_id, corpus_id and api_key are optional, and if they are not supplied will be read from the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively.
|
||||
|
||||
Afer you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example:
|
||||
|
||||
```python
|
||||
vectara.add_texts(["to be or not to be", "that is the question"])
|
||||
```
|
||||
|
||||
|
||||
Since Vectara supports file-upload, we also added the ability to upload files (PDF, TXT, HTML, PPT, DOC, etc) directly as file. When using this method, the file is uploaded directly to the Vectara backend, processed and chunked optimally there, so you don't have to use the LangChain document loader or chunking mechanism.
|
||||
|
||||
As an example:
|
||||
|
||||
```python
|
||||
vectara.add_files(["path/to/file1.pdf", "path/to/file2.pdf",...])
|
||||
```
|
||||
|
||||
To query the vectorstore, you can use the `similarity_search` method (or `similarity_search_with_score`), which takes a query string and returns a list of results:
|
||||
```python
|
||||
results = vectara.similarity_score("what is LangChain?")
|
||||
|
||||
@@ -24,6 +24,7 @@ Understanding these components is crucial when assessing serving systems. LangCh
|
||||
- [BentoML](https://github.com/bentoml/BentoML)
|
||||
- [OpenLLM](/docs/ecosystem/integrations/openllm.html)
|
||||
- [Modal](/docs/ecosystem/integrations/modal.html)
|
||||
- [Jina](/docs/ecosystem/integrations/jina.html#deployment)
|
||||
|
||||
These links will provide further information on each ecosystem, assisting you in finding the best fit for your LLM deployment needs.
|
||||
|
||||
|
||||
@@ -51,6 +51,10 @@ A minimal example of how to deploy LangChain to [Fly.io](https://fly.io/) using
|
||||
|
||||
A minimal example on how to deploy LangChain to DigitalOcean App Platform.
|
||||
|
||||
## [CI/CD Google Cloud Build + Dockerfile + Serverless Google Cloud Run](https://github.com/g-emarco/github-assistant)
|
||||
|
||||
Boilerplate LangChain project on how to deploy to Google Cloud Run using Docker with Cloud Build CI/CD pipeline
|
||||
|
||||
## [Google Cloud Run](https://github.com/homanp/gcp-langchain)
|
||||
|
||||
A minimal example on how to deploy LangChain to Google Cloud Run.
|
||||
@@ -61,7 +65,7 @@ This repository contains LangChain adapters for Steamship, enabling LangChain de
|
||||
|
||||
## [Langchain-serve](https://github.com/jina-ai/langchain-serve)
|
||||
|
||||
This repository allows users to serve local chains and agents as RESTful, gRPC, or WebSocket APIs, thanks to [Jina](https://docs.jina.ai/). Deploy your chains & agents with ease and enjoy independent scaling, serverless and autoscaling APIs, as well as a Streamlit playground on Jina AI Cloud.
|
||||
This repository allows users to deploy any LangChain app as REST/WebSocket APIs or, as Slack Bots with ease. Benefit from the scalability and serverless architecture of Jina AI Cloud, or deploy on-premise with Kubernetes.
|
||||
|
||||
## [BentoML](https://github.com/ssheng/BentoChain)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Evaluating an OpenAPI Chain\n",
|
||||
"\n",
|
||||
"This notebook goes over ways to semantically evaluate an [OpenAPI Chain](/docs/modules/chains/additiona/openapi.html), which calls an endpoint defined by the OpenAPI specification using purely natural language."
|
||||
"This notebook goes over ways to semantically evaluate an [OpenAPI Chain](/docs/modules/chains/additional/openapi.html), which calls an endpoint defined by the OpenAPI specification using purely natural language."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"Here we go over how to benchmark performance on a question answering task over a Paul Graham essay.\n",
|
||||
"\n",
|
||||
"It is highly reccomended that you do any evaluation/benchmarking with tracing enabled. See [here](https://langchain.readthedocs.io/en/latest/tracing.html) for an explanation of what tracing is and how to set it up."
|
||||
"It is highly reccomended that you do any evaluation/benchmarking with tracing enabled. See [here](https://python.langchain.com/docs/modules/callbacks/how_to/tracing) for an explanation of what tracing is and how to set it up."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# SQL Database Agent\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://langchain.readthedocs.io/en/latest/modules/chains/examples/sqlite.html) and is designed to answer more general questions about a database, as well as recover from errors.\n",
|
||||
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://python.langchain.com/docs/modules/chains/popular/sqlite) and is designed to answer more general questions about a database, as well as recover from errors.\n",
|
||||
"\n",
|
||||
"Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your database given certain questions. Be careful running it on sensitive data!\n",
|
||||
"\n",
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"BING_SUBSCRIPTION_KEY\"] = \"\"\n",
|
||||
"os.environ[\"BING_SEARCH_URL\"] = \"\""
|
||||
"os.environ[\"BING_SUBSCRIPTION_KEY\"] = \"<key>\"\n",
|
||||
"os.environ[\"BING_SEARCH_URL\"] = \"https://api.bing.microsoft.com/v7.0/search\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "a4c896e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -27,7 +27,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"api_key = \"...\""
|
||||
"api_key = \"BSAv1neIuQOsxqOyy0sEe_ie2zD_n_V\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -49,7 +49,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'[{\"title\": \"Barack Obama - Wikipedia\", \"link\": \"https://en.wikipedia.org/wiki/Barack_Obama\", \"snippet\": \"Outside of politics, <strong>Obama</strong> has published three bestselling books: Dreams from My Father (1995), The Audacity of Hope (2006) and A Promised Land (2020). Rankings by scholars and historians, in which he has been featured since 2010, place him in the <strong>middle</strong> to upper tier of American presidents.\"}, {\"title\": \"Obama\\'s Middle Name -- My Last Name -- is \\'Hussein.\\' So?\", \"link\": \"https://www.cair.com/cair_in_the_news/obamas-middle-name-my-last-name-is-hussein-so/\", \"snippet\": \"Many Americans understand that common names don\\\\u2019t only come in the form of a \\\\u201cSmith\\\\u201d or a \\\\u201cJohnson.\\\\u201d Perhaps, they have a neighbor, mechanic or teacher named Hussein. Or maybe they\\\\u2019ve seen fashion designer Hussein Chalayan in the pages of Vogue or recall <strong>King Hussein</strong>, our ally in the Middle East.\"}, {\"title\": \"What\\'s up with Obama\\'s middle name? - Quora\", \"link\": \"https://www.quora.com/Whats-up-with-Obamas-middle-name\", \"snippet\": \"Answer (1 of 15): A better question would be, \\\\u201cWhat\\\\u2019s up with Obama\\\\u2019s first name?\\\\u201d President <strong>Barack Hussein Obama</strong>\\\\u2019s father\\\\u2019s name was <strong>Barack Hussein Obama</strong>. He was named after his father. Hussein, Obama\\\\u2019s middle name, is a very common Arabic name, meaning "good," "handsome," or "beautiful."\"}]'"
|
||||
"'[{\"title\": \"Obama\\'s Middle Name -- My Last Name -- is \\'Hussein.\\' So?\", \"link\": \"https://www.cair.com/cair_in_the_news/obamas-middle-name-my-last-name-is-hussein-so/\", \"snippet\": \"I wasn\\\\u2019t sure whether to laugh or cry a few days back listening to radio talk show host Bill Cunningham repeatedly scream Barack <strong>Obama</strong>\\\\u2019<strong>s</strong> <strong>middle</strong> <strong>name</strong> \\\\u2014 my last <strong>name</strong> \\\\u2014 as if he had anti-Muslim Tourette\\\\u2019s. \\\\u201cHussein,\\\\u201d Cunningham hissed like he was beckoning Satan when shouting the ...\"}, {\"title\": \"What\\'s up with Obama\\'s middle name? - Quora\", \"link\": \"https://www.quora.com/Whats-up-with-Obamas-middle-name\", \"snippet\": \"Answer (1 of 15): A better question would be, \\\\u201cWhat\\\\u2019s up with <strong>Obama</strong>\\\\u2019s first <strong>name</strong>?\\\\u201d President Barack Hussein <strong>Obama</strong>\\\\u2019s father\\\\u2019s <strong>name</strong> was Barack Hussein <strong>Obama</strong>. He was <strong>named</strong> after his father. Hussein, <strong>Obama</strong>\\\\u2019<strong>s</strong> <strong>middle</strong> <strong>name</strong>, is a very common Arabic <strong>name</strong>, meaning "good," "handsome," or ...\"}, {\"title\": \"Barack Obama | Biography, Parents, Education, Presidency, Books, ...\", \"link\": \"https://www.britannica.com/biography/Barack-Obama\", \"snippet\": \"Barack <strong>Obama</strong>, in full Barack Hussein <strong>Obama</strong> II, (born August 4, 1961, Honolulu, Hawaii, U.S.), 44th president of the United States (2009\\\\u201317) and the first African American to hold the office. Before winning the presidency, <strong>Obama</strong> represented Illinois in the U.S.\"}]'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -86,7 +86,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
226
docs/extras/modules/agents/tools/integrations/dataforseo.ipynb
Normal file
226
docs/extras/modules/agents/tools/integrations/dataforseo.ipynb
Normal file
@@ -0,0 +1,226 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DataForSeo API Wrapper\n",
|
||||
"This notebook demonstrates how to use the DataForSeo API wrapper to obtain search engine results. The DataForSeo API allows users to retrieve SERP from most popular search engines like Google, Bing, Yahoo. It also allows to get SERPs from different search engine types like Maps, News, Events, etc.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities import DataForSeoAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up the API wrapper with your credentials\n",
|
||||
"You can obtain your API credentials by registering on the DataForSeo website."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"DATAFORSEO_LOGIN\"] = \"your_api_access_username\"\n",
|
||||
"os.environ[\"DATAFORSEO_PASSWORD\"] = \"your_api_access_password\"\n",
|
||||
"\n",
|
||||
"wrapper = DataForSeoAPIWrapper()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The run method will return the first result snippet from one of the following elements: answer_box, knowledge_graph, featured_snippet, shopping, organic."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wrapper.run(\"Weather in Los Angeles\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## The Difference Between `run` and `results`\n",
|
||||
"`run` and `results` are two methods provided by the `DataForSeoAPIWrapper` class.\n",
|
||||
"\n",
|
||||
"The `run` method executes the search and returns the first result snippet from the answer box, knowledge graph, featured snippet, shopping, or organic results. These elements are sorted by priority from highest to lowest.\n",
|
||||
"\n",
|
||||
"The `results` method returns a JSON response configured according to the parameters set in the wrapper. This allows for more flexibility in terms of what data you want to return from the API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting Results as JSON\n",
|
||||
"You can customize the result types and fields you want to return in the JSON response. You can also set a maximum count for the number of top results to return."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_wrapper = DataForSeoAPIWrapper(\n",
|
||||
" json_result_types=[\"organic\", \"knowledge_graph\", \"answer_box\"],\n",
|
||||
" json_result_fields=[\"type\", \"title\", \"description\", \"text\"],\n",
|
||||
" top_count=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_wrapper.results(\"Bill Gates\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing Location and Language\n",
|
||||
"You can specify the location and language of your search results by passing additional parameters to the API wrapper."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"customized_wrapper = DataForSeoAPIWrapper(\n",
|
||||
" top_count=10,\n",
|
||||
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||
" params={\"location_name\": \"Germany\", \"language_code\": \"en\"})\n",
|
||||
"customized_wrapper.results(\"coffee near me\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing the Search Engine\n",
|
||||
"You can also specify the search engine you want to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"customized_wrapper = DataForSeoAPIWrapper(\n",
|
||||
" top_count=10,\n",
|
||||
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||
" params={\"location_name\": \"Germany\", \"language_code\": \"en\", \"se_name\": \"bing\"})\n",
|
||||
"customized_wrapper.results(\"coffee near me\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing the Search Type\n",
|
||||
"The API wrapper also allows you to specify the type of search you want to perform. For example, you can perform a maps search."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"maps_search = DataForSeoAPIWrapper(\n",
|
||||
" top_count=10,\n",
|
||||
" json_result_fields=[\"title\", \"value\", \"address\", \"rating\", \"type\"],\n",
|
||||
" params={\"location_coordinate\": \"52.512,13.36,12z\", \"language_code\": \"en\", \"se_type\": \"maps\"})\n",
|
||||
"maps_search.results(\"coffee near me\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Integration with Langchain Agents\n",
|
||||
"You can use the `Tool` class from the `langchain.agents` module to integrate the `DataForSeoAPIWrapper` with a langchain agent. The `Tool` class encapsulates a function that the agent can call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool\n",
|
||||
"search = DataForSeoAPIWrapper(\n",
|
||||
" top_count=3,\n",
|
||||
" json_result_types=[\"organic\"],\n",
|
||||
" json_result_fields=[\"title\", \"description\", \"type\"])\n",
|
||||
"tool = Tool(\n",
|
||||
" name=\"google-search-answer\",\n",
|
||||
" description=\"My new answer tool\",\n",
|
||||
" func=search.run,\n",
|
||||
")\n",
|
||||
"json_tool = Tool(\n",
|
||||
" name=\"google-search-json\",\n",
|
||||
" description=\"My new json tool\",\n",
|
||||
" func=search.results,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1,211 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PromptLayer\n",
|
||||
"\n",
|
||||
"<img src=\"https://promptlayer.com/logo.png\" height=\"300\">\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[PromptLayer](https://promptlayer.com) is a an observability platform for prompts and LLMs. In this guide we will go over how to setup the `PromptLayerCallbackHandler`. While PromptLayer does have LLMs that integrate directly with LangChain (eg [`PromptLayerOpenAI`](https://python.langchain.com/docs/modules/model_io/models/llms/integrations/promptlayer_openai)), this callback will be an easier and more feature rich way to integrate PromptLayer with any model on LangChain. \n",
|
||||
"\n",
|
||||
"This callback is also the recommended way to connect with PromptLayer when building Chains and Agents on LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install promptlayer --upgrade"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Getting API Credentials\n",
|
||||
"\n",
|
||||
"If you have not already create an account on [PromptLayer](https://www.promptlayer.com) and get an API key by clicking on the settings cog in the navbar\n",
|
||||
"Set it as an environment variabled called `PROMPTLAYER_API_KEY`\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Usage\n",
|
||||
"\n",
|
||||
"To get started with `PromptLayerCallbackHandler` is fairly simple, it takes two optional arguments:\n",
|
||||
"1. `pl_tags` - an optional list of strings that will be tags tracked on PromptLayer\n",
|
||||
"2. `pl_id_callback` - an optional function that will get a `promptlayer_request_id` as an argument. This id can be used with all of PromptLayers tracking features to track, metadata, scores, and prompt usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Simple Example\n",
|
||||
"\n",
|
||||
"In this simple example we use `PromptLayerCallbackHandler` with `ChatOpenAI`. We add a PromptLayer tag named `chatopenai`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"Sure, here's one:\\n\\nWhy did the tomato turn red?\\n\\nBecause it saw the salad dressing!\" additional_kwargs={} example=False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema import (\n",
|
||||
" HumanMessage,\n",
|
||||
")\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"\n",
|
||||
"chat_llm = ChatOpenAI(\n",
|
||||
" temperature=0,\n",
|
||||
" callbacks=[PromptLayerCallbackHandler(pl_tags=[\"chatopenai\"])],\n",
|
||||
")\n",
|
||||
"llm_results = chat_llm(\n",
|
||||
" [\n",
|
||||
" HumanMessage(content=\"What comes after 1,2,3 ?\"),\n",
|
||||
" HumanMessage(content=\"Tell me another joke?\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"print(llm_results)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Full Featured Example\n",
|
||||
"\n",
|
||||
"In this example we unlock more of the power of PromptLayer.\n",
|
||||
"\n",
|
||||
"We are using the Prompt Registry and fetching the prompt called `example`.\n",
|
||||
"\n",
|
||||
"We also define a `pl_id_callback` function that tracks a score, metadata and the prompt used. Read more about tracking on [our docs](docs.promptlayer.com)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"prompt layer id 6050929\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nToasterCo.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"import promptlayer\n",
|
||||
"\n",
|
||||
"def pl_id_callback(promptlayer_request_id):\n",
|
||||
" print(\"prompt layer id \", promptlayer_request_id)\n",
|
||||
" promptlayer.track.score(\n",
|
||||
" request_id=promptlayer_request_id, score=100\n",
|
||||
" ) # score is an integer 0-100\n",
|
||||
" promptlayer.track.metadata(\n",
|
||||
" request_id=promptlayer_request_id, metadata={\"foo\": \"bar\"}\n",
|
||||
" ) # metadata is a dictionary of key value pairs that is tracked on PromptLayer\n",
|
||||
" promptlayer.track.prompt(\n",
|
||||
" request_id=promptlayer_request_id,\n",
|
||||
" prompt_name=\"example\",\n",
|
||||
" prompt_input_variables={\"product\": \"toasters\"},\n",
|
||||
" version=1,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"openai_llm = OpenAI(\n",
|
||||
" model_name=\"text-davinci-002\",\n",
|
||||
" callbacks=[PromptLayerCallbackHandler(pl_id_callback=pl_id_callback)],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"example_prompt = promptlayer.prompts.get(\"example\", version=1, langchain=True)\n",
|
||||
"openai_llm(example_prompt.format(product=\"toasters\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"That is all it takes! After setup all your requests will show up on the PromptLayer dasahboard.\n",
|
||||
"This callback also works with any LLM implemented on LangChain."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c4fe2cd85a8d9e8baaec5340ce66faff1c77581a9f43e6c45e85e09b6fced008"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -56,7 +56,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"SERPER_API_KEY\"] = \"\""
|
||||
"os.environ[\"SERPER_API_KEY\"] = \"\"",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -71,11 +72,13 @@
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from langchain.schema import BaseRetriever\n",
|
||||
"from langchain.callbacks.manager import AsyncCallbackManagerForRetrieverRun, CallbackManagerForRetrieverRun\n",
|
||||
"from langchain.utilities import GoogleSerperAPIWrapper\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.schema import Document"
|
||||
"from langchain.schema import Document\n",
|
||||
"from typing import Any, List"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -94,17 +97,22 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SerperSearchRetriever(BaseRetriever):\n",
|
||||
" def __init__(self, search):\n",
|
||||
" self.search = search\n",
|
||||
"\n",
|
||||
" def get_relevant_documents(self, query: str):\n",
|
||||
" search: GoogleSerperAPIWrapper = None\n",
|
||||
"\n",
|
||||
" def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any) -> List[Document]:\n",
|
||||
" return [Document(page_content=self.search.run(query))]\n",
|
||||
"\n",
|
||||
" async def aget_relevant_documents(self, query: str):\n",
|
||||
" raise NotImplemented\n",
|
||||
" async def _aget_relevant_documents(self,\n",
|
||||
" query: str,\n",
|
||||
" *,\n",
|
||||
" run_manager: AsyncCallbackManagerForRetrieverRun,\n",
|
||||
" **kwargs: Any,\n",
|
||||
" ) -> List[Document]:\n",
|
||||
" raise NotImplementedError()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"retriever = SerperSearchRetriever(GoogleSerperAPIWrapper())"
|
||||
"retriever = SerperSearchRetriever(search=GoogleSerperAPIWrapper())"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
302
docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb
Normal file
302
docs/extras/modules/chains/additional/graph_hugegraph_qa.ipynb
Normal file
@@ -0,0 +1,302 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2777010",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HugeGraph QA Chain\n",
|
||||
"\n",
|
||||
"This notebook shows how to use LLMs to provide a natural language interface to [HugeGraph](https://hugegraph.apache.org/cn/) database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f26dcbe4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You will need to have a running HugeGraph instance.\n",
|
||||
"You can run a local docker container by running the executing the following script:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"docker run \\\n",
|
||||
" --name=graph \\\n",
|
||||
" -itd \\\n",
|
||||
" -p 8080:8080 \\\n",
|
||||
" hugegraph/hugegraph\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If we want to connect HugeGraph in the application, we need to install python sdk:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip3 install hugegraph-python\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d64a29f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you are using the docker container, you need to wait a couple of second for the database to start, and then we need create schema and write graph data for the database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "e53ab93e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from hugegraph.connection import PyHugeGraph\n",
|
||||
"\n",
|
||||
"client = PyHugeGraph(\"localhost\", \"8080\", user=\"admin\", pwd=\"admin\", graph=\"hugegraph\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b7c3a50e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, we create the schema for a simple movie database:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ef5372a8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'create EdgeLabel success, Detail: \"b\\'{\"id\":1,\"name\":\"ActedIn\",\"source_label\":\"Person\",\"target_label\":\"Movie\",\"frequency\":\"SINGLE\",\"sort_keys\":[],\"nullable_keys\":[],\"index_labels\":[],\"properties\":[],\"status\":\"CREATED\",\"ttl\":0,\"enable_label_index\":true,\"user_data\":{\"~create_time\":\"2023-07-04 10:48:47.908\"}}\\'\"'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"schema\"\"\"\n",
|
||||
"schema = client.schema()\n",
|
||||
"schema.propertyKey(\"name\").asText().ifNotExist().create()\n",
|
||||
"schema.propertyKey(\"birthDate\").asText().ifNotExist().create()\n",
|
||||
"schema.vertexLabel(\"Person\").properties(\"name\", \"birthDate\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n",
|
||||
"schema.vertexLabel(\"Movie\").properties(\"name\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n",
|
||||
"schema.edgeLabel(\"ActedIn\").sourceLabel(\"Person\").targetLabel(\"Movie\").ifNotExist().create()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "016f7989",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we can insert some data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "b7f4c370",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1:Robert De Niro--ActedIn-->2:The Godfather Part II"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"graph\"\"\"\n",
|
||||
"g = client.graph()\n",
|
||||
"g.addVertex(\"Person\", {\"name\": \"Al Pacino\", \"birthDate\": \"1940-04-25\"})\n",
|
||||
"g.addVertex(\"Person\", {\"name\": \"Robert De Niro\", \"birthDate\": \"1943-08-17\"})\n",
|
||||
"g.addVertex(\"Movie\", {\"name\": \"The Godfather\"})\n",
|
||||
"g.addVertex(\"Movie\", {\"name\": \"The Godfather Part II\"})\n",
|
||||
"g.addVertex(\"Movie\", {\"name\": \"The Godfather Coda The Death of Michael Corleone\"})\n",
|
||||
"\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather\", {})\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Part II\", {})\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Coda The Death of Michael Corleone\", {})\n",
|
||||
"g.addEdge(\"ActedIn\", \"1:Robert De Niro\", \"2:The Godfather Part II\", {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5b8f7788",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating `HugeGraphQAChain`\n",
|
||||
"\n",
|
||||
"We can now create the `HugeGraph` and `HugeGraphQAChain`. To create the `HugeGraph` we simply need to pass the database object to the `HugeGraph` constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "f1f68fcf",
|
||||
"metadata": {
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import HugeGraphQAChain\n",
|
||||
"from langchain.graphs import HugeGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "b86ebfa7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = HugeGraph(\n",
|
||||
" username=\"admin\",\n",
|
||||
" password=\"admin\",\n",
|
||||
" address=\"localhost\",\n",
|
||||
" port=8080,\n",
|
||||
" graph=\"hugegraph\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e262540b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"\n",
|
||||
"If the schema of database changes, you can refresh the schema information needed to generate Gremlin statements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "134dd8d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# graph.refresh_schema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "e78b8e72",
|
||||
"metadata": {
|
||||
"ExecuteTime": {}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Node properties: [name: Person, primary_keys: ['name'], properties: ['name', 'birthDate'], name: Movie, primary_keys: ['name'], properties: ['name']]\n",
|
||||
"Edge properties: [name: ActedIn, properties: []]\n",
|
||||
"Relationships: ['Person--ActedIn-->Movie']\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(graph.get_schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c27e813",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"We can now use the graph Gremlin QA chain to ask question of the graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "3b23dead",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = HugeGraphQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "76aecc93",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Generated gremlin:\n",
|
||||
"\u001b[32;1m\u001b[1;3mg.V().has('Movie', 'name', 'The Godfather').in('ActedIn').valueMap(true)\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'id': '1:Al Pacino', 'label': 'Person', 'name': ['Al Pacino'], 'birthDate': ['1940-04-25']}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Al Pacino played in The Godfather.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in The Godfather?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "869f0258",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
300
docs/extras/modules/chains/additional/graph_sparql_qa.ipynb
Normal file
300
docs/extras/modules/chains/additional/graph_sparql_qa.ipynb
Normal file
@@ -0,0 +1,300 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c94240f5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GraphSparqlQAChain\n",
|
||||
"\n",
|
||||
"Graph databases are an excellent choice for applications based on network-like models. To standardize the syntax and semantics of such graphs, the W3C recommends Semantic Web Technologies, cp. [Semantic Web](https://www.w3.org/standards/semanticweb/). [SPARQL](https://www.w3.org/TR/sparql11-query/) serves as a query language analogously to SQL or Cypher for these graphs. This notebook demonstrates the application of LLMs as a natural language interface to a graph database by generating SPARQL.\\\n",
|
||||
"Disclaimer: To date, SPARQL query generation via LLMs is still a bit unstable. Be especially careful with UPDATE queries, which alter the graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc0ee68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are several sources you can run queries against, including files on the web, files you have available locally, SPARQL endpoints, e.g., [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page), and [triple stores](https://www.w3.org/wiki/LargeTripleStores)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "62812aad",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import GraphSparqlQAChain\n",
|
||||
"from langchain.graphs import RdfGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "0928915d",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = RdfGraph(\n",
|
||||
" source_file=\"http://www.w3.org/People/Berners-Lee/card\",\n",
|
||||
" standard=\"rdf\",\n",
|
||||
" local_copy=\"test.ttl\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Note that providing a `local_file` is necessary for storing changes locally if the source is read-only."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58c1a8ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"If the schema of the database changes, you can refresh the schema information needed to generate SPARQL queries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "4e3de44f",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph.load_schema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "1fe76ccd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In the following, each IRI is followed by the local name and optionally its description in parentheses. \n",
|
||||
"The RDF graph supports the following node types:\n",
|
||||
"<http://xmlns.com/foaf/0.1/PersonalProfileDocument> (PersonalProfileDocument, None), <http://www.w3.org/ns/auth/cert#RSAPublicKey> (RSAPublicKey, None), <http://www.w3.org/2000/10/swap/pim/contact#Male> (Male, None), <http://xmlns.com/foaf/0.1/Person> (Person, None), <http://www.w3.org/2006/vcard/ns#Work> (Work, None)\n",
|
||||
"The RDF graph supports the following relationships:\n",
|
||||
"<http://www.w3.org/2000/01/rdf-schema#seeAlso> (seeAlso, None), <http://purl.org/dc/elements/1.1/title> (title, None), <http://xmlns.com/foaf/0.1/mbox_sha1sum> (mbox_sha1sum, None), <http://xmlns.com/foaf/0.1/maker> (maker, None), <http://www.w3.org/ns/solid/terms#oidcIssuer> (oidcIssuer, None), <http://www.w3.org/2000/10/swap/pim/contact#publicHomePage> (publicHomePage, None), <http://xmlns.com/foaf/0.1/openid> (openid, None), <http://www.w3.org/ns/pim/space#storage> (storage, None), <http://xmlns.com/foaf/0.1/name> (name, None), <http://www.w3.org/2000/10/swap/pim/contact#country> (country, None), <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> (type, None), <http://www.w3.org/ns/solid/terms#profileHighlightColor> (profileHighlightColor, None), <http://www.w3.org/ns/pim/space#preferencesFile> (preferencesFile, None), <http://www.w3.org/2000/01/rdf-schema#label> (label, None), <http://www.w3.org/ns/auth/cert#modulus> (modulus, None), <http://www.w3.org/2000/10/swap/pim/contact#participant> (participant, None), <http://www.w3.org/2000/10/swap/pim/contact#street2> (street2, None), <http://www.w3.org/2006/vcard/ns#locality> (locality, None), <http://xmlns.com/foaf/0.1/nick> (nick, None), <http://xmlns.com/foaf/0.1/homepage> (homepage, None), <http://creativecommons.org/ns#license> (license, None), <http://xmlns.com/foaf/0.1/givenname> (givenname, None), <http://www.w3.org/2006/vcard/ns#street-address> (street-address, None), <http://www.w3.org/2006/vcard/ns#postal-code> (postal-code, None), <http://www.w3.org/2000/10/swap/pim/contact#street> (street, None), <http://www.w3.org/2003/01/geo/wgs84_pos#lat> (lat, None), <http://xmlns.com/foaf/0.1/primaryTopic> (primaryTopic, None), <http://www.w3.org/2006/vcard/ns#fn> (fn, None), <http://www.w3.org/2003/01/geo/wgs84_pos#location> (location, None), <http://usefulinc.com/ns/doap#developer> (developer, None), <http://www.w3.org/2000/10/swap/pim/contact#city> (city, None), <http://www.w3.org/2006/vcard/ns#region> (region, None), <http://xmlns.com/foaf/0.1/member> (member, None), <http://www.w3.org/2003/01/geo/wgs84_pos#long> (long, None), <http://www.w3.org/2000/10/swap/pim/contact#address> (address, None), <http://xmlns.com/foaf/0.1/family_name> (family_name, None), <http://xmlns.com/foaf/0.1/account> (account, None), <http://xmlns.com/foaf/0.1/workplaceHomepage> (workplaceHomepage, None), <http://purl.org/dc/terms/title> (title, None), <http://www.w3.org/ns/solid/terms#publicTypeIndex> (publicTypeIndex, None), <http://www.w3.org/2000/10/swap/pim/contact#office> (office, None), <http://www.w3.org/2000/10/swap/pim/contact#homePage> (homePage, None), <http://xmlns.com/foaf/0.1/mbox> (mbox, None), <http://www.w3.org/2000/10/swap/pim/contact#preferredURI> (preferredURI, None), <http://www.w3.org/ns/solid/terms#profileBackgroundColor> (profileBackgroundColor, None), <http://schema.org/owns> (owns, None), <http://xmlns.com/foaf/0.1/based_near> (based_near, None), <http://www.w3.org/2006/vcard/ns#hasAddress> (hasAddress, None), <http://xmlns.com/foaf/0.1/img> (img, None), <http://www.w3.org/2000/10/swap/pim/contact#assistant> (assistant, None), <http://xmlns.com/foaf/0.1/title> (title, None), <http://www.w3.org/ns/auth/cert#key> (key, None), <http://www.w3.org/ns/ldp#inbox> (inbox, None), <http://www.w3.org/ns/solid/terms#editableProfile> (editableProfile, None), <http://www.w3.org/2000/10/swap/pim/contact#postalCode> (postalCode, None), <http://xmlns.com/foaf/0.1/weblog> (weblog, None), <http://www.w3.org/ns/auth/cert#exponent> (exponent, None), <http://rdfs.org/sioc/ns#avatar> (avatar, None)\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graph.get_schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68a3c677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"Now, you can use the graph SPARQL QA chain to ask questions about the graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "7476ce98",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = GraphSparqlQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "ef8ee27b",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new GraphSparqlQAChain chain...\u001B[0m\n",
|
||||
"Identified intent:\n",
|
||||
"\u001B[32;1m\u001B[1;3mSELECT\u001B[0m\n",
|
||||
"Generated SPARQL:\n",
|
||||
"\u001B[32;1m\u001B[1;3mPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n",
|
||||
"SELECT ?homepage\n",
|
||||
"WHERE {\n",
|
||||
" ?person foaf:name \"Tim Berners-Lee\" .\n",
|
||||
" ?person foaf:workplaceHomepage ?homepage .\n",
|
||||
"}\u001B[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001B[32;1m\u001B[1;3m[]\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"What is Tim Berners-Lee's work homepage?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af4b3294",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Updating the graph\n",
|
||||
"\n",
|
||||
"Analogously, you can update the graph, i.e., insert triples, using natural language."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "fdf38841",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new GraphSparqlQAChain chain...\u001B[0m\n",
|
||||
"Identified intent:\n",
|
||||
"\u001B[32;1m\u001B[1;3mUPDATE\u001B[0m\n",
|
||||
"Generated SPARQL:\n",
|
||||
"\u001B[32;1m\u001B[1;3mPREFIX foaf: <http://xmlns.com/foaf/0.1/>\n",
|
||||
"INSERT {\n",
|
||||
" ?person foaf:workplaceHomepage <http://www.w3.org/foo/bar/> .\n",
|
||||
"}\n",
|
||||
"WHERE {\n",
|
||||
" ?person foaf:name \"Timothy Berners-Lee\" .\n",
|
||||
"}\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Successfully inserted triples into the graph.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e0f7fc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's verify the results:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "f874171b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(rdflib.term.URIRef('https://www.w3.org/'),),\n",
|
||||
" (rdflib.term.URIRef('http://www.w3.org/foo/bar/'),)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = (\n",
|
||||
" \"\"\"PREFIX foaf: <http://xmlns.com/foaf/0.1/>\\n\"\"\"\n",
|
||||
" \"\"\"SELECT ?hp\\n\"\"\"\n",
|
||||
" \"\"\"WHERE {\\n\"\"\"\n",
|
||||
" \"\"\" ?person foaf:name \"Timothy Berners-Lee\" . \\n\"\"\"\n",
|
||||
" \"\"\" ?person foaf:workplaceHomepage ?hp .\\n\"\"\"\n",
|
||||
" \"\"\"}\"\"\"\n",
|
||||
")\n",
|
||||
"graph.query(query)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "lc",
|
||||
"language": "python",
|
||||
"name": "lc"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
518
docs/extras/modules/chains/popular/openai_functions.ipynb
Normal file
518
docs/extras/modules/chains/popular/openai_functions.ipynb
Normal file
@@ -0,0 +1,518 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54ccb772",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using OpenAI functions\n",
|
||||
"This walkthrough demonstrates how to incorporate OpenAI function-calling API's in a chain. We'll go over: \n",
|
||||
"1. How to use functions to get structured outputs from ChatOpenAI\n",
|
||||
"2. How to create a generic chain that uses (multiple) functions\n",
|
||||
"3. How to create a chain that actually executes the chosen function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "767ac575",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain.chains.openai_functions import (\n",
|
||||
" create_openai_fn_chain, create_structured_output_chain\n",
|
||||
")\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "976b6496",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting structured outputs\n",
|
||||
"We can take advantage of OpenAI functions to try and force the model to return a particular kind of structured output. We'll use the `create_structured_output_chain` to create our chain, which takes the desired structured output either as a Pydantic class or as JsonSchema.\n",
|
||||
"\n",
|
||||
"See here for relevant [reference docs](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.base.create_structured_output_chain.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e052faae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Pydantic classes\n",
|
||||
"When passing in Pydantic classes to structure our text, we need to make sure to have a docstring description for the class. It also helps to have descriptions for each of the classes attributes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "0e085c99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"class Person(BaseModel):\n",
|
||||
" \"\"\"Identifying information about a person.\"\"\"\n",
|
||||
" name: str = Field(..., description=\"The person's name\")\n",
|
||||
" age: int = Field(..., description=\"The person's age\")\n",
|
||||
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b459a33e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'Sally', 'age': 13}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
||||
"\n",
|
||||
"prompt_msgs = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(content=\"Use the given format to extract information from the following input:\"),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||
" ]\n",
|
||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||
"\n",
|
||||
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Sally is 13\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3539936",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To extract arbitrarily many structured outputs of a given format, we can just create a wrapper Pydantic class that takes a sequence of the original class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4d8ea815",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'people': [{'name': 'Sally', 'age': 13, 'fav_food': ''},\n",
|
||||
" {'name': 'Joey', 'age': 12, 'fav_food': 'spinach'},\n",
|
||||
" {'name': 'Caroline', 'age': 23, 'fav_food': ''}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import Sequence\n",
|
||||
"\n",
|
||||
"class People(BaseModel):\n",
|
||||
" \"\"\"Identifying information about all people in a text.\"\"\"\n",
|
||||
" people: Sequence[Person] = Field(..., description=\"The people in the text\")\n",
|
||||
" \n",
|
||||
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea66e10e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using JsonSchema\n",
|
||||
"\n",
|
||||
"We can also pass in JsonSchema instead of Pydantic classes to specify the desired structure. When we do this, our chain will output json corresponding to the properties described in the JsonSchema, instead of a Pydantic class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3484415e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_schema = {\n",
|
||||
" \"title\": \"Person\",\n",
|
||||
" \"description\": \"Identifying information about a person.\",\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"name\": {\n",
|
||||
" \"title\": \"Name\",\n",
|
||||
" \"description\": \"The person's name\",\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" },\n",
|
||||
" \"age\": {\n",
|
||||
" \"title\": \"Age\",\n",
|
||||
" \"description\": \"The person's age\",\n",
|
||||
" \"type\": \"integer\"\n",
|
||||
" },\n",
|
||||
" \"fav_food\": {\n",
|
||||
" \"title\": \"Fav Food\",\n",
|
||||
" \"description\": \"The person's favorite food\",\n",
|
||||
" \"type\": \"string\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"required\": [\n",
|
||||
" \"name\",\n",
|
||||
" \"age\"\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "be9b76b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
|
||||
"Human: Use the given format to extract information from the following input:\n",
|
||||
"Human: Sally is 13\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'Sally', 'age': 13}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain = create_structured_output_chain(json_schema, llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Sally is 13\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12394696",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a generic OpenAI functions chain\n",
|
||||
"To create a generic OpenAI functions chain, we can use the `create_openai_fn_chain` method. This is the same as `create_structured_output_chain` except that instead of taking a single output schema, it takes a sequence of function definitions.\n",
|
||||
"\n",
|
||||
"Functions can be passed in as:\n",
|
||||
"- dicts conforming to OpenAI functions spec,\n",
|
||||
"- Pydantic classes, in which case they should have docstring descriptions of the function they represent and descriptions for each of the parameters,\n",
|
||||
"- Python functions, in which case they should have docstring descriptions of the function and args, along with type hints.\n",
|
||||
"\n",
|
||||
"See here for relevant [reference docs](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.base.create_openai_fn_chain.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff19be25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Pydantic classes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "17f52508",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class RecordPerson(BaseModel):\n",
|
||||
" \"\"\"Record some identifying information about a pe.\"\"\"\n",
|
||||
" name: str = Field(..., description=\"The person's name\")\n",
|
||||
" age: int = Field(..., description=\"The person's age\")\n",
|
||||
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"class RecordDog(BaseModel):\n",
|
||||
" \"\"\"Record some identifying information about a dog.\"\"\"\n",
|
||||
" name: str = Field(..., description=\"The dog's name\")\n",
|
||||
" color: str = Field(..., description=\"The dog's color\")\n",
|
||||
" fav_food: Optional[str] = Field(None, description=\"The dog's favorite food\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a4658ad8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: Harry was a chubby brown beagle who loved chicken\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"RecordDog(name='Harry', color='brown', fav_food='chicken')"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prompt_msgs = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a world class algorithm for recording entities\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(content=\"Make calls to the relevant function to record the entities in the following input:\"),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||
"]\n",
|
||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||
"\n",
|
||||
"chain = create_openai_fn_chain([RecordPerson, RecordDog], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"Harry was a chubby brown beagle who loved chicken\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "df6d9147",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Python functions\n",
|
||||
"We can pass in functions as Pydantic classes, directly as OpenAI function dicts, or Python functions. To pass Python function in directly, we'll want to make sure our parameters have type hints, we have a docstring, and we use [Google Python style docstrings](https://google.github.io/styleguide/pyguide.html#doc-function-args) to describe the parameters.\n",
|
||||
"\n",
|
||||
"**NOTE**: To use Python functions, make sure the function arguments are of primitive types (str, float, int, bool) or that they are Pydantic objects."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "95ac5825",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'Tommy', 'age': 12, 'fav_food': {'food': 'apple pie'}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"class OptionalFavFood(BaseModel):\n",
|
||||
" \"\"\"Either a food or null.\"\"\"\n",
|
||||
" food: Optional[str] = Field(None, description=\"Either the name of a food or null. Should be null if the food isn't known.\")\n",
|
||||
"\n",
|
||||
"def record_person(name: str, age: int, fav_food: OptionalFavFood) -> str:\n",
|
||||
" \"\"\"Record some basic identifying information about a person.\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" name: The person's name.\n",
|
||||
" age: The person's age in years.\n",
|
||||
" fav_food: An OptionalFavFood object that either contains the person's favorite food or a null value. Food should be null if it's not known.\n",
|
||||
" \"\"\"\n",
|
||||
" return f\"Recording person {name} of age {age} with favorite food {fav_food.food}!\"\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"chain = create_openai_fn_chain([record_person], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "403ea5dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we pass in multiple Python functions or OpenAI functions, then the returned output will be of the form\n",
|
||||
"```python\n",
|
||||
"{\"name\": \"<<function_name>>\", \"arguments\": {<<function_arguments>>}}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8b0d11de",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
|
||||
"Human: Make calls to the relevant function to record the entities in the following input:\n",
|
||||
"Human: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
|
||||
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'record_dog',\n",
|
||||
" 'arguments': {'name': 'Henry', 'color': 'brown', 'fav_food': {'food': None}}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def record_dog(name: str, color: str, fav_food: OptionalFavFood) -> str:\n",
|
||||
" \"\"\"Record some basic identifying information about a dog.\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" name: The dog's name.\n",
|
||||
" color: The dog's color.\n",
|
||||
" fav_food: An OptionalFavFood object that either contains the dog's favorite food or a null value. Food should be null if it's not known.\n",
|
||||
" \"\"\"\n",
|
||||
" return f\"Recording dog {name} of color {color} with favorite food {fav_food}!\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = create_openai_fn_chain([record_person, record_dog], llm, prompt, verbose=True)\n",
|
||||
"chain.run(\"I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f93686b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Other Chains using OpenAI functions\n",
|
||||
"\n",
|
||||
"There are a number of more specific chains that use OpenAI functions.\n",
|
||||
"- [Extraction](/docs/modules/chains/additional/extraction): very similar to structured output chain, intended for information/entity extraction specifically.\n",
|
||||
"- [Tagging](/docs/modules/chains/additional/tagging): tag inputs.\n",
|
||||
"- [OpenAPI](/docs/modules/chains/additional/openapi_openai): take an OpenAPI spec and create + execute valid requests against the API, using OpenAI functions under the hood.\n",
|
||||
"- [QA with citations](/docs/modules/chains/additional/qa_citations): use OpenAI functions ability to extract citations from text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "93425c66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -134,7 +134,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3dd292b1-9a73-4ea8-af19-5fa6e3c1a62a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Brave Search\n",
|
||||
"\n",
|
||||
"\n",
|
||||
">[Brave Search](https://en.wikipedia.org/wiki/Brave_Search) is a search engine developed by Brave Software.\n",
|
||||
"> - `Brave Search` uses its own web index. As of May 2022, it covered over 10 billion pages and was used to serve 92% \n",
|
||||
"> of search results without relying on any third-parties, with the remainder being retrieved \n",
|
||||
"> server-side from the Bing API or (on an opt-in basis) client-side from Google. According \n",
|
||||
"> to Brave, the index was kept \"intentionally smaller than that of Google or Bing\" in order to \n",
|
||||
"> help avoid spam and other low-quality content, with the disadvantage that \"Brave Search is \n",
|
||||
"> not yet as good as Google in recovering long-tail queries.\"\n",
|
||||
">- `Brave Search Premium`: As of April 2023 Brave Search is an ad-free website, but it will \n",
|
||||
"> eventually switch to a new model that will include ads and premium users will get an ad-free experience.\n",
|
||||
"> User data including IP addresses won't be collected from its users by default. A premium account \n",
|
||||
"> will be required for opt-in data-collection.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "26f0888e-3f3e-4b82-ac4a-2df6feeccbe0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation and Setup\n",
|
||||
"\n",
|
||||
"To get access to the Brave Search API, you need to [create an account and get an API key](https://api.search.brave.com/app/dashboard).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "d7d7be09-58bd-47d7-bf1b-33964564f777",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"api_key = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b3ac92df-6ff0-4dbb-b32b-a7dc140c48ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import BraveSearchLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f483caf-58ef-4138-975a-5b783559dc1b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "766634cf-3bc7-4656-939a-cafa218807a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = BraveSearchLoader(query=\"obama middle name\", api_key=api_key, search_kwargs={\"count\": 3})\n",
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "f1fcc9f1-cbdc-46b3-89d3-80311d557dc6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'title': \"Obama's Middle Name -- My Last Name -- is 'Hussein.' So?\",\n",
|
||||
" 'link': 'https://www.cair.com/cair_in_the_news/obamas-middle-name-my-last-name-is-hussein-so/'},\n",
|
||||
" {'title': \"What's up with Obama's middle name? - Quora\",\n",
|
||||
" 'link': 'https://www.quora.com/Whats-up-with-Obamas-middle-name'},\n",
|
||||
" {'title': 'Barack Obama | Biography, Parents, Education, Presidency, Books, ...',\n",
|
||||
" 'link': 'https://www.britannica.com/biography/Barack-Obama'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"[doc.metadata for doc in docs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "601bfd77-03d3-468e-843f-2523d5e215bd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['I wasn’t sure whether to laugh or cry a few days back listening to radio talk show host Bill Cunningham repeatedly scream Barack <strong>Obama</strong>’<strong>s</strong> <strong>middle</strong> <strong>name</strong> — my last <strong>name</strong> — as if he had anti-Muslim Tourette’s. “Hussein,” Cunningham hissed like he was beckoning Satan when shouting the ...',\n",
|
||||
" 'Answer (1 of 15): A better question would be, “What’s up with <strong>Obama</strong>’s first <strong>name</strong>?” President Barack Hussein <strong>Obama</strong>’s father’s <strong>name</strong> was Barack Hussein <strong>Obama</strong>. He was <strong>named</strong> after his father. Hussein, <strong>Obama</strong>’<strong>s</strong> <strong>middle</strong> <strong>name</strong>, is a very common Arabic <strong>name</strong>, meaning "good," "handsome," or ...',\n",
|
||||
" 'Barack <strong>Obama</strong>, in full Barack Hussein <strong>Obama</strong> II, (born August 4, 1961, Honolulu, Hawaii, U.S.), 44th president of the United States (2009–17) and the first African American to hold the office. Before winning the presidency, <strong>Obama</strong> represented Illinois in the U.S.']"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"[doc.page_content for doc in docs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "74a6ba54-9e48-4bac-ab9b-03eabd19eb81",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cube Semantic Layer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook demonstrates the process of retrieving Cube's data model metadata in a format suitable for passing to LLMs as embeddings, thereby enhancing contextual information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### About Cube"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Cube](https://cube.dev/) is the Semantic Layer for building data apps. It helps data engineers and application developers access data from modern data stores, organize it into consistent definitions, and deliver it to every application."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Cube’s data model provides structure and definitions that are used as a context for LLM to understand data and generate correct queries. LLM doesn’t need to navigate complex joins and metrics calculations because Cube abstracts those and provides a simple interface that operates on the business-level terminology, instead of SQL table and column names. This simplification helps LLM to be less error-prone and avoid hallucinations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`Cube Semantic Loader` requires 2 arguments:\n",
|
||||
"| Input Parameter | Description |\n",
|
||||
"| --- | --- |\n",
|
||||
"| `cube_api_url` | The URL of your Cube's deployment REST API. Please refer to the [Cube documentation](https://cube.dev/docs/http-api/rest#configuration-base-path) for more information on configuring the base path. |\n",
|
||||
"| `cube_api_token` | The authentication token generated based on your Cube's API secret. Please refer to the [Cube documentation](https://cube.dev/docs/security#generating-json-web-tokens-jwt) for instructions on generating JSON Web Tokens (JWT). |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import jwt\n",
|
||||
"from langchain.document_loaders import CubeSemanticLoader\n",
|
||||
"\n",
|
||||
"api_url = \"https://api-example.gcp-us-central1.cubecloudapp.dev/cubejs-api/v1/meta\"\n",
|
||||
"cubejs_api_secret = \"api-secret-here\"\n",
|
||||
"security_context = {}\n",
|
||||
"# Read more about security context here: https://cube.dev/docs/security\n",
|
||||
"api_token = jwt.encode(security_context, cubejs_api_secret, algorithm=\"HS256\")\n",
|
||||
"\n",
|
||||
"loader = CubeSemanticLoader(api_url, api_token)\n",
|
||||
"\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Returns:\n",
|
||||
"\n",
|
||||
"A list of documents with the following attributes:\n",
|
||||
"\n",
|
||||
"- `page_content`\n",
|
||||
"- `metadata`\n",
|
||||
" - `table_name`\n",
|
||||
" - `column_name`\n",
|
||||
" - `column_data_type`\n",
|
||||
" - `column_title`\n",
|
||||
" - `column_description`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> page_content='table name: orders_view, column name: orders_view.total_amount, column data type: number, column title: Orders View Total Amount, column description: None' metadata={'table_name': 'orders_view', 'column_name': 'orders_view.total_amount', 'column_data_type': 'number', 'column_title': 'Orders View Total Amount', 'column_description': 'None'}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -32,7 +32,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 1,
|
||||
"id": "40cd9806",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -44,7 +44,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"id": "2d20b852",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -56,7 +56,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "579fa702",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"id": "90c1d899",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -80,7 +80,7 @@
|
||||
"[Document(page_content='This is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', metadata={'source': 'example_data/fake-email.eml'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -128,7 +128,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='This is a test email to use for unit tests.', lookup_str='', metadata={'source': 'example_data/fake-email.eml'}, lookup_index=0)"
|
||||
"Document(page_content='This is a test email to use for unit tests.', metadata={'source': 'example_data/fake-email.eml', 'filename': 'fake-email.eml', 'file_directory': 'example_data', 'date': '2022-12-16T17:04:16-05:00', 'filetype': 'message/rfc822', 'sent_from': ['Matthew Robinson <mrobinson@unstructured.io>'], 'sent_to': ['Matthew Robinson <mrobinson@unstructured.io>'], 'subject': 'Test Email', 'category': 'NarrativeText'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -140,6 +140,61 @@
|
||||
"data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5021f20a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Processing Attachments\n",
|
||||
"\n",
|
||||
"You can process attachments with `UnstructuredEmailLoader` by setting `process_attachments=True` in the constructor. By default, attachments will be partitioned using the `partition` function from `unstructured`. You can use a different partitioning function by passing the function to the `attachment_partitioner` kwarg."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6539f166",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredEmailLoader(\n",
|
||||
" \"example_data/fake-email.eml\",\n",
|
||||
" mode=\"elements\",\n",
|
||||
" process_attachments=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "aebead38",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ddeb60f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='This is a test email to use for unit tests.', metadata={'source': 'example_data/fake-email.eml', 'filename': 'fake-email.eml', 'file_directory': 'example_data', 'date': '2022-12-16T17:04:16-05:00', 'filetype': 'message/rfc822', 'sent_from': ['Matthew Robinson <mrobinson@unstructured.io>'], 'sent_to': ['Matthew Robinson <mrobinson@unstructured.io>'], 'subject': 'Test Email', 'category': 'NarrativeText'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a074515",
|
||||
@@ -234,7 +289,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
{"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"}
|
||||
{"sender_name": "User 1", "timestamp_ms": 1675597435669, "content": "Oh no worries! Bye"}
|
||||
{"sender_name": "User 2", "timestamp_ms": 1675596277579, "content": "No Im sorry it was my mistake, the blue one is not for sale"}
|
||||
@@ -0,0 +1,50 @@
|
||||
MIME-Version: 1.0
|
||||
Date: Fri, 23 Dec 2022 12:08:48 -0600
|
||||
Message-ID: <CAPgNNXSzLVJ-d1OCX_TjFgJU7ugtQrjFybPtAMmmYZzphxNFYg@mail.gmail.com>
|
||||
Subject: Fake email with attachment
|
||||
From: Mallori Harrell <mallori@unstructured.io>
|
||||
To: Mallori Harrell <mallori@unstructured.io>
|
||||
Content-Type: multipart/mixed; boundary="0000000000005d654405f082adb7"
|
||||
|
||||
--0000000000005d654405f082adb7
|
||||
Content-Type: multipart/alternative; boundary="0000000000005d654205f082adb5"
|
||||
|
||||
--0000000000005d654205f082adb5
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
|
||||
Hello!
|
||||
|
||||
Here's the attachments!
|
||||
|
||||
It includes:
|
||||
|
||||
- Lots of whitespace
|
||||
- Little to no content
|
||||
- and is a quick read
|
||||
|
||||
Best,
|
||||
|
||||
Mallori
|
||||
|
||||
--0000000000005d654205f082adb5
|
||||
Content-Type: text/html; charset="UTF-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<div dir=3D"ltr">Hello!=C2=A0<div><br></div><div>Here's the attachments=
|
||||
!</div><div><br></div><div>It includes:</div><div><ul><li style=3D"margin-l=
|
||||
eft:15px">Lots of whitespace</li><li style=3D"margin-left:15px">Little=C2=
|
||||
=A0to no content</li><li style=3D"margin-left:15px">and is a quick read</li=
|
||||
></ul><div>Best,</div></div><div><br></div><div>Mallori</div><div dir=3D"lt=
|
||||
r" class=3D"gmail_signature" data-smartmail=3D"gmail_signature"><div dir=3D=
|
||||
"ltr"><div><div><br></div></div></div></div></div>
|
||||
|
||||
--0000000000005d654205f082adb5--
|
||||
--0000000000005d654405f082adb7
|
||||
Content-Type: text/plain; charset="US-ASCII"; name="fake-attachment.txt"
|
||||
Content-Disposition: attachment; filename="fake-attachment.txt"
|
||||
Content-Transfer-Encoding: base64
|
||||
X-Attachment-Id: f_lc0tto5j0
|
||||
Content-ID: <f_lc0tto5j0>
|
||||
|
||||
SGV5IHRoaXMgaXMgYSBmYWtlIGF0dGFjaG1lbnQh
|
||||
--0000000000005d654405f082adb7--
|
||||
@@ -14,31 +14,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c2f3f5f2",
|
||||
"execution_count": 2,
|
||||
"id": "994d6c74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build a sample vectorDB\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.document_loaders import PyPDFLoader\n",
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# Load PDF\n",
|
||||
"path=\"path-to-files\"\n",
|
||||
"loaders = [\n",
|
||||
" PyPDFLoader(path+\"docs/cs229_lectures/MachineLearning-Lecture01.pdf\"),\n",
|
||||
" PyPDFLoader(path+\"docs/cs229_lectures/MachineLearning-Lecture02.pdf\"),\n",
|
||||
" PyPDFLoader(path+\"docs/cs229_lectures/MachineLearning-Lecture03.pdf\")\n",
|
||||
"]\n",
|
||||
"docs = []\n",
|
||||
"for loader in loaders:\n",
|
||||
" docs.extend(loader.load())\n",
|
||||
"# Load blog post\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
" \n",
|
||||
"# Split\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500,chunk_overlap = 150)\n",
|
||||
"splits = text_splitter.split_documents(docs)\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)\n",
|
||||
"splits = text_splitter.split_documents(data)\n",
|
||||
"\n",
|
||||
"# VectorDB\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
@@ -64,8 +57,7 @@
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.retrievers.multi_query import MultiQueryRetriever\n",
|
||||
"question=\"What does the course say about regression?\"\n",
|
||||
"num_queries=3\n",
|
||||
"question=\"What are the approaches to Task Decomposition?\"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(),llm=llm)"
|
||||
]
|
||||
@@ -73,6 +65,19 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9e6d3b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set logging for the queries\n",
|
||||
"import logging\n",
|
||||
"logging.basicConfig()\n",
|
||||
"logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e5203612",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -80,22 +85,22 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:root:Generated queries: [\"1. What is the course's perspective on regression?\", '2. How does the course discuss regression?', '3. What information does the course provide about regression?']\n"
|
||||
"INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be approached?', '2. What are the different methods for Task Decomposition?', '3. What are the various approaches to decomposing tasks?']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6"
|
||||
"5"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"unique_docs = retriever_from_llm.get_relevant_documents(question=\"What does the course say about regression?\")\n",
|
||||
"unique_docs = retriever_from_llm.get_relevant_documents(query=question)\n",
|
||||
"len(unique_docs)"
|
||||
]
|
||||
},
|
||||
@@ -111,7 +116,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "d9afb0ca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -151,12 +156,12 @@
|
||||
"llm_chain = LLMChain(llm=llm,prompt=QUERY_PROMPT,output_parser=output_parser)\n",
|
||||
" \n",
|
||||
"# Other inputs\n",
|
||||
"question=\"What does the course say about regression?\""
|
||||
"question=\"What are the approaches to Task Decomposition?\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "6660d7ee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -164,16 +169,16 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:root:Generated queries: [\"1. What is the course's perspective on regression?\", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', \"4. What are the course's teachings on regression?\", '5. In relation to the course, what is mentioned about regression?']\n"
|
||||
"INFO:langchain.retrievers.multi_query:Generated queries: [\"1. What is the course's perspective on regression?\", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', \"4. What are the course's teachings on regression?\", '5. In relation to the course, what is mentioned about regression?']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"8"
|
||||
"11"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -185,7 +190,7 @@
|
||||
" parser_key=\"lines\") # \"lines\" is the key (attribute name) of the parsed output\n",
|
||||
"\n",
|
||||
"# Results\n",
|
||||
"unique_docs = retriever.get_relevant_documents(question=\"What does the course say about regression?\")\n",
|
||||
"unique_docs = retriever.get_relevant_documents(query=\"What does the course say about regression?\")\n",
|
||||
"len(unique_docs)"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AWS Kendra\n",
|
||||
"# Amazon Kendra\n",
|
||||
"\n",
|
||||
"> AWS Kendra is an intelligent search service provided by Amazon Web Services (AWS). It utilizes advanced natural language processing (NLP) and machine learning algorithms to enable powerful search capabilities across various data sources within an organization. Kendra is designed to help users find the information they need quickly and accurately, improving productivity and decision-making.\n",
|
||||
"> Amazon Kendra is an intelligent search service provided by Amazon Web Services (AWS). It utilizes advanced natural language processing (NLP) and machine learning algorithms to enable powerful search capabilities across various data sources within an organization. Kendra is designed to help users find the information they need quickly and accurately, improving productivity and decision-making.\n",
|
||||
"\n",
|
||||
"> With Kendra, users can search across a wide range of content types, including documents, FAQs, knowledge bases, manuals, and websites. It supports multiple languages and can understand complex queries, synonyms, and contextual meanings to provide highly relevant search results."
|
||||
]
|
||||
@@ -17,7 +17,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using the AWS Kendra Index Retriever"
|
||||
"## Using the Amazon Kendra Index Retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
"from langchain.memory.chat_message_histories import ZepChatMessageHistory\n",
|
||||
"from langchain.schema import HumanMessage, AIMessage\n",
|
||||
"from uuid import uuid4\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"# Set this to your Zep server URL\n",
|
||||
"ZEP_API_URL = \"http://localhost:8000\""
|
||||
@@ -75,6 +76,25 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Provide your Zep API key. Note that this is optional. See https://docs.getzep.com/deployment/auth\n",
|
||||
"\n",
|
||||
"zep_api_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:29.118416Z",
|
||||
@@ -93,12 +113,13 @@
|
||||
"zep_chat_history = ZepChatMessageHistory(\n",
|
||||
" session_id=session_id,\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" api_key=zep_api_key\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:30.271181Z",
|
||||
@@ -172,7 +193,7 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"for msg in test_history:\n",
|
||||
" zep_chat_history.append(\n",
|
||||
" zep_chat_history.add_message(\n",
|
||||
" HumanMessage(content=msg[\"content\"])\n",
|
||||
" if msg[\"role\"] == \"human\"\n",
|
||||
" else AIMessage(content=msg[\"content\"])\n",
|
||||
@@ -192,7 +213,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:32.979155Z",
|
||||
@@ -207,14 +228,14 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7759001673780126, 'uuid': '3a82a02f-056e-4c6a-b960-67ebdf3b2b93', 'created_at': '2023-05-25T15:03:30.2041Z', 'role': 'human', 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602262941130749, 'uuid': 'a2fc9c21-0897-46c8-bef7-6f5c0f71b04a', 'created_at': '2023-05-25T15:03:30.248065Z', 'role': 'ai', 'token_count': 27}),\n",
|
||||
" Document(page_content='Who were her contemporaries?', metadata={'score': 0.757553366415519, 'uuid': '41f9c41a-a205-41e1-b48b-a0a4cd943fc8', 'created_at': '2023-05-25T15:03:30.243995Z', 'role': 'human', 'token_count': 8}),\n",
|
||||
" Document(page_content='Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', metadata={'score': 0.7546211059317948, 'uuid': '34678311-0098-4f1a-8fd4-5615ac692deb', 'created_at': '2023-05-25T15:03:30.231427Z', 'role': 'ai', 'token_count': 31}),\n",
|
||||
" Document(page_content='Which books of hers were made into movies?', metadata={'score': 0.7496714959247069, 'uuid': '18046c3a-9666-4d3e-b4f0-43d1394732b7', 'created_at': '2023-05-25T15:03:30.236837Z', 'role': 'human', 'token_count': 11})]"
|
||||
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.8897116216176073, 'uuid': 'db60ff57-f259-4ec4-8a81-178ed4c6e54f', 'created_at': '2023-06-26T23:40:22.816214Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56}),\n",
|
||||
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8856661080361157, 'uuid': 'f1a5981a-8f6d-4168-a548-6e9c32f35fa1', 'created_at': '2023-06-26T23:40:22.809621Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n",
|
||||
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7757595298492976, 'uuid': '361d0043-1009-4e13-a7f0-8aea8b1ee869', 'created_at': '2023-06-26T23:40:22.709886Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject wants to know about the identity or background of an individual named Octavia Butler.'}}, 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7601242516059306, 'uuid': '56c45e8a-0f65-45f0-bc46-d9e65164b563', 'created_at': '2023-06-26T23:40:22.778836Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': \"The subject is providing information about Octavia Butler's contemporaries.\"}}, 'token_count': 27}),\n",
|
||||
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7594731095320668, 'uuid': '6951f2fd-dfa4-4e05-9380-f322ef8f72f8', 'created_at': '2023-06-26T23:40:22.80464Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -226,6 +247,7 @@
|
||||
" session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" top_k=5,\n",
|
||||
" api_key=zep_api_key\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"await zep_retriever.aget_relevant_documents(\"Who wrote Parable of the Sower?\")"
|
||||
@@ -240,7 +262,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:03:34.713354Z",
|
||||
@@ -255,14 +277,14 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.8897321402776546, 'uuid': '1c09603a-52c1-40d7-9d69-29f26256029c', 'created_at': '2023-05-25T15:03:30.268257Z', 'role': 'ai', 'token_count': 56}),\n",
|
||||
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8857628682610436, 'uuid': 'f6706e8c-6c91-452f-8c1b-9559fd924657', 'created_at': '2023-05-25T15:03:30.265302Z', 'role': 'human', 'token_count': 23}),\n",
|
||||
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7759670375149477, 'uuid': '3a82a02f-056e-4c6a-b960-67ebdf3b2b93', 'created_at': '2023-05-25T15:03:30.2041Z', 'role': 'human', 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602854653476563, 'uuid': 'a2fc9c21-0897-46c8-bef7-6f5c0f71b04a', 'created_at': '2023-05-25T15:03:30.248065Z', 'role': 'ai', 'token_count': 27}),\n",
|
||||
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7595293992240313, 'uuid': 'f22f2498-6118-4c74-8718-aa89ccd7e3d6', 'created_at': '2023-05-25T15:03:30.261198Z', 'role': 'ai', 'token_count': 18})]"
|
||||
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.889661105796371, 'uuid': 'db60ff57-f259-4ec4-8a81-178ed4c6e54f', 'created_at': '2023-06-26T23:40:22.816214Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56}),\n",
|
||||
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.885754241595424, 'uuid': 'f1a5981a-8f6d-4168-a548-6e9c32f35fa1', 'created_at': '2023-06-26T23:40:22.809621Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n",
|
||||
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7758688965570713, 'uuid': '361d0043-1009-4e13-a7f0-8aea8b1ee869', 'created_at': '2023-06-26T23:40:22.709886Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject wants to know about the identity or background of an individual named Octavia Butler.'}}, 'token_count': 8}),\n",
|
||||
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602672137411663, 'uuid': '56c45e8a-0f65-45f0-bc46-d9e65164b563', 'created_at': '2023-06-26T23:40:22.778836Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': \"The subject is providing information about Octavia Butler's contemporaries.\"}}, 'token_count': 27}),\n",
|
||||
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7596040989115522, 'uuid': '6951f2fd-dfa4-4e05-9380-f322ef8f72f8', 'created_at': '2023-06-26T23:40:22.80464Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -304,7 +326,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9597802c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Clarifai\n",
|
||||
"\n",
|
||||
">[Clarifai](https://www.clarifai.com/) is an AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model training, evaluation, and inference.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with `Clarifai` [models](https://clarifai.com/explore/models). Text embedding models in particular can be found [here](https://clarifai.com/explore/models?page=1&perPage=24&filterData=%5B%7B%22field%22%3A%22model_type_id%22%2C%22value%22%3A%5B%22text-embedder%22%5D%7D%5D).\n",
|
||||
"\n",
|
||||
"To use Clarifai, you must have an account and a Personal Access Token (PAT) key. \n",
|
||||
"[Check here](https://clarifai.com/settings/security) to get or create a PAT."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2a773d8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "91ea14ce-831d-409a-a88f-30353acdabd1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install required dependencies\n",
|
||||
"!pip install clarifai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "426f1156",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Imports\n",
|
||||
"Here we will be setting the personal access token. You can find your PAT under [settings/security](https://clarifai.com/settings/security) in your Clarifai account."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3f5dc9d7-65e3-4b5b-9086-3327d016cfe0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Please login and get your API key from https://clarifai.com/settings/security \n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"CLARIFAI_PAT = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6fb585dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import the required modules\n",
|
||||
"from langchain.embeddings import ClarifaiEmbeddings\n",
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "16521ed2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Input\n",
|
||||
"Create a prompt template to be used with the LLM Chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c8905eac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Set the user id and app id to the application in which the model resides. You can find a list of public models on https://clarifai.com/explore/models\n",
|
||||
"\n",
|
||||
"You will have to also initialize the model id and if needed, the model version id. Some models have many versions, you can choose the one appropriate for your task."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1fe9bf15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"USER_ID = 'openai'\n",
|
||||
"APP_ID = 'embed'\n",
|
||||
"MODEL_ID = 'text-embedding-ada'\n",
|
||||
"\n",
|
||||
"# You can provide a specific model version as the model_version_id arg.\n",
|
||||
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "3f3458d9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize a Clarifai embedding model\n",
|
||||
"embeddings = ClarifaiEmbeddings(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a641dbd9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text = \"This is a test document.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "32b4d5f4-2b8e-4681-856f-19a3dd141ae4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "47076457-1880-48ac-970f-872ead6f0d94",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_result = embeddings.embed_documents([text])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"# Elasticsearch\n",
|
||||
"Walkthrough of how to generate embeddings using a hosted embedding model in Elasticsearch\n",
|
||||
"\n",
|
||||
"The easiest way to instantiate the `ElasticsearchEmebddings` class it either\n",
|
||||
"The easiest way to instantiate the `ElasticsearchEmbeddings` class it either\n",
|
||||
"- using the `from_credentials` constructor if you are using Elastic Cloud\n",
|
||||
"- or using the `from_es_connection` constructor with any Elasticsearch cluster"
|
||||
],
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spacy Embedding\n",
|
||||
"\n",
|
||||
"### Loading the Spacy embedding class to generate and query embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Import the necessary classes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from langchain.embeddings.spacy_embeddings import SpacyEmbeddings\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Initialize SpacyEmbeddings.This will load the Spacy model into memory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"embedder = SpacyEmbeddings()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Define some example texts . These could be any documents that you want to analyze - for example, news articles, social media posts, or product reviews."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"texts = [\n",
|
||||
" \"The quick brown fox jumps over the lazy dog.\",\n",
|
||||
" \"Pack my box with five dozen liquor jugs.\",\n",
|
||||
" \"How vexingly quick daft zebras jump!\",\n",
|
||||
" \"Bright vixens jump; dozy fowl quack.\"\n",
|
||||
"]\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Generate and print embeddings for the texts . The SpacyEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"embeddings = embedder.embed_documents(texts)\n",
|
||||
"for i, embedding in enumerate(embeddings):\n",
|
||||
" print(f\"Embedding for document {i+1}: {embedding}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Generate and print an embedding for a single piece of text. You can also generate an embedding for a single piece of text, such as a search query. This can be useful for tasks like information retrieval, where you want to find documents that are similar to a given query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"query = \"Quick foxes and lazy dogs.\"\n",
|
||||
"query_embedding = embedder.embed_query(query)\n",
|
||||
"print(f\"Embedding for query: {query_embedding}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -10,14 +10,32 @@
|
||||
"\n",
|
||||
">`OpenSearch` helps you develop high quality, maintenance-free, and high performance intelligent search services to provide your users with high search efficiency and accuracy.\n",
|
||||
"\n",
|
||||
">`OpenSearch` provides the vector search feature. In specific scenarios, especially test question search and image search scenarios, you can use the vector search feature together with the multimodal search feature to improve the accuracy of search results. This topic describes the syntax and usage notes of vector indexes.\n",
|
||||
">`OpenSearch` provides the vector search feature. In specific scenarios, especially test question search and image search scenarios, you can use the vector search feature together with the multimodal search feature to improve the accuracy of search results.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Alibaba Cloud OpenSearch Vector Search Edition`.\n",
|
||||
"To run, you should have an [OpenSearch Vector Search Edition](https://opensearch.console.aliyun.com) instance up and running:\n",
|
||||
"\n",
|
||||
"Read the [help document](https://www.alibabacloud.com/help/en/opensearch/latest/vector-search) to quickly familiarize and configure OpenSearch Vector Search Edition instance.\n"
|
||||
"Read the [help document](https://www.alibabacloud.com/help/en/opensearch/latest/vector-search) to quickly familiarize and configure OpenSearch Vector Search Edition instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"After the instance is up and running, follow these steps to split documents, get embeddings, connect to the alibaba cloud opensearch instance, index documents, and perform vector retrieval."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"We need to install the following Python packages first."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -29,10 +47,29 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After completing the configuration, follow these steps to connect to the instance, index documents, and perform vector retrieval."
|
||||
]
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -60,7 +97,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Split documents and get embeddings by call OpenAI API"
|
||||
"Split documents and get embeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -123,7 +160,7 @@
|
||||
" \"id\": \"id\", # The id field name mapping of index document.\n",
|
||||
" \"document\": \"document\", # The text field name mapping of index document.\n",
|
||||
" \"embedding\": \"embedding\", # The embedding field name mapping of index document.\n",
|
||||
" \"metadata_x\": \"metadata_x,=\", # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query.\n",
|
||||
" \"name_of_the_metadata_specified_during_search\": \"opensearch_metadata_field_name,=\", # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query.\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -139,7 +176,10 @@
|
||||
"# \"id\": \"id\",\n",
|
||||
"# \"document\": \"document\",\n",
|
||||
"# \"embedding\": \"embedding\",\n",
|
||||
"# \"metadata\": \"metadata,=\" #The value field contains mapping name and operator, the operator would be used when executing metadata filter query\n",
|
||||
"# \"metadata_a\": \"metadata_a,=\" #The value field contains mapping name and operator, the operator would be used when executing metadata filter query\n",
|
||||
"# \"metadata_b\": \"metadata_b,>\"\n",
|
||||
"# \"metadata_c\": \"metadata_c,<\"\n",
|
||||
"# \"metadata_else\": \"metadata_else,=\"\n",
|
||||
"# })"
|
||||
]
|
||||
},
|
||||
@@ -251,7 +291,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Query and retrieve data with metadata\n"
|
||||
"Query and retrieve data with metadata.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -307,4 +347,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
@@ -43,7 +43,6 @@
|
||||
"import openai\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.schema import BaseRetriever\n",
|
||||
"from langchain.vectorstores.azuresearch import AzureSearch"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cassio>=0.0.5\""
|
||||
"!pip install \"cassio>=0.0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,14 +44,16 @@
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"database_mode = (input('\\n(L)ocal Cassandra or (A)stra DB? ')).upper()\n",
|
||||
"database_mode = (input('\\n(C)assandra or (A)stra DB? ')).upper()\n",
|
||||
"\n",
|
||||
"keyspace_name = input('\\nKeyspace name? ')\n",
|
||||
"\n",
|
||||
"if database_mode == 'A':\n",
|
||||
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
||||
" #\n",
|
||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')"
|
||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')\n",
|
||||
"elif database_mode == 'C':\n",
|
||||
" CASSANDRA_CONTACT_POINTS = input('Contact points? (comma-separated, empty for localhost) ').strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -72,8 +74,15 @@
|
||||
"from cassandra.cluster import Cluster\n",
|
||||
"from cassandra.auth import PlainTextAuthProvider\n",
|
||||
"\n",
|
||||
"if database_mode == 'L':\n",
|
||||
" cluster = Cluster()\n",
|
||||
"if database_mode == 'C':\n",
|
||||
" if CASSANDRA_CONTACT_POINTS:\n",
|
||||
" cluster = Cluster([\n",
|
||||
" cp.strip()\n",
|
||||
" for cp in CASSANDRA_CONTACT_POINTS.split(',')\n",
|
||||
" if cp.strip()\n",
|
||||
" ])\n",
|
||||
" else:\n",
|
||||
" cluster = Cluster()\n",
|
||||
" session = cluster.connect()\n",
|
||||
"elif database_mode == 'A':\n",
|
||||
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
||||
@@ -261,7 +270,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
"source": [
|
||||
"# Clarifai\n",
|
||||
"\n",
|
||||
">[Clarifai](https://www.clarifai.com/) is a AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model building and inference. A Clarifai application can be used as a vector database after uploading inputs. \n",
|
||||
">[Clarifai](https://www.clarifai.com/) is an AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model training, evaluation, and inference. A Clarifai application can be used as a vector database after uploading inputs. \n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Clarifai` vector database.\n",
|
||||
"\n",
|
||||
"To use Clarifai, you must have an account and a Personal Access Token key. \n",
|
||||
"Here are the [installation instructions](https://clarifai.com/settings/security )."
|
||||
"To use Clarifai, you must have an account and a Personal Access Token (PAT) key. \n",
|
||||
"[Check here](https://clarifai.com/settings/security) to get or create a PAT."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -58,7 +58,7 @@
|
||||
"# Please login and get your API key from https://clarifai.com/settings/security \n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"CLARIFAI_PAT_KEY = getpass()"
|
||||
"CLARIFAI_PAT = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Setup the user id and app id where the text data will be uploaded. \n",
|
||||
"Setup the user id and app id where the text data will be uploaded. Note: when creating that application please select an appropriate base workflow for indexing your text documents such as the Language-Understanding workflow.\n",
|
||||
"\n",
|
||||
"You will have to first create an account on [Clarifai](https://clarifai.com/login) and then create an application."
|
||||
]
|
||||
@@ -139,7 +139,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT_KEY, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)"
|
||||
"clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
@@ -30,7 +29,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "38237514-b3fa-44a4-9cff-30cd6bf50073",
|
||||
"metadata": {},
|
||||
@@ -58,7 +56,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -73,7 +71,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 2,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -132,7 +130,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f13473b5",
|
||||
"metadata": {},
|
||||
@@ -174,7 +171,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f34420cf",
|
||||
"metadata": {},
|
||||
@@ -194,7 +190,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "31bda7fd",
|
||||
"metadata": {},
|
||||
@@ -255,7 +250,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "57da60d4",
|
||||
"metadata": {},
|
||||
@@ -350,7 +344,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f4294b96",
|
||||
"metadata": {},
|
||||
@@ -361,7 +354,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 3,
|
||||
"id": "d5bf812c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -396,7 +389,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3d33c126",
|
||||
"metadata": {},
|
||||
@@ -426,7 +418,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0be136e0",
|
||||
"metadata": {},
|
||||
@@ -456,7 +447,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1b4ecd86",
|
||||
"metadata": {},
|
||||
@@ -466,7 +456,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 4,
|
||||
"id": "1fd60fd1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -474,15 +464,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}, Score: 5.159960813797904e-15\n",
|
||||
"Content: bar, Metadata: {'page': 1}, Score: 0.3131446838378906\n"
|
||||
"Content: foo, Metadata: {'page': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = db.similarity_search(\"foo\", filter=dict(page=1), k=1, fetch_k=4)\n",
|
||||
"for doc, score in results_with_scores:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}\")"
|
||||
"for doc in results:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -502,7 +491,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,575 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Marqo\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Marqo vectorstore.\n",
|
||||
"\n",
|
||||
">[Marqo](https://www.marqo.ai/) is an open-source vector search engine. Marqo allows you to store and query multimodal data such as text and images. Marqo creates the vectors for you using a huge selection of opensource models, you can also provide your own finetuned models and Marqo will handle the loading and inference for you.\n",
|
||||
"\n",
|
||||
"To run this notebook with our docker image please run the following commands first to get Marqo:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"docker pull marqoai/marqo:latest\n",
|
||||
"docker rm -f marqo\n",
|
||||
"docker run --name marqo -it --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "aac9563e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install marqo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5d1489ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Marqo\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Index langchain-demo exists.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import marqo \n",
|
||||
"\n",
|
||||
"# initialize marqo\n",
|
||||
"marqo_url = \"http://localhost:8882\" # if using marqo cloud replace with your endpoint (console.marqo.ai)\n",
|
||||
"marqo_api_key = \"\" # if using marqo cloud replace with your api key (console.marqo.ai)\n",
|
||||
"\n",
|
||||
"client = marqo.Client(url=marqo_url, api_key=marqo_api_key)\n",
|
||||
"\n",
|
||||
"index_name = \"langchain-demo\"\n",
|
||||
"\n",
|
||||
"docsearch = Marqo.from_documents(docs, index_name=index_name)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"result_docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9c608226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result_docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "98704b27",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"0.68647254\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result_docs = docsearch.similarity_search_with_score(query)\n",
|
||||
"print(result_docs[0][0].page_content, result_docs[0][1], sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "eb3395b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Additional features\n",
|
||||
"\n",
|
||||
"One of the powerful features of Marqo as a vectorstore is that you can use indexes created externally. For example:\n",
|
||||
"\n",
|
||||
"+ If you had a database of image and text pairs from another application, you can simply just use it in langchain with the Marqo vectorstore. Note that bringing your own multimodal indexes will disable the `add_texts` method.\n",
|
||||
"\n",
|
||||
"+ If you had a database of text documents, you can bring it into the langchain framework and add more texts through `add_texts`.\n",
|
||||
"\n",
|
||||
"The documents that are returned are customised by passing your own function to the `page_content_builder` callback in the search methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "35b99fef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Multimodal Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a359ed74",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'errors': False,\n",
|
||||
" 'processingTimeMs': 2090.2822139996715,\n",
|
||||
" 'index_name': 'langchain-multimodal-demo',\n",
|
||||
" 'items': [{'_id': 'aa92fc1c-1fb2-4d86-b027-feb507c419f7',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201},\n",
|
||||
" {'_id': '5142c258-ef9f-4bf2-a1a6-2307280173a0',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"# use a new index\n",
|
||||
"index_name = \"langchain-multimodal-demo\"\n",
|
||||
"\n",
|
||||
"# incase the demo is re-run\n",
|
||||
"try:\n",
|
||||
" client.delete_index(index_name)\n",
|
||||
"except Exception:\n",
|
||||
" print(f\"Creating {index_name}\")\n",
|
||||
" \n",
|
||||
"# This index could have been created by another system\n",
|
||||
"settings = {\"treat_urls_and_pointers_as_images\": True, \"model\": \"ViT-L/14\"}\n",
|
||||
"client.create_index(index_name, **settings)\n",
|
||||
"client.index(index_name).add_documents(\n",
|
||||
" [ \n",
|
||||
" # image of a bus\n",
|
||||
" {\n",
|
||||
" \"caption\": \"Bus\",\n",
|
||||
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg\"\n",
|
||||
" },\n",
|
||||
" # image of a plane\n",
|
||||
" { \n",
|
||||
" \"caption\": \"Plane\", \n",
|
||||
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "368d1fab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_content(res):\n",
|
||||
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
|
||||
" return f\"{res['caption']}: {res['image']}\"\n",
|
||||
"\n",
|
||||
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"query = \"vehicles that fly\"\n",
|
||||
"doc_results = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "eef4edf9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Plane: https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg\n",
|
||||
"Bus: https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for doc in doc_results:\n",
|
||||
" print(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c255f603",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Text only example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "9e9a2b20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'errors': False,\n",
|
||||
" 'processingTimeMs': 139.2144540004665,\n",
|
||||
" 'index_name': 'langchain-byo-index-demo',\n",
|
||||
" 'items': [{'_id': '27c05a1c-b8a9-49a5-ae73-fbf1eb51dc3f',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201},\n",
|
||||
" {'_id': '6889afe0-e600-43c1-aa3b-1d91bf6db274',\n",
|
||||
" 'result': 'created',\n",
|
||||
" 'status': 201}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"# use a new index\n",
|
||||
"index_name = \"langchain-byo-index-demo\"\n",
|
||||
"\n",
|
||||
"# incase the demo is re-run\n",
|
||||
"try:\n",
|
||||
" client.delete_index(index_name)\n",
|
||||
"except Exception:\n",
|
||||
" print(f\"Creating {index_name}\")\n",
|
||||
"\n",
|
||||
"# This index could have been created by another system\n",
|
||||
"client.create_index(index_name)\n",
|
||||
"client.index(index_name).add_documents(\n",
|
||||
" [ \n",
|
||||
" {\n",
|
||||
" \"Title\": \"Smartphone\",\n",
|
||||
" \"Description\": \"A smartphone is a portable computer device that combines mobile telephone \"\n",
|
||||
" \"functions and computing functions into one unit.\",\n",
|
||||
" },\n",
|
||||
" { \n",
|
||||
" \"Title\": \"Telephone\",\n",
|
||||
" \"Description\": \"A telephone is a telecommunications device that permits two or more users to\"\n",
|
||||
" \"conduct a conversation when they are too far apart to be easily heard directly.\",\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "b2943ea9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['9986cc72-adcd-4080-9d74-265c173a9ec3']"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Note text indexes retain the ability to use add_texts despite different field names in documents\n",
|
||||
"# this is because the page_content_builder callback lets you handle these document fields as required\n",
|
||||
"\n",
|
||||
"def get_content(res):\n",
|
||||
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
|
||||
" if 'text' in res:\n",
|
||||
" return res['text']\n",
|
||||
" return res['Description']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
|
||||
"\n",
|
||||
"docsearch.add_texts([\"This is a document that is about elephants\"])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "851450e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A smartphone is a portable computer device that combines mobile telephone functions and computing functions into one unit.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"modern communications devices\"\n",
|
||||
"doc_results = docsearch.similarity_search(query)\n",
|
||||
"\n",
|
||||
"print(doc_results[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "9a438fec",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This is a document that is about elephants\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"elephants\"\n",
|
||||
"doc_results = docsearch.similarity_search(query, page_content_builder=get_content)\n",
|
||||
"\n",
|
||||
"print(doc_results[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0d04c9d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Weighted Queries\n",
|
||||
"\n",
|
||||
"We also expose marqos weighted queries which are a powerful way to compose complex semantic searches."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "d42ba0d6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A smartphone is a portable computer device that combines mobile telephone functions and computing functions into one unit.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = {\"communications devices\" : 1.0}\n",
|
||||
"doc_results = docsearch.similarity_search(query)\n",
|
||||
"print(doc_results[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "b5918a16",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A telephone is a telecommunications device that permits two or more users toconduct a conversation when they are too far apart to be easily heard directly.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = {\"communications devices\" : 1.0, \"technology post 2000\": -1.0}\n",
|
||||
"doc_results = docsearch.similarity_search(query)\n",
|
||||
"print(doc_results[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2d026aa0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Question Answering with Sources\n",
|
||||
"\n",
|
||||
"This section shows how to use Marqo as part of a `RetrievalQAWithSourcesChain`. Marqo will perform the searches for information in the sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "e4ca223c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key:········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "5c6e45f9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"../../../state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "70a7f320",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Index langchain-qa-with-retrieval exists.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"index_name = \"langchain-qa-with-retrieval\"\n",
|
||||
"docsearch = Marqo.from_documents(docs, index_name=index_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "b3b008a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
|
||||
" OpenAI(temperature=0), chain_type=\"stuff\", retriever=docsearch.as_retriever()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e1457716",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'answer': ' The president honored Justice Breyer, thanking him for his service and noting that he is a retiring Justice of the United States Supreme Court.\\n',\n",
|
||||
" 'sources': '../../../state_of_the_union.txt'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain(\n",
|
||||
" {\"question\": \"What did the president say about Justice Breyer\"},\n",
|
||||
" return_only_outputs=True,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
@@ -44,6 +45,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
|
||||
"metadata": {},
|
||||
@@ -63,6 +65,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3ecc42",
|
||||
"metadata": {},
|
||||
@@ -130,7 +133,7 @@
|
||||
"# initialize MongoDB python client\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)\n",
|
||||
"\n",
|
||||
"db_name = \"lanchain_db\"\n",
|
||||
"db_name = \"langchain_db\"\n",
|
||||
"collection_name = \"langchain_col\"\n",
|
||||
"collection = client[db_name][collection_name]\n",
|
||||
"index_name = \"langchain_demo\"\n",
|
||||
@@ -156,6 +159,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
|
||||
"metadata": {},
|
||||
@@ -183,14 +187,14 @@
|
||||
"db_name = \"langchain_db\"\n",
|
||||
"collection_name = \"langchain_col\"\n",
|
||||
"collection = client[db_name][collection_name]\n",
|
||||
"index_name = \"langchain_index\"\n",
|
||||
"index_name = \"langchain_demo\"\n",
|
||||
"\n",
|
||||
"# initialize vector store\n",
|
||||
"vectorStore = MongoDBAtlasVectorSearch(\n",
|
||||
" collection, OpenAIEmbeddings(), index_name=index_name\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# perform a similarity search between the embedding of the query and the embeddings of the documents\n",
|
||||
"# perform a similarity search between a query and the ingested documents\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vectorStore.similarity_search(query)\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,169 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MongoDB Atlas Vector Search\n",
|
||||
"\n",
|
||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a document database managed in the cloud. It also enables Lucene and its vector search feature.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the functionality related to the `MongoDB Atlas Vector Search` feature where you can store your embeddings in MongoDB documents and create a Lucene vector index to perform a KNN search.\n",
|
||||
"\n",
|
||||
"It uses the [knnBeta Operator](https://www.mongodb.com/docs/atlas/atlas-search/knn-beta) available in MongoDB Atlas Search. This feature is in early access and available only for evaluation purposes, to validate functionality, and to gather feedback from a small closed group of early access users. It is not recommended for production deployments as we may introduce breaking changes.\n",
|
||||
"\n",
|
||||
"To use MongoDB Atlas, you must have first deployed a cluster. Free clusters are available. \n",
|
||||
"Here is the MongoDB Atlas [quick start](https://www.mongodb.com/docs/atlas/getting-started/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4c41cad-08ef-4f72-a545-2151e4598efe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install pymongo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1e38361-c1fe-4ac6-86e9-c90ebaf7ae87",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"MONGODB_ATLAS_URI = os.environ['MONGODB_ATLAS_URI']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "320af802-9271-46ee-948f-d2453933d44b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key. Make sure the environment variable `OPENAI_API_KEY` is set up before proceeding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3ecc42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create a Lucene vector index on your cluster. In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings-for-vector-search) to get more details on how to define an Atlas Search index.\n",
|
||||
"You can name the index `langchain_demo` and create the index on the namespace `lanchain_db.langchain_col`. Finally, write the following definition in the JSON editor:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"mappings\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"fields\": {\n",
|
||||
" \"embedding\": {\n",
|
||||
" \"dimensions\": 1536,\n",
|
||||
" \"similarity\": \"cosine\",\n",
|
||||
" \"type\": \"knnVector\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pymongo import MongoClient\n",
|
||||
"\n",
|
||||
"# initialize MongoDB python client\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_CONNECTION_STRING)\n",
|
||||
"\n",
|
||||
"db_name = \"lanchain_db\"\n",
|
||||
"collection_name = \"langchain_col\"\n",
|
||||
"collection = client[db_name][collection_name]\n",
|
||||
"index_name = \"langchain_demo\"\n",
|
||||
"\n",
|
||||
"# insert the documents in MongoDB Atlas with their embedding\n",
|
||||
"docsearch = MongoDBAtlasVectorSearch.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" collection=collection,\n",
|
||||
" index_name=index_name\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# perform a similarity search between the embedding of the query and the embeddings of the documents\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c608226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,338 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1292f057",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# pg_hnsw\n",
|
||||
"\n",
|
||||
"> [pg_embedding](https://github.com/knizhnik/hnsw) is an open-source vector similarity search for `Postgres` that uses Hierarchical Navigable Small Worlds for approximate nearest neighbor search.\n",
|
||||
"\n",
|
||||
"It supports:\n",
|
||||
"- exact and approximate nearest neighbor search using HNSW\n",
|
||||
"- L2 distance\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Postgres vector database (`PGEmbedding`).\n",
|
||||
"\n",
|
||||
"> The PGEmbedding integration creates the pg_embedding extension for you, but you run the following Postgres query to add it:\n",
|
||||
"```sql\n",
|
||||
"CREATE EXTENSION embedding;\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a6214221",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Pip install necessary package\n",
|
||||
"!pip install openai\n",
|
||||
"!pip install psycopg2-binary\n",
|
||||
"!pip install tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "b2e49694",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Add the OpenAI API Key to the environment variables to use `OpenAIEmbeddings`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1dcc8d99",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key:········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9719ea68",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Loading Environment Variables\n",
|
||||
"from typing import List, Tuple"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dfd1f38d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import PGEmbedding\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.docstore.document import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8fab8cc2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Database Url:········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ[\"DATABASE_URL\"] = getpass.getpass(\"Database Url:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "bef17115",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"connection_string = os.environ.get(\"DATABASE_URL\")\n",
|
||||
"collection_name = \"state_of_the_union\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "743abfaa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = PGEmbedding.from_documents(\n",
|
||||
" embedding=embeddings,\n",
|
||||
" documents=docs,\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" connection_string=connection_string,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "41ce4c4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for doc, score in docs_with_score:\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
" print(\"Score: \", score)\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"-\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7ef7b052",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Working with vectorstore in Postgres"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "939151f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Uploading a vectorstore in PG "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "595ac511",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = PGEmbedding.from_documents(\n",
|
||||
" embedding=embeddings,\n",
|
||||
" documents=docs,\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" connection_string=connection_string,\n",
|
||||
" pre_delete_collection=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f9510e6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create HNSW Index\n",
|
||||
"By default, the extension performs a sequential scan search, with 100% recall. You might consider creating an HNSW index for approximate nearest neighbor (ANN) search to speed up `similarity_search_with_score` execution time. To create the HNSW index on your vector column, use a `create_hnsw_index` function:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d1981fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PGEmbedding.create_hnsw_index(\n",
|
||||
" max_elements=10000, dims=1536, m=8, ef_construction=16, ef_search=16\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7adacf29",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The function above is equivalent to running the below SQL query:\n",
|
||||
"```sql\n",
|
||||
"CREATE INDEX ON vectors USING hnsw(vec) WITH (maxelements=10000, dims=1536, m=3, efconstruction=16, efsearch=16);\n",
|
||||
"```\n",
|
||||
"The HNSW index options used in the statement above include:\n",
|
||||
"\n",
|
||||
"- maxelements: Defines the maximum number of elements indexed. This is a required parameter. The example shown above has a value of 3. A real-world example would have a much large value, such as 1000000. An \"element\" refers to a data point (a vector) in the dataset, which is represented as a node in the HNSW graph. Typically, you would set this option to a value able to accommodate the number of rows in your in your dataset.\n",
|
||||
"- dims: Defines the number of dimensions in your vector data. This is a required parameter. A small value is used in the example above. If you are storing data generated using OpenAI's text-embedding-ada-002 model, which supports 1536 dimensions, you would define a value of 1536, for example.\n",
|
||||
"- m: Defines the maximum number of bi-directional links (also referred to as \"edges\") created for each node during graph construction.\n",
|
||||
"The following additional index options are supported:\n",
|
||||
"\n",
|
||||
"- efConstruction: Defines the number of nearest neighbors considered during index construction. The default value is 32.\n",
|
||||
"- efsearch: Defines the number of nearest neighbors considered during index search. The default value is 32.\n",
|
||||
"For information about how you can configure these options to influence the HNSW algorithm, refer to [Tuning the HNSW algorithm](https://neon-next-git-dprice-hnsw-extension-neondatabase.vercel.app/docs/extensions/hnsw#tuning-the-hnsw-algorithm)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "528893fb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieving a vectorstore in PG"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "b6162b1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = PGEmbedding(\n",
|
||||
" connection_string=connection_string,\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" collection_name=collection_name,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retriever = store.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "1a5fedb1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"VectorStoreRetriever(vectorstore=<langchain.vectorstores.pghnsw.HNSWVectoreStore object at 0x121d3c8b0>, search_type='similarity', search_kwargs={})"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "0cefc938",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db1 = PGEmbedding.from_existing_index(\n",
|
||||
" embedding=embeddings,\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" pre_delete_collection=False,\n",
|
||||
" connection_string=connection_string,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs_with_score: List[Tuple[Document, float]] = db1.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85cde495",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for doc, score in docs_with_score:\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
" print(\"Score: \", score)\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"-\" * 80)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
@@ -51,6 +52,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "320af802-9271-46ee-948f-d2453933d44b",
|
||||
"metadata": {},
|
||||
@@ -136,6 +138,30 @@
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "86a4b96b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Adding More Text to an Existing Index\n",
|
||||
"\n",
|
||||
"More text can embedded and upserted to an existing Pinecone index using the `add_texts` function\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38a7a60e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"index = pinecone.Index(\"langchain-demo\")\n",
|
||||
"vectorstore = Pinecone(index, embeddings.embed_query, \"text\")\n",
|
||||
"\n",
|
||||
"vectorstore.add_texts(\"More text!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
"\n",
|
||||
" CREATE FUNCTION match_documents(query_embedding vector(1536), match_count int)\n",
|
||||
" RETURNS TABLE(\n",
|
||||
" id bigint,\n",
|
||||
" id uuid,\n",
|
||||
" content text,\n",
|
||||
" metadata jsonb,\n",
|
||||
" -- we return matched vectors to enable maximal marginal relevance searches\n",
|
||||
|
||||
@@ -11,43 +11,11 @@
|
||||
">[Vectara](https://vectara.com/) is a API platform for building LLM-powered applications. It provides a simple to use API for document indexing and query that is managed by Vectara and is optimized for performance and accuracy. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Vectara` vector database. \n",
|
||||
"This notebook shows how to use functionality related to the `Vectara` vector database or the `Vectara` retriever. \n",
|
||||
"\n",
|
||||
"See the [Vectara API documentation ](https://docs.vectara.com/docs/) for more information on how to use the API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7b2f111b-357a-4f42-9730-ef0603bdc1b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "082e7e8b-ac52-430c-98d6-8f0924457642",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key:········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -61,33 +29,13 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"import os\n",
|
||||
"from langchain.embeddings import FakeEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Vectara\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-04T10:51:22.520144Z",
|
||||
"start_time": "2023-04-04T10:51:22.285826Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -96,7 +44,21 @@
|
||||
"source": [
|
||||
"## Connecting to Vectara from LangChain\n",
|
||||
"\n",
|
||||
"The Vectara API provides simple API endpoints for indexing and querying."
|
||||
"The Vectara API provides simple API endpoints for indexing and querying, which is encapsulated in the Vectara integration.\n",
|
||||
"First let's ingest the documents using the from_documents() method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "be0a4973",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -112,7 +74,50 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectara = Vectara.from_documents(docs, embedding=None)"
|
||||
"vectara = Vectara.from_documents(docs, \n",
|
||||
" embedding=FakeEmbeddings(size=768), \n",
|
||||
" doc_metadata = {\"speech\": \"state-of-the-union\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "90dbf3e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Vectara's indexing API provides a file upload API where the file is handled directly by Vectara - pre-processed, chunked optimally and added to the Vectara vector store.\n",
|
||||
"To use this, we added the add_files() method (and from_files()). \n",
|
||||
"\n",
|
||||
"Let's see this in action. We pick two PDF documents to upload: \n",
|
||||
"1. The \"I have a dream\" speech by Dr. King\n",
|
||||
"2. Churchill's \"We Shall Fight on the Beaches\" speech"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "85ef3468",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tempfile\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"urls = [\n",
|
||||
" ['https://www.gilderlehrman.org/sites/default/files/inline-pdfs/king.dreamspeech.excerpts.pdf', 'I-have-a-dream'],\n",
|
||||
" ['https://www.parkwayschools.net/cms/lib/MO01931486/Centricity/Domain/1578/Churchill_Beaches_Speech.pdf', 'we shall fight on the beaches'],\n",
|
||||
"]\n",
|
||||
"files_list = []\n",
|
||||
"for url,_ in urls:\n",
|
||||
" name = tempfile.NamedTemporaryFile().name\n",
|
||||
" urllib.request.urlretrieve(url, name)\n",
|
||||
" files_list.append(name)\n",
|
||||
"\n",
|
||||
"docsearch: Vectara = Vectara.from_files(\n",
|
||||
" files=files_list,\n",
|
||||
" embedding=FakeEmbeddings(size=768),\n",
|
||||
" metadatas=[{\"url\": url, \"speech\": title} for url,title in urls],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,7 +138,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -145,12 +150,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = vectara.similarity_search(query, n_sentence_context=0)"
|
||||
"found_docs = vectara.similarity_search(query, n_sentence_context=0, filter=\"doc.speech = 'state-of-the-union'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "fc516993",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -191,7 +196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "8804a21d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -202,12 +207,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = vectara.similarity_search_with_score(query)"
|
||||
"found_docs = vectara.similarity_search_with_score(query, filter=\"doc.speech = 'state-of-the-union'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "756a6887",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -228,7 +233,7 @@
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"\n",
|
||||
"Score: 0.7129974\n"
|
||||
"Score: 0.4917977\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -238,6 +243,37 @@
|
||||
"print(f\"\\nScore: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1f9876a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's do similar search for content in the files we uploaded"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "47784de5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='We must forever conduct our struggle on the high plane of dignity and discipline.', metadata={'section': '1'}), 0.7962591)\n",
|
||||
"(Document(page_content='We must not allow our\\ncreative protests to degenerate into physical violence. . . .', metadata={'section': '1'}), 0.25983918)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"We must forever conduct our struggle\"\n",
|
||||
"found_docs = vectara.similarity_search_with_score(query, filter=\"doc.speech = 'I-have-a-dream'\")\n",
|
||||
"print(found_docs[0])\n",
|
||||
"print(found_docs[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -246,12 +282,12 @@
|
||||
"source": [
|
||||
"## Vectara as a Retriever\n",
|
||||
"\n",
|
||||
"Vectara, as all the other vector stores, is a LangChain Retriever, by using cosine similarity. "
|
||||
"Vectara, as all the other vector stores, can be used also as a LangChain Retriever:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"id": "9427195f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -263,10 +299,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"VectaraRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x122db2830>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '0'})"
|
||||
"VectaraRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x12772caf0>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '0'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -278,7 +314,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"id": "f3c70c31",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -293,7 +329,7 @@
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cassio>=0.0.6\""
|
||||
"!pip install \"cassio>=0.0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -40,10 +40,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:41.754535Z",
|
||||
"start_time": "2023-05-25T15:09:40.897232Z"
|
||||
}
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -51,8 +48,8 @@
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.schema import HumanMessage, AIMessage\n",
|
||||
"from langchain.tools import DuckDuckGoSearchRun\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"from langchain.utilities import WikipediaAPIWrapper\n",
|
||||
"from langchain.agents import initialize_agent, AgentType, Tool\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -73,19 +70,37 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "True"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load your OpenAI key from a .env file\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"# Provide your OpenAI key\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
"openai_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Provide your Zep API key. Note that this is optional. See https://docs.getzep.com/deployment/auth\n",
|
||||
"\n",
|
||||
"zep_api_key = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,7 +113,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:41.840440Z",
|
||||
@@ -107,13 +122,20 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ddg = DuckDuckGoSearchRun()\n",
|
||||
"tools = [ddg]\n",
|
||||
"search = WikipediaAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to search online for answers. You should ask targeted questions\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Set up Zep Chat History\n",
|
||||
"zep_chat_history = ZepChatMessageHistory(\n",
|
||||
" session_id=session_id,\n",
|
||||
" url=ZEP_API_URL,\n",
|
||||
" api_key=zep_api_key\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use a standard ConversationBufferMemory to encapsulate the Zep chat history\n",
|
||||
@@ -122,7 +144,7 @@
|
||||
")\n",
|
||||
"\n",
|
||||
"# Initialize the agent\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"llm = OpenAI(temperature=0, openai_api_key=openai_key)\n",
|
||||
"agent_chain = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
@@ -142,7 +164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:41.960661Z",
|
||||
@@ -212,7 +234,7 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"for msg in test_history:\n",
|
||||
" zep_chat_history.append(\n",
|
||||
" zep_chat_history.add_message(\n",
|
||||
" HumanMessage(content=msg[\"content\"])\n",
|
||||
" if msg[\"role\"] == \"human\"\n",
|
||||
" else AIMessage(content=msg[\"content\"])\n",
|
||||
@@ -231,7 +253,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:50.485377Z",
|
||||
@@ -245,18 +267,20 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: Do I need to use a tool? No\n",
|
||||
"AI: Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.\u001b[0m\n",
|
||||
"\u001B[1m> Entering new chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mThought: Do I need to use a tool? No\n",
|
||||
"AI: Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.'"
|
||||
"text/plain": [
|
||||
"'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -281,7 +305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:50.493438Z",
|
||||
@@ -293,19 +317,17 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The conversation is about Octavia Butler. The AI describes her as an American science fiction author and mentions the\n",
|
||||
"FX series Kindred as a well-known adaptation of her work. The human then asks about her contemporaries, and the AI lists \n",
|
||||
"Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\n",
|
||||
"The human asks about Octavia Butler and the AI identifies her as an American science fiction author. They continue to discuss her works and the fact that the FX series Kindred is based on one of her novels. The AI also lists Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ as Butler's contemporaries.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"{'role': 'human', 'content': 'What awards did she win?', 'uuid': '9fa75c3c-edae-41e3-b9bc-9fcf16b523c9', 'created_at': '2023-05-25T15:09:41.91662Z', 'token_count': 8}\n",
|
||||
"{'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'uuid': 'def4636c-32cb-49ed-b671-32035a034712', 'created_at': '2023-05-25T15:09:41.919874Z', 'token_count': 21}\n",
|
||||
"{'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'uuid': '6e87bd4a-bc23-451e-ae36-05a140415270', 'created_at': '2023-05-25T15:09:41.923771Z', 'token_count': 14}\n",
|
||||
"{'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'uuid': 'f65d8dde-9ee8-4983-9da6-ba789b7e8aa4', 'created_at': '2023-05-25T15:09:41.935254Z', 'token_count': 18}\n",
|
||||
"{'role': 'human', 'content': \"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", 'uuid': '5678d056-7f05-4e70-b8e5-f85efa56db01', 'created_at': '2023-05-25T15:09:41.938974Z', 'token_count': 23}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'uuid': '50d64946-9239-4327-83e6-71dcbdd16198', 'created_at': '2023-05-25T15:09:41.957437Z', 'token_count': 56}\n",
|
||||
"{'role': 'human', 'content': \"WWhat is the book's relevance to the challenges facing contemporary society?\", 'uuid': 'a39cfc07-8858-480a-9026-fc47a8ef7001', 'created_at': '2023-05-25T15:09:50.469533Z', 'token_count': 16}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.', 'uuid': 'a4ecf0fe-fdd0-4aad-b72b-efde2e6830cc', 'created_at': '2023-05-25T15:09:50.473793Z', 'token_count': 62}\n"
|
||||
"{'role': 'human', 'content': 'What awards did she win?', 'uuid': 'a4bdc592-71a5-47d0-9c64-230b882aab48', 'created_at': '2023-06-26T23:37:56.383953Z', 'token_count': 8, 'metadata': {'system': {'entities': [], 'intent': 'The subject is asking about the awards someone won, likely referring to a specific individual.'}}}\n",
|
||||
"{'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'uuid': '60cc6e6b-7cd4-4a81-aebc-72ef997286b4', 'created_at': '2023-06-26T23:37:56.389935Z', 'token_count': 21, 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 14, 'Start': 0, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 33, 'Start': 19, 'Text': 'the Hugo Award'}], 'Name': 'the Hugo Award'}, {'Label': 'EVENT', 'Matches': [{'End': 81, 'Start': 57, 'Text': 'the MacArthur Fellowship'}], 'Name': 'the MacArthur Fellowship'}], 'intent': 'The subject is stating the accomplishments and awards received by Octavia Butler.'}}}\n",
|
||||
"{'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'uuid': 'b189fc60-1510-4a4b-a503-899481d652de', 'created_at': '2023-06-26T23:37:56.395722Z', 'token_count': 14, 'metadata': {'system': {'entities': [], 'intent': 'The subject is looking for recommendations on women science fiction writers to read.'}}}\n",
|
||||
"{'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'uuid': '4be1ccbb-a915-45d6-9f18-7a0c1cbd9907', 'created_at': '2023-06-26T23:37:56.403596Z', 'token_count': 18, 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': 'The subject is suggesting reading material and making a literary recommendation.'}}}\n",
|
||||
"{'role': 'human', 'content': \"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", 'uuid': 'ac3c5e3e-26a7-4f3b-aeb0-bba084e22753', 'created_at': '2023-06-26T23:37:56.410662Z', 'token_count': 23, 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}], 'intent': 'The subject is asking for a brief overview or summary of the book \"Parable of the Sower\" written by Butler.'}}}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'uuid': '4a463b4c-bcab-473c-bed1-fc56a7a20ae2', 'created_at': '2023-06-26T23:37:56.41764Z', 'token_count': 56, 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}}\n",
|
||||
"{'role': 'human', 'content': \"WWhat is the book's relevance to the challenges facing contemporary society?\", 'uuid': '41bab0c7-5e20-40a4-9303-f82069977c91', 'created_at': '2023-06-26T23:38:03.559642Z', 'token_count': 16, 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 5, 'Start': 0, 'Text': 'WWhat'}], 'Name': 'WWhat'}]}}}\n",
|
||||
"{'role': 'ai', 'content': 'Parable of the Sower is a prescient novel that speaks to the challenges facing contemporary society, such as climate change, economic inequality, and the rise of authoritarianism. It is a cautionary tale that warns of the dangers of ignoring these issues and the importance of taking action to address them.', 'uuid': 'bfd8146a-4632-4c8c-98b6-9468bb624339', 'created_at': '2023-06-26T23:38:03.589312Z', 'token_count': 62, 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}]}}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -332,7 +354,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-25T15:09:50.751203Z",
|
||||
@@ -344,16 +366,16 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'uuid': '6e87bd4a-bc23-451e-ae36-05a140415270', 'created_at': '2023-05-25T15:09:41.923771Z', 'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'token_count': 14} 0.9118298949424545\n",
|
||||
"{'uuid': 'f65d8dde-9ee8-4983-9da6-ba789b7e8aa4', 'created_at': '2023-05-25T15:09:41.935254Z', 'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'token_count': 18} 0.8533024416448016\n",
|
||||
"{'uuid': '52cfe3e8-b800-4dd8-a7dd-8e9e4764dfc8', 'created_at': '2023-05-25T15:09:41.913856Z', 'role': 'ai', 'content': \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", 'token_count': 27} 0.852352466457884\n",
|
||||
"{'uuid': 'd40da612-0867-4a43-92ec-778b86490a39', 'created_at': '2023-05-25T15:09:41.858543Z', 'role': 'human', 'content': 'Who was Octavia Butler?', 'token_count': 8} 0.8235468913583194\n",
|
||||
"{'uuid': '4fcfbce4-7bfa-44bd-879a-8cbf265bdcf9', 'created_at': '2023-05-25T15:09:41.893848Z', 'role': 'ai', 'content': 'Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', 'token_count': 31} 0.8204317130595353\n",
|
||||
"{'uuid': 'def4636c-32cb-49ed-b671-32035a034712', 'created_at': '2023-05-25T15:09:41.919874Z', 'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'token_count': 21} 0.8196714827228725\n",
|
||||
"{'uuid': '862107de-8f6f-43c0-91fa-4441f01b2b3a', 'created_at': '2023-05-25T15:09:41.898149Z', 'role': 'human', 'content': 'Which books of hers were made into movies?', 'token_count': 11} 0.7954322970428519\n",
|
||||
"{'uuid': '97164506-90fe-4c71-9539-69ebcd1d90a2', 'created_at': '2023-05-25T15:09:41.90887Z', 'role': 'human', 'content': 'Who were her contemporaries?', 'token_count': 8} 0.7942531405021976\n",
|
||||
"{'uuid': '50d64946-9239-4327-83e6-71dcbdd16198', 'created_at': '2023-05-25T15:09:41.957437Z', 'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'token_count': 56} 0.78144769172694\n",
|
||||
"{'uuid': 'c460ffd4-0715-4c69-b793-1092054973e6', 'created_at': '2023-05-25T15:09:41.903082Z', 'role': 'ai', 'content': \"The most well-known adaptation of Octavia Butler's work is the FX series Kindred, based on her novel of the same name.\", 'token_count': 29} 0.7811962820699464\n"
|
||||
"{'uuid': 'b189fc60-1510-4a4b-a503-899481d652de', 'created_at': '2023-06-26T23:37:56.395722Z', 'role': 'human', 'content': 'Which other women sci-fi writers might I want to read?', 'metadata': {'system': {'entities': [], 'intent': 'The subject is looking for recommendations on women science fiction writers to read.'}}, 'token_count': 14} 0.9119619869747062\n",
|
||||
"{'uuid': '4be1ccbb-a915-45d6-9f18-7a0c1cbd9907', 'created_at': '2023-06-26T23:37:56.403596Z', 'role': 'ai', 'content': 'You might want to read Ursula K. Le Guin or Joanna Russ.', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': 'The subject is suggesting reading material and making a literary recommendation.'}}, 'token_count': 18} 0.8534346954749745\n",
|
||||
"{'uuid': '76ec2a3d-b908-4c23-a55d-71ff92865a7a', 'created_at': '2023-06-26T23:37:56.378345Z', 'role': 'ai', 'content': \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': 'The subject is stating the contemporaries of Octavia Butler, who are also science fiction writers.'}}, 'token_count': 27} 0.8523930955780226\n",
|
||||
"{'uuid': '1feb02c7-63c9-4616-854d-0d97fb590ea5', 'created_at': '2023-06-26T23:37:56.313009Z', 'role': 'human', 'content': 'Who was Octavia Butler?', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject is asking about the identity of Octavia Butler, likely seeking information about her background or accomplishments.'}}, 'token_count': 8} 0.8236355436055457\n",
|
||||
"{'uuid': 'ebe4696d-b5fa-4ca0-88c9-da794d9611ab', 'created_at': '2023-06-26T23:37:56.332247Z', 'role': 'ai', 'content': 'Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American science fiction author.', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 0, 'Text': 'Octavia Estelle Butler'}], 'Name': 'Octavia Estelle Butler'}, {'Label': 'DATE', 'Matches': [{'End': 37, 'Start': 24, 'Text': 'June 22, 1947'}], 'Name': 'June 22, 1947'}, {'Label': 'DATE', 'Matches': [{'End': 57, 'Start': 40, 'Text': 'February 24, 2006'}], 'Name': 'February 24, 2006'}, {'Label': 'NORP', 'Matches': [{'End': 74, 'Start': 66, 'Text': 'American'}], 'Name': 'American'}], 'intent': 'The subject is making a statement about the background and profession of Octavia Estelle Butler, an American author.'}}, 'token_count': 31} 0.8206687242257686\n",
|
||||
"{'uuid': '60cc6e6b-7cd4-4a81-aebc-72ef997286b4', 'created_at': '2023-06-26T23:37:56.389935Z', 'role': 'ai', 'content': 'Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 14, 'Start': 0, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 33, 'Start': 19, 'Text': 'the Hugo Award'}], 'Name': 'the Hugo Award'}, {'Label': 'EVENT', 'Matches': [{'End': 81, 'Start': 57, 'Text': 'the MacArthur Fellowship'}], 'Name': 'the MacArthur Fellowship'}], 'intent': 'The subject is stating the accomplishments and awards received by Octavia Butler.'}}, 'token_count': 21} 0.8194249796585193\n",
|
||||
"{'uuid': '0fa4f336-909d-4880-b01a-8e80e91fa8f2', 'created_at': '2023-06-26T23:37:56.344552Z', 'role': 'human', 'content': 'Which books of hers were made into movies?', 'metadata': {'system': {'entities': [], 'intent': 'The subject is inquiring about which books written by an unknown female author were adapted into movies.'}}, 'token_count': 11} 0.7955105671310818\n",
|
||||
"{'uuid': 'f91de7f2-4b84-4c5a-8a33-a71f38f3a59c', 'created_at': '2023-06-26T23:37:56.368146Z', 'role': 'human', 'content': 'Who were her contemporaries?', 'metadata': {'system': {'entities': [], 'intent': 'The subject is asking about the people who lived during the same time period as a specific individual.'}}, 'token_count': 8} 0.7942358617914813\n",
|
||||
"{'uuid': '4a463b4c-bcab-473c-bed1-fc56a7a20ae2', 'created_at': '2023-06-26T23:37:56.41764Z', 'role': 'ai', 'content': 'Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56} 0.7816448549236643\n",
|
||||
"{'uuid': '6161d934-a629-4ba2-8bba-0b0996c93964', 'created_at': '2023-06-26T23:37:56.358632Z', 'role': 'ai', 'content': \"The most well-known adaptation of Octavia Butler's work is the FX series Kindred, based on her novel of the same name.\", 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 50, 'Start': 34, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 65, 'Start': 63, 'Text': 'FX'}], 'Name': 'FX'}, {'Label': 'GPE', 'Matches': [{'End': 80, 'Start': 73, 'Text': 'Kindred'}], 'Name': 'Kindred'}], 'intent': \"The subject is discussing Octavia Butler's work being adapted into a TV series called Kindred.\"}}, 'token_count': 29} 0.7815841371388998\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -362,11 +384,25 @@
|
||||
"for r in search_results:\n",
|
||||
" print(r.message, r.dist)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -380,10 +416,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -0,0 +1,212 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Human input Chat Model\n",
|
||||
"\n",
|
||||
"Along with HumanInputLLM, LangChain also provides a pseudo Chat Model class that can be used for testing, debugging, or educational purposes. This allows you to mock out calls to the Chat Model and simulate how a human would respond if they received the messages.\n",
|
||||
"\n",
|
||||
"In this notebook, we go over how to use this.\n",
|
||||
"\n",
|
||||
"We start this with using the HumanInputChatModel in an agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models.human import HumanInputChatModel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Since we will use the `WikipediaQueryRun` tool in this notebook, you might need to install the `wikipedia` package if you haven't done so already."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/mskim58/dev/research/chatbot/github/langchain/.venv/bin/python: No module named pip\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"wikipedia\"])\n",
|
||||
"llm = HumanInputChatModel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\n",
|
||||
" ======= start of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"type: system\n",
|
||||
"data:\n",
|
||||
" content: \"Answer the following questions as best you can. You have access to the following tools:\\n\\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\\n\\nThe way you use the tools is by specifying a json blob.\\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\\n\\nThe only values that should be in the \\\"action\\\" field are: Wikipedia\\n\\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\\n\\n```\\n{\\n \\\"action\\\": $TOOL_NAME,\\n \\\"action_input\\\": $INPUT\\n}\\n```\\n\\nALWAYS use the following format:\\n\\nQuestion: the input question you must answer\\nThought: you should always think about what to do\\nAction:\\n```\\n$JSON_BLOB\\n```\\nObservation: the result of the action\\n... (this Thought/Action/Observation can repeat N times)\\nThought: I now know the final answer\\nFinal Answer: the final answer to the original input question\\n\\nBegin! Reminder to always use the exact characters `Final Answer` when responding.\"\n",
|
||||
" additional_kwargs: {}\n",
|
||||
"\n",
|
||||
"======= end of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" ======= start of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"type: human\n",
|
||||
"data:\n",
|
||||
" content: 'What is Bocchi the Rock?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" '\n",
|
||||
" additional_kwargs: {}\n",
|
||||
" example: false\n",
|
||||
"\n",
|
||||
"======= end of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Wikipedia\",\n",
|
||||
" \"action_input\": \"What is Bocchi the Rock?\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mPage: Bocchi the Rock!\n",
|
||||
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Botchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
|
||||
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\n",
|
||||
"\n",
|
||||
"Page: Hitori Bocchi no Marumaru Seikatsu\n",
|
||||
"Summary: Hitori Bocchi no Marumaru Seikatsu (Japanese: ひとりぼっちの○○生活, lit. \"Bocchi Hitori's ____ Life\" or \"The ____ Life of Being Alone\") is a Japanese yonkoma manga series written and illustrated by Katsuwo. It was serialized in ASCII Media Works' Comic Dengeki Daioh \"g\" magazine from September 2013 to April 2021. Eight tankōbon volumes have been released. An anime television series adaptation by C2C aired from April to June 2019.\n",
|
||||
"\n",
|
||||
"Page: Kessoku Band (album)\n",
|
||||
"Summary: Kessoku Band (Japanese: 結束バンド, Hepburn: Kessoku Bando) is the debut studio album by Kessoku Band, a fictional musical group from the anime television series Bocchi the Rock!, released digitally on December 25, 2022, and physically on CD on December 28 by Aniplex. Featuring vocals from voice actresses Yoshino Aoyama, Sayumi Suzushiro, Saku Mizuno, and Ikumi Hasegawa, the album consists of 14 tracks previously heard in the anime, including a cover of Asian Kung-Fu Generation's \"Rockn' Roll, Morning Light Falls on You\", as well as newly recorded songs; nine singles preceded the album's physical release. Commercially, Kessoku Band peaked at number one on the Billboard Japan Hot Albums Chart and Oricon Albums Chart, and was certified gold by the Recording Industry Association of Japan.\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\n",
|
||||
" ======= start of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"type: system\n",
|
||||
"data:\n",
|
||||
" content: \"Answer the following questions as best you can. You have access to the following tools:\\n\\nWikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\\n\\nThe way you use the tools is by specifying a json blob.\\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\\n\\nThe only values that should be in the \\\"action\\\" field are: Wikipedia\\n\\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\\n\\n```\\n{\\n \\\"action\\\": $TOOL_NAME,\\n \\\"action_input\\\": $INPUT\\n}\\n```\\n\\nALWAYS use the following format:\\n\\nQuestion: the input question you must answer\\nThought: you should always think about what to do\\nAction:\\n```\\n$JSON_BLOB\\n```\\nObservation: the result of the action\\n... (this Thought/Action/Observation can repeat N times)\\nThought: I now know the final answer\\nFinal Answer: the final answer to the original input question\\n\\nBegin! Reminder to always use the exact characters `Final Answer` when responding.\"\n",
|
||||
" additional_kwargs: {}\n",
|
||||
"\n",
|
||||
"======= end of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" ======= start of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"type: human\n",
|
||||
"data:\n",
|
||||
" content: \"What is Bocchi the Rock?\\n\\nThis was your previous work (but I haven't seen any of it! I only see what you return as final answer):\\nAction:\\n```\\n{\\n \\\"action\\\": \\\"Wikipedia\\\",\\n \\\"action_input\\\": \\\"What is Bocchi the Rock?\\\"\\n}\\n```\\nObservation: Page: Bocchi the Rock!\\nSummary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Botchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\\nAn anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\\n\\nPage: Hitori Bocchi no Marumaru Seikatsu\\nSummary: Hitori Bocchi no Marumaru Seikatsu (Japanese: ひとりぼっちの○○生活, lit. \\\"Bocchi Hitori's ____ Life\\\" or \\\"The ____ Life of Being Alone\\\") is a Japanese yonkoma manga series written and illustrated by Katsuwo. It was serialized in ASCII Media Works' Comic Dengeki Daioh \\\"g\\\" magazine from September 2013 to April 2021. Eight tankōbon volumes have been released. An anime television series adaptation by C2C aired from April to June 2019.\\n\\nPage: Kessoku Band (album)\\nSummary: Kessoku Band (Japanese: 結束バンド, Hepburn: Kessoku Bando) is the debut studio album by Kessoku Band, a fictional musical group from the anime television series Bocchi the Rock!, released digitally on December 25, 2022, and physically on CD on December 28 by Aniplex. Featuring vocals from voice actresses Yoshino Aoyama, Sayumi Suzushiro, Saku Mizuno, and Ikumi Hasegawa, the album consists of 14 tracks previously heard in the anime, including a cover of Asian Kung-Fu Generation's \\\"Rockn' Roll, Morning Light Falls on You\\\", as well as newly recorded songs; nine singles preceded the album's physical release. Commercially, Kessoku Band peaked at number one on the Billboard Japan Hot Albums Chart and Oricon Albums Chart, and was certified gold by the Recording Industry Association of Japan.\\n\\n\\nThought:\"\n",
|
||||
" additional_kwargs: {}\n",
|
||||
" example: false\n",
|
||||
"\n",
|
||||
"======= end of message ======= \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[32;1m\u001b[1;3mThis finally works.\n",
|
||||
"Final Answer: Bocchi the Rock! is a four-panel manga series and anime television series. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Bocchi the Rock?',\n",
|
||||
" 'output': \"Bocchi the Rock! is a four-panel manga series and anime television series. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent(\"What is Bocchi the Rock?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -52,7 +52,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" J'aime programmer. \", additional_kwargs={})"
|
||||
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@@ -101,7 +101,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LLMResult(generations=[[ChatGeneration(text=\" J'aime la programmation.\", generation_info=None, message=AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}))]], llm_output={})"
|
||||
"LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('8cc8fb68-1c35-439c-96a0-695036a93652'))])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -125,13 +125,13 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" J'adore programmer."
|
||||
" J'aime la programmation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" J'adore programmer.\", additional_kwargs={})"
|
||||
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
@@ -151,7 +151,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df45f59f",
|
||||
"id": "c253883f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -173,7 +173,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -8,9 +8,12 @@
|
||||
"source": [
|
||||
"# Clarifai\n",
|
||||
"\n",
|
||||
">[Clarifai](https://www.clarifai.com/) is a AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model building and inference.\n",
|
||||
">[Clarifai](https://www.clarifai.com/) is an AI Platform that provides the full AI lifecycle ranging from data exploration, data labeling, model training, evaluation, and inference.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with `Clarifai` [models](https://clarifai.com/explore/models)."
|
||||
"This example goes over how to use LangChain to interact with `Clarifai` [models](https://clarifai.com/explore/models). \n",
|
||||
"\n",
|
||||
"To use Clarifai, you must have an account and a Personal Access Token (PAT) key. \n",
|
||||
"[Check here](https://clarifai.com/settings/security) to get or create a PAT."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -42,7 +45,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Imports\n",
|
||||
"Here we will be setting the personal access token. You can find your PAT under settings/security on the platform."
|
||||
"Here we will be setting the personal access token. You can find your PAT under [settings/security](https://clarifai.com/settings/security) in your Clarifai account."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -52,17 +55,25 @@
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Please login and get your API key from https://clarifai.com/settings/security \n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"CLARIFAI_PAT_KEY = getpass()"
|
||||
"CLARIFAI_PAT = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "6fb585dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -81,12 +92,12 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Input\n",
|
||||
"Create a prompt template to be used with the LLM Chain"
|
||||
"Create a prompt template to be used with the LLM Chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -121,10 +132,10 @@
|
||||
"source": [
|
||||
"USER_ID = 'openai'\n",
|
||||
"APP_ID = 'chat-completion'\n",
|
||||
"MODEL_ID = 'chatgpt-3_5-turbo'\n",
|
||||
"MODEL_ID = 'GPT-3_5-turbo'\n",
|
||||
"\n",
|
||||
"# You can provide a specific model version\n",
|
||||
"# model_version_id = \"MODEL_VERSION_ID\""
|
||||
"# You can provide a specific model version as the model_version_id arg.\n",
|
||||
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -137,7 +148,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize a Clarifai LLM\n",
|
||||
"clarifai_llm = Clarifai(clarifai_pat_key=CLARIFAI_PAT_KEY, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
|
||||
"clarifai_llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -171,7 +182,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Justin Bieber was born on March 1, 1994. So, we need to look at the Super Bowl that was played in the year 1994. \\n\\nThe Super Bowl in 1994 was Super Bowl XXVIII (28). It was played on January 30, 1994, between the Dallas Cowboys and the Buffalo Bills. \\n\\nThe Dallas Cowboys won the Super Bowl in 1994, defeating the Buffalo Bills by a score of 30-13. \\n\\nTherefore, the Dallas Cowboys are the NFL team that won the Super Bowl in the year Justin Bieber was born.'"
|
||||
"'Justin Bieber was born on March 1, 1994. So, we need to figure out the Super Bowl winner for the 1994 season. The NFL season spans two calendar years, so the Super Bowl for the 1994 season would have taken place in early 1995. \\n\\nThe Super Bowl in question is Super Bowl XXIX, which was played on January 29, 1995. The game was won by the San Francisco 49ers, who defeated the San Diego Chargers by a score of 49-26. Therefore, the San Francisco 49ers won the Super Bowl in the year Justin Bieber was born.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -64,13 +64,20 @@
|
||||
"source": [
|
||||
"### Specify Model\n",
|
||||
"\n",
|
||||
"To run locally, download a compatible ggml-formatted model. For more info, visit https://github.com/nomic-ai/gpt4all\n",
|
||||
"To run locally, download a compatible ggml-formatted model. \n",
|
||||
" \n",
|
||||
"**Download option 1**: The [gpt4all page](https://gpt4all.io/index.html) has a useful `Model Explorer` section:\n",
|
||||
"\n",
|
||||
"For full installation instructions go [here](https://gpt4all.io/index.html).\n",
|
||||
"* Select a model of interest\n",
|
||||
"* Download using the UI and move the `.bin` to the `local_path` (noted below)\n",
|
||||
"\n",
|
||||
"The GPT4All Chat installer needs to decompress a 3GB LLM model during the installation process!\n",
|
||||
"For more info, visit https://github.com/nomic-ai/gpt4all.\n",
|
||||
"\n",
|
||||
"Note that new models are uploaded regularly - check the link above for the most recent `.bin` URL"
|
||||
"--- \n",
|
||||
"\n",
|
||||
"**Download option 2**: Uncomment the below block to download a model. \n",
|
||||
"\n",
|
||||
"* You may want to update `url` to a new version, whih can be browsed using the [gpt4all page](https://gpt4all.io/index.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -81,22 +88,8 @@
|
||||
"source": [
|
||||
"local_path = (\n",
|
||||
" \"./models/ggml-gpt4all-l13b-snoozy.bin\" # replace with your desired local file path\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Uncomment the below block to download a model. You may want to update `url` to a new version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
")\n",
|
||||
"\n",
|
||||
"# import requests\n",
|
||||
"\n",
|
||||
"# from pathlib import Path\n",
|
||||
@@ -126,8 +119,10 @@
|
||||
"source": [
|
||||
"# Callbacks support token-wise streaming\n",
|
||||
"callbacks = [StreamingStdOutCallbackHandler()]\n",
|
||||
"\n",
|
||||
"# Verbose is required to pass to the callback manager\n",
|
||||
"llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)\n",
|
||||
"\n",
|
||||
"# If you want to use a custom model add the backend parameter\n",
|
||||
"# Check https://docs.gpt4all.io/gpt4all_python.html for supported backends\n",
|
||||
"llm = GPT4All(model=local_path, backend=\"gptj\", callbacks=callbacks, verbose=True)"
|
||||
@@ -170,7 +165,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,20 +1,26 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "959300d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hugging Face Hub\n",
|
||||
"\n",
|
||||
"The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
|
||||
">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
|
||||
"\n",
|
||||
"This example showcases how to connect to the Hugging Face Hub."
|
||||
"This example showcases how to connect to the `Hugging Face Hub` and use different models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ddafc6d-7d7c-48fa-838f-0e7f50895ce3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff",
|
||||
"metadata": {
|
||||
@@ -26,22 +32,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d772b637-de00-4663-bd77-9bc96d798db2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install huggingface_hub > /dev/null"
|
||||
"!pip install huggingface_hub"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d597a792-354c-4ca5-b483-5965eec5d63d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n",
|
||||
"\n",
|
||||
@@ -52,7 +66,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "b8c5b88c-e4b8-4d0d-9a35-6e8f106452c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -63,108 +77,101 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "84dd44c1-c428-41f3-a911-520281386c94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Select a Model**"
|
||||
"## Prepare Examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "39c7eeac-01c4-486b-9480-e828a9e73e78",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"repo_id = \"google/flan-t5-xl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3acf0069",
|
||||
"id": "3fe7d1d1-241d-426a-acff-e208f1088871",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"from langchain import HuggingFaceHub"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6620f39b-3d32-4840-8931-ff7d2c3e47e8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "44adc1a0-9c0a-4f1e-af5a-fe04222e78d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Examples\n",
|
||||
"\n",
|
||||
"Below are some examples of models you can access through the Hugging Face Hub integration."
|
||||
"Below are some examples of models you can access through the `Hugging Face Hub` integration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4fa9337e-ccb5-4c52-9b7c-1653148bc256",
|
||||
"id": "4c16fded-70d1-42af-8bfa-6ddda9f0bc63",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### StableLM, by Stability AI\n",
|
||||
"\n",
|
||||
"See [Stability AI's](https://huggingface.co/stabilityai) organization page for a list of available models."
|
||||
"### Flan, by Google"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "36a1ce01-bd46-451f-8ee6-61c8f4bd665a",
|
||||
"metadata": {},
|
||||
"execution_count": 5,
|
||||
"id": "39c7eeac-01c4-486b-9480-e828a9e73e78",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"repo_id = \"stabilityai/stablelm-tuned-alpha-3b\"\n",
|
||||
"# Others include stabilityai/stablelm-base-alpha-3b\n",
|
||||
"# as well as 7B parameter versions"
|
||||
"repo_id = \"google/flan-t5-xxl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5654cea-60b0-4f40-ab34-06ba1eca810d",
|
||||
"execution_count": 8,
|
||||
"id": "3acf0069",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The FIFA World Cup was held in the year 1994. West Germany won the FIFA World Cup in 1994\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f19d0dc-c987-433f-a8d6-b1214e8ee067",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reuse the prompt and question from above.\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1a5c97af-89bc-4e59-95c1-223742a9160b",
|
||||
"metadata": {},
|
||||
@@ -176,34 +183,40 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "521fcd2b-8e38-4920-b407-5c7d330411c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"repo_id = \"databricks/dolly-v2-3b\"\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
|
||||
"repo_id = \"databricks/dolly-v2-3b\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "9907ec3a-fe0c-4543-81c4-d42f9453f16c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" First of all, the world cup was won by the Germany. Then the Argentina won the world cup in 2022. So, the Argentina won the world cup in 1994.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Question: Who\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Reuse the prompt and question from above.\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae",
|
||||
"metadata": {},
|
||||
@@ -215,17 +228,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 13,
|
||||
"id": "257a091d-750b-4910-ac08-fe1c7b3fd98b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0, \"max_length\": 64})"
|
||||
"repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -235,27 +245,74 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reuse the prompt and question from above.\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2bf838eb-1083-402f-b099-b07c452418c8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**And many more!**"
|
||||
"### XGen, by Salesforce\n",
|
||||
"\n",
|
||||
"See [more information](https://github.com/salesforce/xgen)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "18c78880-65d7-41d0-9722-18090efb60e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"repo_id = \"Salesforce/xgen-7b-8k-base\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18c78880-65d7-41d0-9722-18090efb60e9",
|
||||
"id": "1b1150b4-ec30-4674-849e-6a41b085aa2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0aca9f9e-f333-449c-97b2-10d1dbf17e75",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Falcon, by Technology Innovation Institute (TII)\n",
|
||||
"\n",
|
||||
"See [more information](https://huggingface.co/tiiuae/falcon-40b)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "496b35ac-5ee2-4b68-a6ce-232608f56c03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"repo_id = \"tiiuae/falcon-40b\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff2541ad-e394-4179-93c2-7ae9c4ca2a25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -274,7 +331,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
"There is a banch of options how to install the llama-cpp package: \n",
|
||||
"- only CPU usage\n",
|
||||
"- CPU + GPU (using one of many BLAS backends)\n",
|
||||
"- Metal GPU (MacOS with Apple Silicon Chip) \n",
|
||||
"\n",
|
||||
"### CPU only installation"
|
||||
]
|
||||
@@ -73,7 +74,45 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python"
|
||||
"!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation with Metal\n",
|
||||
"\n",
|
||||
"`lama.cpp` supports Apple silicon first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks. Use the `FORCE_CMAKE=1` environment variable to force the use of cmake and install the pip package for the Metal support ([source](https://github.com/abetlen/llama-cpp-python/blob/main/docs/install/macos.md)).\n",
|
||||
"\n",
|
||||
"Example installation with Metal Support:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install llama-cpp-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**IMPORTANT**: If you have already installed a cpu only version of the package, you need to reinstall it from scratch: consider the following command: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -325,6 +364,61 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metal\n",
|
||||
"\n",
|
||||
"If the installation with Metal was correct, you will see an `NEON = 1` indicator in model properties.\n",
|
||||
"\n",
|
||||
"Two of the most important parameters for use with GPU are:\n",
|
||||
"\n",
|
||||
"- `n_gpu_layers` - determines how many layers of the model are offloaded to your Metal GPU, in the most case, set it to `1` is enough for Metal\n",
|
||||
"- `n_batch` - how many tokens are processed in parallel, default is 8, set to bigger number.\n",
|
||||
"- `f16_kv` - for some reason, Metal only support `True`, otherwise you will get error such as `Asserting on type 0\n",
|
||||
"GGML_ASSERT: .../ggml-metal.m:706: false && \"not implemented\"`\n",
|
||||
"\n",
|
||||
"Setting these parameters correctly will dramatically improve the evaluation speed (see [wrapper code](https://github.com/mmagnesium/langchain/blob/master/langchain/llms/llamacpp.py) for more details)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 1 # Metal set to 1 is enough.\n",
|
||||
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
||||
"\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"./ggml-model-q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The rest are almost same as GPU, the console log will show the following log to indicate the Metal was enable properly.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: using MPS\n",
|
||||
"...\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You also could check the `Activity Monitor` by watching the % GPU of the process, the % CPU will drop dramatically after turn on `n_gpu_layers=1`. Also for the first time call LLM, the performance might be slow due to the model compilation in Metal GPU."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
6
docs/package-lock.json
generated
Normal file
6
docs/package-lock.json
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "docs",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {}
|
||||
}
|
||||
@@ -78,11 +78,14 @@ pprint(data)
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
## Using `JSONLoader`
|
||||
|
||||
Suppose we are interested in extracting the values under the `content` field within the `messages` key of the JSON data. This can easily be done through the `JSONLoader` as shown below.
|
||||
|
||||
|
||||
### JSON file
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
file_path='./example_data/facebook_chat.json',
|
||||
@@ -114,6 +117,81 @@ pprint(data)
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
### JSON Lines file
|
||||
|
||||
If you want to load documents from a JSON Lines file, you pass `json_lines=True`
|
||||
and specify `jq_schema` to extract `page_content` from a single JSON object.
|
||||
|
||||
```python
|
||||
file_path = './example_data/facebook_chat_messages.jsonl'
|
||||
pprint(Path(file_path).read_text())
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
('{"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"}\n'
|
||||
'{"sender_name": "User 1", "timestamp_ms": 1675597435669, "content": "Oh no '
|
||||
'worries! Bye"}\n'
|
||||
'{"sender_name": "User 2", "timestamp_ms": 1675596277579, "content": "No Im '
|
||||
'sorry it was my mistake, the blue one is not for sale"}\n')
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
file_path='./example_data/facebook_chat_messages.jsonl',
|
||||
jq_schema='.content',
|
||||
json_lines=True)
|
||||
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
```python
|
||||
pprint(data)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[Document(page_content='Bye!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}),
|
||||
Document(page_content='Oh no worries! Bye', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 2}),
|
||||
Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 3})]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
Another option is set `jq_schema='.'` and provide `content_key`:
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
file_path='./example_data/facebook_chat_messages.jsonl',
|
||||
jq_schema='.',
|
||||
content_key='sender_name',
|
||||
json_lines=True)
|
||||
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
```python
|
||||
pprint(data)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[Document(page_content='User 2', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}),
|
||||
Document(page_content='User 1', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 2}),
|
||||
Document(page_content='User 2', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 3})]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
## Extracting metadata
|
||||
|
||||
Generally, we want to include metadata available in the JSON file into the documents that we create from the content.
|
||||
|
||||
@@ -253,7 +253,7 @@ html_text = """
|
||||
|
||||
```python
|
||||
html_splitter = RecursiveCharacterTextSplitter.from_language(
|
||||
language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0
|
||||
language=Language.HTML, chunk_size=60, chunk_overlap=0
|
||||
)
|
||||
html_docs = html_splitter.create_documents([html_text])
|
||||
html_docs
|
||||
@@ -262,19 +262,18 @@ html_docs
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[Document(page_content='<!DOCTYPE html>\n<html>\n <head>', metadata={}),
|
||||
Document(page_content='<title>🦜️🔗 LangChain</title>\n <style>', metadata={}),
|
||||
Document(page_content='body {', metadata={}),
|
||||
Document(page_content='font-family: Arial, sans-serif;', metadata={}),
|
||||
Document(page_content='}\n h1 {', metadata={}),
|
||||
Document(page_content='color: darkblue;\n }', metadata={}),
|
||||
Document(page_content='</style>\n </head>\n <body>\n <div>', metadata={}),
|
||||
Document(page_content='<h1>🦜️🔗 LangChain</h1>', metadata={}),
|
||||
Document(page_content='<p>⚡ Building applications with LLMs through', metadata={}),
|
||||
Document(page_content='composability ⚡</p>', metadata={}),
|
||||
Document(page_content='</div>\n <div>', metadata={}),
|
||||
Document(page_content='As an open source project in a rapidly', metadata={}),
|
||||
Document(page_content='developing field, we are extremely open to contributions.', metadata={}),
|
||||
[Document(page_content='<!DOCTYPE html>\n<html>', metadata={}),
|
||||
Document(page_content='<head>\n <title>🦜️🔗 LangChain</title>', metadata={}),
|
||||
Document(page_content='<style>\n body {\n font-family: Aria', metadata={}),
|
||||
Document(page_content='l, sans-serif;\n }\n h1 {', metadata={}),
|
||||
Document(page_content='color: darkblue;\n }\n </style>\n </head', metadata={}),
|
||||
Document(page_content='>', metadata={}),
|
||||
Document(page_content='<body>', metadata={}),
|
||||
Document(page_content='<div>\n <h1>🦜️🔗 LangChain</h1>', metadata={}),
|
||||
Document(page_content='<p>⚡ Building applications with LLMs through composability ⚡', metadata={}),
|
||||
Document(page_content='</p>\n </div>', metadata={}),
|
||||
Document(page_content='<div>\n As an open source project in a rapidly dev', metadata={}),
|
||||
Document(page_content='eloping field, we are extremely open to contributions.', metadata={}),
|
||||
Document(page_content='</div>\n </body>\n</html>', metadata={})]
|
||||
```
|
||||
|
||||
@@ -310,4 +309,4 @@ sol_docs
|
||||
]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
</CodeOutputBlock>
|
||||
|
||||
@@ -1,24 +1,40 @@
|
||||
The `BaseRetriever` class in LangChain is as follows:
|
||||
The public API of the `BaseRetriever` class in LangChain is as follows:
|
||||
|
||||
```python
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
from typing import Any, List
|
||||
from langchain.schema import Document
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
|
||||
class BaseRetriever(ABC):
|
||||
@abstractmethod
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
"""Get texts relevant for a query.
|
||||
|
||||
...
|
||||
def get_relevant_documents(
|
||||
self, query: str, *, callbacks: Callbacks = None, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Retrieve documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant texts for
|
||||
|
||||
query: string to find relevant documents for
|
||||
callbacks: Callback manager or list of callbacks
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
...
|
||||
|
||||
async def aget_relevant_documents(
|
||||
self, query: str, *, callbacks: Callbacks = None, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
callbacks: Callback manager or list of callbacks
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
It's that simple! The `get_relevant_documents` method can be implemented however you see fit.
|
||||
It's that simple! You can call `get_relevant_documents` or the async `get_relevant_documents` methods to retrieve documents relevant to a query, where "relevance" is defined by
|
||||
the specific retriever object you are calling.
|
||||
|
||||
Of course, we also help construct what we think useful Retrievers are. The main type of Retriever that we focus on is a Vectorstore retriever. We will focus on that for the rest of this guide.
|
||||
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
# Implement a Custom Retriever
|
||||
|
||||
In this walkthrough, you will implement a simple custom retriever in LangChain using a simple dot product distance lookup.
|
||||
|
||||
All retrievers inherit from the `BaseRetriever` class and override the following abstract methods:
|
||||
|
||||
```python
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, List
|
||||
from langchain.schema import Document
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
|
||||
class BaseRetriever(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
"""Get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def _aget_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
```
|
||||
|
||||
|
||||
The `_get_relevant_documents` and async `_get_relevant_documents` methods can be implemented however you see fit. The `run_manager` is useful if your retriever calls other traceable LangChain primitives like LLMs, chains, or tools.
|
||||
|
||||
|
||||
Below, implement an example that fetches the most similar documents from a list of documents using a numpy array of embeddings.
|
||||
|
||||
|
||||
```python
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
class NumpyRetriever(BaseRetriever):
|
||||
"""Retrieves documents from a numpy array."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
texts: List[str],
|
||||
vectors: np.ndarray,
|
||||
embeddings: Optional[Embeddings] = None,
|
||||
num_to_return: int = 1,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.embeddings = embeddings or OpenAIEmbeddings()
|
||||
self.texts = texts
|
||||
self.vectors = vectors
|
||||
self.num_to_return = num_to_return
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embeddings: Optional[Embeddings] = None,
|
||||
**kwargs: Any,
|
||||
) -> "NumpyRetriever":
|
||||
embeddings = embeddings or OpenAIEmbeddings()
|
||||
vectors = np.array(embeddings.embed_documents(texts))
|
||||
return cls(texts, vectors, embeddings)
|
||||
|
||||
def _get_relevant_documents_from_query_vector(
|
||||
self, vector_query: np.ndarray
|
||||
) -> List[Document]:
|
||||
dot_product = np.dot(self.vectors, vector_query)
|
||||
# Get the indices of the min 5 documents
|
||||
indices = np.argpartition(
|
||||
dot_product, -min(self.num_to_return, len(self.vectors))
|
||||
)[-self.num_to_return :]
|
||||
# Sort indices by distance
|
||||
indices = indices[np.argsort(dot_product[indices])]
|
||||
return [
|
||||
Document(
|
||||
page_content=self.texts[idx],
|
||||
metadata={"index": idx},
|
||||
)
|
||||
for idx in indices
|
||||
]
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
"""Get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
vector_query = np.array(self.embeddings.embed_query(query))
|
||||
return self._get_relevant_documents_from_query_vector(vector_query)
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
query_emb = await self.embeddings.aembed_query(query)
|
||||
return self._get_relevant_documents_from_query_vector(np.array(query_emb))
|
||||
```
|
||||
|
||||
The retriever can be instantiated through the class method `from_texts`. It embeds the texts and stores them in a numpy array. To look up documents, it embeds the query and finds the most similar documents using a simple dot product distance.
|
||||
Once the retriever is implemented, you can use it like any other retriever in LangChain.
|
||||
|
||||
|
||||
```python
|
||||
retriever = NumpyRetriever.from_texts(texts= ["hello world", "goodbye world"])
|
||||
```
|
||||
|
||||
You can then use the retriever to get relevant documents.
|
||||
|
||||
```python
|
||||
retriever.get_relevant_documents("Hi there!")
|
||||
|
||||
# [Document(page_content='hello world', metadata={'index': 0})]
|
||||
```
|
||||
|
||||
```python
|
||||
retriever.get_relevant_documents("Bye!")
|
||||
# [Document(page_content='goodbye world', metadata={'index': 1})]
|
||||
```
|
||||
@@ -31,7 +31,7 @@ embeddings_model = OpenAIEmbeddings()
|
||||
#### Embed list of texts
|
||||
|
||||
```python
|
||||
embeddings = embedding_model.embed_documents(
|
||||
embeddings = embeddings_model.embed_documents(
|
||||
[
|
||||
"Hi there!",
|
||||
"Oh, hello!",
|
||||
@@ -56,7 +56,7 @@ len(embeddings), len(embeddings[0])
|
||||
Embed a single piece of text for the purpose of comparing to other embedded pieces of texts.
|
||||
|
||||
```python
|
||||
embedded_query = embedding_model.embed_query("What was the name mentioned in the conversation?")
|
||||
embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")
|
||||
embedded_query[:5]
|
||||
```
|
||||
|
||||
|
||||
@@ -26,7 +26,8 @@ raw_documents = TextLoader('../../../state_of_the_union.txt').load()
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
documents = text_splitter.split_documents(raw_documents)
|
||||
|
||||
db = FAISS.from_documents(documents, OpenAIEmbeddings())
|
||||
embeddings = OpenAIEmbeddings()
|
||||
db = FAISS.from_documents(documents, embeddings)
|
||||
```
|
||||
|
||||
### Similarity search
|
||||
|
||||
@@ -16,7 +16,7 @@ If you'd prefer not to set an environment variable you can pass the key in direc
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
chat = ChatOpenAI(open_api_key="...")
|
||||
chat = ChatOpenAI(openai_api_key="...")
|
||||
```
|
||||
|
||||
otherwise you can initialize without any params:
|
||||
|
||||
@@ -65,7 +65,7 @@ pipeline_prompt.input_variables
|
||||
print(pipeline_prompt.format(
|
||||
person="Elon Musk",
|
||||
example_q="What's your favorite car?",
|
||||
example_a="Telsa",
|
||||
example_a="Tesla",
|
||||
input="What's your favorite social media site?"
|
||||
))
|
||||
```
|
||||
@@ -77,7 +77,7 @@ print(pipeline_prompt.format(
|
||||
Here's an example of an interaction:
|
||||
|
||||
Q: What's your favorite car?
|
||||
A: Telsa
|
||||
A: Tesla
|
||||
Now, do this for real!
|
||||
|
||||
Q: What's your favorite social media site?
|
||||
|
||||
@@ -38,11 +38,11 @@ from langchain.llms import (
|
||||
)
|
||||
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
||||
from langchain.prompts import (
|
||||
BasePromptTemplate,
|
||||
FewShotPromptTemplate,
|
||||
Prompt,
|
||||
PromptTemplate,
|
||||
)
|
||||
from langchain.schema.prompt_template import BasePromptTemplate
|
||||
from langchain.sql_database import SQLDatabase
|
||||
from langchain.utilities.arxiv import ArxivAPIWrapper
|
||||
from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
|
||||
@@ -26,16 +26,16 @@ from langchain.callbacks.manager import (
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.input import get_color_mapping
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.few_shot import FewShotPromptTemplate
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
BaseMessage,
|
||||
BaseOutputParser,
|
||||
BasePromptTemplate,
|
||||
OutputParserException,
|
||||
)
|
||||
from langchain.schema.messages import BaseMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.asyncio import asyncio_timeout
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit
|
||||
from langchain.agents.agent_toolkits.json.base import create_json_agent
|
||||
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
|
||||
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
|
||||
from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit
|
||||
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
|
||||
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
|
||||
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
|
||||
@@ -64,4 +65,5 @@ __all__ = [
|
||||
"FileManagementToolkit",
|
||||
"PlayWrightBrowserToolkit",
|
||||
"AzureCognitiveServicesToolkit",
|
||||
"O365Toolkit",
|
||||
]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""Toolkits for agents."""
|
||||
from abc import abstractmethod
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
@@ -7,8 +7,8 @@ from pydantic import BaseModel
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
|
||||
class BaseToolkit(BaseModel):
|
||||
"""Class responsible for defining a collection of related tools."""
|
||||
class BaseToolkit(BaseModel, ABC):
|
||||
"""Class representing a collection of related tools."""
|
||||
|
||||
@abstractmethod
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Agent for working with csvs."""
|
||||
"""Agent for working with csv files."""
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
|
||||
@@ -1 +1 @@
|
||||
"""Gmail toolkit."""
|
||||
"""Office365 toolkit."""
|
||||
|
||||
@@ -30,9 +30,9 @@ class O365Toolkit(BaseToolkit):
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
return [
|
||||
O365SearchEvents(account=self.account),
|
||||
O365CreateDraftMessage(account=self.account),
|
||||
O365SearchEmails(account=self.account),
|
||||
O365SendEvent(account=self.account),
|
||||
O365SendMessage(account=self.account),
|
||||
O365SearchEvents(),
|
||||
O365CreateDraftMessage(),
|
||||
O365SearchEmails(),
|
||||
O365SendEvent(),
|
||||
O365SendMessage(),
|
||||
]
|
||||
|
||||
@@ -34,8 +34,8 @@ from langchain.chains.llm import LLMChain
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.memory import ReadOnlySharedMemory
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.requests import RequestsWrapper
|
||||
from langchain.schema import BasePromptTemplate
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.requests.tool import BaseRequestsTool
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ class RequestsToolkit(BaseToolkit):
|
||||
|
||||
|
||||
class OpenAPIToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with a OpenAPI api."""
|
||||
"""Toolkit for interacting with an OpenAPI API."""
|
||||
|
||||
json_agent: AgentExecutor
|
||||
requests_wrapper: TextRequestsWrapper
|
||||
|
||||
@@ -19,8 +19,8 @@ from langchain.agents.types import AgentType
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.schema import SystemMessage
|
||||
from langchain.schema import BasePromptTemplate
|
||||
from langchain.schema.messages import SystemMessage
|
||||
from langchain.tools.python.tool import PythonAstREPLTool
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ def _get_multi_prompt(
|
||||
suffix: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
num_dfs = len(dfs)
|
||||
if suffix is not None:
|
||||
@@ -60,7 +61,7 @@ def _get_multi_prompt(
|
||||
|
||||
partial_prompt = prompt.partial()
|
||||
if "dfs_head" in input_variables:
|
||||
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
|
||||
dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs])
|
||||
partial_prompt = partial_prompt.partial(num_dfs=str(num_dfs), dfs_head=dfs_head)
|
||||
if "num_dfs" in input_variables:
|
||||
partial_prompt = partial_prompt.partial(num_dfs=str(num_dfs))
|
||||
@@ -73,6 +74,7 @@ def _get_single_prompt(
|
||||
suffix: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
if suffix is not None:
|
||||
suffix_to_use = suffix
|
||||
@@ -100,7 +102,9 @@ def _get_single_prompt(
|
||||
|
||||
partial_prompt = prompt.partial()
|
||||
if "df_head" in input_variables:
|
||||
partial_prompt = partial_prompt.partial(df_head=str(df.head().to_markdown()))
|
||||
partial_prompt = partial_prompt.partial(
|
||||
df_head=str(df.head(number_of_head_rows).to_markdown())
|
||||
)
|
||||
return partial_prompt, tools
|
||||
|
||||
|
||||
@@ -110,6 +114,7 @@ def _get_prompt_and_tools(
|
||||
suffix: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
try:
|
||||
import pandas as pd
|
||||
@@ -131,6 +136,7 @@ def _get_prompt_and_tools(
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
number_of_head_rows=number_of_head_rows,
|
||||
)
|
||||
else:
|
||||
if not isinstance(df, pd.DataFrame):
|
||||
@@ -141,6 +147,7 @@ def _get_prompt_and_tools(
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
number_of_head_rows=number_of_head_rows,
|
||||
)
|
||||
|
||||
|
||||
@@ -149,13 +156,18 @@ def _get_functions_single_prompt(
|
||||
prefix: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
if suffix is not None:
|
||||
suffix_to_use = suffix
|
||||
if include_df_in_prompt:
|
||||
suffix_to_use = suffix_to_use.format(df_head=str(df.head().to_markdown()))
|
||||
suffix_to_use = suffix_to_use.format(
|
||||
df_head=str(df.head(number_of_head_rows).to_markdown())
|
||||
)
|
||||
elif include_df_in_prompt:
|
||||
suffix_to_use = FUNCTIONS_WITH_DF.format(df_head=str(df.head().to_markdown()))
|
||||
suffix_to_use = FUNCTIONS_WITH_DF.format(
|
||||
df_head=str(df.head(number_of_head_rows).to_markdown())
|
||||
)
|
||||
else:
|
||||
suffix_to_use = ""
|
||||
|
||||
@@ -173,16 +185,19 @@ def _get_functions_multi_prompt(
|
||||
prefix: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
if suffix is not None:
|
||||
suffix_to_use = suffix
|
||||
if include_df_in_prompt:
|
||||
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
|
||||
dfs_head = "\n\n".join(
|
||||
[d.head(number_of_head_rows).to_markdown() for d in dfs]
|
||||
)
|
||||
suffix_to_use = suffix_to_use.format(
|
||||
dfs_head=dfs_head,
|
||||
)
|
||||
elif include_df_in_prompt:
|
||||
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
|
||||
dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs])
|
||||
suffix_to_use = FUNCTIONS_WITH_MULTI_DF.format(
|
||||
dfs_head=dfs_head,
|
||||
)
|
||||
@@ -208,6 +223,7 @@ def _get_functions_prompt_and_tools(
|
||||
suffix: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
try:
|
||||
import pandas as pd
|
||||
@@ -230,6 +246,7 @@ def _get_functions_prompt_and_tools(
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
number_of_head_rows=number_of_head_rows,
|
||||
)
|
||||
else:
|
||||
if not isinstance(df, pd.DataFrame):
|
||||
@@ -239,6 +256,7 @@ def _get_functions_prompt_and_tools(
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
number_of_head_rows=number_of_head_rows,
|
||||
)
|
||||
|
||||
|
||||
@@ -257,6 +275,7 @@ def create_pandas_dataframe_agent(
|
||||
early_stopping_method: str = "force",
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
number_of_head_rows: int = 5,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a pandas agent from an LLM and dataframe."""
|
||||
@@ -268,6 +287,7 @@ def create_pandas_dataframe_agent(
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
number_of_head_rows=number_of_head_rows,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
@@ -288,6 +308,7 @@ def create_pandas_dataframe_agent(
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
number_of_head_rows=number_of_head_rows,
|
||||
)
|
||||
agent = OpenAIFunctionsAgent(
|
||||
llm=llm,
|
||||
|
||||
@@ -17,7 +17,7 @@ from langchain.utilities.powerbi import PowerBIDataset
|
||||
|
||||
def create_pbi_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: Optional[PowerBIToolkit],
|
||||
toolkit: Optional[PowerBIToolkit] = None,
|
||||
powerbi: Optional[PowerBIDataset] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = POWERBI_PREFIX,
|
||||
@@ -36,13 +36,13 @@ def create_pbi_agent(
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
|
||||
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(
|
||||
llm=llm,
|
||||
prompt=ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix.format(top_k=top_k),
|
||||
prefix=prefix.format(top_k=top_k).format(tables=tables),
|
||||
suffix=suffix,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
|
||||
@@ -18,7 +18,7 @@ from langchain.utilities.powerbi import PowerBIDataset
|
||||
|
||||
def create_pbi_chat_agent(
|
||||
llm: BaseChatModel,
|
||||
toolkit: Optional[PowerBIToolkit],
|
||||
toolkit: Optional[PowerBIToolkit] = None,
|
||||
powerbi: Optional[PowerBIDataset] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
@@ -32,19 +32,20 @@ def create_pbi_chat_agent(
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a pbi agent from an Chat LLM and tools.
|
||||
"""Construct a Power BI agent from a Chat LLM and tools.
|
||||
|
||||
If you supply only a toolkit and no powerbi dataset, the same LLM is used for both.
|
||||
If you supply only a toolkit and no Power BI dataset, the same LLM is used for both.
|
||||
"""
|
||||
if toolkit is None:
|
||||
if powerbi is None:
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
|
||||
agent = ConversationalChatAgent.from_llm_and_tools(
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
system_message=prefix.format(top_k=top_k),
|
||||
system_message=prefix.format(top_k=top_k).format(tables=tables),
|
||||
human_message=suffix,
|
||||
input_variables=input_variables,
|
||||
callback_manager=callback_manager,
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
POWERBI_PREFIX = """You are an agent designed to help users interact with a PowerBI Dataset.
|
||||
|
||||
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
|
||||
Agent has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, return "This does not appear to be part of this dataset." as the answer.
|
||||
|
||||
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
"""
|
||||
@@ -17,9 +17,9 @@ Thought: I can first ask which tables I have, then how each table is defined and
|
||||
|
||||
POWERBI_CHAT_PREFIX = """Assistant is a large language model built to help users interact with a PowerBI Dataset.
|
||||
|
||||
Assistant has access to a tool that can write a query based on the question and then run those against PowerBI, Microsofts business intelligence tool. The questions from the users should be interpreted as related to the dataset that is available and not general questions about the world. If the question does not seem related to the dataset, just return "This does not appear to be part of this dataset." as the answer.
|
||||
Assistant should try to create a correct and complete answer to the question from the user. If the user asks a question not related to the dataset it should return "This does not appear to be part of this dataset." as the answer. The user might make a mistake with the spelling of certain values, if you think that is the case, ask the user to confirm the spelling of the value and then run the query again. Unless the user specifies a specific number of examples they wish to obtain, and the results are too large, limit your query to at most {top_k} results, but make it clear when answering which field was used for the filtering. The user has access to these tables: {{tables}}.
|
||||
|
||||
Given an input question, ask to run the questions against the dataset, then look at the results and return the answer, the answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
The answer should be a complete sentence that answers the question, if multiple rows are asked find a way to write that in a easily readable format for a human, also make sure to represent numbers in readable ways, like 1M instead of 1000000.
|
||||
"""
|
||||
|
||||
POWERBI_CHAT_SUFFIX = """TOOLS
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""Toolkit for interacting with a Power BI dataset."""
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
@@ -7,9 +7,19 @@ from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.powerbi.prompt import QUESTION_TO_QUERY
|
||||
from langchain.tools.powerbi.prompt import (
|
||||
QUESTION_TO_QUERY_BASE,
|
||||
SINGLE_QUESTION_TO_QUERY,
|
||||
USER_INPUT,
|
||||
)
|
||||
from langchain.tools.powerbi.tool import (
|
||||
InfoPowerBITool,
|
||||
ListPowerBITool,
|
||||
@@ -22,10 +32,12 @@ class PowerBIToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with PowerBI dataset."""
|
||||
|
||||
powerbi: PowerBIDataset = Field(exclude=True)
|
||||
llm: BaseLanguageModel = Field(exclude=True)
|
||||
llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True)
|
||||
examples: Optional[str] = None
|
||||
max_iterations: int = 5
|
||||
callback_manager: Optional[BaseCallbackManager] = None
|
||||
output_token_limit: Optional[int] = None
|
||||
tiktoken_model_name: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@@ -34,30 +46,47 @@ class PowerBIToolkit(BaseToolkit):
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
if self.callback_manager:
|
||||
chain = LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager,
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
else:
|
||||
chain = LLMChain(
|
||||
llm=self.llm,
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
return [
|
||||
QueryPowerBITool(
|
||||
llm_chain=chain,
|
||||
llm_chain=self._get_chain(),
|
||||
powerbi=self.powerbi,
|
||||
examples=self.examples,
|
||||
max_iterations=self.max_iterations,
|
||||
output_token_limit=self.output_token_limit,
|
||||
tiktoken_model_name=self.tiktoken_model_name,
|
||||
),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
ListPowerBITool(powerbi=self.powerbi),
|
||||
]
|
||||
|
||||
def _get_chain(self) -> LLMChain:
|
||||
"""Construct the chain based on the callback manager and model type."""
|
||||
if isinstance(self.llm, BaseLanguageModel):
|
||||
return LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager
|
||||
if self.callback_manager
|
||||
else None,
|
||||
prompt=PromptTemplate(
|
||||
template=SINGLE_QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
)
|
||||
|
||||
system_prompt = SystemMessagePromptTemplate(
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY_BASE,
|
||||
input_variables=["tables", "schemas", "examples"],
|
||||
)
|
||||
)
|
||||
human_prompt = HumanMessagePromptTemplate(
|
||||
prompt=PromptTemplate(
|
||||
template=USER_INPUT,
|
||||
input_variables=["tool_input"],
|
||||
)
|
||||
)
|
||||
return LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager if self.callback_manager else None,
|
||||
prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]),
|
||||
)
|
||||
|
||||
@@ -10,7 +10,7 @@ from langchain.agents.types import AgentType
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.schema import SystemMessage
|
||||
from langchain.schema.messages import SystemMessage
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ from langchain.prompts.chat import (
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
)
|
||||
from langchain.schema import AIMessage, SystemMessage
|
||||
from langchain.schema.messages import AIMessage, SystemMessage
|
||||
|
||||
|
||||
def create_sql_agent(
|
||||
|
||||
@@ -14,13 +14,12 @@ from langchain.agents.utils import validate_tools_single_input
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import AgentAction
|
||||
from langchain.schema import AgentAction, BasePromptTemplate
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
|
||||
|
||||
@@ -30,9 +30,8 @@ class ChatOutputParser(AgentOutputParser):
|
||||
except Exception:
|
||||
if not includes_answer:
|
||||
raise OutputParserException(f"Could not parse LLM output: {text}")
|
||||
return AgentFinish(
|
||||
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
|
||||
)
|
||||
output = text.split(FINAL_ANSWER_ACTION)[-1].strip()
|
||||
return AgentFinish({"output": output}, text)
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
|
||||
@@ -16,20 +16,14 @@ from langchain.agents.utils import validate_tools_single_input
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
BaseOutputParser,
|
||||
HumanMessage,
|
||||
)
|
||||
from langchain.schema import AgentAction, BaseOutputParser, BasePromptTemplate
|
||||
from langchain.schema.messages import AIMessage, BaseMessage, HumanMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
|
||||
|
||||
@@ -38,6 +38,8 @@ from langchain.tools.sleep.tool import SleepTool
|
||||
from langchain.tools.wikipedia.tool import WikipediaQueryRun
|
||||
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
|
||||
from langchain.tools.openweathermap.tool import OpenWeatherMapQueryRun
|
||||
from langchain.tools.dataforseo_api_search import DataForSeoAPISearchRun
|
||||
from langchain.tools.dataforseo_api_search import DataForSeoAPISearchResults
|
||||
from langchain.utilities import ArxivAPIWrapper
|
||||
from langchain.utilities import PubMedAPIWrapper
|
||||
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
||||
@@ -53,6 +55,7 @@ from langchain.utilities.twilio import TwilioAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
|
||||
from langchain.utilities.dataforseo_api_search import DataForSeoAPIWrapper
|
||||
|
||||
|
||||
def _get_python_repl() -> BaseTool:
|
||||
@@ -278,6 +281,14 @@ def _get_openweathermap(**kwargs: Any) -> BaseTool:
|
||||
return OpenWeatherMapQueryRun(api_wrapper=OpenWeatherMapAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_dataforseo_api_search(**kwargs: Any) -> BaseTool:
|
||||
return DataForSeoAPISearchRun(api_wrapper=DataForSeoAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
|
||||
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
_EXTRA_LLM_TOOLS: Dict[
|
||||
str,
|
||||
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
||||
@@ -326,6 +337,14 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
||||
"sceneXplain": (_get_scenexplain, []),
|
||||
"graphql": (_get_graphql_tool, ["graphql_endpoint"]),
|
||||
"openweathermap-api": (_get_openweathermap, ["openweathermap_api_key"]),
|
||||
"dataforseo-api-search": (
|
||||
_get_dataforseo_api_search,
|
||||
["api_login", "api_password", "aiosession"],
|
||||
),
|
||||
"dataforseo-api-search-json": (
|
||||
_get_dataforseo_api_search_json,
|
||||
["api_login", "api_password", "aiosession"],
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.chat_models.openai import ChatOpenAI
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
BaseMessagePromptTemplate,
|
||||
ChatPromptTemplate,
|
||||
@@ -21,10 +20,13 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
BasePromptTemplate,
|
||||
OutputParserException,
|
||||
)
|
||||
from langchain.schema.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
FunctionMessage,
|
||||
OutputParserException,
|
||||
SystemMessage,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
@@ -106,7 +108,6 @@ def _parse_ai_message(message: BaseMessage) -> Union[AgentAction, AgentFinish]:
|
||||
function_call = message.additional_kwargs.get("function_call", {})
|
||||
|
||||
if function_call:
|
||||
function_call = message.additional_kwargs["function_call"]
|
||||
function_name = function_call["name"]
|
||||
try:
|
||||
_tool_input = json.loads(function_call["arguments"])
|
||||
|
||||
@@ -11,7 +11,6 @@ from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.chat_models.openai import ChatOpenAI
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
BaseMessagePromptTemplate,
|
||||
ChatPromptTemplate,
|
||||
@@ -21,10 +20,13 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
BasePromptTemplate,
|
||||
OutputParserException,
|
||||
)
|
||||
from langchain.schema.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
FunctionMessage,
|
||||
OutputParserException,
|
||||
SystemMessage,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
@@ -105,7 +107,6 @@ def _parse_ai_message(message: BaseMessage) -> Union[List[AgentAction], AgentFin
|
||||
function_call = message.additional_kwargs.get("function_call", {})
|
||||
|
||||
if function_call:
|
||||
function_call = message.additional_kwargs["function_call"]
|
||||
try:
|
||||
tools = json.loads(function_call["arguments"])["actions"]
|
||||
except JSONDecodeError:
|
||||
@@ -152,7 +153,7 @@ class OpenAIMultiFunctionsAgent(BaseMultiActionAgent):
|
||||
tools: The tools this agent has access to.
|
||||
prompt: The prompt for this agent, should support agent_scratchpad as one
|
||||
of the variables. For an easy way to construct this prompt, use
|
||||
`OpenAIFunctionsAgent.create_prompt(...)`
|
||||
`OpenAIMultiFunctionsAgent.create_prompt(...)`
|
||||
"""
|
||||
|
||||
llm: BaseLanguageModel
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user