AWS Bedrock RAG template (#12450)

This commit is contained in:
Lance Martin
2023-10-27 13:15:54 -07:00
committed by GitHub
parent 5d40e36c75
commit 5c564e62e1
11 changed files with 2602 additions and 9 deletions

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,29 @@
# RAG AWS Bedrock
AWS Bedrock is a managed serve that offers a set of foundation models.
Here we will use `Anthropic Claude` for text generation and `Amazon Titan` for text embedding.
We will use Pinecode as our vectorstore.
(See [this notebook](https://github.com/aws-samples/amazon-bedrock-workshop/blob/main/03_QuestionAnswering/01_qa_w_rag_claude.ipynb) for additional context on the RAG pipeline.)
(See [this notebook](https://github.com/aws-samples/amazon-bedrock-workshop/blob/58f238a183e7e629c9ae11dd970393af4e64ec44/00_Intro/bedrock_boto3_setup.ipynb#Prerequisites) for additional context on setup.)
## Pinecone
This connects to a hosted Pinecone vectorstore.
Be sure that you have set a few env variables in `chain.py`:
* `PINECONE_API_KEY`
* `PINECONE_ENV`
* `index_name`
## LLM and Embeddings
Be sure to set AWS enviorment variables:
* `AWS_DEFAULT_REGION`
# `AWS_PROFILE`
* `BEDROCK_ASSUME_ROLE`

2296
templates/rag-aws-bedrock/poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
[tool.poetry]
name = "rag-aws-bedrock"
version = "0.1.0"
description = ""
authors = ["Lance Martin <lance@langchain.dev>"]
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.313, <0.1"
openai = ">=0.28.1"
tiktoken = ">=0.5.1"
pinecone-client = ">=2.2.4"
boto3 = ">=1.28.57"
awscli = ">=1.29.57"
botocore = ">=1.31.57"
[tool.langserve]
export_module = "rag_aws_bedrock"
export_attr = "chain"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View File

@@ -0,0 +1,50 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "681a5d1e",
"metadata": {},
"source": [
"## Connect to template\n",
"\n",
"In `server.py`, set -\n",
"```\n",
"add_routes(app, chain_ext, path=\"/rag_aws_bedrock\")\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d774be2a",
"metadata": {},
"outputs": [],
"source": [
"from langserve.client import RemoteRunnable\n",
"rag_app_pinecone = RemoteRunnable('http://0.0.0.0:8001/rag_aws_bedrock')\n",
"rag_app_pinecone.invoke(\"What are the different types of agent memory\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,3 @@
from rag_aws_bedrock.chain import chain
__all__ = ["chain"]

View File

@@ -0,0 +1,73 @@
import os
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
from langchain.vectorstores import Pinecone
from utils import bedrock
if os.environ.get("PINECONE_API_KEY", None) is None:
raise Exception("Missing `PINECONE_API_KEY` environment variable.")
if os.environ.get("PINECONE_ENVIRONMENT", None) is None:
raise Exception("Missing `PINECONE_ENVIRONMENT` environment variable.")
if os.environ.get("AWS_DEFAULT_REGION", None) is None:
raise Exception("Missing `AWS_DEFAULT_REGION` environment variable.")
if os.environ.get("AWS_PROFILE", None) is None:
raise Exception("Missing `AWS_PROFILE` environment variable.")
if os.environ.get("BEDROCK_ASSUME_ROLE", None) is None:
raise Exception("Missing `BEDROCK_ASSUME_ROLE` environment variable.")
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX", "langchain-test")
### Ingest code - you may need to run this the first time
# Load
# from langchain.document_loaders import WebBaseLoader
# loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
# data = loader.load()
# # Split
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
# all_splits = text_splitter.split_documents(data)
# # Add to vectorDB
# vectorstore = Pinecone.from_documents(
# documents=all_splits, embedding=OpenAIEmbeddings(), index_name=PINECONE_INDEX_NAME
# )
# retriever = vectorstore.as_retriever()
# Set LLM and embeddings
boto3_bedrock = bedrock.get_bedrock_client(
assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
region=os.environ.get("AWS_DEFAULT_REGION", None)
)
model = Bedrock(model_id="anthropic.claude-v2",
client=boto3_bedrock,
model_kwargs={'max_tokens_to_sample':200})
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1",
client=boto3_bedrock)
# Set vectostore
vectorstore = Pinecone.from_existing_index(PINECONE_INDEX_NAME, bedrock_embeddings)
retriever = vectorstore.as_retriever()
# RAG prompt
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
# RAG
chain = (
RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
| prompt
| model
| StrOutputParser()
)