From d64bd32b20e359c1c4524a839b343302ed5a6f04 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Fri, 5 Apr 2024 17:20:40 -0700 Subject: [PATCH] templates: add rag azure search template (#18143) - **Description:** Adds a template for performing RAG with the AzureSearch vectorstore. - **Issue:** N/A - **Dependencies:** N/A - **Twitter handle:** N/A --------- Co-authored-by: Erick Friis Co-authored-by: Erick Friis --- templates/rag-azure-search/.gitignore | 1 + templates/rag-azure-search/LICENSE | 21 +++++ templates/rag-azure-search/README.md | 87 +++++++++++++++++ templates/rag-azure-search/pyproject.toml | 25 +++++ .../rag_azure_search/__init__.py | 3 + .../rag_azure_search/chain.py | 94 +++++++++++++++++++ templates/rag-azure-search/tests/__init__.py | 0 7 files changed, 231 insertions(+) create mode 100644 templates/rag-azure-search/.gitignore create mode 100644 templates/rag-azure-search/LICENSE create mode 100644 templates/rag-azure-search/README.md create mode 100644 templates/rag-azure-search/pyproject.toml create mode 100644 templates/rag-azure-search/rag_azure_search/__init__.py create mode 100644 templates/rag-azure-search/rag_azure_search/chain.py create mode 100644 templates/rag-azure-search/tests/__init__.py diff --git a/templates/rag-azure-search/.gitignore b/templates/rag-azure-search/.gitignore new file mode 100644 index 00000000000..bee8a64b79a --- /dev/null +++ b/templates/rag-azure-search/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/templates/rag-azure-search/LICENSE b/templates/rag-azure-search/LICENSE new file mode 100644 index 00000000000..fc0602feecd --- /dev/null +++ b/templates/rag-azure-search/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/templates/rag-azure-search/README.md b/templates/rag-azure-search/README.md new file mode 100644 index 00000000000..9822d334e4f --- /dev/null +++ b/templates/rag-azure-search/README.md @@ -0,0 +1,87 @@ +# rag-azure-search + +This template performs RAG on documents using [Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) as the vectorstore and Azure OpenAI chat and embedding models. + +For additional details on RAG with Azure AI Search, refer to [this notebook](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/vectorstores/azuresearch.ipynb). + + +## Environment Setup + +***Prerequisites:*** Existing [Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) and [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/overview) resources. + +***Environment Variables:*** + +To run this template, you'll need to set the following environment variables: + +***Required:*** + +- AZURE_SEARCH_ENDPOINT - The endpoint of the Azure AI Search service. +- AZURE_SEARCH_KEY - The API key for the Azure AI Search service. +- AZURE_OPENAI_ENDPOINT - The endpoint of the Azure OpenAI service. +- AZURE_OPENAI_API_KEY - The API key for the Azure OpenAI service. +- AZURE_EMBEDDINGS_DEPLOYMENT - Name of the Azure OpenAI deployment to use for embeddings. +- AZURE_CHAT_DEPLOYMENT - Name of the Azure OpenAI deployment to use for chat. + +***Optional:*** + +- AZURE_SEARCH_INDEX_NAME - Name of an existing Azure AI Search index to use. If not provided, an index will be created with name "rag-azure-search". +- OPENAI_API_VERSION - Azure OpenAI API version to use. Defaults to "2023-05-15". + +## Usage + +To use this package, you should first have the LangChain CLI installed: + +```shell +pip install -U langchain-cli +``` + +To create a new LangChain project and install this as the only package, you can do: + +```shell +langchain app new my-app --package rag-azure-search +``` + +If you want to add this to an existing project, you can just run: + +```shell +langchain app add rag-azure-search +``` + +And add the following code to your `server.py` file: +```python +from rag_azure_search import chain as rag_azure_search_chain + +add_routes(app, rag_azure_search_chain, path="/rag-azure-search") +``` + +(Optional) Let's now configure LangSmith. +LangSmith will help us trace, monitor and debug LangChain applications. +LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/). +If you don't have access, you can skip this section + + +```shell +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY= +export LANGCHAIN_PROJECT= # if not specified, defaults to "default" +``` + +If you are inside this directory, then you can spin up a LangServe instance directly by: + +```shell +langchain serve +``` + +This will start the FastAPI app with a server is running locally at +[http://localhost:8000](http://localhost:8000) + +We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs) +We can access the playground at [http://127.0.0.1:8000/rag-azure-search/playground](http://127.0.0.1:8000/rag-azure-search/playground) + +We can access the template from code with: + +```python +from langserve.client import RemoteRunnable + +runnable = RemoteRunnable("http://localhost:8000/rag-azure-search") +``` \ No newline at end of file diff --git a/templates/rag-azure-search/pyproject.toml b/templates/rag-azure-search/pyproject.toml new file mode 100644 index 00000000000..54af32ecfb1 --- /dev/null +++ b/templates/rag-azure-search/pyproject.toml @@ -0,0 +1,25 @@ +[tool.poetry] +name = "rag-azure-search" +version = "0.0.1" +description = "" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +langchain-core = ">=0.1.5" +langchain-openai = ">=0.0.1" +azure-search-documents = ">=11.4.0" + +[tool.poetry.group.dev.dependencies] +langchain-cli = ">=0.0.4" +fastapi = "^0.104.0" +sse-starlette = "^1.6.5" + +[tool.langserve] +export_module = "rag_azure_search" +export_attr = "chain" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/templates/rag-azure-search/rag_azure_search/__init__.py b/templates/rag-azure-search/rag_azure_search/__init__.py new file mode 100644 index 00000000000..ee4169c8d13 --- /dev/null +++ b/templates/rag-azure-search/rag_azure_search/__init__.py @@ -0,0 +1,3 @@ +from rag_azure_search.chain import chain + +__all__ = ["chain"] diff --git a/templates/rag-azure-search/rag_azure_search/chain.py b/templates/rag-azure-search/rag_azure_search/chain.py new file mode 100644 index 00000000000..8206cf25357 --- /dev/null +++ b/templates/rag-azure-search/rag_azure_search/chain.py @@ -0,0 +1,94 @@ +import os + +from langchain_community.vectorstores.azuresearch import AzureSearch +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables import RunnableParallel, RunnablePassthrough +from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings + +if not os.getenv("AZURE_OPENAI_ENDPOINT"): + raise ValueError("Please set the environment variable AZURE_OPENAI_ENDPOINT") + +if not os.getenv("AZURE_OPENAI_API_KEY"): + raise ValueError("Please set the environment variable AZURE_OPENAI_API_KEY") + +if not os.getenv("AZURE_EMBEDDINGS_DEPLOYMENT"): + raise ValueError("Please set the environment variable AZURE_EMBEDDINGS_DEPLOYMENT") + +if not os.getenv("AZURE_CHAT_DEPLOYMENT"): + raise ValueError("Please set the environment variable AZURE_CHAT_DEPLOYMENT") + +if not os.getenv("AZURE_SEARCH_ENDPOINT"): + raise ValueError("Please set the environment variable AZURE_SEARCH_ENDPOINT") + +if not os.getenv("AZURE_SEARCH_KEY"): + raise ValueError("Please set the environment variable AZURE_SEARCH_KEY") + + +api_version = os.getenv("OPENAI_API_VERSION", "2023-05-15") +index_name = os.getenv("AZURE_SEARCH_INDEX_NAME", "rag-azure-search") + +embeddings = AzureOpenAIEmbeddings( + deployment=os.environ["AZURE_EMBEDDINGS_DEPLOYMENT"], + api_version=api_version, + chunk_size=1, +) + +vector_store: AzureSearch = AzureSearch( + azure_search_endpoint=os.environ["AZURE_SEARCH_ENDPOINT"], + azure_search_key=os.environ["AZURE_SEARCH_KEY"], + index_name=index_name, + embedding_function=embeddings.embed_query, +) + +""" +(Optional) Example document - +Uncomment the following code to load the document into the vector store +or substitute with your own. +""" +# import pathlib +# from langchain.text_splitter import CharacterTextSplitter +# from langchain_community.document_loaders import TextLoader + +# current_file_path = pathlib.Path(__file__).resolve() +# root_directory = current_file_path.parents[3] +# target_file_path = \ +# root_directory / "docs" / "docs" / "modules" / "state_of_the_union.txt" + +# loader = TextLoader(str(target_file_path), encoding="utf-8") + +# documents = loader.load() +# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +# docs = text_splitter.split_documents(documents) + +# vector_store.add_documents(documents=docs) + +# RAG prompt +template = """Answer the question based only on the following context: +{context} +Question: {question} +""" + +# Perform a similarity search +retriever = vector_store.as_retriever() + +_prompt = ChatPromptTemplate.from_template(template) +_model = AzureChatOpenAI( + deployment_name=os.environ["AZURE_CHAT_DEPLOYMENT"], + api_version=api_version, +) +chain = ( + RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) + | _prompt + | _model + | StrOutputParser() +) + + +# Add typing for input +class Question(BaseModel): + __root__: str + + +chain = chain.with_types(input_type=Question) diff --git a/templates/rag-azure-search/tests/__init__.py b/templates/rag-azure-search/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d