diff --git a/templates/rag-matching-engine/LICENSE b/templates/rag-matching-engine/LICENSE new file mode 100644 index 00000000000..426b6509034 --- /dev/null +++ b/templates/rag-matching-engine/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/templates/rag-matching-engine/README.md b/templates/rag-matching-engine/README.md new file mode 100644 index 00000000000..c46fdf53c0a --- /dev/null +++ b/templates/rag-matching-engine/README.md @@ -0,0 +1 @@ +# rag-matching-engine diff --git a/templates/rag-matching-engine/pyproject.toml b/templates/rag-matching-engine/pyproject.toml new file mode 100644 index 00000000000..7edc5f6a754 --- /dev/null +++ b/templates/rag-matching-engine/pyproject.toml @@ -0,0 +1,24 @@ +[tool.poetry] +name = "rag_matching_engine" +version = "0.0.1" +description = "" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +langchain = ">=0.0.313, <0.1" +google-cloud-aiplatform = "^1.35.0" + +[tool.poetry.group.dev.dependencies] +langchain-cli = ">=0.0.4" +fastapi = "^0.104.0" +sse-starlette = "^1.6.5" + +[tool.langserve] +export_module = "rag_matching_engine.chain" +export_attr = "chain" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/templates/rag-matching-engine/rag_matching_engine/__init__.py b/templates/rag-matching-engine/rag_matching_engine/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/templates/rag-matching-engine/rag_matching_engine/chain.py b/templates/rag-matching-engine/rag_matching_engine/chain.py new file mode 100644 index 00000000000..e5ad87eae11 --- /dev/null +++ b/templates/rag-matching-engine/rag_matching_engine/chain.py @@ -0,0 +1,69 @@ +import os + +from langchain.embeddings import VertexAIEmbeddings +from langchain.llms import VertexAI +from langchain.prompts import PromptTemplate +from langchain.schema.output_parser import StrOutputParser +from langchain.schema.runnable import RunnableParallel, RunnablePassthrough +from langchain.vectorstores import MatchingEngine + +# you need to preate the index first, for example, as described here: +# https://github.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain_matching_engine.ipynb +expected_variables = [ + "project_id", + "me_region", + "gcs_bucket", + "me_index_id", + "me_endpoint_id", +] +variables = [] +for variable_name in expected_variables: + variable = os.environ.get(variable_name.upper()) + if not variable: + raise Exception(f"Missing `{variable_name}` environment variable.") + variables.append(variable) + +project_id, me_region, gcs_bucket, me_index_id, me_endpoint_id = variables + + +vectorstore = MatchingEngine.from_components( + project_id=project_id, + region=me_region, + gcs_bucket_name=gcs_bucket, + embedding=VertexAIEmbeddings(), + index_id=me_index_id, + endpoint_id=me_endpoint_id, +) + +model = VertexAI() + +template = ( + "SYSTEM: You are an intelligent assistant helping the users with their questions" + "on research papers.\n\n" + "Question: {question}\n\n" + "Strictly Use ONLY the following pieces of context to answer the question at the " + "end. Think step-by-step and then answer.\n\n" + "Do not try to make up an answer:\n" + "- If the answer to the question cannot be determined from the context alone, " + 'say \n"I cannot determine the answer to that."\n' + '- If the context is empty, just say "I do not know the answer to that."\n\n' + "=============\n{context}\n=============\n\n" + "Question: {question}\nHelpful Answer: " +) + +prompt = PromptTemplate.from_template(template) + +retriever = vectorstore.as_retriever( + search_type="similarity", + search_kwargs={ + "k": 10, + "search_distance": 0.6, + }, +) + +chain = ( + RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) + | prompt + | model + | StrOutputParser() +) diff --git a/templates/rag-matching-engine/tests/__init__.py b/templates/rag-matching-engine/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d