diff --git a/templates/llama2-functions/README.md b/templates/llama2-functions/README.md index f16989e969c..77b1baf6dc6 100644 --- a/templates/llama2-functions/README.md +++ b/templates/llama2-functions/README.md @@ -2,14 +2,17 @@ This template shows how to do extraction of structured data from unstructured data, using LLaMA2 [fine-tuned for grammars and jsonschema](https://replicate.com/andreasjansson/llama-2-13b-chat-gguf). -Specify the scehma you want to extract in `chain.py` +[Query transformations](https://blog.langchain.dev/query-transformations/) are one great application area for open source, private LLMs: -By default, it will extract the title and author of papers. +* The tasks are often narrow and well-defined (e.g., generatae multiple questions from a user input) +* They also are tasks that users may want to run locally (e.g., in a RAG workflow) + +Specify the scehma you want to extract in `chain.py` ## LLM This template will use a `Replicate` [hosted version](https://replicate.com/andreasjansson/llama-2-13b-chat-gguf) of LLaMA2 that has support for grammars and jsonschema. -Based on the `Replicate` example, these are supplied directly in the prompt. +Based on the `Replicate` example, the JSON schema is supplied directly in the prompt. Be sure that `REPLICATE_API_TOKEN` is set in your environment. \ No newline at end of file diff --git a/templates/llama2-functions/llama2-functions.ipynb b/templates/llama2-functions/llama2-functions.ipynb index e84c60a149b..7a7c76f15d6 100644 --- a/templates/llama2-functions/llama2-functions.ipynb +++ b/templates/llama2-functions/llama2-functions.ipynb @@ -5,30 +5,6 @@ "cell_type": "markdown", "id": "9faf648c-541e-4368-82a8-96287dbf34de", "metadata": {}, - "source": [ - "## Document Loading\n", - "\n", - "Load a blog post on agents." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "662a843a-49e8-40ec-bd32-0f44bc4159a1", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import WebBaseLoader\n", - "\n", - "loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n", - "text = loader.load()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "67306dbd-d79c-4723-825e-7d88edb811ba", - "metadata": {}, "source": [ "## Run Template\n", "\n", @@ -41,14 +17,13 @@ { "cell_type": "code", "execution_count": null, - "id": "3668ba4b-254e-4a3b-bfb5-53242572cb1b", + "id": "2dfe28bb-6112-459b-a77d-013964b65409", "metadata": {}, "outputs": [], "source": [ "from langserve.client import RemoteRunnable\n", - "\n", "llama2_function = RemoteRunnable(\"http://0.0.0.0:8001/llama2_functions\")\n", - "llama2_function.invoke({\"input\": text[0].page_content[0:1500]})" + "llama2_function.invoke({\"question\":\"How does agent memory work?\"})" ] } ], diff --git a/templates/llama2-functions/llama2_functions/chain.py b/templates/llama2-functions/llama2_functions/chain.py index 9dd66e9e1c7..b7ebc31bd0d 100644 --- a/templates/llama2-functions/llama2_functions/chain.py +++ b/templates/llama2-functions/llama2_functions/chain.py @@ -9,32 +9,34 @@ model = Replicate( ) # Prompt with output schema specification -template = """A article will be passed to you. Extract from it all papers that are mentioned by this article. - -Do not extract the name of the article itself. If no papers are mentioned that's fine - you don't need to extract any! Just return an empty list. - -Do not make up or guess ANY extra information. Only extract what exactly is in the text. - -Respond with json that adheres to the following jsonschema: - +template = """You are an AI language model assistant. Your task is to generate 3 different versions of the given user / + question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user / + question, your goal is to help the user overcome some of the limitations of distance-based similarity search. / + Respond with json that adheres to the following jsonschema: {{ "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": {{ - "author": {{ + "question_1": {{ "type": "string", - "description": "The author of the paper." + "description": "First version of the user question." }}, - "title": {{ + "question_2": {{ "type": "string", - "description": "The title of the paper." + "description": "Second version of the user question." + }}, + "question_3": {{ + "type": "string", + "description": "Third version of the user question." }} }}, - "required": ["author", "title"], + "required": ["question_1","question_2","question_3"], "additionalProperties": false }}""" # noqa: E501 -prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")]) +prompt = ChatPromptTemplate.from_messages( + [("system", template), ("human", "{question}")] +) # Chain chain = prompt | model diff --git a/templates/rag-chroma-private/rag_chroma_private.ipynb b/templates/rag-chroma-private/rag_chroma_private.ipynb index b3a969c7218..f39884bd1f0 100644 --- a/templates/rag-chroma-private/rag_chroma_private.ipynb +++ b/templates/rag-chroma-private/rag_chroma_private.ipynb @@ -13,20 +13,6 @@ "```" ] }, - { - "cell_type": "code", - "execution_count": 1, - "id": "78fb41d3-d2aa-40a6-b144-491f38a7cf88", - "metadata": {}, - "outputs": [], - "source": [ - "from langserve.client import RemoteRunnable\n", - "\n", - "rag_app = RemoteRunnable(\"http://0.0.0.0:8001/rag_chroma_private/\")\n", - "for item in rag_app.stream(\"How does agent memory work?\"):\n", - " print(item)" - ] - }, { "cell_type": "code", "execution_count": 3, @@ -41,88 +27,11 @@ ] } ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a554971a-e724-4c99-84d1-5d646ae4ac3e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "6891d028-43ac-4a70-b2ad-6fbd3d937283", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "rag_app.stream(\"How does agent memory work?\")" + "from langserve.client import RemoteRunnable\n", + "rag_app = RemoteRunnable(\"http://0.0.0.0:8001/rag_chroma_private/\")\n", + "rag_app.invoke(\"How does agent memory work?\")" ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "888494ca-0509-4070-b36f-600a042f352c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Based on the provided context, agent memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language. Each element is an observation, an event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The memory module surfaces the context to inform the agent's behavior according to relevance, recency, and importance.\n" - ] - } - ], - "source": [ - "\n", - "stream = \n", - "for i in stream:\n", - " print(i)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ff2169c9-dab2-41c4-8f38-1f8aebb16814", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting httpx_sse\n", - " Obtaining dependency information for httpx_sse from https://files.pythonhosted.org/packages/62/33/d35b4ccf8c1ac7266bd1d068c48f842d3c7392cca87e32751c79ee553d7a/httpx_sse-0.3.1-py3-none-any.whl.metadata\n", - " Using cached httpx_sse-0.3.1-py3-none-any.whl.metadata (8.6 kB)\n", - "Using cached httpx_sse-0.3.1-py3-none-any.whl (7.7 kB)\n", - "Installing collected packages: httpx_sse\n", - "Successfully installed httpx_sse-0.3.1\n" - ] - } - ], - "source": [ - "! pip install httpx_sse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d843f23-686a-4138-8a9d-087bb00b2e13", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/templates/rag-chroma-private/rag_chroma_private/chain.py b/templates/rag-chroma-private/rag_chroma_private/chain.py index 9a5d4d70b03..b3942d909eb 100644 --- a/templates/rag-chroma-private/rag_chroma_private/chain.py +++ b/templates/rag-chroma-private/rag_chroma_private/chain.py @@ -1,6 +1,5 @@ -from langchain.chat_models import ChatOllama - # Load +from langchain.chat_models import ChatOllama from langchain.document_loaders import WebBaseLoader from langchain.embeddings import GPT4AllEmbeddings from langchain.prompts import ChatPromptTemplate