Minor template cleaning (#12573)

2025-07-18 18:53:10 +00:00 · 2023-10-30 11:27:44 -07:00 · 2023-10-30 11:27:44 -07:00 · 08103e6d48
commit 08103e6d48
parent b1e3843931
5 changed files with 28 additions and 140 deletions
--- a/templates/llama2-functions/README.md
+++ b/templates/llama2-functions/README.md
@ -2,14 +2,17 @@

 This template shows how to do extraction of structured data from unstructured data, using LLaMA2 [fine-tuned for grammars and jsonschema](https://replicate.com/andreasjansson/llama-2-13b-chat-gguf).

-Specify the scehma you want to extract in `chain.py`
+[Query transformations](https://blog.langchain.dev/query-transformations/) are one great application area for open source, private LLMs:

-By default, it will extract the title and author of papers.
+* The tasks are often narrow and well-defined (e.g., generatae multiple questions from a user input)
+* They also are tasks that users may want to run locally (e.g., in a RAG workflow)
+
+Specify the scehma you want to extract in `chain.py`

 ##  LLM

 This template will use a `Replicate` [hosted version](https://replicate.com/andreasjansson/llama-2-13b-chat-gguf) of LLaMA2 that has support for grammars and jsonschema. 

-Based on the `Replicate` example, these are supplied directly in the prompt.
+Based on the `Replicate` example, the JSON schema is supplied directly in the prompt.

 Be sure that `REPLICATE_API_TOKEN` is set in your environment.
--- a/templates/llama2-functions/llama2-functions.ipynb
+++ b/templates/llama2-functions/llama2-functions.ipynb
@ -5,30 +5,6 @@
   "cell_type": "markdown",
   "id": "9faf648c-541e-4368-82a8-96287dbf34de",
   "metadata": {},
-   "source": [
-    "## Document Loading\n",
-    "\n",
-    "Load a blog post on agents."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "662a843a-49e8-40ec-bd32-0f44bc4159a1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.document_loaders import WebBaseLoader\n",
-    "\n",
-    "loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
-    "text = loader.load()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "67306dbd-d79c-4723-825e-7d88edb811ba",
-   "metadata": {},
   "source": [
    "## Run Template\n",
    "\n",
@ -41,14 +17,13 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "3668ba4b-254e-4a3b-bfb5-53242572cb1b",
+   "id": "2dfe28bb-6112-459b-a77d-013964b65409",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langserve.client import RemoteRunnable\n",
-    "\n",
    "llama2_function = RemoteRunnable(\"http://0.0.0.0:8001/llama2_functions\")\n",
-    "llama2_function.invoke({\"input\": text[0].page_content[0:1500]})"
+    "llama2_function.invoke({\"question\":\"How does agent memory work?\"})"
   ]
  }
 ],
--- a/templates/llama2-functions/llama2_functions/chain.py
+++ b/templates/llama2-functions/llama2_functions/chain.py
@ -9,32 +9,34 @@ model = Replicate(
 )

 # Prompt with output schema specification
-template = """A article will be passed to you. Extract from it all papers that are mentioned by this article. 
-
-Do not extract the name of the article itself. If no papers are mentioned that's fine - you don't need to extract any! Just return an empty list.
-
-Do not make up or guess ANY extra information. Only extract what exactly is in the text.
-
-Respond with json that adheres to the following jsonschema:
-
+template = """You are an AI language model assistant. Your task is to generate 3 different versions of the given user /
+    question to retrieve relevant documents from a vector  database. By generating multiple perspectives on the user / 
+    question, your goal is to help the user overcome some of the limitations  of distance-based similarity search. /
+    Respond with json that adheres to the following jsonschema:
 {{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "properties": {{
-    "author": {{
+    "question_1": {{
      "type": "string",
-      "description": "The author of the paper."
+      "description": "First version of the user question."
    }},
-    "title": {{
+    "question_2": {{
      "type": "string",
-      "description": "The title of the paper."
+      "description": "Second version of the user question."
+    }},
+    "question_3": {{
+      "type": "string",
+      "description": "Third version of the user question."
    }}
  }},
-  "required": ["author", "title"],
+  "required": ["question_1","question_2","question_3"],
  "additionalProperties": false
 }}"""  # noqa: E501

-prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")])
+prompt = ChatPromptTemplate.from_messages(
+    [("system", template), ("human", "{question}")]
+)

 # Chain
 chain = prompt | model
--- a/templates/rag-chroma-private/rag_chroma_private.ipynb
+++ b/templates/rag-chroma-private/rag_chroma_private.ipynb
@ -13,20 +13,6 @@
    "```"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "78fb41d3-d2aa-40a6-b144-491f38a7cf88",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langserve.client import RemoteRunnable\n",
-    "\n",
-    "rag_app = RemoteRunnable(\"http://0.0.0.0:8001/rag_chroma_private/\")\n",
-    "for item in rag_app.stream(\"How does agent memory work?\"):\n",
-    "    print(item)"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 3,
@ -41,88 +27,11 @@
     ]
    }
   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a554971a-e724-4c99-84d1-5d646ae4ac3e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "6891d028-43ac-4a70-b2ad-6fbd3d937283",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<generator object RemoteRunnable.stream at 0x1245d25f0>"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
-    "rag_app.stream(\"How does agent memory work?\")"
+    "from langserve.client import RemoteRunnable\n",
+    "rag_app = RemoteRunnable(\"http://0.0.0.0:8001/rag_chroma_private/\")\n",
+    "rag_app.invoke(\"How does agent memory work?\")"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "888494ca-0509-4070-b36f-600a042f352c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " Based on the provided context, agent memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language. Each element is an observation, an event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The memory module surfaces the context to inform the agent's behavior according to relevance, recency, and importance.\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "stream = \n",
-    "for i in stream:\n",
-    "    print(i)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "ff2169c9-dab2-41c4-8f38-1f8aebb16814",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Collecting httpx_sse\n",
-      "  Obtaining dependency information for httpx_sse from https://files.pythonhosted.org/packages/62/33/d35b4ccf8c1ac7266bd1d068c48f842d3c7392cca87e32751c79ee553d7a/httpx_sse-0.3.1-py3-none-any.whl.metadata\n",
-      "  Using cached httpx_sse-0.3.1-py3-none-any.whl.metadata (8.6 kB)\n",
-      "Using cached httpx_sse-0.3.1-py3-none-any.whl (7.7 kB)\n",
-      "Installing collected packages: httpx_sse\n",
-      "Successfully installed httpx_sse-0.3.1\n"
-     ]
-    }
-   ],
-   "source": [
-    "! pip install httpx_sse"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3d843f23-686a-4138-8a9d-087bb00b2e13",
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
--- a/templates/rag-chroma-private/rag_chroma_private/chain.py
+++ b/templates/rag-chroma-private/rag_chroma_private/chain.py
@ -1,6 +1,5 @@
-from langchain.chat_models import ChatOllama
-
 # Load
+from langchain.chat_models import ChatOllama
 from langchain.document_loaders import WebBaseLoader
 from langchain.embeddings import GPT4AllEmbeddings
 from langchain.prompts import ChatPromptTemplate