From c37be7f5fb13b84bcb05091649a7e21cb8f16977 Mon Sep 17 00:00:00 2001 From: Lance Martin <122662504+rlancemartin@users.noreply.github.com> Date: Thu, 24 Aug 2023 11:03:35 -0700 Subject: [PATCH] Add Code LLaMA to code QA use case (#9713) Use [Ollama integration](https://ollama.ai/blog/run-code-llama-locally). --- .../extras/use_cases/code_understanding.ipynb | 94 ++++++++++++++++--- 1 file changed, 81 insertions(+), 13 deletions(-) diff --git a/docs/extras/use_cases/code_understanding.ipynb b/docs/extras/use_cases/code_understanding.ipynb index a649d74094f..ffd14b5026a 100644 --- a/docs/extras/use_cases/code_understanding.ipynb +++ b/docs/extras/use_cases/code_understanding.ipynb @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -109,7 +109,7 @@ "1293" ] }, - "execution_count": 14, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -148,7 +148,7 @@ "3748" ] }, - "execution_count": 17, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -187,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ "from langchain.embeddings.openai import OpenAIEmbeddings\n", "db = Chroma.from_documents(texts, OpenAIEmbeddings(disallowed_special=()))\n", "retriever = db.as_retriever(\n", - " search_type=\"mmr\", # Also test \"similarity\"\n", + " search_type=\"mmr\", # Also test \"similarity\"\n", " search_kwargs={\"k\": 8},\n", ")" ] @@ -217,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -231,22 +231,22 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'To load a source code as documents for a QA over code, you can use the `CodeLoader` class. This class allows you to load source code files and split them into classes and functions.\\n\\nHere is an example of how to use the `CodeLoader` class:\\n\\n```python\\nfrom langchain.document_loaders.code import CodeLoader\\n\\n# Specify the path to the source code file\\ncode_file_path = \"path/to/code/file.py\"\\n\\n# Create an instance of the CodeLoader class\\ncode_loader = CodeLoader(code_file_path)\\n\\n# Load the code as documents\\ndocuments = code_loader.load()\\n\\n# Iterate over the documents\\nfor document in documents:\\n # Access the class or function name\\n name = document.metadata[\"name\"]\\n \\n # Access the code content\\n code = document.page_content\\n \\n # Process the code as needed\\n # ...\\n```\\n\\nIn the example above, `code_file_path` should be replaced with the actual path to your source code file. The `load()` method of the `CodeLoader` class will return a list of `Document` objects, where each document represents a class or function in the source code. You can access the class or function name using the `metadata[\"name\"]` attribute, and the code content using the `page_content` attribute of each `Document` object.\\n\\nYou can then process the code as needed for your QA task.'" + "'To initialize a ReAct agent, you need to follow these steps:\\n\\n1. Initialize a language model `llm` of type `BaseLanguageModel`.\\n\\n2. Initialize a document store `docstore` of type `Docstore`.\\n\\n3. Create a `DocstoreExplorer` with the initialized `docstore`. The `DocstoreExplorer` is used to search for and look up terms in the document store.\\n\\n4. Create an array of `Tool` objects. The `Tool` objects represent the actions that the agent can perform. In the case of `ReActDocstoreAgent`, the tools must be \"Search\" and \"Lookup\" with their corresponding functions from the `DocstoreExplorer`.\\n\\n5. Initialize the `ReActDocstoreAgent` using the `from_llm_and_tools` method with the `llm` (language model) and `tools` as parameters.\\n\\n6. Initialize the `ReActChain` (which is the `AgentExecutor`) using the `ReActDocstoreAgent` and `tools` as parameters.\\n\\nHere is an example of how to do this:\\n\\n```python\\nfrom langchain import ReActChain, OpenAI\\nfrom langchain.docstore.base import Docstore\\nfrom langchain.docstore.document import Document\\nfrom langchain.tools.base import BaseTool\\n\\n# Initialize the LLM and a docstore\\nllm = OpenAI()\\ndocstore = Docstore()\\n\\ndocstore_explorer = DocstoreExplorer(docstore)\\ntools = [\\n Tool(\\n name=\"Search\",\\n func=docstore_explorer.search,\\n description=\"Search for a term in the docstore.\",\\n ),\\n Tool(\\n name=\"Lookup\",\\n func=docstore_explorer.lookup,\\n description=\"Lookup a term in the docstore.\",\\n ),\\n]\\nagent = ReActDocstoreAgent.from_llm_and_tools(llm, tools)\\nreact = ReActChain(agent=agent, tools=tools)\\n```\\n\\nKeep in mind that this is a simplified example and you might need to adapt it to your specific needs.'" ] }, - "execution_count": 30, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "question = \"How can I load a source code as documents, for a QA over code, spliting the code in classes and functions?\"\n", + "question = \"How can I initialize a ReAct agent?\"\n", "result = qa(question)\n", "result['answer']" ] @@ -328,6 +328,74 @@ "\n", "![Image description](/img/code_retrieval.png)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private chat\n", + "\n", + "We can use [Code LLaMA](https://about.fb.com/news/2023/08/code-llama-ai-for-coding/) via the Ollama integration.\n", + "\n", + "`ollama pull codellama:7b-instruct`" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import Ollama\n", + "from langchain.callbacks.manager import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler \n", + "llm = Ollama(model=\"codellama:7b-instruct\", \n", + " callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))\n", + "memory = ConversationSummaryMemory(llm=llm,memory_key=\"chat_history\",return_messages=True)\n", + "qa_llama=ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \"How can I initialize a ReAct agent?\" To initialize a ReAct agent, you can use the `ReActAgent.from_llm_and_tools()` class method. This method takes two arguments: the LLM and a list of tools.\n", + "Here is an example of how to initialize a ReAct agent with the OpenAI language model and the \"Search\" tool:\n", + "from langchain.agents.mrkl.base import ZeroShotAgent\n", + "\n", + "agent = ReActDocstoreAgent.from_llm_and_tools(OpenAIFunctionsAgent(), [Tool(\"Search\")]])\n", + "\n", + " The human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good because it will help humans reach their full potential." + ] + }, + { + "data": { + "text/plain": [ + "' To initialize a ReAct agent, you can use the `ReActAgent.from_llm_and_tools()` class method. This method takes two arguments: the LLM and a list of tools.\\nHere is an example of how to initialize a ReAct agent with the OpenAI language model and the \"Search\" tool:\\nfrom langchain.agents.mrkl.base import ZeroShotAgent\\n\\nagent = ReActDocstoreAgent.from_llm_and_tools(OpenAIFunctionsAgent(), [Tool(\"Search\")]])\\n\\n'" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "question = \"How can I initialize a ReAct agent?\"\n", + "result = qa_llama(question)\n", + "result['answer']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can view the [LangSmith trace](https://smith.langchain.com/public/fd24c734-e365-4a09-b883-cdbc7dcfa582/r) to sanity check the result relative to what was retrieved." + ] } ], "metadata": {