From 1f3b9878609433b9765a7439db7e13e50161739e Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Thu, 20 Jul 2023 22:24:55 -0700 Subject: [PATCH] Harrison/GitHub toolkit (#8047) Co-authored-by: Trevor Dobbertin --- .../modules/agents/toolkits/github.ipynb | 167 +++++++++++ .../agents/agent_toolkits/github/__init__.py | 1 + .../agents/agent_toolkits/github/toolkit.py | 33 +++ langchain/tools/github/__init__.py | 1 + langchain/tools/github/prompt.py | 56 ++++ langchain/tools/github/tool.py | 64 +++++ langchain/utilities/github.py | 268 ++++++++++++++++++ 7 files changed, 590 insertions(+) create mode 100644 docs/extras/modules/agents/toolkits/github.ipynb create mode 100644 langchain/agents/agent_toolkits/github/__init__.py create mode 100644 langchain/agents/agent_toolkits/github/toolkit.py create mode 100644 langchain/tools/github/__init__.py create mode 100644 langchain/tools/github/prompt.py create mode 100644 langchain/tools/github/tool.py create mode 100644 langchain/utilities/github.py diff --git a/docs/extras/modules/agents/toolkits/github.ipynb b/docs/extras/modules/agents/toolkits/github.ipynb new file mode 100644 index 00000000000..338d07e152b --- /dev/null +++ b/docs/extras/modules/agents/toolkits/github.ipynb @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GitHub\n", + "\n", + "This notebook goes over how to use the GitHub tool.\n", + "The GitHub tool allows agents to interact with a given GitHub repository. It implements CRUD operations for modifying files and can read/comment on Issues. The tool wraps the [PyGitHub](https://github.com/PyGithub/PyGithub) library.\n", + "\n", + "In order to interact with the GitHub API you must create a [GitHub app](https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/about-creating-github-apps). Next, you must set the following environment variables:\n", + "```\n", + "GITHUB_APP_ID\n", + "GITHUB_APP_PRIVATE_KEY\n", + "GITHUB_REPOSITORY\n", + "GITHUB_BRANCH\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "%pip install pygithub" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from langchain.agents import AgentType\n", + "from langchain.agents import initialize_agent\n", + "from langchain.agents.agent_toolkits.github.toolkit import GitHubToolkit\n", + "from langchain.llms import OpenAI\n", + "from langchain.utilities.github import GitHubAPIWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"GITHUB_APP_ID\"] = \"your-github-app-id\"\n", + "os.environ[\"GITHUB_APP_PRIVATE_KEY\"] = \"/path/to/your/private/key\"\n", + "os.environ[\"GITHUB_REPOSITORY\"] = \"user/repo\"\n", + "os.environ[\"GITHUB_BRANCH\"] = \"branch-name\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(temperature=0)\n", + "github = GitHubAPIWrapper()\n", + "toolkit = GitHubToolkit.from_github_api_wrapper(github)\n", + "agent = initialize_agent(\n", + " toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to figure out what issues need to be completed and how to complete them.\n", + "Action: Get Issues\n", + "Action Input: N/A\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mFound 1 issues:\n", + "[{'title': 'Change the main script to print Hello AI!', 'number': 1}]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to get more information about this issue.\n", + "Action: Get Issue\n", + "Action Input: 1\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3m{'title': 'Change the main script to print Hello AI!', 'body': None, 'comments': '[]'}\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to update the main script to print Hello AI!\n", + "Action: Update File\n", + "Action Input: main.py\n", + "OLD <<<<\n", + "print(\"Hello World!\")\n", + ">>>> OLD\n", + "NEW <<<<\n", + "print(\"Hello AI!\")\n", + ">>>> NEW\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mFile content was not updated because the old content was not found. It may be helpful to use the read_file action to get the current file contents.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to read the current file contents.\n", + "Action: Read File\n", + "Action Input: main.py\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3mprint(\"Hello world!\")\n", + "\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to update the main script to print Hello AI!\n", + "Action: Update File\n", + "Action Input: main.py\n", + "OLD <<<<\n", + "print(\"Hello world!\")\n", + ">>>> OLD\n", + "NEW <<<<\n", + "print(\"Hello AI!\")\n", + ">>>> NEW\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mUpdated file main.py\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The main script has been updated to print \"Hello AI!\"\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The main script has been updated to print \"Hello AI!\"'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\n", + " \"You have the software engineering capabilities of a Google Principle engineer. You are tasked with completing issues on a github repository. Please look at the existing issues and complete them.\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/agents/agent_toolkits/github/__init__.py b/langchain/agents/agent_toolkits/github/__init__.py new file mode 100644 index 00000000000..bcd9368a52a --- /dev/null +++ b/langchain/agents/agent_toolkits/github/__init__.py @@ -0,0 +1 @@ +"""GitHub Toolkit.""" diff --git a/langchain/agents/agent_toolkits/github/toolkit.py b/langchain/agents/agent_toolkits/github/toolkit.py new file mode 100644 index 00000000000..1ea40ca4a72 --- /dev/null +++ b/langchain/agents/agent_toolkits/github/toolkit.py @@ -0,0 +1,33 @@ +"""GitHub Toolkit.""" +from typing import List + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.tools import BaseTool +from langchain.tools.github.tool import GitHubAction +from langchain.utilities.github import GitHubAPIWrapper + + +class GitHubToolkit(BaseToolkit): + """GitHub Toolkit.""" + + tools: List[BaseTool] = [] + + @classmethod + def from_github_api_wrapper( + cls, github_api_wrapper: GitHubAPIWrapper + ) -> "GitHubToolkit": + actions = github_api_wrapper.list() + tools = [ + GitHubAction( + name=action["name"], + description=action["description"], + mode=action["mode"], + api_wrapper=github_api_wrapper, + ) + for action in actions + ] + return cls(tools=tools) + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return self.tools diff --git a/langchain/tools/github/__init__.py b/langchain/tools/github/__init__.py new file mode 100644 index 00000000000..e737ac26ba0 --- /dev/null +++ b/langchain/tools/github/__init__.py @@ -0,0 +1 @@ +""" GitHub Tool """ diff --git a/langchain/tools/github/prompt.py b/langchain/tools/github/prompt.py new file mode 100644 index 00000000000..1b9de986725 --- /dev/null +++ b/langchain/tools/github/prompt.py @@ -0,0 +1,56 @@ +# flake8: noqa +GET_ISSUES_PROMPT = """ +This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input. +""" + +GET_ISSUE_PROMPT = """ +This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer. +""" + +COMMENT_ON_ISSUE_PROMPT = """ +This tool is useful when you need to comment on a GitHub issue. Simply pass in the issue number and the comment you would like to make. Please use this sparingly as we don't want to clutter the comment threads. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules: + +- First you must specify the issue number as an integer +- Then you must place two newlines +- Then you must specify your comment +""" + +CREATE_FILE_PROMPT = """ +This tool is a wrapper for the GitHub API, useful when you need to create a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules: + +- First you must specify which file to create by passing a full file path (**IMPORTANT**: the path must not start with a slash) +- Then you must specify the contents of the file + +For example, if you would like to create a file called /test/test.txt with contents "test contents", you would pass in the following string: + +test/test.txt + +test contents +""" + +READ_FILE_PROMPT = """ +This tool is a wrapper for the GitHub API, useful when you need to read the contents of a file in a GitHub repository. Simply pass in the full file path of the file you would like to read. **IMPORTANT**: the path must not start with a slash +""" + +UPDATE_FILE_PROMPT = """ +This tool is a wrapper for the GitHub API, useful when you need to update the contents of a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules: + +- First you must specify which file to modify by passing a full file path (**IMPORTANT**: the path must not start with a slash) +- Then you must specify the old contents which you would like to replace wrapped in OLD <<<< and >>>> OLD +- Then you must specify the new contents which you would like to replace the old contents with wrapped in NEW <<<< and >>>> NEW + +For example, if you would like to replace the contents of the file /test/test.txt from "old contents" to "new contents", you would pass in the following string: + +test/test.txt + +OLD <<<< +old contents +>>>> OLD +NEW <<<< +new contents +>>>> NEW +""" + +DELETE_FILE_PROMPT = """ +This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash +""" diff --git a/langchain/tools/github/tool.py b/langchain/tools/github/tool.py new file mode 100644 index 00000000000..f0d506e3746 --- /dev/null +++ b/langchain/tools/github/tool.py @@ -0,0 +1,64 @@ +""" +This tool allows agents to interact with the pygithub library +and operate on a GitHub repository. + +To use this tool, you must first set as environment variables: + GITHUB_API_TOKEN + GITHUB_REPOSITORY -> format: {owner}/{repo} + +TODO: remove below +Below is a sample script that uses the Github tool: + +```python +from langchain.agents import AgentType +from langchain.agents import initialize_agent +from langchain.agents.agent_toolkits.github.toolkit import GitHubToolkit +from langchain.llms import OpenAI +from langchain.utilities.github import GitHubAPIWrapper + +llm = OpenAI(temperature=0) +github = GitHubAPIWrapper() +toolkit = GitHubToolkit.from_github_api_wrapper(github) +agent = initialize_agent( + toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True +) + +agent.run( + "{{Enter a prompt here to direct the agent}}" +) + +``` +""" +from typing import Optional + +from pydantic import Field + +from langchain.callbacks.manager import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain.tools.base import BaseTool +from langchain.utilities.github import GitHubAPIWrapper + + +class GitHubAction(BaseTool): + api_wrapper: GitHubAPIWrapper = Field(default_factory=GitHubAPIWrapper) + mode: str + name = "" + description = "" + + def _run( + self, + instructions: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the GitHub API to run an operation.""" + return self.api_wrapper.run(self.mode, instructions) + + async def _arun( + self, + _: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Use the GitHub API to run an operation.""" + raise NotImplementedError("GitHubAction does not support async") diff --git a/langchain/utilities/github.py b/langchain/utilities/github.py new file mode 100644 index 00000000000..56c3a1099d3 --- /dev/null +++ b/langchain/utilities/github.py @@ -0,0 +1,268 @@ +"""Util that calls GitHub.""" +import json +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.tools.github.prompt import ( + COMMENT_ON_ISSUE_PROMPT, + CREATE_FILE_PROMPT, + DELETE_FILE_PROMPT, + GET_ISSUE_PROMPT, + GET_ISSUES_PROMPT, + READ_FILE_PROMPT, + UPDATE_FILE_PROMPT, +) +from langchain.utils import get_from_dict_or_env + + +class GitHubAPIWrapper(BaseModel): + """Wrapper for GitHub API.""" + + github: Any #: :meta private: + github_repo_instance: Any #: :meta private: + github_repository: Optional[str] = None + github_app_id: Optional[str] = None + github_app_private_key: Optional[str] = None + github_branch: Optional[str] = None + + operations: List[Dict] = [ + { + "mode": "get_issues", + "name": "Get Issues", + "description": GET_ISSUES_PROMPT, + }, + { + "mode": "get_issue", + "name": "Get Issue", + "description": GET_ISSUE_PROMPT, + }, + { + "mode": "comment_on_issue", + "name": "Comment on Issue", + "description": COMMENT_ON_ISSUE_PROMPT, + }, + { + "mode": "create_file", + "name": "Create File", + "description": CREATE_FILE_PROMPT, + }, + { + "mode": "read_file", + "name": "Read File", + "description": READ_FILE_PROMPT, + }, + { + "mode": "update_file", + "name": "Update File", + "description": UPDATE_FILE_PROMPT, + }, + { + "mode": "delete_file", + "name": "Delete File", + "description": DELETE_FILE_PROMPT, + }, + ] + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def list(self) -> List[Dict]: + return self.operations + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + github_repository = get_from_dict_or_env( + values, "github_repository", "GITHUB_REPOSITORY" + ) + + github_app_id = get_from_dict_or_env(values, "github_app_id", "GITHUB_APP_ID") + + github_app_private_key = get_from_dict_or_env( + values, "github_app_private_key", "GITHUB_APP_PRIVATE_KEY" + ) + + github_branch = get_from_dict_or_env( + values, "github_branch", "GITHUB_BRANCH", default="master" + ) + + try: + from github import Auth, GithubIntegration + except ImportError: + raise ImportError( + "PyGithub is not installed. " + "Please install it with `pip install PyGithub`" + ) + + with open(github_app_private_key, "r") as f: + private_key = f.read() + + auth = Auth.AppAuth( + github_app_id, + private_key, + ) + gi = GithubIntegration(auth=auth) + installation = gi.get_installations()[0] + + # create a GitHub instance: + g = installation.get_github_for_installation() + + values["github"] = g + values["github_repo_instance"] = g.get_repo(github_repository) + values["github_repository"] = github_repository + values["github_app_id"] = github_app_id + values["github_app_private_key"] = github_app_private_key + values["github_branch"] = github_branch + + return values + + def parse_issues(self, issues: List[dict]) -> List[dict]: + parsed = [] + for issue in issues: + title = issue["title"] + number = issue["number"] + parsed.append({"title": title, "number": number}) + return parsed + + def get_issues(self) -> str: + issues = self.github_repo_instance.get_issues(state="open") + parsed_issues = self.parse_issues(issues) + parsed_issues_str = ( + "Found " + str(len(parsed_issues)) + " issues:\n" + str(parsed_issues) + ) + return parsed_issues_str + + def get_issue(self, issue_number: int) -> Dict[str, Any]: + issue = self.github_repo_instance.get_issue(number=issue_number) + + # If there are too many comments + # We can't add them all to context so for now we'll just skip + if issue.get_comments().totalCount > 10: + return { + "message": ( + "There are too many comments to add them all to context. " + "Please visit the issue on GitHub to see them all." + ) + } + page = 0 + comments = [] + while True: + comments_page = issue.get_comments().get_page(page) + if len(comments_page) == 0: + break + for comment in comments_page: + comments.append( + {"body": comment["body"], "user": comment["user"]["login"]} + ) + page += 1 + + return { + "title": issue["title"], + "body": issue["body"], + "comments": str(comments), + } + + def comment_on_issue(self, comment_query: str) -> str: + # comment_query is a string which contains the issue number and the comment + # the issue number is the first word in the string + # the comment is the rest of the string + issue_number = int(comment_query.split("\n\n")[0]) + comment = comment_query[len(str(issue_number)) + 2 :] + + issue = self.github_repo_instance.get_issue(number=issue_number) + issue.create_comment(comment) + return "Commented on issue " + str(issue_number) + + def create_file(self, file_query: str) -> str: + # file_query is a string which contains the file path and the file contents + # the file path is the first line in the string + # the file contents is the rest of the string + file_path = file_query.split("\n")[0] + file_contents = file_query[len(file_path) + 2 :] + + self.github_repo_instance.create_file( + path=file_path, + message="Create " + file_path, + content=file_contents, + branch=self.github_branch, + ) + return "Created file " + file_path + + def read_file(self, file_path: str) -> str: + # file_path is a string which contains the file path + file = self.github_repo_instance.get_contents(file_path) + return file.decoded_content.decode("utf-8") + + def update_file(self, file_query: str) -> str: + # file_query is a string which contains the file path and the file contents + # the file path is the first line in the string + # the old file contents is wrapped in OLD <<<< and >>>> OLD + # the new file contents is wrapped in NEW <<<< and >>>> NEW + + # for example: + + # /test/test.txt + # OLD <<<< + # old contents + # >>>> OLD + # NEW <<<< + # new contents + # >>>> NEW + + # the old contents will be replaced with the new contents + file_path = file_query.split("\n")[0] + old_file_contents = file_query.split("OLD <<<<")[1].split(">>>> OLD")[0].strip() + new_file_contents = file_query.split("NEW <<<<")[1].split(">>>> NEW")[0].strip() + + file_content = self.read_file(file_path) + updated_file_content = file_content.replace( + old_file_contents, new_file_contents + ) + + if file_content == updated_file_content: + return ( + "File content was not updated because the old content was not found. " + "It may be helpful to use the read_file action to get " + "the current file contents." + ) + + self.github_repo_instance.update_file( + path=file_path, + message="Update " + file_path, + content=updated_file_content, + branch=self.github_branch, + sha=self.github_repo_instance.get_contents(file_path).sha, + ) + return "Updated file " + file_path + + def delete_file(self, file_path: str) -> str: + # file_path is a string which contains the file path + file = self.github_repo_instance.get_contents(file_path) + self.github_repo_instance.delete_file( + path=file_path, + message="Delete " + file_path, + branch=self.github_branch, + sha=file.sha, + ) + return "Deleted file " + file_path + + def run(self, mode: str, query: str) -> str: + if mode == "get_issues": + return self.get_issues() + elif mode == "get_issue": + return json.dumps(self.get_issue(int(query))) + elif mode == "comment_on_issue": + return self.comment_on_issue(query) + elif mode == "create_file": + return self.create_file(query) + elif mode == "read_file": + return self.read_file(query) + elif mode == "update_file": + return self.update_file(query) + elif mode == "delete_file": + return self.delete_file(query) + else: + raise ValueError("Invalid mode" + mode)