From 4b63a217c227d284a7bc75dcfd0cc55701760041 Mon Sep 17 00:00:00 2001 From: Nobuhiko Otoba <44864310+nobutoba@users.noreply.github.com> Date: Fri, 23 Aug 2024 07:24:57 +0900 Subject: [PATCH] "community: Fix GithubFileLoader source code", "docs: Fix GithubFileLoader code sample" (#19943) This PR adds tiny improvements to the `GithubFileLoader` document loader and its code sample, addressing the following issues: 1. Currently, the `file_extension` argument of `GithubFileLoader` does not change its behavior at all. 1. The `GithubFileLoader` sample code in `docs/docs/integrations/document_loaders/github.ipynb` does not work as it stands. The respective solutions I propose are the following: 1. Remove `file_extension` argument from `GithubFileLoader`. 1. Specify the branch as `master` (not the default `main`) and rename `documents` as `document`. --------- Co-authored-by: Isaac Francisco <78627776+isahers1@users.noreply.github.com> --- docs/docs/integrations/document_loaders/github.ipynb | 5 +++-- .../community/langchain_community/document_loaders/github.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/integrations/document_loaders/github.ipynb b/docs/docs/integrations/document_loaders/github.ipynb index 6a1ac50382a..47e940b7c6a 100644 --- a/docs/docs/integrations/document_loaders/github.ipynb +++ b/docs/docs/integrations/document_loaders/github.ipynb @@ -175,6 +175,7 @@ "source": [ "loader = GithubFileLoader(\n", " repo=\"langchain-ai/langchain\", # the repo name\n", + " branch=\"master\", # the branch name\n", " access_token=ACCESS_TOKEN,\n", " github_api_url=\"https://api.github.com\",\n", " file_filter=lambda file_path: file_path.endswith(\n", @@ -191,13 +192,13 @@ "example output of one of document: \n", "\n", "```json\n", - "documents.metadata: \n", + "document.metadata: \n", " {\n", " \"path\": \"README.md\",\n", " \"sha\": \"82f1c4ea88ecf8d2dfsfx06a700e84be4\",\n", " \"source\": \"https://github.com/langchain-ai/langchain/blob/master/README.md\"\n", " }\n", - "documents.content:\n", + "document.content:\n", " mock content\n", "```" ] diff --git a/libs/community/langchain_community/document_loaders/github.py b/libs/community/langchain_community/document_loaders/github.py index df53ed16c9d..ee647d42401 100644 --- a/libs/community/langchain_community/document_loaders/github.py +++ b/libs/community/langchain_community/document_loaders/github.py @@ -178,7 +178,6 @@ class GitHubIssuesLoader(BaseGitHubLoader): class GithubFileLoader(BaseGitHubLoader, ABC): """Load GitHub File""" - file_extension: str = ".md" branch: str = "main" file_filter: Optional[Callable[[str], bool]]