From c6f00e6bdcd9efe0ceb0775b3dda9f246fe4da1f Mon Sep 17 00:00:00 2001 From: mehdiosa <65399858+mehdiosa@users.noreply.github.com> Date: Fri, 30 Aug 2024 23:47:11 +0200 Subject: [PATCH] community: Fix branch not being considered when using GithubFileLoader (#20075) - **Description:** Added `ref` query parameter so data is not loaded only from the default branch but any branch passed --------- Co-authored-by: Osama Mehdi Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Erick Friis --- .../community/langchain_community/document_loaders/github.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/github.py b/libs/community/langchain_community/document_loaders/github.py index ee647d42401..df02a7894fb 100644 --- a/libs/community/langchain_community/document_loaders/github.py +++ b/libs/community/langchain_community/document_loaders/github.py @@ -206,7 +206,10 @@ class GithubFileLoader(BaseGitHubLoader, ABC): ] def get_file_content_by_path(self, path: str) -> str: - base_url = f"{self.github_api_url}/repos/{self.repo}/contents/{path}" + queryparams = f"?ref={self.branch}" if self.branch else "" + base_url = ( + f"{self.github_api_url}/repos/{self.repo}/contents/{path}{queryparams}" + ) response = requests.get(base_url, headers=self.headers) response.raise_for_status()