mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 16:13:25 +00:00
community[patch]: Change github endpoint in GithubLoader (#17622)
Description- - Changed the GitHub endpoint as existing was not working and giving 404 not found error - Also the existing function was failing if file_filter is not passed as the tree api return all paths including directory as well, and when get_file_content was iterating over these path, the function was failing for directory as the api was returning list of files inside the directory, so added a condition to ignore the paths if it a directory - Fixes this issue - https://github.com/langchain-ai/langchain/issues/17453 Co-authored-by: Radhika Bansal <Radhika.Bansal@veritas.com>
This commit is contained in:
parent
2b93206f02
commit
8bafd2df5e
@ -211,7 +211,7 @@ class GithubFileLoader(BaseGitHubLoader, ABC):
|
|||||||
|
|
||||||
def get_file_paths(self) -> List[Dict]:
|
def get_file_paths(self) -> List[Dict]:
|
||||||
base_url = (
|
base_url = (
|
||||||
f"{self.github_api_url}/api/v3/repos/{self.repo}/git/trees/"
|
f"{self.github_api_url}/repos/{self.repo}/git/trees/"
|
||||||
f"{self.branch}?recursive=1"
|
f"{self.branch}?recursive=1"
|
||||||
)
|
)
|
||||||
response = requests.get(base_url, headers=self.headers)
|
response = requests.get(base_url, headers=self.headers)
|
||||||
@ -222,8 +222,8 @@ class GithubFileLoader(BaseGitHubLoader, ABC):
|
|||||||
'path': '.github',
|
'path': '.github',
|
||||||
'mode': '040000',
|
'mode': '040000',
|
||||||
'type': 'tree',
|
'type': 'tree',
|
||||||
'sha': '89a2ae046e8b59eb96531f123c0c6d4913885df1',
|
'sha': '5dc46e6b38b22707894ced126270b15e2f22f64e',
|
||||||
'url': 'https://github.com/api/v3/repos/shufanhao/langchain/git/trees/89a2ae046e8b59eb96531f123c0c6d4913885dxxx'
|
'url': 'https://api.github.com/repos/langchain-ai/langchain/git/blobs/5dc46e6b38b22707894ced126270b15e2f22f64e'
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
return [
|
return [
|
||||||
@ -233,12 +233,15 @@ class GithubFileLoader(BaseGitHubLoader, ABC):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_file_content_by_path(self, path: str) -> str:
|
def get_file_content_by_path(self, path: str) -> str:
|
||||||
base_url = f"{self.github_api_url}/api/v3/repos/{self.repo}/contents/{path}"
|
base_url = f"{self.github_api_url}/repos/{self.repo}/contents/{path}"
|
||||||
response = requests.get(base_url, headers=self.headers)
|
response = requests.get(base_url, headers=self.headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
content_encoded = response.json()["content"]
|
if isinstance(response.json(), dict):
|
||||||
return base64.b64decode(content_encoded).decode("utf-8")
|
content_encoded = response.json()["content"]
|
||||||
|
return base64.b64decode(content_encoded).decode("utf-8")
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
documents = []
|
documents = []
|
||||||
@ -246,6 +249,9 @@ class GithubFileLoader(BaseGitHubLoader, ABC):
|
|||||||
files = self.get_file_paths()
|
files = self.get_file_paths()
|
||||||
for file in files:
|
for file in files:
|
||||||
content = self.get_file_content_by_path(file["path"])
|
content = self.get_file_content_by_path(file["path"])
|
||||||
|
if content == "":
|
||||||
|
continue
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
"path": file["path"],
|
"path": file["path"],
|
||||||
"sha": file["sha"],
|
"sha": file["sha"],
|
||||||
|
@ -147,7 +147,7 @@ def test_github_file_content_get_file_paths(mocker: MockerFixture) -> None:
|
|||||||
"type": "blob",
|
"type": "blob",
|
||||||
"sha": "789",
|
"sha": "789",
|
||||||
"size": 37,
|
"size": 37,
|
||||||
"url": "https://github.com/api/v3/repos/shufanhao/langchain/git/blobs/789",
|
"url": "https://github.com/repos/shufanhao/langchain/git/blobs/789",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -206,7 +206,7 @@ def test_github_file_content_loader(mocker: MockerFixture) -> None:
|
|||||||
"type": "blob",
|
"type": "blob",
|
||||||
"sha": "789",
|
"sha": "789",
|
||||||
"size": 37,
|
"size": 37,
|
||||||
"url": "https://github.com/api/v3/repos/shufanhao/langchain/git/blobs/789",
|
"url": "https://github.com/repos/shufanhao/langchain/git/blobs/789",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user