[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
This commit is contained in:
Hongxin Liu
2023-09-19 14:20:26 +08:00
committed by GitHub
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions

View File

@@ -22,13 +22,13 @@ def compare_dirs(dir1, dir2):
# If the corresponding item doesn't exist in the second directory, the directories are different
if not os.path.exists(item_path2):
print(f'Found mismatch: {item_path1}, {item_path2}')
print(f"Found mismatch: {item_path1}, {item_path2}")
return False
# If the corresponding item is a directory, we compare the two directories recursively
if os.path.isdir(item_path1) and os.path.isdir(item_path2):
if not compare_dirs(item_path1, item_path2):
print(f'Found mismatch: {item_path1}, {item_path2}')
print(f"Found mismatch: {item_path1}, {item_path2}")
return False
# both are files
@@ -37,16 +37,16 @@ def compare_dirs(dir1, dir2):
# If the corresponding item is not a file or a directory, the directories are different
else:
print(f'Found mismatch: {item_path1}, {item_path2}')
print(f"Found mismatch: {item_path1}, {item_path2}")
return False
# If all items are the same, the directories are the same
return True
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', help="The directory where the multi-language source files are kept.")
parser.add_argument("-d", "--directory", help="The directory where the multi-language source files are kept.")
args = parser.parse_args()
i18n_folders = os.listdir(args.directory)
@@ -56,7 +56,7 @@ if __name__ == '__main__':
for i in range(1, len(i18n_folders)):
dir1 = i18n_folders[0]
dir2 = i18n_folders[i]
print(f'comparing {dir1} vs {dir2}')
print(f"comparing {dir1} vs {dir2}")
match = compare_dirs(i18n_folders[0], i18n_folders[i])
if not match:

View File

@@ -4,7 +4,7 @@ import os
def check_inputs(input_list):
for path in input_list:
real_path = os.path.join('examples', path)
real_path = os.path.join("examples", path)
if not os.path.exists(real_path):
return False
return True
@@ -12,16 +12,16 @@ def check_inputs(input_list):
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
parser.add_argument("-f", "--fileNameList", type=str, help="List of file names")
args = parser.parse_args()
name_list = args.fileNameList.split(",")
is_correct = check_inputs(name_list)
if is_correct:
print('success')
print("success")
else:
print('failure')
print("failure")
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -17,21 +17,21 @@ def show_files(path, all_files):
def join(input_list, sep=None):
return (sep or ' ').join(input_list)
return (sep or " ").join(input_list)
def main():
contents = show_files('examples/', [])
contents = show_files("examples/", [])
all_loc = []
for file_loc in contents:
split_loc = file_loc.split('/')
split_loc = file_loc.split("/")
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
if len(split_loc) >= 4:
re_loc = '/'.join(split_loc[1:3])
re_loc = "/".join(split_loc[1:3])
if re_loc not in all_loc:
all_loc.append(re_loc)
print(all_loc)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -3,7 +3,7 @@ import argparse
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
parser.add_argument("-f", "--fileNameList", type=str, help="The list of changed files")
args = parser.parse_args()
name_list = args.fileNameList.split(":")
folder_need_check = set()
@@ -15,10 +15,10 @@ def main():
# - application
# - file
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
folder_need_check.add('/'.join(loc.split("/")[1:3]))
folder_need_check.add("/".join(loc.split("/")[1:3]))
# Output the result using print. Then the shell can get the values.
print(list(folder_need_check))
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -74,16 +74,16 @@ def get_organization_repositories(github_token, organization_name) -> List[str]:
# prepare header
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
}
res = requests.get(url, headers=headers).json()
repo_list = []
for item in res:
repo_list.append(item['name'])
repo_list.append(item["name"])
return repo_list
@@ -97,9 +97,9 @@ def get_issue_pull_request_comments(github_token: str, org_name: str, repo_name:
"""
# prepare header
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
}
user_engagement_count = {}
@@ -107,28 +107,28 @@ def get_issue_pull_request_comments(github_token: str, org_name: str, repo_name:
# do pagination to the API
page = 1
while True:
comment_api = f'https://api.github.com/repos/{org_name}/{repo_name}/issues/comments?since={since}&page={page}'
comment_api = f"https://api.github.com/repos/{org_name}/{repo_name}/issues/comments?since={since}&page={page}"
comment_response = requests.get(comment_api, headers=headers).json()
if len(comment_response) == 0:
break
else:
for item in comment_response:
comment_author_relationship = item['author_association']
if comment_author_relationship != 'MEMBER':
comment_author_relationship = item["author_association"]
if comment_author_relationship != "MEMBER":
# if the comment is not made by our member
# we don't count this comment towards user engagement
continue
issue_id = item['issue_url'].split('/')[-1]
issue_api = f'https://api.github.com/repos/{org_name}/{repo_name}/issues/{issue_id}'
issue_id = item["issue_url"].split("/")[-1]
issue_api = f"https://api.github.com/repos/{org_name}/{repo_name}/issues/{issue_id}"
issue_response = requests.get(issue_api, headers=headers).json()
issue_author_relationship = issue_response['author_association']
issue_author_relationship = issue_response["author_association"]
if issue_author_relationship != 'MEMBER':
if issue_author_relationship != "MEMBER":
# this means that the issue/PR is not created by our own people
# any comments in this issue/PR by our member will be counted towards the leaderboard
member_name = item['user']['login']
member_name = item["user"]["login"]
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
@@ -153,7 +153,7 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
if cursor is None:
offset_str = ""
else:
offset_str = f", after: \"{cursor}\""
offset_str = f', after: "{cursor}"'
query = f"""
{{
repository(owner: "{org_name}", name: "{repo_name}"){{
@@ -182,7 +182,7 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
if cursor is None:
offset_str = ""
else:
offset_str = f", before: \"{cursor}\""
offset_str = f', before: "{cursor}"'
query = f"""
{{
repository(owner: "{org_name}", name: "{repo_name}"){{
@@ -220,8 +220,8 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
# a utility function to make call to Github GraphQL API
def _call_graphql_api(query):
headers = {"Authorization": f"Bearer {github_token}"}
json_data = {'query': query}
response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers)
json_data = {"query": query}
response = requests.post("https://api.github.com/graphql", json=json_data, headers=headers)
data = response.json()
return data
@@ -234,21 +234,21 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
data = _call_graphql_api(query)
found_discussion_out_of_time_range = False
edges = data['data']['repository']['discussions']['edges']
edges = data["data"]["repository"]["discussions"]["edges"]
if len(edges) == 0:
break
else:
# keep the discussion whose author is not a member
for edge in edges:
# print the discussion title
discussion = edge['node']
discussion_updated_at = str2datetime(discussion['updatedAt'])
discussion = edge["node"]
discussion_updated_at = str2datetime(discussion["updatedAt"])
# check if the updatedAt is within the last 7 days
# if yes, add it to discussion_numbers
if discussion_updated_at > since:
if discussion['authorAssociation'] != 'MEMBER':
discussion_numbers.append(discussion['number'])
if discussion["authorAssociation"] != "MEMBER":
discussion_numbers.append(discussion["number"])
else:
found_discussion_out_of_time_range = True
@@ -256,7 +256,7 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
break
else:
# update cursor
cursor = edges[-1]['cursor']
cursor = edges[-1]["cursor"]
# get the discussion comments and replies made by our member
user_engagement_count = {}
@@ -269,42 +269,42 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
data = _call_graphql_api(query)
# get the comments
edges = data['data']['repository']['discussion']['comments']['edges']
edges = data["data"]["repository"]["discussion"]["comments"]["edges"]
# update the cursor
if len(edges) == 0:
break
else:
# update cursor for pagination
cursor = edges[-1]['cursor']
cursor = edges[-1]["cursor"]
for edge in edges:
comment = edge['node']
if comment['authorAssociation'] == 'MEMBER':
comment = edge["node"]
if comment["authorAssociation"] == "MEMBER":
# check if the updatedAt is within the last 7 days
# if yes, add it to user_engagement_count
comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
comment_updated_at = datetime.strptime(comment["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
if comment_updated_at > since:
member_name = comment['author']['login']
member_name = comment["author"]["login"]
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
user_engagement_count[member_name] = 1
# get the replies
reply_edges = comment['replies']['edges']
reply_edges = comment["replies"]["edges"]
if len(reply_edges) == 0:
continue
else:
for reply_edge in reply_edges:
reply = reply_edge['node']
if reply['authorAssociation'] == 'MEMBER':
reply = reply_edge["node"]
if reply["authorAssociation"] == "MEMBER":
# check if the updatedAt is within the last 7 days
# if yes, add it to discussion_numbers
reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
reply_updated_at = datetime.strptime(reply["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
if reply_updated_at > since:
member_name = reply['author']['login']
member_name = reply["author"]["login"]
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
@@ -312,7 +312,9 @@ def get_discussion_comments(github_token: str, org_name: str, repo_name: str, si
return user_engagement_count
def generate_user_engagement_leaderboard_image(github_token: str, org_name: str, repo_list: List[str], output_path: str) -> bool:
def generate_user_engagement_leaderboard_image(
github_token: str, org_name: str, repo_list: List[str], output_path: str
) -> bool:
"""
Generate the user engagement leaderboard image for stats within the last 7 days
@@ -335,16 +337,19 @@ def generate_user_engagement_leaderboard_image(github_token: str, org_name: str,
else:
total_engagement_count[name] = count
for repo_name in repo_list:
print(f"Fetching user engagement count for {repo_name}/{repo_name}")
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime_str)
discussion_engagement_count = get_discussion_comments(github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime)
issue_pr_engagement_count = get_issue_pull_request_comments(
github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime_str
)
discussion_engagement_count = get_discussion_comments(
github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime
)
# update the total engagement count
_update_count(issue_pr_engagement_count)
_update_count(discussion_engagement_count)
# prepare the data for plotting
x = []
y = []
@@ -363,7 +368,7 @@ def generate_user_engagement_leaderboard_image(github_token: str, org_name: str,
# plot the leaderboard
xlabel = f"Number of Comments made (since {start_datetime_str})"
ylabel = "Member"
title = 'Active User Engagement Leaderboard'
title = "Active User Engagement Leaderboard"
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True
else:
@@ -380,16 +385,16 @@ def generate_contributor_leaderboard_image(github_token, org_name, repo_list, ou
"""
# request to the Github API to get the users who have contributed in the last 7 days
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
"Authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
}
counter = Counter()
start_datetime = get_utc_time_one_week_ago()
def _get_url(org_name, repo_name, page):
return f'https://api.github.com/repos/{org_name}/{repo_name}/pulls?per_page=50&page={page}&state=closed'
return f"https://api.github.com/repos/{org_name}/{repo_name}/pulls?per_page=50&page={page}&state=closed"
def _iterate_by_page(org_name, repo_name):
page = 1
@@ -415,8 +420,8 @@ def generate_contributor_leaderboard_image(github_token, org_name, repo_list, ou
# count the pull request and author from response
for pr_data in response:
merged_at = pr_data['merged_at']
author = pr_data['user']['login']
merged_at = pr_data["merged_at"]
author = pr_data["user"]["login"]
if merged_at is None:
continue
@@ -439,7 +444,7 @@ def generate_contributor_leaderboard_image(github_token, org_name, repo_list, ou
_iterate_by_page(org_name, repo_name)
# convert unix timestamp to Beijing datetime
bj_start_datetime = datetime.fromtimestamp(start_datetime.timestamp(), tz=pytz.timezone('Asia/Shanghai'))
bj_start_datetime = datetime.fromtimestamp(start_datetime.timestamp(), tz=pytz.timezone("Asia/Shanghai"))
bj_start_datetime_str = datetime2str(bj_start_datetime)
contribution_list = counter.to_sorted_list()
@@ -452,7 +457,7 @@ def generate_contributor_leaderboard_image(github_token, org_name, repo_list, ou
if len(author_list) > 0:
xlabel = f"Number of Pull Requests (since {bj_start_datetime_str})"
ylabel = "Contributor"
title = 'Active Contributor Leaderboard'
title = "Active Contributor Leaderboard"
plot_bar_chart(num_commit_list, author_list, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True
else:
@@ -468,14 +473,14 @@ def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
image_path (str): the path to the image to be uploaded
"""
url = "https://open.feishu.cn/open-apis/im/v1/images"
form = {'image_type': 'message', 'image': (open(image_path, 'rb'))} # 需要替换具体的path
form = {"image_type": "message", "image": (open(image_path, "rb"))} # 需要替换具体的path
multi_form = MultipartEncoder(form)
headers = {
'Authorization': f'Bearer {lark_tenant_token}', ## 获取tenant_access_token, 需要替换为实际的token
"Authorization": f"Bearer {lark_tenant_token}", ## 获取tenant_access_token, 需要替换为实际的token
}
headers['Content-Type'] = multi_form.content_type
headers["Content-Type"] = multi_form.content_type
response = requests.request("POST", url, headers=headers, data=multi_form).json()
return response['data']['image_key']
return response["data"]["image_key"]
def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
@@ -486,10 +491,10 @@ def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
app_id (str): Lark app id
app_secret (str): Lark app secret
"""
url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal'
data = {'app_id': app_id, 'app_secret': app_secret}
url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal"
data = {"app_id": app_id, "app_secret": app_secret}
response = requests.post(url, json=data).json()
return response['tenant_access_token']
return response["tenant_access_token"]
def send_image_to_lark(image_key: str, webhook_url: str) -> None:
@@ -516,10 +521,10 @@ def send_message_to_lark(message: str, webhook_url: str):
requests.post(webhook_url, json=data)
if __name__ == '__main__':
GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png'
USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
if __name__ == "__main__":
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
CONTRIBUTOR_IMAGE_PATH = "contributor_leaderboard.png"
USER_ENGAGEMENT_IMAGE_PATH = "engagement_leaderboard.png"
ORG_NAME = "hpcaitech"
# get all open source repositories
@@ -527,17 +532,19 @@ if __name__ == '__main__':
# generate images
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, CONTRIBUTOR_IMAGE_PATH)
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, USER_ENGAGEMENT_IMAGE_PATH)
engagement_success = generate_user_engagement_leaderboard_image(
GITHUB_TOKEN, ORG_NAME, REPO_LIST, USER_ENGAGEMENT_IMAGE_PATH
)
# upload images
APP_ID = os.environ['LARK_APP_ID']
APP_SECRET = os.environ['LARK_APP_SECRET']
APP_ID = os.environ["LARK_APP_ID"]
APP_SECRET = os.environ["LARK_APP_SECRET"]
LARK_TENANT_TOKEN = generate_lark_tenant_access_token(app_id=APP_ID, app_secret=APP_SECRET)
contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH)
user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
# send message to lark
LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL']
LARK_WEBHOOK_URL = os.environ["LARK_WEBHOOK_URL"]
message = """本周的社区榜单出炉啦!
1. 开发贡献者榜单
2. 用户互动榜单

View File

@@ -7,27 +7,27 @@ import re
import requests
COMMIT_API = 'https://api.github.com/repos/hpcaitech/ColossalAI/commits'
TAGS_API = 'https://api.github.com/repos/hpcaitech/ColossalAI/tags'
COMMIT_API = "https://api.github.com/repos/hpcaitech/ColossalAI/commits"
TAGS_API = "https://api.github.com/repos/hpcaitech/ColossalAI/tags"
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--out', type=str, help='output path for the release draft', required=True)
parser.add_argument('--version', type=str, help='current version to release', required=True)
parser.add_argument("--out", type=str, help="output path for the release draft", required=True)
parser.add_argument("--version", type=str, help="current version to release", required=True)
return parser.parse_args()
def get_latest_tag_commit(headers=None):
res = requests.get(url=TAGS_API, headers=headers)
data = res.json()
commit_hash = data[0]['commit']['sha']
version = data[0]['name']
commit_hash = data[0]["commit"]["sha"]
version = data[0]["name"]
return commit_hash, version
def get_commit_info(commit_hash, headers=None):
api = f'{COMMIT_API}/{commit_hash}'
api = f"{COMMIT_API}/{commit_hash}"
res = requests.get(url=api, headers=headers)
return res.json()
@@ -37,7 +37,7 @@ def get_all_commit_info(since, headers=None):
results = []
while True:
api = f'{COMMIT_API}?since={since}&per_page=100&page={page}'
api = f"{COMMIT_API}?since={since}&per_page=100&page={page}"
resp = requests.get(url=api, headers=headers)
data = resp.json()
@@ -53,21 +53,21 @@ def get_all_commit_info(since, headers=None):
def collate_release_info(commit_info_list):
results = dict()
pattern = pattern = r'\[.*\]'
pattern = pattern = r"\[.*\]"
for commit_info in commit_info_list:
author = commit_info['commit']['author']['name']
author = commit_info["commit"]["author"]["name"]
try:
author_url = commit_info['author']['url']
author_url = commit_info["author"]["url"]
except:
# author can be None
author_url = None
msg = commit_info['commit']['message']
msg = commit_info["commit"]["message"]
match = re.search(pattern, msg)
if match:
tag = match.group().lstrip('[').rstrip(']').capitalize()
tag = match.group().lstrip("[").rstrip("]").capitalize()
if tag not in results:
results[tag] = []
results[tag].append((msg, author, author_url))
@@ -89,42 +89,43 @@ def generate_release_post_markdown(current_version, last_version, release_info):
for msg, author, author_url in v:
# only keep the first line
msg = msg.split('\n')[0]
msg = msg.split("\n")[0]
if author_url:
item = f'{msg} by [{author}]({author_url})\n'
item = f"{msg} by [{author}]({author_url})\n"
else:
item = f'{msg} by {author}\n'
text.append(f'- {item}')
item = f"{msg} by {author}\n"
text.append(f"- {item}")
text.append('\n')
text.append("\n")
# add full change log
text.append(
f'**Full Changelog**: https://github.com/hpcaitech/ColossalAI/compare/{current_version}...{last_version}')
f"**Full Changelog**: https://github.com/hpcaitech/ColossalAI/compare/{current_version}...{last_version}"
)
return text
if __name__ == '__main__':
if __name__ == "__main__":
args = parse_args()
token = os.environ['GITHUB_API_TOKEN']
headers = {'Authorization': token}
token = os.environ["GITHUB_API_TOKEN"]
headers = {"Authorization": token}
# get previous release tag
last_release_commit, last_version = get_latest_tag_commit(headers)
last_release_commit_info = get_commit_info(last_release_commit, headers=headers)
last_release_date = last_release_commit_info['commit']['author']['date']
last_release_date = last_release_commit_info["commit"]["author"]["date"]
# get the commits since last release
commit_info = get_all_commit_info(since=last_release_date, headers=headers)
commit_info = commit_info[:-1] # remove the release commit
commit_info = commit_info[:-1] # remove the release commit
# collate into markdown
release_info = collate_release_info(commit_info)
markdown_text = generate_release_post_markdown(args.version, last_version, release_info)
# write into a file
with open(args.out, 'w') as f:
with open(args.out, "w") as f:
for line in markdown_text:
f.write(line)

View File

@@ -5,8 +5,8 @@ import requests
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--message', type=str)
parser.add_argument('-u', '--url', type=str)
parser.add_argument("-m", "--message", type=str)
parser.add_argument("-u", "--url", type=str)
return parser.parse_args()
@@ -15,6 +15,6 @@ def send_message_to_lark(message, webhook_url):
requests.post(webhook_url, json=data)
if __name__ == '__main__':
if __name__ == "__main__":
args = parse_args()
send_message_to_lark(args.message, args.url)