[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
This commit is contained in:
Hongxin Liu
2023-09-19 14:20:26 +08:00
committed by GitHub
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions

View File

@@ -44,8 +44,8 @@ def shuffle_by_round_robin(tensor_list, num_partitions):
for partition_id in range(partitions_count):
partition_tensors = partitions[partition_id]
for item in partition_tensors:
tensor_index_mapping[item['index']] = len(new_tensor_list)
new_tensor_list.append(item['tensor'])
tensor_index_mapping[item["index"]] = len(new_tensor_list)
new_tensor_list.append(item["tensor"])
return new_tensor_list, tensor_index_mapping
@@ -107,11 +107,13 @@ def split_by_dtype(tensor_list):
return buckets
def reduce_tensor_dp_group(tensor: torch.Tensor,
dtype: Optional[torch.dtype] = None,
dst_local_rank: Optional[int] = None,
dst_global_rank: Optional[int] = None,
group: Optional[dist.ProcessGroup] = None):
def reduce_tensor_dp_group(
tensor: torch.Tensor,
dtype: Optional[torch.dtype] = None,
dst_local_rank: Optional[int] = None,
dst_global_rank: Optional[int] = None,
group: Optional[dist.ProcessGroup] = None,
):
"""
Reduce the tensor in the data parallel process group
@@ -173,7 +175,7 @@ def has_inf_or_nan(tensor):
raise
return True
else:
if tensor_sum == float('inf') or tensor_sum == -float('inf') or tensor_sum != tensor_sum:
if tensor_sum == float("inf") or tensor_sum == -float("inf") or tensor_sum != tensor_sum:
return True
return False
@@ -184,8 +186,7 @@ def release_param_grad(tensor_list):
def calculate_global_norm_from_list(norm_list):
""" Compute total from a list of norms
"""
"""Compute total from a list of norms"""
total_norm = 0.0
for norm in norm_list:
total_norm += norm**2.0
@@ -221,7 +222,7 @@ def compute_norm(gradients: Tensor, dp_group: ProcessGroup, tp_group: ProcessGro
total_norm = 0.0
for g in gradients:
param_norm = g.data.double().norm(2)
total_norm += param_norm.item()**2
total_norm += param_norm.item() ** 2
# Sum across all model parallel GPUs.
total_norm_cuda = torch.cuda.FloatTensor([float(total_norm)])
@@ -230,9 +231,9 @@ def compute_norm(gradients: Tensor, dp_group: ProcessGroup, tp_group: ProcessGro
if tp_group is not None:
dist.all_reduce(tensor=total_norm_cuda, op=torch.distributed.ReduceOp.SUM, group=tp_group)
total_norm = total_norm_cuda[0].item()**(1. / norm_type)
total_norm = total_norm_cuda[0].item() ** (1.0 / norm_type)
if total_norm == float('inf') or total_norm == -float('inf') or total_norm != total_norm:
if total_norm == float("inf") or total_norm == -float("inf") or total_norm != total_norm:
total_norm = -1
return total_norm