mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-15 14:12:02 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -44,7 +44,7 @@ def check_linear_col():
|
||||
W = W.clone()
|
||||
W.requires_grad = True
|
||||
|
||||
B_shape = (OUTPUT_SIZE)
|
||||
B_shape = OUTPUT_SIZE
|
||||
B_master = torch.randn(B_shape, dtype=dtype, device=device)
|
||||
dist.broadcast(B_master, src=0)
|
||||
B = torch.chunk(B_master, DEPTH, dim=0)[i]
|
||||
@@ -65,7 +65,7 @@ def check_linear_col():
|
||||
C = torch.chunk(C_master, DEPTH, dim=-1)[i]
|
||||
|
||||
check_equal(out, C)
|
||||
print_rank_0('linear_col forward: pass')
|
||||
print_rank_0("linear_col forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=get_current_device())
|
||||
@@ -87,7 +87,7 @@ def check_linear_col():
|
||||
B_grad = torch.chunk(B_grad, DEPTH, dim=0)[i]
|
||||
check_equal(B_grad, layer.bias.grad)
|
||||
|
||||
print_rank_0('linear_col backward: pass')
|
||||
print_rank_0("linear_col backward: pass")
|
||||
|
||||
|
||||
def check_linear_row():
|
||||
@@ -114,7 +114,7 @@ def check_linear_row():
|
||||
W = W.clone()
|
||||
W.requires_grad = True
|
||||
|
||||
B_shape = (INPUT_SIZE)
|
||||
B_shape = INPUT_SIZE
|
||||
B_master = torch.randn(B_shape, dtype=dtype, device=device)
|
||||
dist.broadcast(B_master, src=0)
|
||||
B = B_master.clone()
|
||||
@@ -134,7 +134,7 @@ def check_linear_row():
|
||||
C = C_master.clone()
|
||||
|
||||
check_equal(out, C)
|
||||
print_rank_0('linear_row forward: pass')
|
||||
print_rank_0("linear_row forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=get_current_device())
|
||||
@@ -155,7 +155,7 @@ def check_linear_row():
|
||||
B_grad = B_master.grad
|
||||
check_equal(B_grad, layer.bias.grad)
|
||||
|
||||
print_rank_0('linear_row backward: pass')
|
||||
print_rank_0("linear_row backward: pass")
|
||||
|
||||
|
||||
def check_embed():
|
||||
@@ -184,7 +184,7 @@ def check_embed():
|
||||
C_master = embed_master(A_master)
|
||||
C = C_master.clone()
|
||||
check_equal(out, C)
|
||||
print_rank_0('embed forward: pass')
|
||||
print_rank_0("embed forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=device)
|
||||
@@ -197,7 +197,7 @@ def check_embed():
|
||||
B_grad = embed_master.weight.grad
|
||||
B_grad = torch.chunk(B_grad, DEPTH, dim=-1)[i]
|
||||
check_equal(B_grad, embed.weight.grad)
|
||||
print_rank_0('embed backward: pass')
|
||||
print_rank_0("embed backward: pass")
|
||||
|
||||
|
||||
def check_vocab_parallel_embed():
|
||||
@@ -226,7 +226,7 @@ def check_vocab_parallel_embed():
|
||||
C_master = embed_master(A_master)
|
||||
C = C_master.clone()
|
||||
check_equal(out, C)
|
||||
print_rank_0('vocab parallel embed forward: pass')
|
||||
print_rank_0("vocab parallel embed forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=device)
|
||||
@@ -239,7 +239,7 @@ def check_vocab_parallel_embed():
|
||||
B_grad = embed_master.weight.grad
|
||||
B_grad = torch.chunk(B_grad, DEPTH, dim=0)[i]
|
||||
check_equal(B_grad, embed.weight.grad)
|
||||
print_rank_0('vocab parallel embed backward: pass')
|
||||
print_rank_0("vocab parallel embed backward: pass")
|
||||
|
||||
|
||||
def check_classifier_no_given_weight():
|
||||
@@ -283,7 +283,7 @@ def check_classifier_no_given_weight():
|
||||
C = C_master.clone()
|
||||
|
||||
check_equal(out, C)
|
||||
print_rank_0('classifier (no given weight) forward: pass')
|
||||
print_rank_0("classifier (no given weight) forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=device)
|
||||
@@ -305,7 +305,7 @@ def check_classifier_no_given_weight():
|
||||
B_grad = layer_master.bias.grad
|
||||
check_equal(B_grad, layer.bias.grad)
|
||||
|
||||
print_rank_0('classifier (no given weight) backward: pass')
|
||||
print_rank_0("classifier (no given weight) backward: pass")
|
||||
|
||||
|
||||
def check_vocab_parallel_classifier_no_given_weight():
|
||||
@@ -343,7 +343,7 @@ def check_vocab_parallel_classifier_no_given_weight():
|
||||
C = torch.chunk(C_master, DEPTH, dim=-1)[i]
|
||||
|
||||
check_equal(out, C)
|
||||
print_rank_0('vocab parallel classifier (no given weight) forward: pass')
|
||||
print_rank_0("vocab parallel classifier (no given weight) forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=device)
|
||||
@@ -365,7 +365,7 @@ def check_vocab_parallel_classifier_no_given_weight():
|
||||
B_grad = torch.chunk(B_grad, DEPTH, dim=0)[i]
|
||||
check_equal(B_grad, layer.bias.grad)
|
||||
|
||||
print_rank_0('vocab parallel classifier (no given weight) backward: pass')
|
||||
print_rank_0("vocab parallel classifier (no given weight) backward: pass")
|
||||
|
||||
|
||||
def check_classifier_given_embed_weight():
|
||||
@@ -401,7 +401,7 @@ def check_classifier_given_embed_weight():
|
||||
C_master = layer_master(embed_master(A_master))
|
||||
C = C_master.clone()
|
||||
check_equal(out, C)
|
||||
print_rank_0('classifier (given embed weight) forward: pass')
|
||||
print_rank_0("classifier (given embed weight) forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=device)
|
||||
@@ -416,7 +416,7 @@ def check_classifier_given_embed_weight():
|
||||
W_grad = torch.chunk(W_grad, DEPTH, dim=-1)[i]
|
||||
check_equal(W_grad, embed.weight.grad)
|
||||
|
||||
print_rank_0('classifier (given embed weight) backward: pass')
|
||||
print_rank_0("classifier (given embed weight) backward: pass")
|
||||
|
||||
|
||||
def check_vocab_parallel_classifier_given_embed_weight():
|
||||
@@ -452,7 +452,7 @@ def check_vocab_parallel_classifier_given_embed_weight():
|
||||
C_master = layer_master(embed_master(A_master))
|
||||
C = torch.chunk(C_master, DEPTH, dim=-1)[i]
|
||||
check_equal(out, C)
|
||||
print_rank_0('vocab parallel classifier (given embed weight) forward: pass')
|
||||
print_rank_0("vocab parallel classifier (given embed weight) forward: pass")
|
||||
|
||||
grad_shape = C_master.shape
|
||||
grad_master = torch.randn(grad_shape, dtype=dtype, device=device)
|
||||
@@ -468,7 +468,7 @@ def check_vocab_parallel_classifier_given_embed_weight():
|
||||
W_grad = torch.chunk(W_grad, DEPTH, dim=0)[i]
|
||||
check_equal(W_grad, embed.weight.grad)
|
||||
|
||||
print_rank_0('vocab parallel classifier (given embed weight) backward: pass')
|
||||
print_rank_0("vocab parallel classifier (given embed weight) backward: pass")
|
||||
|
||||
|
||||
def check_vocab_parallel_loss():
|
||||
@@ -495,7 +495,7 @@ def check_vocab_parallel_loss():
|
||||
out_master.requires_grad = True
|
||||
loss_master = criterion_master(out_master, target_master)
|
||||
check_equal(loss, loss_master)
|
||||
print_rank_0('vocab parallel loss forward: pass')
|
||||
print_rank_0("vocab parallel loss forward: pass")
|
||||
|
||||
loss.backward()
|
||||
loss_master.backward()
|
||||
@@ -503,7 +503,7 @@ def check_vocab_parallel_loss():
|
||||
out_grad = out_master.grad
|
||||
out_grad = torch.chunk(out_grad, DEPTH, dim=-1)[i]
|
||||
check_equal(out_grad, out.grad)
|
||||
print_rank_0('vocab parallel loss backward: pass')
|
||||
print_rank_0("vocab parallel loss backward: pass")
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@@ -531,7 +531,7 @@ def check_linear_row_stream_inference():
|
||||
W = torch.chunk(W_master, DEPTH, dim=-1)[i]
|
||||
W = W.clone()
|
||||
|
||||
B_shape = (INPUT_SIZE)
|
||||
B_shape = INPUT_SIZE
|
||||
B_master = torch.randn(B_shape, dtype=dtype, device=device)
|
||||
dist.broadcast(B_master, src=0)
|
||||
B = B_master.clone()
|
||||
@@ -550,4 +550,4 @@ def check_linear_row_stream_inference():
|
||||
C = C_master.clone()
|
||||
|
||||
check_equal(out, C)
|
||||
print_rank_0('linear_row forward: pass')
|
||||
print_rank_0("linear_row forward: pass")
|
||||
|
@@ -10,12 +10,14 @@ from colossalai.legacy.initialize import launch
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(pipeline=dict(size=1), tensor=dict(size=4, mode='1d')),)
|
||||
CONFIG = dict(
|
||||
parallel=dict(pipeline=dict(size=1), tensor=dict(size=4, mode="1d")),
|
||||
)
|
||||
|
||||
|
||||
def check_layer(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config=CONFIG, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
|
||||
check_linear_col()
|
||||
check_linear_row()
|
||||
@@ -39,5 +41,5 @@ def test_1d():
|
||||
spawn(check_layer, 4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
test_1d()
|
||||
|
Reference in New Issue
Block a user