[nfc]fix typo colossalai/pipeline tensor nn (#3899)

* fix typo colossalai/autochunk auto_parallel amp

* fix typo colossalai/auto_parallel nn utils etc.

* fix typo colossalai/auto_parallel autochunk fx/passes  etc.

* fix typo docs/

* change placememt_policy to placement_policy in docs/ and examples/

* fix typo colossalai/ applications/

* fix typo colossalai/cli fx kernel

* fix typo colossalai/nn

* revert change warmuped

* fix typo colossalai/pipeline tensor nn
This commit is contained in:
digger yu
2023-06-06 14:07:36 +08:00
committed by GitHub
parent c1535ccbba
commit 0e484e6201
13 changed files with 27 additions and 27 deletions

View File

@@ -123,7 +123,7 @@ class WorkerBase(ABC):
self.device = device
self._initialize_outstanding_range()
# variable and const for context managment
# variable and const for context management
self.outstanding = 0
self.forward_times = 0
self.backward_times = 0
@@ -226,7 +226,7 @@ class WorkerBase(ABC):
self.pp_rank_to_worker_rref = pp_rank_to_worker_rref
# for some schedule need the other worker's info to initialise partition (like Chimera)
# construction of partition is executed after the registion of pp_rank_to_worker_rref
# construction of partition is executed after the registration of pp_rank_to_worker_rref
self._initialize_partition()
# res_use works for lifecycle counter,
@@ -418,7 +418,7 @@ class WorkerBase(ABC):
# On current PP middleware design for DAG, get_output_by_key used by _subscribe_producer
# can only be executed once for every producer-consumer stage pair, which is necessary
# to count the lifecycle of work_item. So, keeping the _subscribe_producer in the same
# lock of work_item queue operation gurantees the consistency of lifecycle counter.
# lock of work_item queue operation guarantees the consistency of lifecycle counter.
work_item_from_producer = self._subscribe_producer(microbatch_id, forward_only)
self.work_list[key] = work_item_from_producer
self.work_list_condition_lock.notify_all()
@@ -460,7 +460,7 @@ class WorkerBase(ABC):
# On current PP middleware design for DAG, get_output_by_key used by subscribe_consumer
# can only be executed once for every producer-consumer stage pair, which is necessary
# to count the lifecycle of work_item. So, keeping the subscribe_consumer in the same
# lock of work_item queue operation gurantees the consistency of lifecycle counter.
# lock of work_item queue operation guarantees the consistency of lifecycle counter.
work_item_from_consumer = self._subscribe_consumer(microbatch_id)
self.work_list[key] = work_item_from_consumer
self.work_list_condition_lock.notify_all()
@@ -508,7 +508,7 @@ class WorkerBase(ABC):
assert self.producer_stage_ids is None, f"all the producers of rank {rank} has been subscribed"
assert self.consumer_stage_ids is None, f"all the consumers of rank {rank} has been subscribed"
# should be aranged in order, the order of the input of current forward
# should be arranged in order, the order of the input of current forward
self.producer_stage_ids = self.get_producer_stage_ids()
self.consumer_stage_ids = self.get_consumer_stage_ids()

View File

@@ -123,7 +123,7 @@ class ChimeraWorker(WorkerBase):
assert self.producer_stage_ids is None, f"all the producers of rank {rank} has been subscribed"
assert self.consumer_stage_ids is None, f"all the consumers of rank {rank} has been subscribed"
# should be aranged in order, the order of the input of current forward
# should be arranged in order, the order of the input of current forward
self.producer_stage_ids = []
self.consumer_stage_ids = []
@@ -174,7 +174,7 @@ class ChimeraWorker(WorkerBase):
else:
# if it is down pipeline, create partition by origin method
co_up_pp_worker_rref = self.pp_rank_to_worker_rref[pp_rank - stage_num]
# get the coresponding model state dict and wait for its init
# get the corresponding model state dict and wait for its init
state_dict = co_up_pp_worker_rref.rpc_sync().get_partition_state_dict()
super()._initialize_partition()
self.module_partition.load_state_dict(state_dict)
@@ -228,7 +228,7 @@ class ChimeraWorker(WorkerBase):
stage_num = self.actual_stage_num
co_pp_rank = (pp_rank + stage_num) % (2 * stage_num)
# if currrent pp_rank is not the first to do step
# if current pp_rank is not the first to do step
# wait its previous pp_rank finish step
grads = self.get_parameter_gradients()