[NFC] polish applications/Chat/coati/trainer/base.py code style (#4260)

2025-09-24 03:03:37 +00:00 · 2023-07-18 10:59:57 +08:00
parent b2debdc09b
commit 798cb72907
1 changed files with 22 additions and 31 deletions
--- a/applications/Chat/coati/trainer/base.py
+++ b/applications/Chat/coati/trainer/base.py
@@ -25,7 +25,8 @@ class SLTrainer(ABC):
        optim (Optimizer): the optimizer to use for training
    """
-    def __init__(self,
+    def __init__(
        self,
        strategy: Strategy,
        max_epochs: int,
        model: nn.Module,
@@ -50,10 +51,7 @@ class SLTrainer(ABC):
    def fit(self, *args, **kwargs):
        self._before_fit(*args, **kwargs)
-        for epoch in tqdm.trange(self.max_epochs,
+        for epoch in tqdm.trange(self.max_epochs, desc="Epochs", disable=not is_rank_0() or self.no_epoch_bar):
                                 desc="Epochs",
                                 disable=not is_rank_0() or self.no_epoch_bar
                                 ):
            self._train(epoch)
            self._eval(epoch)
@@ -75,8 +73,7 @@ class OnPolicyTrainer(ABC):
                 buffer: NaiveReplayBuffer,
                 sample_buffer: bool,
                 dataloader_pin_memory: bool,
-                 callbacks: List[Callback] = []
+                 callbacks: List[Callback] = []) -> None:
                 ) -> None:
        super().__init__()
        self.strategy = strategy
        self.buffer = buffer
@@ -154,7 +151,8 @@ class OnPolicyTrainer(ABC):
        self._learn(update_step)
        self._on_learn_epoch_end(update_step)
-    def fit(self,
+    def fit(
        self,
        prompt_dataloader: DataLoader,
        pretrain_dataloader: DataLoader,
        num_episodes: int,
@@ -175,23 +173,16 @@ class OnPolicyTrainer(ABC):
        self.pretrain_dataloader = CycledDataLoader(pretrain_dataloader)
        with self._fit_ctx():
-            for episode in tqdm.trange(num_episodes,
+            for episode in tqdm.trange(num_episodes, desc="Episodes", disable=not is_rank_0()):
                                       desc="Episodes",
                                       disable=not is_rank_0()):
                with self._episode_ctx(episode):
-                    for collect_step in tqdm.trange(num_collect_steps,
+                    for collect_step in tqdm.trange(num_collect_steps, desc="Collect steps", disable=not is_rank_0()):
                                                    desc="Collect steps",
                                                    disable=not is_rank_0()):
                        self._collect_phase(collect_step)
                    if not self.sample_buffer:
                        # HACK(cwher): according to the design of boost API, dataloader should also be boosted,
                        #  but it is impractical to adapt this pattern in RL training. Thus, I left dataloader unboosted.
                        #  I only call strategy.setup_dataloader() to setup dataloader.
-                        self.dataloader = self.strategy.setup_dataloader(self.buffer,
+                        self.dataloader = self.strategy.setup_dataloader(self.buffer, self.dataloader_pin_memory)
-                                                                         self.dataloader_pin_memory)
+                    for update_step in tqdm.trange(num_update_steps, desc="Update steps", disable=not is_rank_0()):
                    for update_step in tqdm.trange(num_update_steps,
                                                   desc="Update steps",
                                                   disable=not is_rank_0()):
                        self._update_phase(update_step)
                    # NOTE: this is for on-policy algorithms
                    self.buffer.clear()