From 3bed6ae9eea18424a1305b2d7b977a3c66fd2621 Mon Sep 17 00:00:00 2001 From: YeAnbang Date: Mon, 9 Jun 2025 09:37:28 +0800 Subject: [PATCH] fix bug, tested --- .gitignore | 4 ++++ applications/ColossalChat/coati/distributed/consumer.py | 6 ++---- applications/ColossalChat/coati/distributed/producer.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 0503f3c95..e603f5015 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,7 @@ applications/ColossalChat/wandb applications/ColossalChat/model applications/ColossalChat/eval applications/ColossalChat/rollouts +applications/ColossalChat/*.txt +applications/ColossalChat/*.db +applications/ColossalChat/stdin +applications/ColossalChat/*.zip diff --git a/applications/ColossalChat/coati/distributed/consumer.py b/applications/ColossalChat/coati/distributed/consumer.py index 3ae4a1796..a7abb1588 100644 --- a/applications/ColossalChat/coati/distributed/consumer.py +++ b/applications/ColossalChat/coati/distributed/consumer.py @@ -132,9 +132,7 @@ class BaseConsumer: format_acc = raw_batch["format_acc"][:, :, 0] ans_acc = raw_batch["ans_acc"][:, :, 0] response_len = ( - raw_batch["response_idx"][:, :, 1] - - raw_batch["response_idx"][:, :, 0] - + 1 + raw_batch["response_idx"][:, :, 1] - raw_batch["response_idx"][:, :, 0] + 1 ).type(torch.float32) effective_group_mask = None if self.filter_range is not None and self.grpo_config.get("dynamic_batching", True): @@ -160,7 +158,7 @@ class BaseConsumer: ) if effective_group_mask is not None: print( - f"[T{dist.get_rank()}] Filter recv data: {len(raw_batch_with_reward)} -> {torch.sum(effective_group_mask).cpu().item()} effective groups" + f"[T{dist.get_rank()}] Filter recv data: {len(raw_batch)} -> {torch.sum(effective_group_mask).cpu().item()} effective groups" ) # mapping the effective group to the raw group for indexing effective_group_to_raw_group_mapping = {} diff --git a/applications/ColossalChat/coati/distributed/producer.py b/applications/ColossalChat/coati/distributed/producer.py index 135c8db0e..23542f1c6 100644 --- a/applications/ColossalChat/coati/distributed/producer.py +++ b/applications/ColossalChat/coati/distributed/producer.py @@ -291,7 +291,7 @@ class BaseProducer: reward_model_output = self.reward_model( outputs["input_ids"].view((-1, outputs["input_ids"].size(-1))), gt_answer=gt_answer, - response_idx=outputs["response_idx"], + response_idx=outputs["response_idx"].view((-1, 2)), ) outputs["reward"] = ( torch.tensor([value[0] for value in reward_model_output])