rewrite reward fn

This commit is contained in:
YeAnbang
2025-05-01 11:28:05 +08:00
parent a6085ff676
commit d06042b434
2 changed files with 68 additions and 8 deletions

View File

@@ -127,7 +127,7 @@ class BaseConsumer:
eval_statistics = {
k: eval_statistics[k] + local_eval_result[k] for k in eval_statistics
}
eval_statistics = {k: (v[0] / v[1]).item() for k, v in eval_statistics.items()}
eval_statistics = {"eval/" + k: (v[0] / v[1]).item() for k, v in eval_statistics.items()}
if dist.get_rank() == 0:
if hasattr(self, "wandb_run"):
self.wandb_run.log(eval_statistics, step=eval_global_step)