[fix] revert reward update and evaluation (#6295)

* Revert "rewrite reward fn"

This reverts commit d06042b434.

* Revert "upgrade reward math verification"

This reverts commit a6085ff676.

* Revert "fix bug"

This reverts commit 01640ebd65.

* Revert "reuse comm-group"

This reverts commit bd61918dcf.

* Revert "Support evaluation during training"

This reverts commit 57a88395fe.
This commit is contained in:
YeAnbang
2025-05-07 10:56:47 +08:00
committed by GitHub
parent 17928ad84f
commit eb6b5dd62e
9 changed files with 82 additions and 307 deletions

View File

@@ -1,10 +1,7 @@
import json
import os
from collections import defaultdict
from typing import Any, Dict, List
import torch
from filelock import FileLock
from colossalai.shardformer.layer.loss import dist_log_prob
@@ -155,13 +152,3 @@ def masked_sum(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.
"""
tensor = tensor * mask
return tensor.sum(dim=dim)
def safe_write_jsonl(file_path, data):
with FileLock(file_path + ".lock"):
# Ensure file exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "a", encoding="utf8") as f:
for entry in data:
json_line = json.dumps(entry, ensure_ascii=False)
f.write(json_line + "\n")