Support evaluation during training

This commit is contained in:
YeAnbang
2025-04-30 18:13:40 +08:00
parent b920af427b
commit 47a7dc7142
9 changed files with 234 additions and 65 deletions

View File

@@ -1,6 +1,9 @@
import json
import os
from typing import Any, Dict, List
import torch
from filelock import FileLock
from colossalai.shardformer.layer.loss import dist_log_prob
@@ -130,3 +133,13 @@ def masked_sum(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.
"""
tensor = tensor * mask
return tensor.sum(dim=dim)
def safe_write_jsonl(file_path, data):
with FileLock(file_path + ".lock"):
# Ensure file exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "a", encoding="utf8") as f:
for entry in data:
json_line = json.dumps(entry, ensure_ascii=False)
f.write(json_line + "\n")