[Distributed RLHF] Integration of PP (#6257)

* update help information

* update style

* fix

* minor fix

* support PP training

* add pp support

* remove unused code

* address conversation

---------

Co-authored-by: Tong Li <tong.li35271158@gmail.com>
This commit is contained in:
YeAnbang
2025-04-09 13:23:24 +08:00
committed by GitHub
parent 50153005b4
commit ed43a4be04
7 changed files with 263 additions and 116 deletions

View File

@@ -47,6 +47,7 @@ def launch_distributed(
master_addr: str = "localhost",
master_port: int = 29500,
core_algo: str = "GRPO",
project_name: Optional[str] = None,
):
if core_algo not in ALGO_MAP:
@@ -108,6 +109,7 @@ def launch_distributed(
"train_microbatch_size": train_microbatch_size,
},
num_generations=num_generations,
project_name=project_name,
)
procs.append(consumer)
ray.get([p.setup.remote() for p in procs])