reward
|
upgrade reward functions
|
2025-05-16 18:04:38 +08:00 |
__init__.py
|
[chat] add distributed impl (#6210)
|
2025-02-21 15:24:23 +08:00 |
comm.py
|
[chat] add distributed impl (#6210)
|
2025-02-21 15:24:23 +08:00 |
consumer.py
|
handle empty index
|
2025-05-15 20:32:05 +08:00 |
grpo_consumer.py
|
fix logging rollouts
|
2025-05-17 21:12:58 +08:00 |
inference_backend.py
|
move logging to producer
|
2025-05-14 18:10:57 +08:00 |
launch.py
|
add uuid to rollout log
|
2025-05-20 09:45:56 +08:00 |
loss.py
|
[feat] Support DAPO (#6263)
|
2025-04-25 17:39:17 +08:00 |
producer.py
|
fix logging rollouts
|
2025-05-17 21:12:58 +08:00 |
README.md
|
[chat] add distributed impl (#6210)
|
2025-02-21 15:24:23 +08:00 |
utils.py
|
move logging to producer
|
2025-05-14 18:10:57 +08:00 |