mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-21 09:29:47 +00:00
[doc] update Coati README (#4405)
* style: apply formatter * fix: add outdated warnings * docs: add dataset format and polish * docs: polish README * fix: fix json format * fix: fix typos * revert: revert 7b example
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
:warning: **This content may be outdated since the major update of Colossal Chat. We will update this content soon.**
|
||||
|
||||
# Distributed PPO Training on Stage 3
|
||||
|
||||
## Detach Experience Makers and Trainers
|
||||
@@ -26,124 +28,137 @@ See examples at `ColossalAI/application/Chat/examples/ray`
|
||||
|
||||
- define makers' environment variables :
|
||||
|
||||
```python
|
||||
env_info_makers = [{
|
||||
'local_rank': '0',
|
||||
'rank': str(rank),
|
||||
'world_size': str(num_makers),
|
||||
'master_port': maker_port,
|
||||
'master_addr': master_addr
|
||||
} for rank in range(num_makers)]
|
||||
```python
|
||||
env_info_makers = [{
|
||||
'local_rank': '0',
|
||||
'rank': str(rank),
|
||||
'world_size': str(num_makers),
|
||||
'master_port': maker_port,
|
||||
'master_addr': master_addr
|
||||
} for rank in range(num_makers)]
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
- define maker models :
|
||||
```python
|
||||
def model_fn():
|
||||
actor = get_actor_from_args(...)
|
||||
critic = get_critic_from_args(...)
|
||||
reward_model = get_reward_model_from_args(...)
|
||||
initial_model = get_actor_from_args(...)
|
||||
return actor, critic, reward_model, initial_model
|
||||
|
||||
```
|
||||
```python
|
||||
def model_fn():
|
||||
actor = get_actor_from_args(...)
|
||||
critic = get_critic_from_args(...)
|
||||
reward_model = get_reward_model_from_args(...)
|
||||
initial_model = get_actor_from_args(...)
|
||||
return actor, critic, reward_model, initial_model
|
||||
|
||||
```
|
||||
|
||||
- set experience_holder_refs :
|
||||
|
||||
```python
|
||||
experience_holder_refs = [
|
||||
ExperienceMakerHolder.options(
|
||||
name=f"maker_{i}",
|
||||
num_gpus=1,
|
||||
max_concurrency=2
|
||||
).remote(
|
||||
detached_trainer_name_list=[f"trainer_{x}" for x in target_trainers(...)],
|
||||
model_fn=model_fn,
|
||||
...)
|
||||
for i, env_info_maker in enumerate(env_info_makers)
|
||||
]
|
||||
```
|
||||
The names in the `detached_trainer_name_list` refer to the target trainers that the maker should send experience to.
|
||||
We set a trainer's name the same as a maker, by `.options(name="str")`. See below.
|
||||
```python
|
||||
experience_holder_refs = [
|
||||
ExperienceMakerHolder.options(
|
||||
name=f"maker_{i}",
|
||||
num_gpus=1,
|
||||
max_concurrency=2
|
||||
).remote(
|
||||
detached_trainer_name_list=[f"trainer_{x}" for x in target_trainers(...)],
|
||||
model_fn=model_fn,
|
||||
...)
|
||||
for i, env_info_maker in enumerate(env_info_makers)
|
||||
]
|
||||
```
|
||||
|
||||
The names in the `detached_trainer_name_list` refer to the target trainers that the maker should send experience to.
|
||||
We set a trainer's name the same as a maker, by `.options(name="str")`. See below.
|
||||
|
||||
### Setup Trainers
|
||||
|
||||
- define trainers' environment variables :
|
||||
```python
|
||||
env_info_trainers = [{
|
||||
'local_rank': '0',
|
||||
'rank': str(rank),
|
||||
'world_size': str(num_trainers),
|
||||
'master_port': trainer_port,
|
||||
'master_addr': master_addr
|
||||
} for rank in range(num_trainers)]
|
||||
```
|
||||
```python
|
||||
env_info_trainers = [{
|
||||
'local_rank': '0',
|
||||
'rank': str(rank),
|
||||
'world_size': str(num_trainers),
|
||||
'master_port': trainer_port,
|
||||
'master_addr': master_addr
|
||||
} for rank in range(num_trainers)]
|
||||
```
|
||||
- define trainer models :
|
||||
|
||||
```python
|
||||
def trainer_model_fn():
|
||||
actor = get_actor_from_args(...)
|
||||
critic = get_critic_from_args(...)
|
||||
return actor, critic
|
||||
```
|
||||
```python
|
||||
def trainer_model_fn():
|
||||
actor = get_actor_from_args(...)
|
||||
critic = get_critic_from_args(...)
|
||||
return actor, critic
|
||||
```
|
||||
|
||||
- set trainer_refs :
|
||||
```python
|
||||
trainer_refs = [
|
||||
DetachedPPOTrainer.options(
|
||||
name=f"trainer{i}",
|
||||
num_gpus=1,
|
||||
max_concurrency=2
|
||||
).remote(
|
||||
experience_maker_holder_name_list=[f"maker{x}" for x in target_makers(...)],
|
||||
model_fn = trainer_model_fn(),
|
||||
...)
|
||||
for i, env_info_trainer in enumerate(env_info_trainers)
|
||||
]
|
||||
```
|
||||
The names in `experience_maker_holder_name_list` refer to the target makers that the trainer should send updated models to.
|
||||
By setting `detached_trainer_name_list` and `experience_maker_holder_name_list`, we can customize the transmission graph.
|
||||
```python
|
||||
trainer_refs = [
|
||||
DetachedPPOTrainer.options(
|
||||
name=f"trainer{i}",
|
||||
num_gpus=1,
|
||||
max_concurrency=2
|
||||
).remote(
|
||||
experience_maker_holder_name_list=[f"maker{x}" for x in target_makers(...)],
|
||||
model_fn = trainer_model_fn(),
|
||||
...)
|
||||
for i, env_info_trainer in enumerate(env_info_trainers)
|
||||
]
|
||||
```
|
||||
The names in `experience_maker_holder_name_list` refer to the target makers that the trainer should send updated models to.
|
||||
By setting `detached_trainer_name_list` and `experience_maker_holder_name_list`, we can customize the transmission graph.
|
||||
|
||||
### Launch Jobs
|
||||
|
||||
- define data_loader :
|
||||
```python
|
||||
def data_loader_fn():
|
||||
return = torch.utils.data.DataLoader(dataset=dataset)
|
||||
|
||||
```
|
||||
```python
|
||||
def data_loader_fn():
|
||||
return = torch.utils.data.DataLoader(dataset=dataset)
|
||||
|
||||
```
|
||||
|
||||
- launch makers :
|
||||
```python
|
||||
wait_tasks = []
|
||||
for experience_holder_ref in experience_holder_refs:
|
||||
wait_tasks.append(
|
||||
experience_holder_ref.workingloop.remote(data_loader_fn(),
|
||||
num_steps=experience_steps))
|
||||
|
||||
```
|
||||
```python
|
||||
wait_tasks = []
|
||||
for experience_holder_ref in experience_holder_refs:
|
||||
wait_tasks.append(
|
||||
experience_holder_ref.workingloop.remote(data_loader_fn(),
|
||||
num_steps=experience_steps))
|
||||
|
||||
```
|
||||
|
||||
- launch trainers :
|
||||
```python
|
||||
for trainer_ref in trainer_refs:
|
||||
wait_tasks.append(trainer_ref.fit.remote(total_steps, update_steps, train_epochs))
|
||||
```
|
||||
|
||||
```python
|
||||
for trainer_ref in trainer_refs:
|
||||
wait_tasks.append(trainer_ref.fit.remote(total_steps, update_steps, train_epochs))
|
||||
```
|
||||
|
||||
- wait for done :
|
||||
```python
|
||||
ray.get(wait_tasks)
|
||||
```
|
||||
```python
|
||||
ray.get(wait_tasks)
|
||||
```
|
||||
|
||||
## Flexible Structure
|
||||
|
||||
We can deploy different strategies to makers and trainers. Here are some notions.
|
||||
|
||||
### 2 Makers 1 Trainer
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/hpcaitech/public_assets/blob/main/applications/chat/2m1t.png?raw=true" width=600/>
|
||||
</p>
|
||||
|
||||
### 2 Makers 2 Trainer
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/hpcaitech/public_assets/blob/main/applications/chat/2m2t.png?raw=true" width=600/>
|
||||
</p>
|
||||
|
||||
### Maker Inference Quantization
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/hpcaitech/public_assets/blob/main/applications/chat/2m2t_quantize.png?raw=true" width=600/>
|
||||
</p>
|
||||
|
Reference in New Issue
Block a user