mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-06 18:43:58 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
33f15203d3
commit
8a9721bafe
@ -5,10 +5,11 @@ import resource
|
|||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from coati.dataset import DataCollatorForPreferenceDataset, StatefulDistributedSampler, load_tokenized_dataset
|
from coati.dataset import DataCollatorForPreferenceDataset, StatefulDistributedSampler
|
||||||
from coati.models import convert_to_lora_module, disable_dropout
|
from coati.models import convert_to_lora_module, disable_dropout
|
||||||
from coati.trainer import DPOTrainer
|
from coati.trainer import DPOTrainer
|
||||||
from coati.utils import load_checkpoint
|
from coati.utils import load_checkpoint
|
||||||
|
from dummy_dataset import DummyLLMDataset
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
import colossalai
|
import colossalai
|
||||||
@ -18,7 +19,6 @@ from colossalai.cluster import DistCoordinator
|
|||||||
from colossalai.logging import get_dist_logger
|
from colossalai.logging import get_dist_logger
|
||||||
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
||||||
from colossalai.nn.optimizer import HybridAdam
|
from colossalai.nn.optimizer import HybridAdam
|
||||||
from dummy_dataset import DummyLLMDataset
|
|
||||||
|
|
||||||
logger = get_dist_logger()
|
logger = get_dist_logger()
|
||||||
|
|
||||||
@ -165,9 +165,11 @@ def train(args):
|
|||||||
|
|
||||||
# configure dataset
|
# configure dataset
|
||||||
mode_map = {"train": "train", "valid": "validation", "test": "test"}
|
mode_map = {"train": "train", "valid": "validation", "test": "test"}
|
||||||
train_dataset = DummyLLMDataset(["chosen_input_ids", "chosen_loss_mask", "rejected_input_ids",
|
train_dataset = DummyLLMDataset(
|
||||||
"rejected_loss_mask"],
|
["chosen_input_ids", "chosen_loss_mask", "rejected_input_ids", "rejected_loss_mask"],
|
||||||
args.max_length, args.dataset_size)
|
args.max_length,
|
||||||
|
args.dataset_size,
|
||||||
|
)
|
||||||
data_collator = DataCollatorForPreferenceDataset(tokenizer=tokenizer, max_length=args.max_length)
|
data_collator = DataCollatorForPreferenceDataset(tokenizer=tokenizer, max_length=args.max_length)
|
||||||
|
|
||||||
train_dataloader = plugin.prepare_dataloader(
|
train_dataloader = plugin.prepare_dataloader(
|
||||||
|
@ -5,10 +5,11 @@ import resource
|
|||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from coati.dataset import DataCollatorForPreferenceDataset, StatefulDistributedSampler, load_tokenized_dataset
|
from coati.dataset import DataCollatorForPreferenceDataset, StatefulDistributedSampler
|
||||||
from coati.models import convert_to_lora_module, disable_dropout
|
from coati.models import convert_to_lora_module, disable_dropout
|
||||||
from coati.trainer import ORPOTrainer
|
from coati.trainer import ORPOTrainer
|
||||||
from coati.utils import load_checkpoint
|
from coati.utils import load_checkpoint
|
||||||
|
from dummy_dataset import DummyLLMDataset
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
import colossalai
|
import colossalai
|
||||||
@ -18,7 +19,7 @@ from colossalai.cluster import DistCoordinator
|
|||||||
from colossalai.logging import get_dist_logger
|
from colossalai.logging import get_dist_logger
|
||||||
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
||||||
from colossalai.nn.optimizer import HybridAdam
|
from colossalai.nn.optimizer import HybridAdam
|
||||||
from dummy_dataset import DummyLLMDataset
|
|
||||||
logger = get_dist_logger()
|
logger = get_dist_logger()
|
||||||
|
|
||||||
|
|
||||||
@ -152,9 +153,11 @@ def train(args):
|
|||||||
# configure dataset
|
# configure dataset
|
||||||
coordinator.print_on_master(f"Load dataset: {args.dataset}")
|
coordinator.print_on_master(f"Load dataset: {args.dataset}")
|
||||||
mode_map = {"train": "train", "valid": "validation", "test": "test"}
|
mode_map = {"train": "train", "valid": "validation", "test": "test"}
|
||||||
train_dataset = DummyLLMDataset(["chosen_input_ids", "chosen_loss_mask", "rejected_input_ids",
|
train_dataset = DummyLLMDataset(
|
||||||
"rejected_loss_mask"],
|
["chosen_input_ids", "chosen_loss_mask", "rejected_input_ids", "rejected_loss_mask"],
|
||||||
args.max_length, args.dataset_size)
|
args.max_length,
|
||||||
|
args.dataset_size,
|
||||||
|
)
|
||||||
data_collator = DataCollatorForPreferenceDataset(tokenizer=tokenizer, max_length=args.max_length)
|
data_collator = DataCollatorForPreferenceDataset(tokenizer=tokenizer, max_length=args.max_length)
|
||||||
|
|
||||||
train_dataloader = plugin.prepare_dataloader(
|
train_dataloader = plugin.prepare_dataloader(
|
||||||
|
@ -6,10 +6,11 @@ import resource
|
|||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from coati.dataset import DataCollatorForSupervisedDataset, StatefulDistributedSampler, load_tokenized_dataset
|
from coati.dataset import DataCollatorForSupervisedDataset, StatefulDistributedSampler
|
||||||
from coati.models import convert_to_lora_module
|
from coati.models import convert_to_lora_module
|
||||||
from coati.trainer import SFTTrainer
|
from coati.trainer import SFTTrainer
|
||||||
from coati.utils import load_checkpoint
|
from coati.utils import load_checkpoint
|
||||||
|
from dummy_dataset import DummyLLMDataset
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
import colossalai
|
import colossalai
|
||||||
@ -19,7 +20,6 @@ from colossalai.cluster import DistCoordinator
|
|||||||
from colossalai.logging import get_dist_logger
|
from colossalai.logging import get_dist_logger
|
||||||
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
||||||
from colossalai.nn.optimizer import HybridAdam
|
from colossalai.nn.optimizer import HybridAdam
|
||||||
from dummy_dataset import DummyLLMDataset
|
|
||||||
|
|
||||||
logger = get_dist_logger()
|
logger = get_dist_logger()
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch.utils.data import Dataset, DataLoader
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
|
|
||||||
class DummyLLMDataset(Dataset):
|
class DummyLLMDataset(Dataset):
|
||||||
def __init__(self, keys, seq_len, size=500):
|
def __init__(self, keys, seq_len, size=500):
|
||||||
@ -11,7 +12,7 @@ class DummyLLMDataset(Dataset):
|
|||||||
def _generate_data(self):
|
def _generate_data(self):
|
||||||
data = {}
|
data = {}
|
||||||
for key in self.keys:
|
for key in self.keys:
|
||||||
data[key] = torch.ones(self.seq_len, dtype = torch.long)
|
data[key] = torch.ones(self.seq_len, dtype=torch.long)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user