mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-06-28 08:17:57 +00:00
fix style
This commit is contained in:
parent
9688e19b32
commit
8a3ff4f315
@ -40,7 +40,7 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
required=True,
|
required=True,
|
||||||
default=None,
|
default=None,
|
||||||
help="The type of data",
|
help="The type of data, choose one from ['sft', 'prompt', 'preference', 'kto']",
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.data_type == "sft":
|
if args.data_type == "sft":
|
||||||
|
@ -46,7 +46,6 @@ def supervised_tokenize_sft(
|
|||||||
max_length: the maximum context length
|
max_length: the maximum context length
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if ignore_index is None:
|
|
||||||
ignore_index = IGNORE_INDEX
|
ignore_index = IGNORE_INDEX
|
||||||
|
|
||||||
messages = data_point["messages"]
|
messages = data_point["messages"]
|
||||||
@ -146,8 +145,6 @@ def tokenize_prompt_dataset(
|
|||||||
ignore_index: the ignore index when calculate loss during training
|
ignore_index: the ignore index when calculate loss during training
|
||||||
max_length: the maximum context length
|
max_length: the maximum context length
|
||||||
"""
|
"""
|
||||||
if ignore_index is None:
|
|
||||||
ignore_index = IGNORE_INDEX
|
|
||||||
|
|
||||||
messages = data_point["messages"]
|
messages = data_point["messages"]
|
||||||
template = deepcopy(conversation_template)
|
template = deepcopy(conversation_template)
|
||||||
@ -226,8 +223,6 @@ def tokenize_rlhf(
|
|||||||
{"context": [{"from": "user", "content": "xxx"}, {"from": "assistant", "content": "xxx"}],
|
{"context": [{"from": "user", "content": "xxx"}, {"from": "assistant", "content": "xxx"}],
|
||||||
"chosen": {"from": "assistant", "content": "xxx"}, "rejected": {"from": "assistant", "content": "xxx"}}
|
"chosen": {"from": "assistant", "content": "xxx"}, "rejected": {"from": "assistant", "content": "xxx"}}
|
||||||
"""
|
"""
|
||||||
if ignore_index is None:
|
|
||||||
ignore_index = IGNORE_INDEX
|
|
||||||
|
|
||||||
context = data_point["context"]
|
context = data_point["context"]
|
||||||
template = deepcopy(conversation_template)
|
template = deepcopy(conversation_template)
|
||||||
|
@ -26,7 +26,7 @@ from .utils import is_rank_0, to_device
|
|||||||
|
|
||||||
class DPOTrainer(SLTrainer):
|
class DPOTrainer(SLTrainer):
|
||||||
"""
|
"""
|
||||||
Trainer for PPO algorithm.
|
Trainer for DPO algorithm.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
actor (Actor): the actor model in ppo algorithm
|
actor (Actor): the actor model in ppo algorithm
|
||||||
|
@ -27,7 +27,7 @@ from .utils import is_rank_0, to_device
|
|||||||
|
|
||||||
class KTOTrainer(SLTrainer):
|
class KTOTrainer(SLTrainer):
|
||||||
"""
|
"""
|
||||||
Trainer for PPO algorithm.
|
Trainer for KTO algorithm.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
actor (Actor): the actor model in ppo algorithm
|
actor (Actor): the actor model in ppo algorithm
|
||||||
|
@ -26,7 +26,7 @@ from .utils import is_rank_0, to_device
|
|||||||
|
|
||||||
class ORPOTrainer(SLTrainer):
|
class ORPOTrainer(SLTrainer):
|
||||||
"""
|
"""
|
||||||
Trainer for PPO algorithm.
|
Trainer for ORPO algorithm.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
actor (Actor): the actor model in ppo algorithm
|
actor (Actor): the actor model in ppo algorithm
|
||||||
|
@ -39,15 +39,15 @@ preference_seed = {
|
|||||||
}
|
}
|
||||||
kto_seed = {
|
kto_seed = {
|
||||||
"prompt": [
|
"prompt": [
|
||||||
{"from": "user", "content": "What are some cuss words in english?"},
|
{"from": "user", "content": "What are some praise words in english?"},
|
||||||
{
|
{
|
||||||
"from": "assistant",
|
"from": "assistant",
|
||||||
"content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, fuck, shit, bitch, turd, shithead, shitbag, scrotum, cunt, whore, fucker, shit-eating, cum, cumbucket, fucknugget, butthole, poop, jackass, cocksucker, asshole, goddamn, piss, sperm, blow, wank, jism, cum-sucking, masturbate, faggot, queer, jizz, jizz-licking, prostitute, slut, cheater, fornicator, floozy, wetback, Mexican, Hispanic, sodomite, midget, mama's boy, faggot, pervert, queer, scumbag, bitch,",
|
"content": "Here's an incomplete list.\n\nexcellent, fantastic, impressive ...",
|
||||||
},
|
},
|
||||||
{"from": "user", "content": "What's your favorite one?"},
|
{"from": "user", "content": "What's your favorite one?"},
|
||||||
],
|
],
|
||||||
"completion": {"from": "assistant", "content": "Ass."},
|
"completion": {"from": "assistant", "content": "Impressive."},
|
||||||
"label": False,
|
"label": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1 +1 @@
|
|||||||
{"prompt": [{"from": "user", "content": "What are some cuss words in english?"}, {"from": "assistant", "content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, ..."},{"from": "user", "content": "What's your favorite one?"}],"completion": {"from": "assistant", "content": "Ass."},"label": false}
|
{"prompt": [{"from": "user", "content": "What are some praise words in english?"}, {"from": "assistant", "content": "Here's an incomplete list.\n\nexcellent, fantastic, impressive ..."},{"from": "user", "content": "What's your favorite one?"}],"completion": {"from": "assistant", "content": "impressive."},"label": true}
|
||||||
|
Loading…
Reference in New Issue
Block a user