mirror of
				https://github.com/hpcaitech/ColossalAI.git
				synced 2025-10-24 17:33:39 +00:00 
			
		
		
		
	upgrade reward functions
This commit is contained in:
		| @@ -198,6 +198,8 @@ if __name__ == "__main__": | ||||
|             "beta": args.kl_coeff,  # KL penalty coefficient | ||||
|             "loss_variation": "sample_level", | ||||
|             "reward_fn_type": args.reward_type, | ||||
|             "max_length": args.max_new_tokens + args.max_prompt_tokens, | ||||
|             "max_new_tokens": args.max_new_tokens, | ||||
|         } | ||||
|     elif args.algo == "DAPO": | ||||
|         # DAPO variant settings | ||||
| @@ -213,6 +215,7 @@ if __name__ == "__main__": | ||||
|             "loss_variation": "token_level", | ||||
|             "soft_over_length_punishment": True, | ||||
|             "max_length": args.max_new_tokens + args.max_prompt_tokens, | ||||
|             "max_new_tokens": args.max_new_tokens, | ||||
|             "cache_length": min(1024, int(args.max_new_tokens / 4)), | ||||
|             "filter_truncated_response": True, | ||||
|             "reward_fn_type": args.reward_type, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user