Tong Li 
							
						 
					 
					
						
						
							
						
						58cb4fb4f7 
					 
					
						
						
							
							add profiling  
						
						
						
						
					 
					
						2025-06-26 17:49:53 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						8abf186ce2 
					 
					
						
						
							
							fix behind  
						
						
						
						
					 
					
						2025-06-26 10:27:00 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						c2561f826a 
					 
					
						
						
							
							fix bugs  
						
						
						
						
					 
					
						2025-06-20 15:44:13 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						ff6696a9bb 
					 
					
						
						
							
							support n_behind, add profiling  
						
						
						
						
					 
					
						2025-06-20 03:14:00 +00:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						25599246c5 
					 
					
						
						
							
							modify readme  
						
						
						
						
					 
					
						2025-06-10 17:00:35 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						dc3033e68a 
					 
					
						
						
							
							support code generation tasks  
						
						
						
						
					 
					
						2025-06-05 18:05:22 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						96faf54542 
					 
					
						
						
							
							fix typ and parameter description  
						
						
						
						
					 
					
						2025-06-05 15:41:14 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						58f8c9bb43 
					 
					
						
						
							
							Merge branch 'grpo-latest' of  https://github.com/hpcaitech/ColossalAI  into grpo-latest-dev  
						
						
						
						
					 
					
						2025-05-28 17:34:52 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						de2ad3b206 
					 
					
						
						
							
							fix default eval setting ( #6321 )  
						
						... 
						
						
						
						Co-authored-by: Tong Li <tong.li35271158@gmail.com > 
						
						
					 
					
						2025-05-22 11:52:41 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						78a06f5ce3 
					 
					
						
						
							
							fix missing tags parameter  
						
						
						
						
					 
					
						2025-05-21 10:51:32 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						37663386bc 
					 
					
						
						
							
							fix metric calculation  
						
						
						
						
					 
					
						2025-05-20 18:14:05 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						f8bd2db33f 
					 
					
						
						
							
							add uuid to rollout log  
						
						
						
						
					 
					
						2025-05-20 09:45:56 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						116621d004 
					 
					
						
						
							
							merge reward and eval  
						
						
						
						
					 
					
						2025-05-19 11:53:47 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						107470a360 
					 
					
						
						
							
							fix logging rollouts  
						
						
						
						
					 
					
						2025-05-17 21:12:58 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						03b41d6fb5 
					 
					
						
						
							
							upgrade reward functions  
						
						
						
						
					 
					
						2025-05-16 18:04:38 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						203dfb1536 
					 
					
						
						
							
							address conversation  
						
						
						
						
					 
					
						2025-05-16 14:15:35 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						11a5854b50 
					 
					
						
						
							
							remove redundant code and fix bugs  
						
						
						
						
					 
					
						2025-05-16 14:08:23 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						094f119b3a 
					 
					
						
						
							
							merge  
						
						
						
						
					 
					
						2025-05-14 18:13:47 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						50070c1e84 
					 
					
						
						
							
							move logging to producer  
						
						
						
						
					 
					
						2025-05-14 18:10:57 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						aca547623f 
					 
					
						
						
							
							[feat] Support prompt level dynamic  ( #6300 )  
						
						... 
						
						
						
						* adjust to dynamic prompt bs
* remove debug
* update pad seq (#6303 )
Co-authored-by: Tong Li <tong.li35271158@gmail.com >
* adjust to dynamic prompt bs
* remove debug
* fix dp issue
* fix
* fix default settings
---------
Co-authored-by: Tong Li <tong.li35271158@gmail.com > 
						
						
					 
					
						2025-05-14 16:40:35 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						47a7dc7142 
					 
					
						
						
							
							Support evaluation during training  
						
						
						
						
					 
					
						2025-05-14 11:03:11 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						eb6b5dd62e 
					 
					
						
						
							
							[fix] revert reward update and evaluation ( #6295 )  
						
						... 
						
						
						
						* Revert "rewrite reward fn"
This reverts commit d06042b434a6085ff67601640ebd65bd61918dcf57a88395fe 
						
						
					 
					
						2025-05-07 10:56:47 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						57a88395fe 
					 
					
						
						
							
							Support evaluation during training  
						
						
						
						
					 
					
						2025-04-30 18:31:49 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						5fd4bcb9d8 
					 
					
						
						
							
							[feat] Sync shard model ( #6289 )  
						
						... 
						
						
						
						* [feat] support hybrid parallel model sync
* update consumer and producer
* update files
* update producer
* remove print
* update
---------
Co-authored-by: duanjunwen <935724073@qq.com >
Co-authored-by: YeAnbang <44796419+YeAnbang@users.noreply.github.com >
Co-authored-by: Tong Li <tong.li35271158@gmail.com > 
						
						
					 
					
						2025-04-30 14:47:01 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						14f237ce7e 
					 
					
						
						
							
							[feat] Support boxed math reward ( #6284 )  
						
						... 
						
						
						
						* fix pp+tp, fix dataloader
* fixed plugin micro-batch size
* support boxed reward
* add boxed reward
* fix pp state dict incomplete issue
* Revert "fix pp state dict incomplete issue"
This reverts commit 6c1b3b694f 
						
						
					 
					
						2025-04-29 16:46:47 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						2ca1e3c630 
					 
					
						
						
							
							fix pp+tp, fix dataloader ( #6280 )  
						
						
						
						
					 
					
						2025-04-28 17:10:00 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						38008858e4 
					 
					
						
						
							
							fix checkpoint naming; add num_epoch parameter ( #6277 )  
						
						
						
						
					 
					
						2025-04-26 14:00:28 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						26d859f68e 
					 
					
						
						
							
							[feat] Support DAPO ( #6263 )  
						
						... 
						
						
						
						* update help information
* update style
* fix
* minor fix
* support PP training
* add pp support
* remove unused code
* address conversation
* fix memory leakage support tp+pp
* move empty cache
* move empty cache
* add DAPO support
* remove format reward
* fix filtering, still buggy
* small fix
* add DAPO support
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci 
* tested multi-node training; fix bind_batch bug
* fix conversation; support sleep mode
* support reusing excessive samples
* add dynamic batching control flag
* add dynamic batching control flag
* refactored
* fix logging
---------
Co-authored-by: Tong Li <tong.li35271158@gmail.com >
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-04-25 17:39:17 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						b823c6eec7 
					 
					
						
						
							
							[feat] Add final save at the end ( #6274 )  
						
						... 
						
						
						
						* add final save
* default 1 episode 
						
						
					 
					
						2025-04-23 10:03:46 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						03f4b1dde3 
					 
					
						
						
							
							add prompt template ( #6273 )  
						
						... 
						
						
						
						Co-authored-by: Tong Li <tong.li35271158@gmail.com > 
						
						
					 
					
						2025-04-22 10:39:47 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						9467c10690 
					 
					
						
						
							
							[hot-fix] Fix memory leakage bug, support TP+PP ( #6258 )  
						
						... 
						
						
						
						* update help information
* update style
* fix
* minor fix
* support PP training
* add pp support
* remove unused code
* address conversation
* fix memory leakage support tp+pp
* move empty cache
* move empty cache
---------
Co-authored-by: Tong Li <tong.li35271158@gmail.com > 
						
						
					 
					
						2025-04-10 10:52:18 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						ed43a4be04 
					 
					
						
						
							
							[Distributed RLHF] Integration of PP ( #6257 )  
						
						... 
						
						
						
						* update help information
* update style
* fix
* minor fix
* support PP training
* add pp support
* remove unused code
* address conversation
---------
Co-authored-by: Tong Li <tong.li35271158@gmail.com > 
						
						
					 
					
						2025-04-09 13:23:24 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						50153005b4 
					 
					
						
						
							
							[feat] add microbatch forwarding ( #6251 )  
						
						... 
						
						
						
						* add microbatch forwarding
* fix forward microbatch
* fix producer OOM
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci 
* change project name
* fix temperature annealing
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci 
* address conversation
---------
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-03-28 10:24:58 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						0472f44163 
					 
					
						
						
							
							fix logprob, add filtering, temperature annealing, lr descent  
						
						
						
						
					 
					
						2025-03-21 10:24:24 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						7ee4452f8c 
					 
					
						
						
							
							fix vllm  
						
						
						
						
					 
					
						2025-03-19 17:11:10 +08:00 
						 
				 
			
				
					
						
							
							
								YeAnbang 
							
						 
					 
					
						
						
							
						
						bc0171d392 
					 
					
						
						
							
							fix transformers backend  
						
						
						
						
					 
					
						2025-03-14 18:12:35 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						57b49da5e4 
					 
					
						
						
							
							setup update  
						
						
						
						
					 
					
						2025-03-13 16:52:15 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						22cc1558a8 
					 
					
						
						
							
							Merge branch 'grpo-latest' of github.com:hpcaitech/ColossalAI into grpo-latest  
						
						
						
						
					 
					
						2025-03-06 16:28:47 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						0590f10fb7 
					 
					
						
						
							
							update select algo  
						
						
						
						
					 
					
						2025-03-06 16:27:13 +08:00 
						 
				 
			
				
					
						
							
							
								pre-commit-ci[bot] 
							
						 
					 
					
						
						
							
						
						ab5b6d8432 
					 
					
						
						
							
							[pre-commit.ci] auto fixes from pre-commit.com hooks  
						
						... 
						
						
						
						for more information, see https://pre-commit.ci  
						
						
					 
					
						2025-03-06 06:30:27 +00:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						0f566cc2d4 
					 
					
						
						
							
							add algo selection  
						
						
						
						
					 
					
						2025-03-06 14:29:22 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						7f2ceac5c3 
					 
					
						
						
							
							update example  
						
						
						
						
					 
					
						2025-03-06 10:54:23 +08:00 
						 
				 
			
				
					
						
							
							
								Tong Li 
							
						 
					 
					
						
						
							
						
						070907dd7f 
					 
					
						
						
							
							polish  
						
						
						
						
					 
					
						2025-02-28 10:16:42 +08:00 
						 
				 
			
				
					
						
							
							
								Hongxin Liu 
							
						 
					 
					
						
						
							
						
						43c9b5fb44 
					 
					
						
						
							
							[chat] add distributed impl ( #6210 )  
						
						
						
						
					 
					
						2025-02-21 15:24:23 +08:00