mirror of
				https://github.com/hpcaitech/ColossalAI.git
				synced 2025-10-24 17:33:39 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			15 lines
		
	
	
		
			432 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			15 lines
		
	
	
		
			432 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| SAVE_DIR=""
 | |
| 
 | |
| rm -rf $SAVE_DIR/cache
 | |
| rm -rf $SAVE_DIR/jsonl
 | |
| rm -rf $SAVE_DIR/arrow
 | |
| 
 | |
| python prepare_dataset.py --type preference \
 | |
|     --data_input_dirs /PATH/TO/PREFERENCE/DATASET \
 | |
|     --conversation_template_config /PATH/TO/CHAT/TEMPLATE/CONFIG.json \
 | |
|     --tokenizer_dir  "" \
 | |
|     --data_cache_dir $SAVE_DIR/cache \
 | |
|     --data_jsonl_output_dir $SAVE_DIR/jsonl \
 | |
|     --data_arrow_output_dir $SAVE_DIR/arrow \
 | |
|     --max_length 1024
 |