mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-08 03:24:07 +00:00
* style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args
54 lines
3.3 KiB
Python
54 lines
3.3 KiB
Python
import os
|
|
|
|
from transformers import AutoTokenizer
|
|
from utils import ChatPromptProcessor, Dialogue
|
|
|
|
CONTEXT = 'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.'
|
|
tokenizer = AutoTokenizer.from_pretrained(os.environ['PRETRAINED_PATH'])
|
|
|
|
samples = [
|
|
([
|
|
Dialogue(
|
|
instruction='Who is the best player in the history of NBA?',
|
|
response='The best player in the history of the NBA is widely considered to be Michael Jordan. He is one of the most successful players in the league, having won 6 NBA championships with the Chicago Bulls and 5 more with the Washington Wizards. He is a 5-time MVP, 1'
|
|
),
|
|
Dialogue(instruction='continue this talk', response=''),
|
|
], 128,
|
|
'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.\n\n### Instruction:\nWho is the best player in the history of NBA?\n\n### Response:\nThe best player in the history of the NBA is widely considered to be Michael Jordan. He is one of the most successful players in the league, having won 6 NBA championships with the Chicago Bulls and 5 more with the Washington Wizards. He is a 5-time MVP, 1\n\n### Instruction:\ncontinue this talk\n\n### Response:\n'
|
|
),
|
|
([
|
|
Dialogue(
|
|
instruction='Who is the best player in the history of NBA?',
|
|
response='The best player in the history of the NBA is widely considered to be Michael Jordan. He is one of the most successful players in the league, having won 6 NBA championships with the Chicago Bulls and 5 more with the Washington Wizards. He is a 5-time MVP, 1'
|
|
),
|
|
Dialogue(instruction='continue this talk', response=''),
|
|
], 200,
|
|
'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.\n\n### Instruction:\ncontinue this talk\n\n### Response:\n'
|
|
),
|
|
([
|
|
Dialogue(
|
|
instruction='Who is the best player in the history of NBA?',
|
|
response='The best player in the history of the NBA is widely considered to be Michael Jordan. He is one of the most successful players in the league, having won 6 NBA championships with the Chicago Bulls and 5 more with the Washington Wizards. He is a 5-time MVP, 1'
|
|
),
|
|
Dialogue(instruction='continue this talk', response=''),
|
|
], 211,
|
|
'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.\n\n### Instruction:\ncontinue this\n\n### Response:\n'
|
|
),
|
|
([
|
|
Dialogue(instruction='Who is the best player in the history of NBA?', response=''),
|
|
], 128,
|
|
'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.\n\n### Instruction:\nWho is the best player in the history of NBA?\n\n### Response:\n'
|
|
),
|
|
]
|
|
|
|
|
|
def test_chat_prompt_processor():
|
|
processor = ChatPromptProcessor(tokenizer, CONTEXT, 256)
|
|
for history, max_new_tokens, result in samples:
|
|
prompt = processor.preprocess_prompt(history, max_new_tokens)
|
|
assert prompt == result
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test_chat_prompt_processor()
|