mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-07-03 02:26:26 +00:00
* flash_attention forward upgrade * llama_model_forward * remove useless comment * update the requirements.txt * add the transformers version requirements * remove the LATEST VERSION try * [shardformer] update bloom model (#5518) * update bloom model * remove the version restriction * [shardformer] update_falcon (#5520) * [shardformer] update mistral model (#5511) * [shardformer] update gpt2 (#5502) * [shardformer] update gptj model (#5503) * [shardformer] update opt (#5522) * [shardformer] update t5 model (#5524) * [shardformer] update whisper model (#5529) * [shardformer] update vit model (#5530) * update vit model * remove the output_hidden_states * [shardformer] fix llama modeling * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [zero] support multiple (partial) backward passes (#5596) * [zero] support multiple (partial) backward passes * [misc] update requirements * [zero] support multiple (partial) backward passes (#5596) * [zero] support multiple (partial) backward passes * [misc] update requirements * fix conflicts * [doc] fix ColossalMoE readme (#5599) * fix readme * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * merge with main * merge with main * llama_model_forward * remove useless comment * remove the LATEST VERSION try * [shardformer] update bloom model (#5518) * update bloom model * remove the version restriction * [shardformer] update mistral model (#5511) * [shardformer] update opt (#5522) * [shardformer] update whisper model (#5529) * [shardformer] fix llama modeling * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [hotfix] Fix examples no pad token & auto parallel codegen bug; (#5606) * fix no pad token bug * fixed some auto parallel codegen bug, but might not run on torch 2.1 --------- Co-authored-by: Edenzzzz <wtan45@wisc.edu> * [shardformer] fix pipeline grad ckpt (#5620) * [shardformer] fix pipeline grad ckpt * [shardformer] fix whisper (#5628) * [test] fix llama model test * fix the opt upgrade (#5634) * [shardformer] fix attn replacement (#5636) * [shardformer] update flashattention replacement (#5637) * update transformers update transformers fix fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [test] fix llama test (#5638) * [gemini] fix buffer cast (#5639) * Fix shardformer upgrade (#5640) * fix llama model * fix the mistral * fix the shardformer model * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [shardformer]support pipeline parallelism for mistral. (#5642) * [shardformer] fix attn replacement (#5636) * [shardformer] update flashattention replacement (#5637) * update transformers update transformers fix fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [Feature] Support LLaMA-3 CPT and ST (#5619) * support LLaMA-3 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Run pre-commit --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [exampe] update llama example (#5626) * [plugin] support dp inside for hybriad parallel * [example] update llama benchmark * [example] update llama benchmark * [example] update llama readme * [example] update llama readme * [example] llama3 (#5631) * release llama3 * [release] llama3 * [release] llama3 * [release] llama3 * [release] llama3 * [test] fix llama test (#5638) * [gemini] fix buffer cast (#5639) * support pp for mistral * fix * fix fix fix * fix --------- Co-authored-by: Hongxin Liu <lhx0217@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> Co-authored-by: binmakeswell <binmakeswell@gmail.com> --------- Co-authored-by: Hongxin Liu <lhx0217@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Co-authored-by: Edenzzzz <wenxuan.tan@wisc.edu> Co-authored-by: Edenzzzz <wtan45@wisc.edu> Co-authored-by: flybird11111 <1829166702@qq.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> Co-authored-by: binmakeswell <binmakeswell@gmail.com>
100 lines
3.3 KiB
Python
100 lines
3.3 KiB
Python
import torch
|
|
import transformers
|
|
|
|
from ..registry import ModelAttribute, model_zoo
|
|
|
|
try:
|
|
from transformers import LlamaConfig
|
|
|
|
HAS_LLAMA = True
|
|
except ImportError:
|
|
HAS_LLAMA = False
|
|
|
|
if HAS_LLAMA:
|
|
# ===============================
|
|
# Register LLaMA
|
|
# ===============================
|
|
|
|
def data_gen():
|
|
# the input ids are corresponding to the sentence
|
|
# 'Hello, my dog is cute'
|
|
#
|
|
# the code is give below:
|
|
# -----------------------------------
|
|
# from transformers import LlamaTokenizerFast
|
|
# tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
|
|
# input = 'Hello, my dog is cute'
|
|
# tokenized_input = tokenizer(input, return_tensors='pt').to('cuda')
|
|
# -----------------------------------
|
|
|
|
input_ids = torch.Tensor(
|
|
[
|
|
[1, 15043, 29892, 590, 11203, 338, 274, 1082, 1, 15043, 29892, 590, 11203, 338, 274, 1082],
|
|
[1, 15043, 29892, 590, 11203, 338, 274, 1082, 1, 15043, 29892, 590, 11203, 338, 274, 1082],
|
|
]
|
|
).long()
|
|
|
|
attention_mask = torch.Tensor(
|
|
[
|
|
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
]
|
|
).long()
|
|
|
|
return dict(input_ids=input_ids, attention_mask=attention_mask)
|
|
|
|
# label is needed for casual lm
|
|
def data_gen_for_casual_lm():
|
|
data = data_gen()
|
|
labels = data["input_ids"].clone()
|
|
data["labels"] = labels
|
|
return data
|
|
|
|
# transform the output to a dict
|
|
output_transform_fn = lambda x: x
|
|
|
|
# function to get the loss
|
|
loss_fn = lambda output: output["last_hidden_state"].mean()
|
|
loss_fn_for_casual_lm = lambda output: output["loss"]
|
|
loss_fn_for_seq_classification = lambda output: output["logits"].mean()
|
|
|
|
config = LlamaConfig(
|
|
num_hidden_layers=8,
|
|
hidden_size=32,
|
|
intermediate_size=64,
|
|
num_attention_heads=4,
|
|
max_position_embeddings=128,
|
|
)
|
|
|
|
if hasattr(config, "pad_token_id"):
|
|
config.pad_token_id = config.eos_token_id
|
|
|
|
# register the following models
|
|
# transformers.LlamaModel,
|
|
# transformers.LlamaForCausalLM,
|
|
# transformers.LlamaForSequenceClassification,
|
|
model_zoo.register(
|
|
name="transformers_llama",
|
|
model_fn=lambda: transformers.LlamaModel(config),
|
|
data_gen_fn=data_gen,
|
|
output_transform_fn=output_transform_fn,
|
|
loss_fn=loss_fn,
|
|
model_attribute=ModelAttribute(has_control_flow=True),
|
|
)
|
|
model_zoo.register(
|
|
name="transformers_llama_for_casual_lm",
|
|
model_fn=lambda: transformers.LlamaForCausalLM(config),
|
|
data_gen_fn=data_gen_for_casual_lm,
|
|
output_transform_fn=output_transform_fn,
|
|
loss_fn=loss_fn_for_casual_lm,
|
|
model_attribute=ModelAttribute(has_control_flow=True),
|
|
)
|
|
model_zoo.register(
|
|
name="transformers_llama_for_sequence_classification",
|
|
model_fn=lambda: transformers.LlamaForSequenceClassification(config),
|
|
data_gen_fn=data_gen,
|
|
output_transform_fn=output_transform_fn,
|
|
loss_fn=loss_fn_for_seq_classification,
|
|
model_attribute=ModelAttribute(has_control_flow=True),
|
|
)
|