diff --git a/tests/kit/model_zoo/transformers/chatglm2_6b/modeling_chatglm.py b/tests/kit/model_zoo/transformers/chatglm2_6b/modeling_chatglm.py index f704715e1..46078f441 100644 --- a/tests/kit/model_zoo/transformers/chatglm2_6b/modeling_chatglm.py +++ b/tests/kit/model_zoo/transformers/chatglm2_6b/modeling_chatglm.py @@ -396,17 +396,18 @@ class SelfAttention(torch.nn.Module): self.num_multi_query_groups_per_partition = config.multi_query_group_num self.qkv_hidden_size = (self.projection_size + 2 * self.hidden_size_per_attention_head * config.multi_query_group_num) +<<<<<<< HEAD self.query_key_value = nn.Linear( config.hidden_size, self.qkv_hidden_size, - bias=config.add_bias_linear or config.add_qkv_bias, device=device, **_config_to_kwargs(config), ) - - self.core_attention = CoreAttention(config, self.layer_number) - - # Output. +======= + self.query_key_value = nn.Linear(self.hidden_size, + self.qkv_hidden_size, + bias=config.add_bias_linear or config.add_qkv_bias, +<<<<<<< HEAD self.dense = nn.Linear( self.projection_size, config.hidden_size, @@ -414,6 +415,13 @@ class SelfAttention(torch.nn.Module): device=device, **_config_to_kwargs(config), ) +======= + self.dense = nn.Linear(self.projection_size, + self.hidden_size, + bias=config.add_bias_linear, + device=device, + **_config_to_kwargs(config)) +>>>>>>> [shardformer] support chatglm without layernorm def _allocate_memory(self, inference_max_sequence_len, batch_size, device=None, dtype=None): if self.multi_query_attention: @@ -925,7 +933,6 @@ class ChatGLMModel(ChatGLMPreTrainedModel): if inputs_embeds is None: inputs_embeds = self.embedding(input_ids) - print(inputs_embeds) if self.pre_seq_len is not None: if past_key_values is None: