fix typo change dosen't to doesn't (#5308)

This commit is contained in:
digger yu 2024-01-30 09:57:38 +08:00 committed by GitHub
parent 6a3086a505
commit 71321a07cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 14 additions and 14 deletions

View File

@ -49,7 +49,7 @@ class FalconPolicy(Policy):
if not self.model.config.new_decoder_architecture and self.model.config.multi_query: if not self.model.config.new_decoder_architecture and self.model.config.multi_query:
warnings.warn( warnings.warn(
"Falcon dosen't support tensor parallelism when (not new_decoder_architecture and multi_query) is True, will ignore the tensor parallelism flag." "Falcon doesn't support tensor parallelism when (not new_decoder_architecture and multi_query) is True, will ignore the tensor parallelism flag."
) )
self.shard_config.enable_tensor_parallelism = False self.shard_config.enable_tensor_parallelism = False

View File

@ -46,7 +46,7 @@ class LlamaPolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
warnings.warn("Llama dosen't support sequence parallelism now, will ignore the sequence parallelism flag.") warnings.warn("Llama doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
decoder_attribute_replacement = { decoder_attribute_replacement = {

View File

@ -35,7 +35,7 @@ class MistralPolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
warnings.warn( warnings.warn(
"Mistral dosen't support sequence parallelism now, will ignore the sequence parallelism flag." "Mistral doesn't support sequence parallelism now, will ignore the sequence parallelism flag."
) )
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
@ -136,7 +136,7 @@ class MistralModelPolicy(MistralPolicy):
def module_policy(self): def module_policy(self):
if self.pipeline_stage_manager: if self.pipeline_stage_manager:
warnings.warn("Mistral dosen't support pipeline parallelism now.") warnings.warn("Mistral doesn't support pipeline parallelism now.")
return super().module_policy() return super().module_policy()
@ -160,7 +160,7 @@ class MistralForCausalLMPolicy(MistralPolicy):
} }
if self.pipeline_stage_manager: if self.pipeline_stage_manager:
warnings.warn("Mistral dosen't support pipeline parallelism now.") warnings.warn("Mistral doesn't support pipeline parallelism now.")
policy.update(new_item) policy.update(new_item)
@ -186,7 +186,7 @@ class MistralForSequenceClassificationPolicy(MistralPolicy):
} }
if self.pipeline_stage_manager: if self.pipeline_stage_manager:
warnings.warn("Mistral dosen't support pipeline parallelism now.") warnings.warn("Mistral doesn't support pipeline parallelism now.")
policy.update(new_item) policy.update(new_item)
return policy return policy

View File

@ -59,7 +59,7 @@ class OPTPolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
warnings.warn("OPT dosen't support sequence parallelism now, will ignore the sequence parallelism flag.") warnings.warn("OPT doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
policy[OPTDecoder] = ModulePolicyDescription( policy[OPTDecoder] = ModulePolicyDescription(

View File

@ -66,7 +66,7 @@ class T5BasePolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
warnings.warn("T5 dosen't support sequence parallelism now, will ignore the sequence parallelism flag.") warnings.warn("T5 doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
policy[T5Stack] = ModulePolicyDescription( policy[T5Stack] = ModulePolicyDescription(
@ -263,7 +263,7 @@ class T5BasePolicy(Policy):
if num_decoder_layers == 0: if num_decoder_layers == 0:
return Policy.distribute_layers(num_encoder_layers, num_stages), num_stages return Policy.distribute_layers(num_encoder_layers, num_stages), num_stages
# the number of stages distributed between encoder and decoder is optmized in this way: # the number of stages distributed between encoder and decoder is optimized in this way:
# num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages)) # num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages))
# s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1 # s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1
def objective(num_encoder_stages): def objective(num_encoder_stages):

View File

@ -33,7 +33,7 @@ class ViTPolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
warnings.warn("Vit dosen't support sequence parallelism now, will ignore the sequence parallelism flag.") warnings.warn("Vit doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
policy[ViTEmbeddings] = ModulePolicyDescription( policy[ViTEmbeddings] = ModulePolicyDescription(

View File

@ -69,13 +69,13 @@ class WhisperPolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
warnings.warn( warnings.warn(
"Whisper dosen't support sequence parallelism now, will ignore the sequence parallelism flag." "Whisper doesn't support sequence parallelism now, will ignore the sequence parallelism flag."
) )
# TODO using the jit fused add_and_dropout affect the accuracy # TODO using the jit fused add_and_dropout affect the accuracy
if self.shard_config.enable_jit_fused: if self.shard_config.enable_jit_fused:
self.shard_config.enable_jit_fused = False self.shard_config.enable_jit_fused = False
warnings.warn("Whisper dosen't support jit fused operator now, will ignore the jit fused operator flag.") warnings.warn("Whisper doesn't support jit fused operator now, will ignore the jit fused operator flag.")
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
policy[WhisperEncoderLayer] = ModulePolicyDescription( policy[WhisperEncoderLayer] = ModulePolicyDescription(
@ -302,7 +302,7 @@ class WhisperPolicy(Policy):
if num_decoder_layers == 0: if num_decoder_layers == 0:
return Policy.distribute_layers(num_encoder_layers, num_stages), num_stages return Policy.distribute_layers(num_encoder_layers, num_stages), num_stages
# the number of stages distributed between encoder and decoder is optmized in this way: # the number of stages distributed between encoder and decoder is optimized in this way:
# num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages)) # num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages))
# s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1 # s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1
def objective(num_encoder_stages): def objective(num_encoder_stages):

View File

@ -43,7 +43,7 @@ class OpenMoePolicy(Policy):
if self.shard_config.enable_sequence_parallelism: if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False self.shard_config.enable_sequence_parallelism = False
raise NotImplementedError( raise NotImplementedError(
"openmoe dosen't support sequence parallelism now, will ignore the sequence parallelism flag.") "openmoe doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism: if self.shard_config.enable_tensor_parallelism:
raise NotImplementedError("Tensor parallelism is not supported for openmoe model now.") raise NotImplementedError("Tensor parallelism is not supported for openmoe model now.")