diff --git a/applications/ColossalChat/examples/requirements.txt b/applications/ColossalChat/examples/requirements.txt index 31eef5256..3e4cc1a95 100644 --- a/applications/ColossalChat/examples/requirements.txt +++ b/applications/ColossalChat/examples/requirements.txt @@ -1,4 +1,4 @@ pandas>=1.4.1 sentencepiece -colossalai==0.4.7 +colossalai>=0.4.7 prompt_toolkit diff --git a/applications/ColossalChat/requirements.txt b/applications/ColossalChat/requirements.txt index 6b9511ad5..ecd876ef3 100755 --- a/applications/ColossalChat/requirements.txt +++ b/applications/ColossalChat/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.39.3 +transformers>=4.39.3 tqdm datasets==2.14.7 loralib diff --git a/applications/ColossalChat/rl_example.py b/applications/ColossalChat/rl_example.py index 08814f9f1..58148b67e 100644 --- a/applications/ColossalChat/rl_example.py +++ b/applications/ColossalChat/rl_example.py @@ -152,7 +152,7 @@ if __name__ == "__main__": "-ei", "--eval-interval", type=int, - default=100, + default=-1, help="Interval for evaluation. Evaluate every ei training steps.", ) parser.add_argument( diff --git a/colossalai/shardformer/modeling/qwen3.py b/colossalai/shardformer/modeling/qwen3.py index 5e8c0762c..437693800 100644 --- a/colossalai/shardformer/modeling/qwen3.py +++ b/colossalai/shardformer/modeling/qwen3.py @@ -273,6 +273,7 @@ class Qwen3PipelineForwards: hidden_states: Optional[torch.FloatTensor] = None, stage_index: Optional[List[int]] = None, shard_config: ShardConfig = None, + **kwargs ): r""" Args: