Merge branch 'main' of github.com:hpcaitech/ColossalAI into prefetch

This commit is contained in:
hxwang
2024-05-24 04:05:07 +00:00
16 changed files with 192 additions and 82 deletions

View File

@@ -20,7 +20,7 @@ class QuickstartUser(HttpUser):
self.client.post(
"/chat",
json={
"converation": [
"messages": [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "what is 1+1?"},
],
@@ -34,7 +34,7 @@ class QuickstartUser(HttpUser):
self.client.post(
"/chat",
json={
"converation": [
"messages": [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "what is 1+1?"},
],
@@ -42,6 +42,7 @@ class QuickstartUser(HttpUser):
},
)
# offline-generation is only for showing the usage, it will never be used in actual serving.
@tag("offline-generation")
@task(5)
def generate_streaming(self):
@@ -54,5 +55,5 @@ class QuickstartUser(HttpUser):
@tag("online-generation", "offline-generation")
@task
def get_models(self):
self.client.get("/models")
def health_check(self):
self.client.get("/ping")

View File

@@ -78,6 +78,8 @@ def main():
parser.add_argument("--zero", type=int, default=0, help="Zero Stage when hybrid plugin is enabled")
parser.add_argument("--custom-ckpt", action="store_true", help="Customize checkpoint", default=False)
parser.add_argument("--profile", action="store_true", help="Enable profiling", default=False)
parser.add_argument("--disable-async-reduce", action="store_true", help="Customize checkpoint", default=False)
args = parser.parse_args()
colossalai.launch_from_torch()
@@ -113,6 +115,7 @@ def main():
enable_fused_normalization=torch.cuda.is_available(),
enable_flash_attention=args.xformers,
max_prefetch=10,
enable_async_reduce=not args.disable_async_reduce,
)
elif args.plugin == "gemini_auto":
plugin = GeminiPlugin(