[sync] Sync feature/colossal-infer with main

This commit is contained in:
Yuanheng Zhao
2024-05-20 15:50:53 +00:00
61 changed files with 6976 additions and 276 deletions

View File

@@ -55,14 +55,14 @@ class Async_DynamicBatchManager(DynamicBatchManager):
self.stats_tool.count_prompt_tokens(new_batch)
self.running_batch = new_batch
has_new_finished, outputs = self._prefill_batch(self.running_batch)
self._filter_runing_batch()
self._filter_running_batch()
self.has_wait_tokens = 0
else:
if self.has_wait_tokens < self.max_wait_tokens:
self.stats_tool.count_output_tokens(self.running_batch)
has_new_finished, outputs = self._decode_batch(self.running_batch)
self._filter_runing_batch()
self._filter_running_batch()
self.has_wait_tokens += 1
else:
@@ -78,7 +78,7 @@ class Async_DynamicBatchManager(DynamicBatchManager):
else:
self.stats_tool.count_output_tokens(self.running_batch)
has_new_finished, outputs = self._decode_batch(self.running_batch)
self._filter_runing_batch()
self._filter_running_batch()
self.has_wait_tokens += 1
if has_new_finished:

View File

@@ -131,14 +131,14 @@ class DynamicBatchManager:
self.stats_tool.count_prompt_tokens(new_batch)
self.running_batch = new_batch
yield from self._prefill_batch(self.running_batch)
self._filter_runing_batch()
self._filter_running_batch()
self.has_wait_tokens = 0
return
if self.has_wait_tokens < self.max_wait_tokens:
self.stats_tool.count_output_tokens(self.running_batch)
yield from self._decode_batch(self.running_batch)
self._filter_runing_batch()
self._filter_running_batch()
self.has_wait_tokens += 1
return
else:
@@ -154,7 +154,7 @@ class DynamicBatchManager:
else:
self.stats_tool.count_output_tokens(self.running_batch)
yield from self._decode_batch(self.running_batch)
self._filter_runing_batch()
self._filter_running_batch()
self.has_wait_tokens += 1
return
@@ -243,7 +243,7 @@ class DynamicBatchManager:
self._filter_batch(batch)
yield from self._output_process(finished_reqs)
def _filter_runing_batch(self):
def _filter_running_batch(self):
if self.running_batch is not None and self.running_batch.is_clear():
self.running_batch = None