feat(model): Usage infos with vLLM

2025-08-18 16:27:31 +00:00 · 2023-11-17 17:04:37 +08:00 · 2023-11-17 17:04:37 +08:00 · 3d7481d369
commit 3d7481d369
parent b45360441d
2 changed files with 24 additions and 1 deletions
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@ -2,6 +2,7 @@
 # -*- coding:utf-8 -*-

 import os
+from functools import cache

 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 MODEL_PATH = os.path.join(ROOT_PATH, "models")
@ -22,6 +23,7 @@ new_directory = PILOT_PATH
 os.chdir(new_directory)


+@cache
 def get_device() -> str:
    try:
        import torch
--- a/pilot/model/llm_out/vllm_llm.py
+++ b/pilot/model/llm_out/vllm_llm.py
@ -53,4 +53,25 @@ async def generate_stream(
        else:
            text_outputs = [output.text for output in request_output.outputs]
        text_outputs = " ".join(text_outputs)
-        yield {"text": text_outputs, "error_code": 0, "usage": {}}
+
+        # Note: usage is not supported yet
+        prompt_tokens = len(request_output.prompt_token_ids)
+        completion_tokens = sum(
+            len(output.token_ids) for output in request_output.outputs
+        )
+        usage = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        }
+        finish_reason = (
+            request_output.outputs[0].finish_reason
+            if len(request_output.outputs) == 1
+            else [output.finish_reason for output in request_output.outputs]
+        )
+        yield {
+            "text": text_outputs,
+            "error_code": 0,
+            "usage": usage,
+            "finish_reason": finish_reason,
+        }