feat(model): Usage infos with vLLM

This commit is contained in:
FangYin Cheng 2023-11-17 17:04:37 +08:00
parent b45360441d
commit 3d7481d369
2 changed files with 24 additions and 1 deletions

View File

@ -2,6 +2,7 @@
# -*- coding:utf-8 -*-
import os
from functools import cache
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
MODEL_PATH = os.path.join(ROOT_PATH, "models")
@ -22,6 +23,7 @@ new_directory = PILOT_PATH
os.chdir(new_directory)
@cache
def get_device() -> str:
try:
import torch

View File

@ -53,4 +53,25 @@ async def generate_stream(
else:
text_outputs = [output.text for output in request_output.outputs]
text_outputs = " ".join(text_outputs)
yield {"text": text_outputs, "error_code": 0, "usage": {}}
# Note: usage is not supported yet
prompt_tokens = len(request_output.prompt_token_ids)
completion_tokens = sum(
len(output.token_ids) for output in request_output.outputs
)
usage = {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens,
}
finish_reason = (
request_output.outputs[0].finish_reason
if len(request_output.outputs) == 1
else [output.finish_reason for output in request_output.outputs]
)
yield {
"text": text_outputs,
"error_code": 0,
"usage": usage,
"finish_reason": finish_reason,
}