mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-18 16:27:31 +00:00
feat(model): Usage infos with vLLM
This commit is contained in:
parent
b45360441d
commit
3d7481d369
@ -2,6 +2,7 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
import os
|
||||
from functools import cache
|
||||
|
||||
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
MODEL_PATH = os.path.join(ROOT_PATH, "models")
|
||||
@ -22,6 +23,7 @@ new_directory = PILOT_PATH
|
||||
os.chdir(new_directory)
|
||||
|
||||
|
||||
@cache
|
||||
def get_device() -> str:
|
||||
try:
|
||||
import torch
|
||||
|
@ -53,4 +53,25 @@ async def generate_stream(
|
||||
else:
|
||||
text_outputs = [output.text for output in request_output.outputs]
|
||||
text_outputs = " ".join(text_outputs)
|
||||
yield {"text": text_outputs, "error_code": 0, "usage": {}}
|
||||
|
||||
# Note: usage is not supported yet
|
||||
prompt_tokens = len(request_output.prompt_token_ids)
|
||||
completion_tokens = sum(
|
||||
len(output.token_ids) for output in request_output.outputs
|
||||
)
|
||||
usage = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
finish_reason = (
|
||||
request_output.outputs[0].finish_reason
|
||||
if len(request_output.outputs) == 1
|
||||
else [output.finish_reason for output in request_output.outputs]
|
||||
)
|
||||
yield {
|
||||
"text": text_outputs,
|
||||
"error_code": 0,
|
||||
"usage": usage,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user