mirror of
https://github.com/csunny/DB-GPT.git
synced 2026-01-25 14:54:26 +00:00
63 lines
1.7 KiB
Python
63 lines
1.7 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import json
|
|
import requests
|
|
from pilot.configs.config import Config
|
|
from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
|
|
|
|
CFG = Config()
|
|
|
|
|
|
def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048):
|
|
history = []
|
|
|
|
prompt = params["prompt"]
|
|
stop = params.get("stop", "###")
|
|
|
|
headers = {
|
|
"Authorization": "Bearer " + CFG.proxy_api_key,
|
|
"Token": CFG.proxy_api_key,
|
|
}
|
|
|
|
messages = prompt.split(stop)
|
|
|
|
# Add history conversation
|
|
for i in range(1, len(messages) - 2, 2):
|
|
history.append(
|
|
{"role": "user", "content": messages[i].split(ROLE_USER + ":")[1]},
|
|
)
|
|
history.append(
|
|
{
|
|
"role": "system",
|
|
"content": messages[i + 1].split(ROLE_ASSISTANT + ":")[1],
|
|
}
|
|
)
|
|
|
|
# Add user query
|
|
query = messages[-2].split(ROLE_USER + ":")[1]
|
|
history.append({"role": "user", "content": query})
|
|
payloads = {
|
|
"model": "gpt-3.5-turbo", # just for test, remove this later
|
|
"messages": history,
|
|
"temperature": params.get("temperature"),
|
|
"max_tokens": params.get("max_new_tokens"),
|
|
}
|
|
|
|
print(payloads)
|
|
print(headers)
|
|
res = requests.post(
|
|
CFG.proxy_server_url, headers=headers, json=payloads, stream=True
|
|
)
|
|
|
|
text = ""
|
|
print("====================================res================")
|
|
print(res)
|
|
for line in res.iter_lines():
|
|
if line:
|
|
decoded_line = line.decode("utf-8")
|
|
json_line = json.loads(decoded_line)
|
|
print(json_line)
|
|
text += json_line["choices"][0]["message"]["content"]
|
|
yield text
|