mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-07-11 14:24:27 +00:00
delete version tag
This commit is contained in:
parent
a6377bb331
commit
de73202757
@ -9,11 +9,7 @@ Doc:
|
|||||||
- For completion service, you can invoke it by using `curl -X POST http://127.0.0.1:8000/v1/completion \
|
- For completion service, you can invoke it by using `curl -X POST http://127.0.0.1:8000/v1/completion \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"prompt":"hello, who are you? ","stream":"False"}'`
|
-d '{"prompt":"hello, who are you? ","stream":"False"}'`
|
||||||
|
Version: V1.0
|
||||||
Version declaration:
|
|
||||||
- This is the first version of the API server for Colossal-Inference
|
|
||||||
- V0 stands for the under development api, such as models, changes should be made to perfect it.
|
|
||||||
- V1 stands for the currently supported api, such as completion and chat, this is the first version.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@ -41,7 +37,8 @@ completion_serving = None
|
|||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v0/models")
|
# NOTE: models are still under development, need to be updated
|
||||||
|
@app.get("/models")
|
||||||
def get_available_models() -> Response:
|
def get_available_models() -> Response:
|
||||||
return JSONResponse(supported_models_dict)
|
return JSONResponse(supported_models_dict)
|
||||||
|
|
||||||
@ -86,7 +83,7 @@ async def generate(request: Request) -> Response:
|
|||||||
return JSONResponse(ret)
|
return JSONResponse(ret)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/completion")
|
@app.post("/completion")
|
||||||
async def create_completion(request: Request):
|
async def create_completion(request: Request):
|
||||||
request_dict = await request.json()
|
request_dict = await request.json()
|
||||||
stream = request_dict.pop("stream", "false").lower()
|
stream = request_dict.pop("stream", "false").lower()
|
||||||
@ -100,7 +97,7 @@ async def create_completion(request: Request):
|
|||||||
return JSONResponse(content=ret)
|
return JSONResponse(content=ret)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat")
|
@app.post("/chat")
|
||||||
async def create_chat(request: Request):
|
async def create_chat(request: Request):
|
||||||
request_dict = await request.json()
|
request_dict = await request.json()
|
||||||
|
|
||||||
|
@ -7,18 +7,18 @@ class QuickstartUser(HttpUser):
|
|||||||
@tag("online-generation")
|
@tag("online-generation")
|
||||||
@task(5)
|
@task(5)
|
||||||
def completion(self):
|
def completion(self):
|
||||||
self.client.post("/v1/completion", json={"prompt": "hello, who are you? ", "stream": "False"})
|
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "False"})
|
||||||
|
|
||||||
@tag("online-generation")
|
@tag("online-generation")
|
||||||
@task(5)
|
@task(5)
|
||||||
def completion_streaming(self):
|
def completion_streaming(self):
|
||||||
self.client.post("/v1/completion", json={"prompt": "hello, who are you? ", "stream": "True"})
|
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "True"})
|
||||||
|
|
||||||
@tag("online-chat")
|
@tag("online-chat")
|
||||||
@task(5)
|
@task(5)
|
||||||
def chat(self):
|
def chat(self):
|
||||||
self.client.post(
|
self.client.post(
|
||||||
"v1/chat",
|
"/chat",
|
||||||
json={
|
json={
|
||||||
"converation": [
|
"converation": [
|
||||||
{"role": "system", "content": "you are a helpful assistant"},
|
{"role": "system", "content": "you are a helpful assistant"},
|
||||||
@ -32,7 +32,7 @@ class QuickstartUser(HttpUser):
|
|||||||
@task(5)
|
@task(5)
|
||||||
def chat_streaming(self):
|
def chat_streaming(self):
|
||||||
self.client.post(
|
self.client.post(
|
||||||
"v1/chat",
|
"/chat",
|
||||||
json={
|
json={
|
||||||
"converation": [
|
"converation": [
|
||||||
{"role": "system", "content": "you are a helpful assistant"},
|
{"role": "system", "content": "you are a helpful assistant"},
|
||||||
@ -55,4 +55,4 @@ class QuickstartUser(HttpUser):
|
|||||||
@tag("online-generation", "offline-generation")
|
@tag("online-generation", "offline-generation")
|
||||||
@task
|
@task
|
||||||
def get_models(self):
|
def get_models(self):
|
||||||
self.client.get("/v0/models")
|
self.client.get("/models")
|
||||||
|
@ -32,7 +32,7 @@ class ServerRunner:
|
|||||||
start = time.time()
|
start = time.time()
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if requests.get("http://localhost:8000/v0/models").status_code == 200:
|
if requests.get("http://localhost:8000/models").status_code == 200:
|
||||||
break
|
break
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
if self.proc.poll() is not None:
|
if self.proc.poll() is not None:
|
||||||
@ -63,7 +63,7 @@ def server():
|
|||||||
|
|
||||||
async def test_completion(server):
|
async def test_completion(server):
|
||||||
data = {"prompt": "How are you?", "stream": "False"}
|
data = {"prompt": "How are you?", "stream": "False"}
|
||||||
response = await server.post("v1/completion", json=data)
|
response = await server.post("/completion", json=data)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
|
|
||||||
|
|
||||||
@ -73,7 +73,7 @@ async def test_chat(server):
|
|||||||
{"role": "user", "content": "what is 1+1?"},
|
{"role": "user", "content": "what is 1+1?"},
|
||||||
]
|
]
|
||||||
data = {"messages": messages, "stream": "False"}
|
data = {"messages": messages, "stream": "False"}
|
||||||
response = await server.post("v1/chat", data)
|
response = await server.post("/chat", data)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user