mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-10-12 19:49:16 +00:00
GPU Inference Server (#1112)
* feat: local inference server * fix: source to use bash + vars * chore: isort and black * fix: make file + inference mode * chore: logging * refactor: remove old links * fix: add new env vars * feat: hf inference server * refactor: remove old links * test: batch and single response * chore: black + isort * separate gpu and cpu dockerfiles * moved gpu to separate dockerfile * Fixed test endpoints * Edits to API. server won't start due to failed instantiation error * Method signature * fix: gpu_infer * tests: fix tests --------- Co-authored-by: Andriy Mulyar <andriy.mulyar@gmail.com>
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
from fastapi import APIRouter, Depends, Response, Security, status
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, List
|
||||
|
||||
from api_v1.settings import settings
|
||||
from fastapi import APIRouter, Depends, Response, Security, status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
@@ -11,11 +12,11 @@ logger.setLevel(logging.DEBUG)
|
||||
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||
|
||||
|
||||
|
||||
class ChatCompletionMessage(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str = Field(..., description='The model to generate a completion from.')
|
||||
messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
|
||||
@@ -26,11 +27,13 @@ class ChatCompletionChoice(BaseModel):
|
||||
index: int
|
||||
finish_reason: str
|
||||
|
||||
|
||||
class ChatCompletionUsage(BaseModel):
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
id: str
|
||||
object: str = 'text_completion'
|
||||
@@ -42,6 +45,7 @@ class ChatCompletionResponse(BaseModel):
|
||||
|
||||
router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
|
||||
|
||||
|
||||
@router.post("/completions", response_model=ChatCompletionResponse)
|
||||
async def chat_completion(request: ChatCompletionRequest):
|
||||
'''
|
||||
@@ -53,11 +57,5 @@ async def chat_completion(request: ChatCompletionRequest):
|
||||
created=time.time(),
|
||||
model=request.model,
|
||||
choices=[{}],
|
||||
usage={
|
||||
'prompt_tokens': 0,
|
||||
'completion_tokens': 0,
|
||||
'total_tokens': 0
|
||||
}
|
||||
usage={'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
|
||||
)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user