mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-10-12 11:38:18 +00:00
GPT4All API Scaffolding. Matches OpenAI OpenAPI spec for chats and completions (#839)
* GPT4All API Scaffolding. Matches OpenAI OpenAI spec for engines, chats and completions * Edits for docker building * FastAPI app builds and pydantic models are accurate * Added groovy download into dockerfile * improved dockerfile * Chat completions endpoint edits * API uni test sketch * Working example of groovy inference with open ai api * Added lines to test * Set default to mpt
This commit is contained in:
63
gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
Normal file
63
gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from fastapi import APIRouter, Depends, Response, Security, status
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict
|
||||
import logging
|
||||
import time
|
||||
from api_v1.settings import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||
|
||||
|
||||
|
||||
class ChatCompletionMessage(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str = Field(..., description='The model to generate a completion from.')
|
||||
messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
|
||||
|
||||
|
||||
class ChatCompletionChoice(BaseModel):
|
||||
message: ChatCompletionMessage
|
||||
index: int
|
||||
finish_reason: str
|
||||
|
||||
class ChatCompletionUsage(BaseModel):
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
id: str
|
||||
object: str = 'text_completion'
|
||||
created: int
|
||||
model: str
|
||||
choices: List[ChatCompletionChoice]
|
||||
usage: ChatCompletionUsage
|
||||
|
||||
|
||||
router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
|
||||
|
||||
@router.post("/completions", response_model=ChatCompletionResponse)
|
||||
async def chat_completion(request: ChatCompletionRequest):
|
||||
'''
|
||||
Completes a GPT4All model response.
|
||||
'''
|
||||
|
||||
return ChatCompletionResponse(
|
||||
id='asdf',
|
||||
created=time.time(),
|
||||
model=request.model,
|
||||
choices=[{}],
|
||||
usage={
|
||||
'prompt_tokens': 0,
|
||||
'completion_tokens': 0,
|
||||
'total_tokens': 0
|
||||
}
|
||||
)
|
||||
|
||||
|
86
gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
Normal file
86
gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from fastapi import APIRouter, Depends, Response, Security, status
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict
|
||||
import logging
|
||||
from uuid import uuid4
|
||||
from api_v1.settings import settings
|
||||
from gpt4all import GPT4All
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||
|
||||
class CompletionRequest(BaseModel):
|
||||
model: str = Field(..., description='The model to generate a completion from.')
|
||||
prompt: str = Field(..., description='The prompt to begin completing from.')
|
||||
max_tokens: int = Field(7, description='Max tokens to generate')
|
||||
temperature: float = Field(0, description='Model temperature')
|
||||
top_p: float = Field(1.0, description='top_p')
|
||||
n: int = Field(1, description='')
|
||||
stream: bool = Field(False, description='Stream responses')
|
||||
|
||||
|
||||
class CompletionChoice(BaseModel):
|
||||
text: str
|
||||
index: int
|
||||
logprobs: float
|
||||
finish_reason: str
|
||||
|
||||
class CompletionUsage(BaseModel):
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
class CompletionResponse(BaseModel):
|
||||
id: str
|
||||
object: str = 'text_completion'
|
||||
created: int
|
||||
model: str
|
||||
choices: List[CompletionChoice]
|
||||
usage: CompletionUsage
|
||||
|
||||
|
||||
router = APIRouter(prefix="/completions", tags=["Completion Endpoints"])
|
||||
|
||||
@router.post("/", response_model=CompletionResponse)
|
||||
async def completions(request: CompletionRequest):
|
||||
'''
|
||||
Completes a GPT4All model response.
|
||||
'''
|
||||
|
||||
# global model
|
||||
if request.stream:
|
||||
raise NotImplementedError("Streaming is not yet implements")
|
||||
|
||||
model = GPT4All(model_name=settings.model, model_path=settings.gpt4all_path)
|
||||
|
||||
output = model.generate(prompt=request.prompt,
|
||||
n_predict = request.max_tokens,
|
||||
top_k = 20,
|
||||
top_p = request.top_p,
|
||||
temp=request.temperature,
|
||||
n_batch = 1024,
|
||||
repeat_penalty = 1.2,
|
||||
repeat_last_n = 10,
|
||||
context_erase = 0)
|
||||
|
||||
|
||||
return CompletionResponse(
|
||||
id=str(uuid4()),
|
||||
created=time.time(),
|
||||
model=request.model,
|
||||
choices=[dict(CompletionChoice(
|
||||
text=output,
|
||||
index=0,
|
||||
logprobs=-1,
|
||||
finish_reason='stop'
|
||||
))],
|
||||
usage={
|
||||
'prompt_tokens': 0, #TODO how to compute this?
|
||||
'completion_tokens': 0,
|
||||
'total_tokens': 0
|
||||
}
|
||||
)
|
||||
|
||||
|
38
gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
Normal file
38
gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from fastapi import APIRouter, Depends, Response, Security, status
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict
|
||||
import logging
|
||||
from api_v1.settings import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||
|
||||
class ListEnginesResponse(BaseModel):
|
||||
data: List[Dict] = Field(..., description="All available models.")
|
||||
|
||||
class EngineResponse(BaseModel):
|
||||
data: List[Dict] = Field(..., description="All available models.")
|
||||
|
||||
router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
|
||||
|
||||
@router.get("/", response_model=ListEnginesResponse)
|
||||
async def list_engines():
|
||||
'''
|
||||
List all available GPT4All models from
|
||||
https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models.json
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
return ListEnginesResponse(data=[])
|
||||
|
||||
|
||||
@router.get("/{engine_id}", response_model=EngineResponse)
|
||||
async def retrieve_engine(engine_id: str):
|
||||
'''
|
||||
|
||||
'''
|
||||
|
||||
raise NotImplementedError()
|
||||
return EngineResponse()
|
||||
|
12
gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
Normal file
12
gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import logging
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/health", tags=["Health"])
|
||||
|
||||
|
||||
@router.get('/', response_class=JSONResponse)
|
||||
async def health_check():
|
||||
"""Runs a health check on this instance of the API."""
|
||||
return JSONResponse({'status': 'ok'}, headers={'Access-Control-Allow-Origin': '*'})
|
Reference in New Issue
Block a user