GPT4All API Scaffolding. Matches OpenAI OpenAPI spec for chats and completions (#839)

* GPT4All API Scaffolding. Matches OpenAI OpenAI spec for engines, chats and completions * Edits for docker building * FastAPI app builds and pydantic models are accurate * Added groovy download into dockerfile * improved dockerfile * Chat completions endpoint edits * API uni test sketch * Working example of groovy inference with open ai api * Added lines to test * Set default to mpt
2025-10-12 11:38:18 +00:00 · 2023-06-28 14:28:52 -04:00
parent 6b8456bf99
commit 633e2a2137
21 changed files with 603 additions and 2 deletions
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/init.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/init.py
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
@@ -0,0 +1,63 @@
+from fastapi import APIRouter, Depends, Response, Security, status
+from pydantic import BaseModel, Field
+from typing import List, Dict
+import logging
+import time
+from api_v1.settings import settings
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
+
+
+
+class ChatCompletionMessage(BaseModel):
+    role: str
+    content: str
+
+class ChatCompletionRequest(BaseModel):
+    model: str = Field(..., description='The model to generate a completion from.')
+    messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
+
+
+class ChatCompletionChoice(BaseModel):
+    message: ChatCompletionMessage
+    index: int
+    finish_reason: str
+
+class ChatCompletionUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+class ChatCompletionResponse(BaseModel):
+    id: str
+    object: str = 'text_completion'
+    created: int
+    model: str
+    choices: List[ChatCompletionChoice]
+    usage: ChatCompletionUsage
+
+
+router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
+
+@router.post("/completions", response_model=ChatCompletionResponse)
+async def chat_completion(request: ChatCompletionRequest):
+    '''
+    Completes a GPT4All model response.
+    '''
+
+    return ChatCompletionResponse(
+        id='asdf',
+        created=time.time(),
+        model=request.model,
+        choices=[{}],
+        usage={
+            'prompt_tokens': 0,
+            'completion_tokens': 0,
+            'total_tokens': 0
+        }
+    )
+
+
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
@@ -0,0 +1,86 @@
+from fastapi import APIRouter, Depends, Response, Security, status
+from pydantic import BaseModel, Field
+from typing import List, Dict
+import logging
+from uuid import uuid4
+from api_v1.settings import settings
+from gpt4all import GPT4All
+import time
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
+
+class CompletionRequest(BaseModel):
+    model: str = Field(..., description='The model to generate a completion from.')
+    prompt: str = Field(..., description='The prompt to begin completing from.')
+    max_tokens: int = Field(7, description='Max tokens to generate')
+    temperature: float = Field(0, description='Model temperature')
+    top_p: float = Field(1.0, description='top_p')
+    n: int = Field(1, description='')
+    stream: bool = Field(False, description='Stream responses')
+
+
+class CompletionChoice(BaseModel):
+    text: str
+    index: int
+    logprobs: float
+    finish_reason: str
+
+class CompletionUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class CompletionResponse(BaseModel):
+    id: str
+    object: str = 'text_completion'
+    created: int
+    model: str
+    choices: List[CompletionChoice]
+    usage: CompletionUsage
+
+
+router = APIRouter(prefix="/completions", tags=["Completion Endpoints"])
+
+@router.post("/", response_model=CompletionResponse)
+async def completions(request: CompletionRequest):
+    '''
+    Completes a GPT4All model response.
+    '''
+
+    # global model
+    if request.stream:
+        raise NotImplementedError("Streaming is not yet implements")
+
+    model = GPT4All(model_name=settings.model, model_path=settings.gpt4all_path)
+
+    output = model.generate(prompt=request.prompt,
+                     n_predict = request.max_tokens,
+                     top_k = 20,
+                     top_p = request.top_p,
+                     temp=request.temperature,
+                     n_batch = 1024,
+                     repeat_penalty = 1.2,
+                     repeat_last_n = 10,
+                     context_erase = 0)
+
+
+    return CompletionResponse(
+        id=str(uuid4()),
+        created=time.time(),
+        model=request.model,
+        choices=[dict(CompletionChoice(
+            text=output,
+            index=0,
+            logprobs=-1,
+            finish_reason='stop'
+        ))],
+        usage={
+            'prompt_tokens': 0, #TODO how to compute this?
+            'completion_tokens': 0,
+            'total_tokens': 0
+        }
+    )
+
+
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
@@ -0,0 +1,38 @@
+from fastapi import APIRouter, Depends, Response, Security, status
+from pydantic import BaseModel, Field
+from typing import List, Dict
+import logging
+from api_v1.settings import settings
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
+
+class ListEnginesResponse(BaseModel):
+    data: List[Dict] = Field(..., description="All available models.")
+
+class EngineResponse(BaseModel):
+    data: List[Dict] = Field(..., description="All available models.")
+
+router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
+
+@router.get("/", response_model=ListEnginesResponse)
+async def list_engines():
+    '''
+    List all available GPT4All models from
+    https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models.json
+    '''
+    raise NotImplementedError()
+    return ListEnginesResponse(data=[])
+
+
+@router.get("/{engine_id}", response_model=EngineResponse)
+async def retrieve_engine(engine_id: str):
+    '''
+
+    '''
+
+    raise NotImplementedError()
+    return EngineResponse()
+
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
@@ -0,0 +1,12 @@
+import logging
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse
+log = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/health", tags=["Health"])
+
+
+@router.get('/', response_class=JSONResponse)
+async def health_check():
+    """Runs a health check on this instance of the API."""
+    return JSONResponse({'status': 'ok'}, headers={'Access-Control-Allow-Origin': '*'})