GPU Inference Server (#1112)

* feat: local inference server * fix: source to use bash + vars * chore: isort and black * fix: make file + inference mode * chore: logging * refactor: remove old links * fix: add new env vars * feat: hf inference server * refactor: remove old links * test: batch and single response * chore: black + isort * separate gpu and cpu dockerfiles * moved gpu to separate dockerfile * Fixed test endpoints * Edits to API. server won't start due to failed instantiation error * Method signature * fix: gpu_infer * tests: fix tests --------- Co-authored-by: Andriy Mulyar <andriy.mulyar@gmail.com>
2025-10-12 11:38:18 +00:00 · 2023-07-21 14:13:29 -05:00
parent 58f0fcab57
commit 8aba2c9009
14 changed files with 271 additions and 112 deletions
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
@@ -1,22 +1,27 @@
+import logging
+from typing import Dict, List
+
+from api_v1.settings import settings
 from fastapi import APIRouter, Depends, Response, Security, status
 from pydantic import BaseModel, Field
-from typing import List, Dict
-import logging
-from api_v1.settings import settings

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)

 ### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml

+
 class ListEnginesResponse(BaseModel):
    data: List[Dict] = Field(..., description="All available models.")

+
 class EngineResponse(BaseModel):
    data: List[Dict] = Field(..., description="All available models.")

+
 router = APIRouter(prefix="/engines", tags=["Search Endpoints"])

+
@router.get("/", response_model=ListEnginesResponse)
 async def list_engines():
    '''
@@ -29,10 +34,7 @@ async def list_engines():

@router.get("/{engine_id}", response_model=EngineResponse)
 async def retrieve_engine(engine_id: str):
-    '''
-
-    '''
+    ''' '''

    raise NotImplementedError()
    return EngineResponse()
-