GPT4All API Scaffolding. Matches OpenAI OpenAPI spec for chats and completions (#839)

* GPT4All API Scaffolding. Matches OpenAI OpenAI spec for engines, chats and completions * Edits for docker building * FastAPI app builds and pydantic models are accurate * Added groovy download into dockerfile * improved dockerfile * Chat completions endpoint edits * API uni test sketch * Working example of groovy inference with open ai api * Added lines to test * Set default to mpt
2025-09-30 01:08:47 +00:00 · 2023-06-28 14:28:52 -04:00
parent 6b8456bf99
commit 633e2a2137
21 changed files with 603 additions and 2 deletions
--- a/gpt4all-api/.gitignore
+++ b/gpt4all-api/.gitignore
@@ -0,0 +1,112 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 app/__pycache__/
 gpt4all_api/__pycache__/
 gpt4all_api/app/api_v1/__pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # VS Code
 .vscode/
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 *.lock
 *.cache
--- a/gpt4all-api/LICENSE
+++ b/gpt4all-api/LICENSE
@@ -0,0 +1,13 @@
 Copyright 2023 Nomic, Inc.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
--- a/gpt4all-api/README.md
+++ b/gpt4all-api/README.md
@@ -1,2 +1,47 @@
-# GPT4All API
+# WIP - not yet ready.
-This directory will contain code to build out a RESTful API for GPT4All models. Exact details TBD, but as an MVP, user should be able to send requests to list, download, and generate text with different models.
+
 # GPT4All REST API
 This directory contains the source code to run and build docker images that run a FastAPI app
 for serving inference from GPT4All models. The API matches the OpenAI API spec.
 ## Tutorial
 ### Starting the app
 First build the FastAPI docker image. You only have to do this on initial build or when you add new dependencies to the requirements.txt file:
 ```bash
 DOCKER_BUILDKIT=1 docker build -t gpt4all_api --progress plain -f gpt4all_api/Dockerfile.buildkit .
 ```
 Then, start the backend with:
 ```bash
 docker compose up --build
 ```
 #### Spinning up your app
 Run `docker compose up` to spin up the backend. Monitor the logs for errors in-case you forgot to set an environment variable above.
 #### Development
 Run
 ```bash
 docker compose up --build
 ```
 and edit files in the `api` directory. The api will hot-reload on changes.
 You can run the unit tests with
 ```bash
 make test
 ```
 #### Viewing API documentation
 Once the FastAPI ap is started you can access its documentation and test the search endpoint by going to:
 ```
 localhost:80/docs
 ```
 This documentation should match the OpenAI OpenAPI spec located at https://github.com/openai/openai-openapi/blob/master/openapi.yaml
--- a/gpt4all-api/docker-compose.yaml
+++ b/gpt4all-api/docker-compose.yaml
@@ -0,0 +1,18 @@
 version: "3.5"
 services:
  gpt4all_api:
    image: gpt4all_api
    container_name: gpt4all_api
    restart: always #restart on error (usually code compilation from save during bad state)
    ports:
      - "4891:4891"
    environment:
      - APP_ENVIRONMENT=dev
      - WEB_CONCURRENCY=2
      - LOGLEVEL=debug
      - PORT=4891
      - model=ggml-mpt-7b-chat.bin
    volumes:
      - './gpt4all_api/app:/app'
    command: ["/start-reload.sh"]
--- a/gpt4all-api/gpt4all_api/Dockerfile.buildkit
+++ b/gpt4all-api/gpt4all_api/Dockerfile.buildkit
@@ -0,0 +1,23 @@
 # syntax=docker/dockerfile:1.0.0-experimental
 FROM tiangolo/uvicorn-gunicorn:python3.11
 ARG MODEL_BIN=ggml-mpt-7b-chat.bin
 # Put first so anytime this file changes other cached layers are invalidated.
 COPY gpt4all_api/requirements.txt /requirements.txt
 RUN pip install --upgrade pip
 # Run various pip install commands with ssh keys from host machine.
 RUN --mount=type=ssh pip install -r /requirements.txt && \
  rm -Rf /root/.cache && rm -Rf /tmp/pip-install*
 # Finally, copy app and client.
 COPY gpt4all_api/app /app
 RUN mkdir -p /models
 # Include the following line to bake a model into the image and not have to download it on API start.
 RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
  && md5sum /models/${MODEL_BIN}
--- a/gpt4all-api/gpt4all_api/README.md
+++ b/gpt4all-api/gpt4all_api/README.md
@@ -0,0 +1 @@
 # FastAPI app for serving GPT4All models
--- a/gpt4all-api/gpt4all_api/app/init.py
+++ b/gpt4all-api/gpt4all_api/app/init.py
--- a/gpt4all-api/gpt4all_api/app/api_v1/init.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/init.py
--- a/gpt4all-api/gpt4all_api/app/api_v1/api.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/api.py
@@ -0,0 +1,8 @@
 from api_v1.routes import chat, completions, engines
 from fastapi import APIRouter
 router = APIRouter()
 router.include_router(chat.router)
 router.include_router(completions.router)
 router.include_router(engines.router)
--- a/gpt4all-api/gpt4all_api/app/api_v1/events.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/events.py
@@ -0,0 +1,26 @@
 import logging
 from fastapi import HTTPException
 from fastapi.responses import JSONResponse
 from starlette.requests import Request
 from api_v1.settings import settings
 log = logging.getLogger(__name__)
 startup_msg_fmt = """
 Starting up GPT4All API
 """
 async def on_http_error(request: Request, exc: HTTPException):
    return JSONResponse({'detail': exc.detail}, status_code=exc.status_code)
 async def on_startup(app):
    startup_msg = startup_msg_fmt.format(settings=settings)
    log.info(startup_msg)
 def startup_event_handler(app):
    async def start_app() -> None:
        await on_startup(app)
    return start_app
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/init.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/init.py
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
@@ -0,0 +1,63 @@
 from fastapi import APIRouter, Depends, Response, Security, status
 from pydantic import BaseModel, Field
 from typing import List, Dict
 import logging
 import time
 from api_v1.settings import settings
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 ### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
 class ChatCompletionMessage(BaseModel):
    role: str
    content: str
 class ChatCompletionRequest(BaseModel):
    model: str = Field(..., description='The model to generate a completion from.')
    messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
 class ChatCompletionChoice(BaseModel):
    message: ChatCompletionMessage
    index: int
    finish_reason: str
 class ChatCompletionUsage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
 class ChatCompletionResponse(BaseModel):
    id: str
    object: str = 'text_completion'
    created: int
    model: str
    choices: List[ChatCompletionChoice]
    usage: ChatCompletionUsage
 router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
@router.post("/completions", response_model=ChatCompletionResponse)
 async def chat_completion(request: ChatCompletionRequest):
    '''
    Completes a GPT4All model response.
    '''
    return ChatCompletionResponse(
        id='asdf',
        created=time.time(),
        model=request.model,
        choices=[{}],
        usage={
            'prompt_tokens': 0,
            'completion_tokens': 0,
            'total_tokens': 0
        }
    )
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
@@ -0,0 +1,86 @@
 from fastapi import APIRouter, Depends, Response, Security, status
 from pydantic import BaseModel, Field
 from typing import List, Dict
 import logging
 from uuid import uuid4
 from api_v1.settings import settings
 from gpt4all import GPT4All
 import time
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 ### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
 class CompletionRequest(BaseModel):
    model: str = Field(..., description='The model to generate a completion from.')
    prompt: str = Field(..., description='The prompt to begin completing from.')
    max_tokens: int = Field(7, description='Max tokens to generate')
    temperature: float = Field(0, description='Model temperature')
    top_p: float = Field(1.0, description='top_p')
    n: int = Field(1, description='')
    stream: bool = Field(False, description='Stream responses')
 class CompletionChoice(BaseModel):
    text: str
    index: int
    logprobs: float
    finish_reason: str
 class CompletionUsage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
 class CompletionResponse(BaseModel):
    id: str
    object: str = 'text_completion'
    created: int
    model: str
    choices: List[CompletionChoice]
    usage: CompletionUsage
 router = APIRouter(prefix="/completions", tags=["Completion Endpoints"])
@router.post("/", response_model=CompletionResponse)
 async def completions(request: CompletionRequest):
    '''
    Completes a GPT4All model response.
    '''
    # global model
    if request.stream:
        raise NotImplementedError("Streaming is not yet implements")
    model = GPT4All(model_name=settings.model, model_path=settings.gpt4all_path)
    output = model.generate(prompt=request.prompt,
                     n_predict = request.max_tokens,
                     top_k = 20,
                     top_p = request.top_p,
                     temp=request.temperature,
                     n_batch = 1024,
                     repeat_penalty = 1.2,
                     repeat_last_n = 10,
                     context_erase = 0)
    return CompletionResponse(
        id=str(uuid4()),
        created=time.time(),
        model=request.model,
        choices=[dict(CompletionChoice(
            text=output,
            index=0,
            logprobs=-1,
            finish_reason='stop'
        ))],
        usage={
            'prompt_tokens': 0, #TODO how to compute this?
            'completion_tokens': 0,
            'total_tokens': 0
        }
    )
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
@@ -0,0 +1,38 @@
 from fastapi import APIRouter, Depends, Response, Security, status
 from pydantic import BaseModel, Field
 from typing import List, Dict
 import logging
 from api_v1.settings import settings
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 ### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
 class ListEnginesResponse(BaseModel):
    data: List[Dict] = Field(..., description="All available models.")
 class EngineResponse(BaseModel):
    data: List[Dict] = Field(..., description="All available models.")
 router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
@router.get("/", response_model=ListEnginesResponse)
 async def list_engines():
    '''
    List all available GPT4All models from
    https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models.json
    '''
    raise NotImplementedError()
    return ListEnginesResponse(data=[])
@router.get("/{engine_id}", response_model=EngineResponse)
 async def retrieve_engine(engine_id: str):
    '''
    '''
    raise NotImplementedError()
    return EngineResponse()
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
@@ -0,0 +1,12 @@
 import logging
 from fastapi import APIRouter
 from fastapi.responses import JSONResponse
 log = logging.getLogger(__name__)
 router = APIRouter(prefix="/health", tags=["Health"])
@router.get('/', response_class=JSONResponse)
 async def health_check():
    """Runs a health check on this instance of the API."""
    return JSONResponse({'status': 'ok'}, headers={'Access-Control-Allow-Origin': '*'})
--- a/gpt4all-api/gpt4all_api/app/api_v1/settings.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/settings.py
@@ -0,0 +1,10 @@
 from pydantic import BaseSettings
 class Settings(BaseSettings):
    app_environment = 'dev'
    model: str = 'ggml-mpt-7b-chat.bin'
    gpt4all_path: str = '/models'
 settings = Settings()
--- a/gpt4all-api/gpt4all_api/app/docs.py
+++ b/gpt4all-api/gpt4all_api/app/docs.py
@@ -0,0 +1,3 @@
 desc = 'GPT4All API'
 endpoint_paths = {'health': '/health'}
--- a/gpt4all-api/gpt4all_api/app/main.py
+++ b/gpt4all-api/gpt4all_api/app/main.py
@@ -0,0 +1,61 @@
 import os
 import docs
 import logging
 from fastapi import FastAPI, HTTPException, Request
 from starlette.middleware.cors import CORSMiddleware
 from fastapi.logger import logger as fastapi_logger
 from api_v1.settings import settings
 from api_v1.api import router as v1_router
 from api_v1 import events
 import os
 logger = logging.getLogger(__name__)
 app = FastAPI(title='GPT4All API', description=docs.desc)
 #CORS Configuration (in-case you want to deploy)
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["GET", "POST", "OPTIONS"],
    allow_headers=["*"],
 )
 logger.info('Adding v1 endpoints..')
 # add v1
 app.include_router(v1_router, prefix='/v1')
 app.add_event_handler('startup', events.startup_event_handler(app))
 app.add_exception_handler(HTTPException, events.on_http_error)
@app.on_event("startup")
 async def startup():
    global model
    logger.info(f"Downloading/fetching model: {os.path.join(settings.gpt4all_path, settings.model)}")
    from gpt4all import GPT4All
    model = GPT4All(model_name=settings.model, model_path=settings.gpt4all_path)
    logger.info("GPT4All API is ready.")
@app.on_event("shutdown")
 async def shutdown():
    logger.info("Shutting down API")
 # This is needed to get logs to show up in the app
 if "gunicorn" in os.environ.get("SERVER_SOFTWARE", ""):
    gunicorn_error_logger = logging.getLogger("gunicorn.error")
    gunicorn_logger = logging.getLogger("gunicorn")
    root_logger = logging.getLogger()
    fastapi_logger.setLevel(gunicorn_logger.level)
    fastapi_logger.handlers = gunicorn_error_logger.handlers
    root_logger.setLevel(gunicorn_logger.level)
    uvicorn_logger = logging.getLogger("uvicorn.access")
    uvicorn_logger.handlers = gunicorn_error_logger.handlers
 else:
    # https://github.com/tiangolo/fastapi/issues/2019
    LOG_FORMAT2 = "[%(asctime)s %(process)d:%(threadName)s] %(name)s - %(levelname)s - %(message)s | %(filename)s:%(lineno)d"
    logging.basicConfig(level=logging.INFO, format=LOG_FORMAT2)
--- a/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
+++ b/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
@@ -0,0 +1,35 @@
 """
 Use the OpenAI python API to test gpt4all models.
 """
 import openai
 openai.api_base = "http://localhost:4891/v1"
 openai.api_key = "not needed for a local LLM"
 def test_completion():
    model = "gpt4all-j-v1.3-groovy"
    prompt = "Who is Michael Jordan?"
    response = openai.Completion.create(
        model=model,
        prompt=prompt,
        max_tokens=50,
        temperature=0.28,
        top_p=0.95,
        n=1,
        echo=True,
        stream=False
    )
    assert len(response['choices'][0]['text']) > len(prompt)
    print(response)
 # def test_chat_completions():
 #     model = "gpt4all-j-v1.3-groovy"
 #     prompt = "Who is Michael Jordan?"
 #     response = openai.ChatCompletion.create(
 #         model=model,
 #         messages=[]
 #     )
--- a/gpt4all-api/gpt4all_api/requirements.txt
+++ b/gpt4all-api/gpt4all_api/requirements.txt
@@ -0,0 +1,10 @@
 aiohttp>=3.6.2
 aiofiles
 pydantic>=1.4.0
 requests>=2.24.0
 ujson>=2.0.2
 fastapi>=0.95.0
 Jinja2>=3.0
 gpt4all==0.2.3
 pytest
 openai
--- a/gpt4all-api/makefile
+++ b/gpt4all-api/makefile
@@ -0,0 +1,37 @@
 ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 APP_NAME:=gpt4all_api
 PYTHON:=python3.8
 all: dependencies
 fresh: clean dependencies
 testenv: clean_testenv test_build
 	docker compose up --build
 testenv_d: clean_testenv test_build
 	docker compose up --build -d
 test:
 	docker compose exec gpt4all_api pytest -svv --disable-warnings -p no:cacheprovider /app/tests
 test_build:
    DOCKER_BUILDKIT=1 docker build -t gpt4all_api --progress plain -f gpt4all_api/Dockerfile.buildkit .
 clean_testenv:
 	docker compose down -v
 fresh_testenv: clean_testenv testenv
 venv:
 	if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
 dependencies: venv
 	source $(ROOT_DIR)/env/bin/activate; yes w | python -m pip install -r $(ROOT_DIR)/atlas_api/requirements.txt
 clean: clean_testenv
 	# Remove existing environment
 	rm -rf $(ROOT_DIR)/env;
 	rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
		`@@ -0,0 +1,3 @@`
							`desc = 'GPT4All API'`

							`endpoint_paths = {'health': '/health'}`