mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-24 22:42:27 +00:00
GPT4All API Scaffolding. Matches OpenAI OpenAPI spec for chats and completions (#839)
* GPT4All API Scaffolding. Matches OpenAI OpenAI spec for engines, chats and completions * Edits for docker building * FastAPI app builds and pydantic models are accurate * Added groovy download into dockerfile * improved dockerfile * Chat completions endpoint edits * API uni test sketch * Working example of groovy inference with open ai api * Added lines to test * Set default to mpt
This commit is contained in:
parent
6b8456bf99
commit
633e2a2137
112
gpt4all-api/.gitignore
vendored
Normal file
112
gpt4all-api/.gitignore
vendored
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
app/__pycache__/
|
||||||
|
gpt4all_api/__pycache__/
|
||||||
|
gpt4all_api/app/api_v1/__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# VS Code
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# celery beat schedule file
|
||||||
|
celerybeat-schedule
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
|
||||||
|
*.lock
|
||||||
|
*.cache
|
13
gpt4all-api/LICENSE
Normal file
13
gpt4all-api/LICENSE
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Copyright 2023 Nomic, Inc.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
@ -1,2 +1,47 @@
|
|||||||
# GPT4All API
|
# WIP - not yet ready.
|
||||||
This directory will contain code to build out a RESTful API for GPT4All models. Exact details TBD, but as an MVP, user should be able to send requests to list, download, and generate text with different models.
|
|
||||||
|
# GPT4All REST API
|
||||||
|
This directory contains the source code to run and build docker images that run a FastAPI app
|
||||||
|
for serving inference from GPT4All models. The API matches the OpenAI API spec.
|
||||||
|
|
||||||
|
## Tutorial
|
||||||
|
|
||||||
|
### Starting the app
|
||||||
|
|
||||||
|
First build the FastAPI docker image. You only have to do this on initial build or when you add new dependencies to the requirements.txt file:
|
||||||
|
```bash
|
||||||
|
DOCKER_BUILDKIT=1 docker build -t gpt4all_api --progress plain -f gpt4all_api/Dockerfile.buildkit .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, start the backend with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Spinning up your app
|
||||||
|
Run `docker compose up` to spin up the backend. Monitor the logs for errors in-case you forgot to set an environment variable above.
|
||||||
|
|
||||||
|
|
||||||
|
#### Development
|
||||||
|
Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up --build
|
||||||
|
```
|
||||||
|
and edit files in the `api` directory. The api will hot-reload on changes.
|
||||||
|
|
||||||
|
You can run the unit tests with
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make test
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Viewing API documentation
|
||||||
|
|
||||||
|
Once the FastAPI ap is started you can access its documentation and test the search endpoint by going to:
|
||||||
|
```
|
||||||
|
localhost:80/docs
|
||||||
|
```
|
||||||
|
|
||||||
|
This documentation should match the OpenAI OpenAPI spec located at https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
18
gpt4all-api/docker-compose.yaml
Normal file
18
gpt4all-api/docker-compose.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
version: "3.5"
|
||||||
|
|
||||||
|
services:
|
||||||
|
gpt4all_api:
|
||||||
|
image: gpt4all_api
|
||||||
|
container_name: gpt4all_api
|
||||||
|
restart: always #restart on error (usually code compilation from save during bad state)
|
||||||
|
ports:
|
||||||
|
- "4891:4891"
|
||||||
|
environment:
|
||||||
|
- APP_ENVIRONMENT=dev
|
||||||
|
- WEB_CONCURRENCY=2
|
||||||
|
- LOGLEVEL=debug
|
||||||
|
- PORT=4891
|
||||||
|
- model=ggml-mpt-7b-chat.bin
|
||||||
|
volumes:
|
||||||
|
- './gpt4all_api/app:/app'
|
||||||
|
command: ["/start-reload.sh"]
|
23
gpt4all-api/gpt4all_api/Dockerfile.buildkit
Normal file
23
gpt4all-api/gpt4all_api/Dockerfile.buildkit
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# syntax=docker/dockerfile:1.0.0-experimental
|
||||||
|
FROM tiangolo/uvicorn-gunicorn:python3.11
|
||||||
|
|
||||||
|
ARG MODEL_BIN=ggml-mpt-7b-chat.bin
|
||||||
|
|
||||||
|
# Put first so anytime this file changes other cached layers are invalidated.
|
||||||
|
COPY gpt4all_api/requirements.txt /requirements.txt
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
|
# Run various pip install commands with ssh keys from host machine.
|
||||||
|
RUN --mount=type=ssh pip install -r /requirements.txt && \
|
||||||
|
rm -Rf /root/.cache && rm -Rf /tmp/pip-install*
|
||||||
|
|
||||||
|
# Finally, copy app and client.
|
||||||
|
COPY gpt4all_api/app /app
|
||||||
|
|
||||||
|
RUN mkdir -p /models
|
||||||
|
|
||||||
|
# Include the following line to bake a model into the image and not have to download it on API start.
|
||||||
|
RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
|
||||||
|
&& md5sum /models/${MODEL_BIN}
|
||||||
|
|
1
gpt4all-api/gpt4all_api/README.md
Normal file
1
gpt4all-api/gpt4all_api/README.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
# FastAPI app for serving GPT4All models
|
0
gpt4all-api/gpt4all_api/app/__init__.py
Normal file
0
gpt4all-api/gpt4all_api/app/__init__.py
Normal file
0
gpt4all-api/gpt4all_api/app/api_v1/__init__.py
Normal file
0
gpt4all-api/gpt4all_api/app/api_v1/__init__.py
Normal file
8
gpt4all-api/gpt4all_api/app/api_v1/api.py
Normal file
8
gpt4all-api/gpt4all_api/app/api_v1/api.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
from api_v1.routes import chat, completions, engines
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
router.include_router(chat.router)
|
||||||
|
router.include_router(completions.router)
|
||||||
|
router.include_router(engines.router)
|
26
gpt4all-api/gpt4all_api/app/api_v1/events.py
Normal file
26
gpt4all-api/gpt4all_api/app/api_v1/events.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import logging
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from starlette.requests import Request
|
||||||
|
from api_v1.settings import settings
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
startup_msg_fmt = """
|
||||||
|
Starting up GPT4All API
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def on_http_error(request: Request, exc: HTTPException):
|
||||||
|
return JSONResponse({'detail': exc.detail}, status_code=exc.status_code)
|
||||||
|
|
||||||
|
|
||||||
|
async def on_startup(app):
|
||||||
|
startup_msg = startup_msg_fmt.format(settings=settings)
|
||||||
|
log.info(startup_msg)
|
||||||
|
|
||||||
|
def startup_event_handler(app):
|
||||||
|
async def start_app() -> None:
|
||||||
|
await on_startup(app)
|
||||||
|
|
||||||
|
return start_app
|
63
gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
Normal file
63
gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from fastapi import APIRouter, Depends, Response, Security, status
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import List, Dict
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from api_v1.settings import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionMessage(BaseModel):
|
||||||
|
role: str
|
||||||
|
content: str
|
||||||
|
|
||||||
|
class ChatCompletionRequest(BaseModel):
|
||||||
|
model: str = Field(..., description='The model to generate a completion from.')
|
||||||
|
messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionChoice(BaseModel):
|
||||||
|
message: ChatCompletionMessage
|
||||||
|
index: int
|
||||||
|
finish_reason: str
|
||||||
|
|
||||||
|
class ChatCompletionUsage(BaseModel):
|
||||||
|
prompt_tokens: int
|
||||||
|
completion_tokens: int
|
||||||
|
total_tokens: int
|
||||||
|
|
||||||
|
class ChatCompletionResponse(BaseModel):
|
||||||
|
id: str
|
||||||
|
object: str = 'text_completion'
|
||||||
|
created: int
|
||||||
|
model: str
|
||||||
|
choices: List[ChatCompletionChoice]
|
||||||
|
usage: ChatCompletionUsage
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
|
||||||
|
|
||||||
|
@router.post("/completions", response_model=ChatCompletionResponse)
|
||||||
|
async def chat_completion(request: ChatCompletionRequest):
|
||||||
|
'''
|
||||||
|
Completes a GPT4All model response.
|
||||||
|
'''
|
||||||
|
|
||||||
|
return ChatCompletionResponse(
|
||||||
|
id='asdf',
|
||||||
|
created=time.time(),
|
||||||
|
model=request.model,
|
||||||
|
choices=[{}],
|
||||||
|
usage={
|
||||||
|
'prompt_tokens': 0,
|
||||||
|
'completion_tokens': 0,
|
||||||
|
'total_tokens': 0
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
86
gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
Normal file
86
gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
from fastapi import APIRouter, Depends, Response, Security, status
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import List, Dict
|
||||||
|
import logging
|
||||||
|
from uuid import uuid4
|
||||||
|
from api_v1.settings import settings
|
||||||
|
from gpt4all import GPT4All
|
||||||
|
import time
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||||
|
|
||||||
|
class CompletionRequest(BaseModel):
|
||||||
|
model: str = Field(..., description='The model to generate a completion from.')
|
||||||
|
prompt: str = Field(..., description='The prompt to begin completing from.')
|
||||||
|
max_tokens: int = Field(7, description='Max tokens to generate')
|
||||||
|
temperature: float = Field(0, description='Model temperature')
|
||||||
|
top_p: float = Field(1.0, description='top_p')
|
||||||
|
n: int = Field(1, description='')
|
||||||
|
stream: bool = Field(False, description='Stream responses')
|
||||||
|
|
||||||
|
|
||||||
|
class CompletionChoice(BaseModel):
|
||||||
|
text: str
|
||||||
|
index: int
|
||||||
|
logprobs: float
|
||||||
|
finish_reason: str
|
||||||
|
|
||||||
|
class CompletionUsage(BaseModel):
|
||||||
|
prompt_tokens: int
|
||||||
|
completion_tokens: int
|
||||||
|
total_tokens: int
|
||||||
|
class CompletionResponse(BaseModel):
|
||||||
|
id: str
|
||||||
|
object: str = 'text_completion'
|
||||||
|
created: int
|
||||||
|
model: str
|
||||||
|
choices: List[CompletionChoice]
|
||||||
|
usage: CompletionUsage
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/completions", tags=["Completion Endpoints"])
|
||||||
|
|
||||||
|
@router.post("/", response_model=CompletionResponse)
|
||||||
|
async def completions(request: CompletionRequest):
|
||||||
|
'''
|
||||||
|
Completes a GPT4All model response.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# global model
|
||||||
|
if request.stream:
|
||||||
|
raise NotImplementedError("Streaming is not yet implements")
|
||||||
|
|
||||||
|
model = GPT4All(model_name=settings.model, model_path=settings.gpt4all_path)
|
||||||
|
|
||||||
|
output = model.generate(prompt=request.prompt,
|
||||||
|
n_predict = request.max_tokens,
|
||||||
|
top_k = 20,
|
||||||
|
top_p = request.top_p,
|
||||||
|
temp=request.temperature,
|
||||||
|
n_batch = 1024,
|
||||||
|
repeat_penalty = 1.2,
|
||||||
|
repeat_last_n = 10,
|
||||||
|
context_erase = 0)
|
||||||
|
|
||||||
|
|
||||||
|
return CompletionResponse(
|
||||||
|
id=str(uuid4()),
|
||||||
|
created=time.time(),
|
||||||
|
model=request.model,
|
||||||
|
choices=[dict(CompletionChoice(
|
||||||
|
text=output,
|
||||||
|
index=0,
|
||||||
|
logprobs=-1,
|
||||||
|
finish_reason='stop'
|
||||||
|
))],
|
||||||
|
usage={
|
||||||
|
'prompt_tokens': 0, #TODO how to compute this?
|
||||||
|
'completion_tokens': 0,
|
||||||
|
'total_tokens': 0
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
38
gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
Normal file
38
gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from fastapi import APIRouter, Depends, Response, Security, status
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import List, Dict
|
||||||
|
import logging
|
||||||
|
from api_v1.settings import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||||
|
|
||||||
|
class ListEnginesResponse(BaseModel):
|
||||||
|
data: List[Dict] = Field(..., description="All available models.")
|
||||||
|
|
||||||
|
class EngineResponse(BaseModel):
|
||||||
|
data: List[Dict] = Field(..., description="All available models.")
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
|
||||||
|
|
||||||
|
@router.get("/", response_model=ListEnginesResponse)
|
||||||
|
async def list_engines():
|
||||||
|
'''
|
||||||
|
List all available GPT4All models from
|
||||||
|
https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models.json
|
||||||
|
'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
return ListEnginesResponse(data=[])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{engine_id}", response_model=EngineResponse)
|
||||||
|
async def retrieve_engine(engine_id: str):
|
||||||
|
'''
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
raise NotImplementedError()
|
||||||
|
return EngineResponse()
|
||||||
|
|
12
gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
Normal file
12
gpt4all-api/gpt4all_api/app/api_v1/routes/health.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import logging
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/health", tags=["Health"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get('/', response_class=JSONResponse)
|
||||||
|
async def health_check():
|
||||||
|
"""Runs a health check on this instance of the API."""
|
||||||
|
return JSONResponse({'status': 'ok'}, headers={'Access-Control-Allow-Origin': '*'})
|
10
gpt4all-api/gpt4all_api/app/api_v1/settings.py
Normal file
10
gpt4all-api/gpt4all_api/app/api_v1/settings.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from pydantic import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
app_environment = 'dev'
|
||||||
|
model: str = 'ggml-mpt-7b-chat.bin'
|
||||||
|
gpt4all_path: str = '/models'
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
3
gpt4all-api/gpt4all_api/app/docs.py
Normal file
3
gpt4all-api/gpt4all_api/app/docs.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
desc = 'GPT4All API'
|
||||||
|
|
||||||
|
endpoint_paths = {'health': '/health'}
|
61
gpt4all-api/gpt4all_api/app/main.py
Normal file
61
gpt4all-api/gpt4all_api/app/main.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import os
|
||||||
|
import docs
|
||||||
|
import logging
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
|
from starlette.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.logger import logger as fastapi_logger
|
||||||
|
from api_v1.settings import settings
|
||||||
|
from api_v1.api import router as v1_router
|
||||||
|
from api_v1 import events
|
||||||
|
import os
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
app = FastAPI(title='GPT4All API', description=docs.desc)
|
||||||
|
|
||||||
|
#CORS Configuration (in-case you want to deploy)
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["GET", "POST", "OPTIONS"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info('Adding v1 endpoints..')
|
||||||
|
|
||||||
|
# add v1
|
||||||
|
app.include_router(v1_router, prefix='/v1')
|
||||||
|
app.add_event_handler('startup', events.startup_event_handler(app))
|
||||||
|
app.add_exception_handler(HTTPException, events.on_http_error)
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup():
|
||||||
|
global model
|
||||||
|
logger.info(f"Downloading/fetching model: {os.path.join(settings.gpt4all_path, settings.model)}")
|
||||||
|
from gpt4all import GPT4All
|
||||||
|
model = GPT4All(model_name=settings.model, model_path=settings.gpt4all_path)
|
||||||
|
|
||||||
|
logger.info("GPT4All API is ready.")
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown():
|
||||||
|
logger.info("Shutting down API")
|
||||||
|
|
||||||
|
|
||||||
|
# This is needed to get logs to show up in the app
|
||||||
|
if "gunicorn" in os.environ.get("SERVER_SOFTWARE", ""):
|
||||||
|
gunicorn_error_logger = logging.getLogger("gunicorn.error")
|
||||||
|
gunicorn_logger = logging.getLogger("gunicorn")
|
||||||
|
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
fastapi_logger.setLevel(gunicorn_logger.level)
|
||||||
|
fastapi_logger.handlers = gunicorn_error_logger.handlers
|
||||||
|
root_logger.setLevel(gunicorn_logger.level)
|
||||||
|
|
||||||
|
uvicorn_logger = logging.getLogger("uvicorn.access")
|
||||||
|
uvicorn_logger.handlers = gunicorn_error_logger.handlers
|
||||||
|
else:
|
||||||
|
# https://github.com/tiangolo/fastapi/issues/2019
|
||||||
|
LOG_FORMAT2 = "[%(asctime)s %(process)d:%(threadName)s] %(name)s - %(levelname)s - %(message)s | %(filename)s:%(lineno)d"
|
||||||
|
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT2)
|
35
gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
Normal file
35
gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
"""
|
||||||
|
Use the OpenAI python API to test gpt4all models.
|
||||||
|
"""
|
||||||
|
import openai
|
||||||
|
openai.api_base = "http://localhost:4891/v1"
|
||||||
|
|
||||||
|
openai.api_key = "not needed for a local LLM"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion():
|
||||||
|
model = "gpt4all-j-v1.3-groovy"
|
||||||
|
prompt = "Who is Michael Jordan?"
|
||||||
|
response = openai.Completion.create(
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
max_tokens=50,
|
||||||
|
temperature=0.28,
|
||||||
|
top_p=0.95,
|
||||||
|
n=1,
|
||||||
|
echo=True,
|
||||||
|
stream=False
|
||||||
|
)
|
||||||
|
assert len(response['choices'][0]['text']) > len(prompt)
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
# def test_chat_completions():
|
||||||
|
# model = "gpt4all-j-v1.3-groovy"
|
||||||
|
# prompt = "Who is Michael Jordan?"
|
||||||
|
# response = openai.ChatCompletion.create(
|
||||||
|
# model=model,
|
||||||
|
# messages=[]
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
|
10
gpt4all-api/gpt4all_api/requirements.txt
Normal file
10
gpt4all-api/gpt4all_api/requirements.txt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
aiohttp>=3.6.2
|
||||||
|
aiofiles
|
||||||
|
pydantic>=1.4.0
|
||||||
|
requests>=2.24.0
|
||||||
|
ujson>=2.0.2
|
||||||
|
fastapi>=0.95.0
|
||||||
|
Jinja2>=3.0
|
||||||
|
gpt4all==0.2.3
|
||||||
|
pytest
|
||||||
|
openai
|
37
gpt4all-api/makefile
Normal file
37
gpt4all-api/makefile
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
APP_NAME:=gpt4all_api
|
||||||
|
PYTHON:=python3.8
|
||||||
|
|
||||||
|
all: dependencies
|
||||||
|
|
||||||
|
fresh: clean dependencies
|
||||||
|
|
||||||
|
testenv: clean_testenv test_build
|
||||||
|
docker compose up --build
|
||||||
|
|
||||||
|
testenv_d: clean_testenv test_build
|
||||||
|
docker compose up --build -d
|
||||||
|
|
||||||
|
test:
|
||||||
|
docker compose exec gpt4all_api pytest -svv --disable-warnings -p no:cacheprovider /app/tests
|
||||||
|
|
||||||
|
test_build:
|
||||||
|
DOCKER_BUILDKIT=1 docker build -t gpt4all_api --progress plain -f gpt4all_api/Dockerfile.buildkit .
|
||||||
|
|
||||||
|
clean_testenv:
|
||||||
|
docker compose down -v
|
||||||
|
|
||||||
|
fresh_testenv: clean_testenv testenv
|
||||||
|
|
||||||
|
venv:
|
||||||
|
if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
|
||||||
|
|
||||||
|
dependencies: venv
|
||||||
|
source $(ROOT_DIR)/env/bin/activate; yes w | python -m pip install -r $(ROOT_DIR)/atlas_api/requirements.txt
|
||||||
|
|
||||||
|
clean: clean_testenv
|
||||||
|
# Remove existing environment
|
||||||
|
rm -rf $(ROOT_DIR)/env;
|
||||||
|
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user