mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-14 06:49:09 +00:00
GPU Inference Server (#1112)
* feat: local inference server * fix: source to use bash + vars * chore: isort and black * fix: make file + inference mode * chore: logging * refactor: remove old links * fix: add new env vars * feat: hf inference server * refactor: remove old links * test: batch and single response * chore: black + isort * separate gpu and cpu dockerfiles * moved gpu to separate dockerfile * Fixed test endpoints * Edits to API. server won't start due to failed instantiation error * Method signature * fix: gpu_infer * tests: fix tests --------- Co-authored-by: Andriy Mulyar <andriy.mulyar@gmail.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
version: "3.5"
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
gpt4all_api:
|
||||
@@ -13,6 +13,7 @@ services:
|
||||
- LOGLEVEL=debug
|
||||
- PORT=4891
|
||||
- model=ggml-mpt-7b-chat.bin
|
||||
- inference_mode=cpu
|
||||
volumes:
|
||||
- './gpt4all_api/app:/app'
|
||||
command: ["/start-reload.sh"]
|
Reference in New Issue
Block a user