mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-14 14:59:13 +00:00
GPU Inference Server (#1112)
* feat: local inference server * fix: source to use bash + vars * chore: isort and black * fix: make file + inference mode * chore: logging * refactor: remove old links * fix: add new env vars * feat: hf inference server * refactor: remove old links * test: batch and single response * chore: black + isort * separate gpu and cpu dockerfiles * moved gpu to separate dockerfile * Fixed test endpoints * Edits to API. server won't start due to failed instantiation error * Method signature * fix: gpu_infer * tests: fix tests --------- Co-authored-by: Andriy Mulyar <andriy.mulyar@gmail.com>
This commit is contained in:
24
gpt4all-api/docker-compose.gpu.yaml
Normal file
24
gpt4all-api/docker-compose.gpu.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
gpt4all_gpu:
|
||||
image: ghcr.io/huggingface/text-generation-inference
|
||||
container_name: gpt4all_gpu
|
||||
restart: always #restart on error (usually code compilation from save during bad state)
|
||||
environment:
|
||||
- HUGGING_FACE_HUB_TOKEN=token
|
||||
- USE_FLASH_ATTENTION=false
|
||||
- MODEL_ID=''
|
||||
- NUM_SHARD=1
|
||||
command: --model-id $MODEL_ID --num-shard $NUM_SHARD
|
||||
volumes:
|
||||
- ./:/data
|
||||
ports:
|
||||
- "8080:80"
|
||||
shm_size: 1g
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
capabilities: [gpu]
|
Reference in New Issue
Block a user