mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-08 10:57:48 +00:00
Update vLLM config to use llama 3.1 8B by default
This commit is contained in:
parent
1938cac2c2
commit
f8fdef0f38
@ -69,22 +69,40 @@ jobs:
|
|||||||
VLLM_TOKENIZER: meta-llama/Meta-Llama-3-8B-Instruct
|
VLLM_TOKENIZER: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
image: vllm/vllm-openai:v0.5.1
|
image: vllm/vllm-openai:v0.6.1.post2
|
||||||
name: vllm
|
name: vllm
|
||||||
preset: H100x1
|
preset: gpu-medium
|
||||||
detach: true
|
detach: true
|
||||||
http_port: "8000"
|
http_port: "8000"
|
||||||
volumes:
|
volumes:
|
||||||
- ${{ volumes.cache.ref_rw }}
|
- ${{ volumes.cache.ref_rw }}
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: secret:HF_TOKEN
|
HF_TOKEN: secret:HF_TOKEN
|
||||||
cmd: --model meta-llama/Meta-Llama-3-8B-Instruct --tokenizer meta-llama/Meta-Llama-3-8B-Instruct --dtype=half
|
cmd: >
|
||||||
|
--model meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||||
|
--tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||||
|
--dtype=half
|
||||||
|
--max-model-len=50000
|
||||||
|
--tensor-parallel-size=2
|
||||||
|
# cmd: >
|
||||||
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||||
|
# --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||||
|
# --dtype=half
|
||||||
|
# cmd: >
|
||||||
|
# --model TechxGenus/Meta-Llama-3-70B-AWQ
|
||||||
|
# --tokenizer TechxGenus/Meta-Llama-3-70B-AWQ
|
||||||
|
# -q=awq
|
||||||
|
# cmd: >
|
||||||
|
# --model mgoin/Meta-Llama-3-70B-Instruct-Marlin
|
||||||
|
# --tokenizer mgoin/Meta-Llama-3-70B-Instruct-Marlin
|
||||||
|
# --dtype=half
|
||||||
|
# -q=marlin
|
||||||
|
|
||||||
ollama:
|
ollama:
|
||||||
image: ollama/ollama:0.1.35
|
image: ollama/ollama:0.1.35
|
||||||
volumes:
|
volumes:
|
||||||
- ${{ volumes.ollama_models.ref_rw }}
|
- ${{ volumes.ollama_models.ref_rw }}
|
||||||
preset: H100x1
|
preset: gpu-small
|
||||||
detach: true
|
detach: true
|
||||||
env:
|
env:
|
||||||
MODEL: "nomic-embed-text"
|
MODEL: "nomic-embed-text"
|
||||||
|
Loading…
Reference in New Issue
Block a user