mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-05-05 14:56:40 +00:00
Use Llama 3
This commit is contained in:
parent
6ec581d172
commit
17f32a5f92
@ -65,8 +65,8 @@ jobs:
|
||||
VLLM_API_BASE: http://${{ inspect_job('vllm').internal_hostname_named }}:8000/v1
|
||||
OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434
|
||||
POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }}
|
||||
VLLM_MODEL: stabilityai/StableBeluga-13B
|
||||
VLLM_TOKENIZER: stabilityai/StableBeluga-13B
|
||||
VLLM_MODEL: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
VLLM_TOKENIZER: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
|
||||
vllm:
|
||||
image: vllm/vllm-openai:v0.4.0
|
||||
@ -76,7 +76,9 @@ jobs:
|
||||
http_port: "8000"
|
||||
volumes:
|
||||
- ${{ volumes.cache.ref_rw }}
|
||||
cmd: --model stabilityai/StableBeluga-13B --tokenizer stabilityai/StableBeluga-13B --dtype=half --tensor-parallel-size=2
|
||||
env:
|
||||
HF_TOKEN: secret:HF_TOKEN
|
||||
cmd: --model meta-llama/Meta-Llama-3-8B-Instruct --tokenizer meta-llama/Meta-Llama-3-8B-Instruct --dtype=half --tensor-parallel-size=2
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
|
@ -6,10 +6,11 @@ Note: this setup is mostly for POC purposes. For production-ready setup, you'll
|
||||
1. `git clone` this repo && `cd` into root of it. We assume you've installed CLIs for the platform and authorized to the cluster.
|
||||
1. Build image for web app with `neuro-flow build privategpt`
|
||||
2. Create block storage for PGVector with `neuro disk create --name pgdata 10G --timeout-unused 100d`
|
||||
3. `neuro-flow run pgvector` -- start vector store
|
||||
4. `neuro-flow run ollama` -- start embeddings server
|
||||
5. `neuro-flow run vllm` -- start LLM inference server. Note: if you want to change LLM hosted there, change it in bash command and in `env.VLLM_MODEL` of `pgpt` job.
|
||||
6. `neuro-flow run pgpt` -- start PrivateGPT web server.
|
||||
3. Create secret with HuggingFace token to pull models `neuro secret add HF_TOKEN <token>` (see https://huggingface.co/settings/tokens)
|
||||
4. `neuro-flow run pgvector` -- start vector store
|
||||
5. `neuro-flow run ollama` -- start embeddings server
|
||||
6. `neuro-flow run vllm` -- start LLM inference server. Note: if you want to change LLM hosted there, change it in bash command and in `env.VLLM_MODEL` of `pgpt` job.
|
||||
7. `neuro-flow run pgpt` -- start PrivateGPT web server.
|
||||
|
||||
### Running PrivateGPT as stand-alone job
|
||||
<details>
|
||||
|
Loading…
Reference in New Issue
Block a user