diff --git a/.neuro/live.yaml b/.neuro/live.yaml index 563db51d..f44dd324 100644 --- a/.neuro/live.yaml +++ b/.neuro/live.yaml @@ -65,14 +65,14 @@ jobs: VLLM_API_BASE: http://${{ inspect_job('vllm').internal_hostname_named }}:8000/v1 OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434 POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }} - VLLM_MODEL: meta-llama/Meta-Llama-3-8B-Instruct - VLLM_TOKENIZER: meta-llama/Meta-Llama-3-8B-Instruct + VLLM_MODEL: meta-llama/Meta-Llama-3.1-8B-Instruct + VLLM_TOKENIZER: meta-llama/Meta-Llama-3.1-8B-Instruct HUGGINGFACE_TOKEN: secret:HF_TOKEN vllm: image: vllm/vllm-openai:v0.6.1.post2 name: vllm - preset: a100x1 + preset: H100x1 detach: true http_port: "8000" volumes: @@ -101,7 +101,7 @@ jobs: image: ollama/ollama:0.1.35 volumes: - ${{ volumes.ollama_models.ref_rw }} - preset: a100x1 + preset: H100x1 detach: true env: MODEL: "nomic-embed-text"