Merge pull request #3 from neuro-inc/eng-393

ENG-393: Use dedicated app, pgvector settings
This commit is contained in:
Yevhenii Semendiak 2024-10-16 11:41:31 +03:00 committed by GitHub
commit 12c850e410
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 50 additions and 6 deletions

View File

@ -71,7 +71,7 @@ jobs:
vllm:
image: vllm/vllm-openai:v0.6.1.post2
name: vllm
preset: gpu-medium
preset: a100x1
detach: true
http_port: "8000"
volumes:
@ -82,8 +82,6 @@ jobs:
--model meta-llama/Meta-Llama-3.1-8B-Instruct
--tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
--dtype=half
--max-model-len=50000
--tensor-parallel-size=2
# cmd: >
# --model meta-llama/Meta-Llama-3.1-8B-Instruct
# --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
@ -102,7 +100,7 @@ jobs:
image: ollama/ollama:0.1.35
volumes:
- ${{ volumes.ollama_models.ref_rw }}
preset: gpu-small
preset: a100x1
detach: true
env:
MODEL: "nomic-embed-text"
@ -120,4 +118,4 @@ jobs:
PGDATA: ${{ volumes.pgdata.mount }}/pgdata
volumes:
- ${{ volumes.pgdata.ref_rw }}
# - ${{ volumes.pgdata_onprem.ref_rw }}
# - ${{ volumes.pgdata_onprem.ref_rw }}

View File

@ -0,0 +1,33 @@
server:
env_name: ${APP_ENV:prod}
port: ${PORT:8080}
llm:
mode: openailike
tokenizer: ${VLLM_TOKENIZER:-}
max_new_tokens: ${VLLM_MAX_NEW_TOKENS:5000}
context_window: ${VLLM_CONTEXT_WINDOW:4096}
temperature: ${VLLM_TEMPERATURE:0.1}
openai:
api_base: ${VLLM_API_BASE:https://api.openai.com/v1/}
api_key: ${VLLM_API_KEY:EMPTY}
model: ${VLLM_MODEL:-}
embedding_api_base: ${EMBEDDING_API_BASE:https://api.openai.com/v1/}
embedding_api_key: ${EMBEDDING_API_KEY:EMPTY}
embeddings_model: ${EMBEDDING_MODEL:text-embedding-ada-002}
embedding:
mode: ${EMBEDDING_PROVIDER:openai}
ingest_mode: ${EMBEDDING_MODE:parallel}
embed_dim: ${EMBEDDING_DIM:768}
ollama:
# Note: if you change embedding model, you'll need to use a dedicated DB for ingext storage
embedding_model: ${OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
embedding_api_base: ${OLLAMA_API_BASE:http://localhost:11434}
request_timeout: 300.0
ui:
enabled: true
path: /

View File

@ -0,0 +1,13 @@
nodestore:
database: postgres
vectorstore:
database: postgres
postgres:
host: ${POSTGRES_HOST:localhost}
port: ${POSTGRES_PORT:5432}
database: ${POSTGRES_DB:postgres}
user: ${POSTGRES_USER:postgres}
password: ${POSTGRES_PASSWORD:postgres}
schema_name: ${POSTGRES_SHEMA:postgres}