From 3dfa105187103789cda6fdab42781aa62a8b2f96 Mon Sep 17 00:00:00 2001 From: Yevhenii Semendiak Date: Wed, 16 Oct 2024 11:36:09 +0300 Subject: [PATCH] ENG-393: Use dedicated app, pgvector settings --- .neuro/live.yaml | 10 ++++------ settings/settings-app.yaml | 33 +++++++++++++++++++++++++++++++++ settings/settings-pgvector.yaml | 13 +++++++++++++ 3 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 settings/settings-app.yaml create mode 100644 settings/settings-pgvector.yaml diff --git a/.neuro/live.yaml b/.neuro/live.yaml index e9669a5c..f8a9ca86 100644 --- a/.neuro/live.yaml +++ b/.neuro/live.yaml @@ -71,7 +71,7 @@ jobs: vllm: image: vllm/vllm-openai:v0.6.1.post2 name: vllm - preset: gpu-medium + preset: a100x1 detach: true http_port: "8000" volumes: @@ -82,8 +82,6 @@ jobs: --model meta-llama/Meta-Llama-3.1-8B-Instruct --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct --dtype=half - --max-model-len=50000 - --tensor-parallel-size=2 # cmd: > # --model meta-llama/Meta-Llama-3.1-8B-Instruct # --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct @@ -102,7 +100,7 @@ jobs: image: ollama/ollama:0.1.35 volumes: - ${{ volumes.ollama_models.ref_rw }} - preset: gpu-small + preset: a100x1 detach: true env: MODEL: "nomic-embed-text" @@ -119,5 +117,5 @@ jobs: POSTGRES_DB: postgres PGDATA: ${{ volumes.pgdata.mount }}/pgdata volumes: - - ${{ volumes.pgdata.ref_rw }} -# - ${{ volumes.pgdata_onprem.ref_rw }} + - ${{ volumes.pgdata.ref_rw }} + # - ${{ volumes.pgdata_onprem.ref_rw }} diff --git a/settings/settings-app.yaml b/settings/settings-app.yaml new file mode 100644 index 00000000..9f9e7e2f --- /dev/null +++ b/settings/settings-app.yaml @@ -0,0 +1,33 @@ +server: + env_name: ${APP_ENV:prod} + port: ${PORT:8080} + +llm: + mode: openailike + tokenizer: ${VLLM_TOKENIZER:-} + max_new_tokens: ${VLLM_MAX_NEW_TOKENS:5000} + context_window: ${VLLM_CONTEXT_WINDOW:4096} + temperature: ${VLLM_TEMPERATURE:0.1} + +openai: + api_base: ${VLLM_API_BASE:https://api.openai.com/v1/} + api_key: ${VLLM_API_KEY:EMPTY} + model: ${VLLM_MODEL:-} + embedding_api_base: ${EMBEDDING_API_BASE:https://api.openai.com/v1/} + embedding_api_key: ${EMBEDDING_API_KEY:EMPTY} + embeddings_model: ${EMBEDDING_MODEL:text-embedding-ada-002} + +embedding: + mode: ${EMBEDDING_PROVIDER:openai} + ingest_mode: ${EMBEDDING_MODE:parallel} + embed_dim: ${EMBEDDING_DIM:768} + +ollama: + # Note: if you change embedding model, you'll need to use a dedicated DB for ingext storage + embedding_model: ${OLLAMA_EMBEDDING_MODEL:nomic-embed-text} + embedding_api_base: ${OLLAMA_API_BASE:http://localhost:11434} + request_timeout: 300.0 + +ui: + enabled: true + path: / diff --git a/settings/settings-pgvector.yaml b/settings/settings-pgvector.yaml new file mode 100644 index 00000000..e01b009c --- /dev/null +++ b/settings/settings-pgvector.yaml @@ -0,0 +1,13 @@ +nodestore: + database: postgres + +vectorstore: + database: postgres + +postgres: + host: ${POSTGRES_HOST:localhost} + port: ${POSTGRES_PORT:5432} + database: ${POSTGRES_DB:postgres} + user: ${POSTGRES_USER:postgres} + password: ${POSTGRES_PASSWORD:postgres} + schema_name: ${POSTGRES_SHEMA:postgres}