mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-28 16:26:56 +00:00
Run on platform
This commit is contained in:
parent
c1802e7cf0
commit
00e3e85b81
103
.neuro/live.yaml
Normal file
103
.neuro/live.yaml
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
kind: live
|
||||||
|
title: private-gpt
|
||||||
|
|
||||||
|
# other files from https://github.com/zylon-ai/private-gpt
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
life_span: 5d
|
||||||
|
|
||||||
|
images:
|
||||||
|
privategpt:
|
||||||
|
ref: image:$[[ project.id ]]:v1
|
||||||
|
dockerfile: $[[ flow.workspace ]]/Dockerfile.external
|
||||||
|
context: $[[ flow.workspace ]]/
|
||||||
|
build_preset: cpu-large
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
cache:
|
||||||
|
remote: storage:$[[ flow.project_id ]]/cache
|
||||||
|
mount: /root/.cache/huggingface
|
||||||
|
local: cache
|
||||||
|
data:
|
||||||
|
remote: storage:$[[ flow.project_id ]]/data
|
||||||
|
mount: /home/worker/app/local_data
|
||||||
|
local: local_data
|
||||||
|
pgdata:
|
||||||
|
remote: storage:$[[ flow.project_id ]]/pgdata
|
||||||
|
mount: /var/lib/postgresql/data
|
||||||
|
local: pgdata
|
||||||
|
pgdata_onprem:
|
||||||
|
remote: disk:pgdata
|
||||||
|
mount: /var/lib/postgresql/data
|
||||||
|
ollama_models:
|
||||||
|
remote: storage:$[[ flow.project_id ]]/ollama_models
|
||||||
|
mount: /root/.ollama
|
||||||
|
local: models
|
||||||
|
project:
|
||||||
|
remote: storage:$[[ flow.project_id ]]
|
||||||
|
mount: /project
|
||||||
|
local: .
|
||||||
|
settings:
|
||||||
|
remote: storage:$[[ flow.project_id ]]/settings
|
||||||
|
mount: /home/worker/app/settings
|
||||||
|
local: settings
|
||||||
|
tiktoken_cache:
|
||||||
|
remote: storage:$[[ flow.project_id ]]/tiktoken_cache
|
||||||
|
mount: /home/worker/app/tiktoken_cache
|
||||||
|
local: tiktoken_cache
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
pgpt:
|
||||||
|
image: ${{ images.privategpt.ref }}
|
||||||
|
name: pgpt
|
||||||
|
preset: cpu-small
|
||||||
|
http_port: "8080"
|
||||||
|
# detach: true
|
||||||
|
browse: true
|
||||||
|
volumes:
|
||||||
|
- ${{ volumes.data.ref_rw }}
|
||||||
|
- ${{ upload(volumes.settings).ref_rw }}
|
||||||
|
- ${{ volumes.tiktoken_cache.ref_rw }}
|
||||||
|
env:
|
||||||
|
PORT: 8080
|
||||||
|
PGPT_PROFILES: vllm-pgvector
|
||||||
|
PGPT_SETTINGS_FOLDER: ${{ volumes.settings.mount }}
|
||||||
|
VLLM_API_BASE: http://${{ inspect_job('vllm').internal_hostname_named }}:8000/v1
|
||||||
|
OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434
|
||||||
|
POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }}
|
||||||
|
VLLM_MODEL: stabilityai/StableBeluga-13B
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
image: vllm/vllm-openai:v0.4.0
|
||||||
|
name: vllm
|
||||||
|
preset: gpu-2x3090
|
||||||
|
detach: true
|
||||||
|
http_port: "8000"
|
||||||
|
volumes:
|
||||||
|
- ${{ volumes.cache.ref_rw }}
|
||||||
|
cmd: --model stabilityai/StableBeluga-13B --tokenizer stabilityai/StableBeluga-13B --dtype=half --tensor-parallel-size=2
|
||||||
|
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
volumes:
|
||||||
|
- ${{ volumes.ollama_models.ref_rw }}
|
||||||
|
preset: gpu-small
|
||||||
|
detach: true
|
||||||
|
env:
|
||||||
|
MODEL: "nomic-embed-text"
|
||||||
|
GIN_MODE: release
|
||||||
|
http_port: "11434"
|
||||||
|
entrypoint: "bash -c 'ollama serve & sleep 10 && ollama pull ${MODEL} && sleep infinity'"
|
||||||
|
|
||||||
|
pgvector:
|
||||||
|
image: pgvector/pgvector:pg16
|
||||||
|
detach: true
|
||||||
|
preset: cpu-small
|
||||||
|
env:
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
POSTGRES_DB: postgres
|
||||||
|
PGDATA: ${{ volumes.pgdata.mount }}
|
||||||
|
volumes:
|
||||||
|
# - ${{ volumes.pgdata.ref_rw }}
|
||||||
|
- ${{ volumes.pgdata_onprem.ref_rw }}
|
@ -1,9 +1,9 @@
|
|||||||
FROM python:3.11.6-slim-bookworm as base
|
FROM python:3.11.6-slim-bookworm as base
|
||||||
|
|
||||||
# Install poetry
|
# Install poetry
|
||||||
RUN pip install pipx
|
# RUN pip install pipx
|
||||||
RUN python3 -m pipx ensurepath
|
# RUN python3 -m pipx ensurepath
|
||||||
RUN pipx install poetry
|
RUN pip install poetry
|
||||||
ENV PATH="/root/.local/bin:$PATH"
|
ENV PATH="/root/.local/bin:$PATH"
|
||||||
ENV PATH=".venv/bin/:$PATH"
|
ENV PATH=".venv/bin/:$PATH"
|
||||||
|
|
||||||
@ -14,7 +14,8 @@ FROM base as dependencies
|
|||||||
WORKDIR /home/worker/app
|
WORKDIR /home/worker/app
|
||||||
COPY pyproject.toml poetry.lock ./
|
COPY pyproject.toml poetry.lock ./
|
||||||
|
|
||||||
RUN poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama"
|
ARG POETRY_EXTRAS="ui vector-stores-postgres llms-ollama llms-openai-like embeddings-ollama"
|
||||||
|
RUN poetry install --extras "$POETRY_EXTRAS"
|
||||||
|
|
||||||
FROM base as app
|
FROM base as app
|
||||||
|
|
||||||
@ -28,6 +29,7 @@ WORKDIR /home/worker/app
|
|||||||
|
|
||||||
RUN mkdir local_data; chown worker local_data
|
RUN mkdir local_data; chown worker local_data
|
||||||
RUN mkdir models; chown worker models
|
RUN mkdir models; chown worker models
|
||||||
|
RUN mkdir tiktoken_cache; chown worker tiktoken_cache
|
||||||
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||||
COPY --chown=worker private_gpt/ private_gpt
|
COPY --chown=worker private_gpt/ private_gpt
|
||||||
COPY --chown=worker fern/ fern
|
COPY --chown=worker fern/ fern
|
||||||
|
45
settings/settings-vllm-pgvector.yaml
Normal file
45
settings/settings-vllm-pgvector.yaml
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
server:
|
||||||
|
env_name: ${APP_ENV:prod}
|
||||||
|
port: ${PORT:8080}
|
||||||
|
|
||||||
|
llm:
|
||||||
|
mode: openailike
|
||||||
|
tokenizer: ${VLLM_MODEL:lmsys/vicuna-7b-v1.5}
|
||||||
|
max_new_tokens: 5000
|
||||||
|
context_window: 2048
|
||||||
|
temperature: 0.1
|
||||||
|
|
||||||
|
openai:
|
||||||
|
api_base: ${VLLM_API_BASE:http://localhost:8000/v1}
|
||||||
|
api_key: EMPTY
|
||||||
|
model: ${VLLM_MODEL:lmsys/vicuna-7b-v1.5}
|
||||||
|
|
||||||
|
embedding:
|
||||||
|
mode: ollama
|
||||||
|
embed_dim: 768
|
||||||
|
# ingest_mode: simple
|
||||||
|
|
||||||
|
ollama:
|
||||||
|
# Note: if you change embedding model, you'll need to use a dedicated DB for ingext storage
|
||||||
|
embedding_model: nomic-embed-text
|
||||||
|
# api_base: ${OLLAMA_API_BASE:http://localhost:11434}
|
||||||
|
embedding_api_base: ${OLLAMA_API_BASE:http://localhost:11434}
|
||||||
|
request_timeout: 300.0
|
||||||
|
|
||||||
|
nodestore:
|
||||||
|
database: postgres
|
||||||
|
|
||||||
|
vectorstore:
|
||||||
|
database: postgres
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
host: ${POSTGRES_HOST:localhost}
|
||||||
|
port: ${POSTGRES_PORT:5432}
|
||||||
|
database: ${POSTGRES_DB:postgres}
|
||||||
|
user: ${POSTGRES_USER:postgres}
|
||||||
|
password: ${POSTGRES_PASSWORD:postgres}
|
||||||
|
schema_name: private_gpt
|
||||||
|
|
||||||
|
ui:
|
||||||
|
enabled: true
|
||||||
|
path: /
|
113
settings/settings.yaml
Normal file
113
settings/settings.yaml
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
# The default configuration file.
|
||||||
|
# More information about configuration can be found in the documentation: https://docs.privategpt.dev/
|
||||||
|
# Syntax in `private_pgt/settings/settings.py`
|
||||||
|
server:
|
||||||
|
env_name: ${APP_ENV:prod}
|
||||||
|
port: ${PORT:8001}
|
||||||
|
cors:
|
||||||
|
enabled: false
|
||||||
|
allow_origins: ["*"]
|
||||||
|
allow_methods: ["*"]
|
||||||
|
allow_headers: ["*"]
|
||||||
|
auth:
|
||||||
|
enabled: false
|
||||||
|
# python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())'
|
||||||
|
# 'secret' is the username and 'key' is the password for basic auth by default
|
||||||
|
# If the auth is enabled, this value must be set in the "Authorization" header of the request.
|
||||||
|
secret: "Basic c2VjcmV0OmtleQ=="
|
||||||
|
|
||||||
|
data:
|
||||||
|
local_data_folder: local_data/private_gpt
|
||||||
|
|
||||||
|
ui:
|
||||||
|
enabled: true
|
||||||
|
path: /
|
||||||
|
default_chat_system_prompt: >
|
||||||
|
You are a helpful, respectful and honest assistant.
|
||||||
|
Always answer as helpfully as possible and follow ALL given instructions.
|
||||||
|
Do not speculate or make up information.
|
||||||
|
Do not reference any given instructions or context.
|
||||||
|
default_query_system_prompt: >
|
||||||
|
You can only answer questions about the provided context.
|
||||||
|
If you know the answer but it is not based in the provided context, don't provide
|
||||||
|
the answer, just state the answer is not in the context provided.
|
||||||
|
delete_file_button_enabled: true
|
||||||
|
delete_all_files_button_enabled: true
|
||||||
|
|
||||||
|
llm:
|
||||||
|
mode: llamacpp
|
||||||
|
# Should be matching the selected model
|
||||||
|
max_new_tokens: 512
|
||||||
|
context_window: 3900
|
||||||
|
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
|
||||||
|
temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
|
||||||
|
|
||||||
|
rag:
|
||||||
|
similarity_top_k: 10
|
||||||
|
#This value controls how many "top" documents the RAG returns to use in the context.
|
||||||
|
#similarity_value: 0.45
|
||||||
|
#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
|
||||||
|
rerank:
|
||||||
|
enabled: false
|
||||||
|
model: cross-encoder/ms-marco-MiniLM-L-2-v2
|
||||||
|
top_n: 1
|
||||||
|
|
||||||
|
llamacpp:
|
||||||
|
prompt_style: "mistral"
|
||||||
|
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
||||||
|
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
|
||||||
|
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
|
||||||
|
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
|
||||||
|
top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
|
||||||
|
repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
|
||||||
|
|
||||||
|
embedding:
|
||||||
|
# Should be matching the value above in most cases
|
||||||
|
mode: huggingface
|
||||||
|
ingest_mode: simple
|
||||||
|
embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
|
||||||
|
|
||||||
|
huggingface:
|
||||||
|
embedding_hf_model_name: BAAI/bge-small-en-v1.5
|
||||||
|
|
||||||
|
vectorstore:
|
||||||
|
database: qdrant
|
||||||
|
|
||||||
|
nodestore:
|
||||||
|
database: simple
|
||||||
|
|
||||||
|
qdrant:
|
||||||
|
path: local_data/private_gpt/qdrant
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
host: localhost
|
||||||
|
port: 5432
|
||||||
|
database: postgres
|
||||||
|
user: postgres
|
||||||
|
password: postgres
|
||||||
|
schema_name: private_gpt
|
||||||
|
|
||||||
|
sagemaker:
|
||||||
|
llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140
|
||||||
|
embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479
|
||||||
|
|
||||||
|
openai:
|
||||||
|
api_key: ${OPENAI_API_KEY:}
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
|
||||||
|
ollama:
|
||||||
|
llm_model: llama2
|
||||||
|
embedding_model: nomic-embed-text
|
||||||
|
api_base: http://localhost:11434
|
||||||
|
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
|
||||||
|
keep_alive: 5m
|
||||||
|
request_timeout: 120.0
|
||||||
|
|
||||||
|
azopenai:
|
||||||
|
api_key: ${AZ_OPENAI_API_KEY:}
|
||||||
|
azure_endpoint: ${AZ_OPENAI_ENDPOINT:}
|
||||||
|
embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}
|
||||||
|
llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}
|
||||||
|
api_version: "2023-05-15"
|
||||||
|
embedding_model: text-embedding-ada-002
|
||||||
|
llm_model: gpt-35-turbo
|
Loading…
Reference in New Issue
Block a user