mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-28 20:12:38 +00:00
Merge c4be3f8cd2
into b7ee43788d
This commit is contained in:
commit
078778c6af
54
Dockerfile.fireworks
Normal file
54
Dockerfile.fireworks
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
FROM python:3.11.6-slim-bookworm as base
|
||||||
|
|
||||||
|
# Install poetry
|
||||||
|
RUN pip install pipx
|
||||||
|
RUN python3 -m pipx ensurepath
|
||||||
|
RUN pipx install poetry==1.8.3
|
||||||
|
ENV PATH="/root/.local/bin:$PATH"
|
||||||
|
ENV PATH=".venv/bin/:$PATH"
|
||||||
|
|
||||||
|
RUN apt update && apt install -y \
|
||||||
|
build-essential
|
||||||
|
|
||||||
|
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
|
||||||
|
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||||
|
|
||||||
|
FROM base as dependencies
|
||||||
|
WORKDIR /home/worker/app
|
||||||
|
COPY pyproject.toml poetry.lock ./
|
||||||
|
|
||||||
|
ARG POETRY_EXTRAS="ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
|
||||||
|
RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
|
||||||
|
|
||||||
|
FROM base as app
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV PORT=8080
|
||||||
|
ENV APP_ENV=prod
|
||||||
|
ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Prepare a non-root user
|
||||||
|
# More info about how to configure UIDs and GIDs in Docker:
|
||||||
|
# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
|
||||||
|
|
||||||
|
# Define the User ID (UID) for the non-root user
|
||||||
|
# UID 100 is chosen to avoid conflicts with existing system users
|
||||||
|
ARG UID=100
|
||||||
|
|
||||||
|
# Define the Group ID (GID) for the non-root user
|
||||||
|
# GID 65534 is often used for the 'nogroup' or 'nobody' group
|
||||||
|
ARG GID=65534
|
||||||
|
|
||||||
|
RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
|
||||||
|
WORKDIR /home/worker/app
|
||||||
|
|
||||||
|
RUN chown worker /home/worker/app
|
||||||
|
RUN mkdir local_data && chown worker local_data
|
||||||
|
RUN mkdir models && chown worker models
|
||||||
|
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||||
|
COPY --chown=worker private_gpt/ private_gpt
|
||||||
|
COPY --chown=worker *.yaml .
|
||||||
|
COPY --chown=worker scripts/ scripts
|
||||||
|
|
||||||
|
USER worker
|
||||||
|
ENTRYPOINT python -m private_gpt
|
@ -1,5 +1,4 @@
|
|||||||
services:
|
services:
|
||||||
|
|
||||||
#-----------------------------------
|
#-----------------------------------
|
||||||
#---- Private-GPT services ---------
|
#---- Private-GPT services ---------
|
||||||
#-----------------------------------
|
#-----------------------------------
|
||||||
@ -93,7 +92,7 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "11434:11434"
|
- "11434:11434"
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/root/.ollama
|
- ./local_data:/root/.ollama
|
||||||
profiles:
|
profiles:
|
||||||
- ""
|
- ""
|
||||||
- ollama-cpu
|
- ollama-cpu
|
||||||
@ -114,3 +113,21 @@ services:
|
|||||||
capabilities: [gpu]
|
capabilities: [gpu]
|
||||||
profiles:
|
profiles:
|
||||||
- ollama-cuda
|
- ollama-cuda
|
||||||
|
|
||||||
|
# fireworks service
|
||||||
|
private-gpt-fireworks:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.fireworks
|
||||||
|
volumes:
|
||||||
|
- ./local_data/:/home/worker/app/local_data
|
||||||
|
ports:
|
||||||
|
- "3001:8080"
|
||||||
|
environment:
|
||||||
|
PORT: 8080
|
||||||
|
PGPT_PROFILES: fireworks
|
||||||
|
FIREWORKS_API_KEY: ${FIREWORKS_API_KEY}
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
profiles:
|
||||||
|
- fireworks
|
||||||
|
@ -3,45 +3,63 @@ It is important that you review the [Main Concepts](../concepts) section to unde
|
|||||||
## Base requirements to run PrivateGPT
|
## Base requirements to run PrivateGPT
|
||||||
|
|
||||||
### 1. Clone the PrivateGPT Repository
|
### 1. Clone the PrivateGPT Repository
|
||||||
|
|
||||||
Clone the repository and navigate to it:
|
Clone the repository and navigate to it:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/zylon-ai/private-gpt
|
git clone https://github.com/zylon-ai/private-gpt
|
||||||
cd private-gpt
|
cd private-gpt
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Install Python 3.11
|
### 2. Install Python 3.11
|
||||||
|
|
||||||
If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported.
|
If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported.
|
||||||
|
|
||||||
#### macOS/Linux
|
#### macOS/Linux
|
||||||
|
|
||||||
Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv):
|
Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pyenv install 3.11
|
pyenv install 3.11
|
||||||
pyenv local 3.11
|
pyenv local 3.11
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Windows
|
#### Windows
|
||||||
|
|
||||||
Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win):
|
Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pyenv install 3.11
|
pyenv install 3.11
|
||||||
pyenv local 3.11
|
pyenv local 3.11
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Install `Poetry`
|
### 3. Install `Poetry`
|
||||||
|
|
||||||
Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
|
Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
|
||||||
Follow the instructions on the official Poetry website to install it.
|
Follow the instructions on the official Poetry website to install it.
|
||||||
|
|
||||||
<Callout intent="warning">
|
<Callout intent="warning">
|
||||||
A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version.
|
A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend
|
||||||
To upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it.
|
upgrading to a tested version. To upgrade Poetry to latest tested version, run
|
||||||
|
`poetry self update 1.8.3` after installing it.
|
||||||
</Callout>
|
</Callout>
|
||||||
|
|
||||||
### 4. Optional: Install `make`
|
### 4. Optional: Install `make`
|
||||||
|
|
||||||
To run various scripts, you need to install `make`. Follow the instructions for your operating system:
|
To run various scripts, you need to install `make`. Follow the instructions for your operating system:
|
||||||
|
|
||||||
#### macOS
|
#### macOS
|
||||||
|
|
||||||
(Using Homebrew):
|
(Using Homebrew):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
brew install make
|
brew install make
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Windows
|
#### Windows
|
||||||
|
|
||||||
(Using Chocolatey):
|
(Using Chocolatey):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
choco install make
|
choco install make
|
||||||
```
|
```
|
||||||
@ -53,6 +71,7 @@ PrivateGPT allows customization of the setup, from fully local to cloud-based, b
|
|||||||
```bash
|
```bash
|
||||||
poetry install --extras "<extra1> <extra2>..."
|
poetry install --extras "<extra1> <extra2>..."
|
||||||
```
|
```
|
||||||
|
|
||||||
Where `<extra>` can be any of the following options described below.
|
Where `<extra>` can be any of the following options described below.
|
||||||
|
|
||||||
### Available Modules
|
### Available Modules
|
||||||
@ -62,7 +81,7 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
|
|||||||
#### LLM
|
#### LLM
|
||||||
|
|
||||||
| **Option** | **Description** | **Extra** |
|
| **Option** | **Description** | **Extra** |
|
||||||
|--------------|------------------------------------------------------------------------|---------------------|
|
| ---------- | --------------------------------------------------------------------- | ---------------- |
|
||||||
| **ollama** | Adds support for Ollama LLM, requires Ollama running locally | llms-ollama |
|
| **ollama** | Adds support for Ollama LLM, requires Ollama running locally | llms-ollama |
|
||||||
| llama-cpp | Adds support for local LLM using LlamaCPP | llms-llama-cpp |
|
| llama-cpp | Adds support for local LLM using LlamaCPP | llms-llama-cpp |
|
||||||
| sagemaker | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints | llms-sagemaker |
|
| sagemaker | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints | llms-sagemaker |
|
||||||
@ -74,7 +93,7 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
|
|||||||
#### Embeddings
|
#### Embeddings
|
||||||
|
|
||||||
| **Option** | **Description** | **Extra** |
|
| **Option** | **Description** | **Extra** |
|
||||||
|------------------|--------------------------------------------------------------------------------|-------------------------|
|
| ----------- | -------------------------------------------------------------------------- | ---------------------- |
|
||||||
| **ollama** | Adds support for Ollama Embeddings, requires Ollama running locally | embeddings-ollama |
|
| **ollama** | Adds support for Ollama Embeddings, requires Ollama running locally | embeddings-ollama |
|
||||||
| huggingface | Adds support for local Embeddings using HuggingFace | embeddings-huggingface |
|
| huggingface | Adds support for local Embeddings using HuggingFace | embeddings-huggingface |
|
||||||
| openai | Adds support for OpenAI Embeddings, requires OpenAI API key | embeddings-openai |
|
| openai | Adds support for OpenAI Embeddings, requires OpenAI API key | embeddings-openai |
|
||||||
@ -85,7 +104,7 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
|
|||||||
#### Vector Stores
|
#### Vector Stores
|
||||||
|
|
||||||
| **Option** | **Description** | **Extra** |
|
| **Option** | **Description** | **Extra** |
|
||||||
|------------------|-----------------------------------------|-------------------------|
|
| ---------- | ---------------------------------------- | ------------------------ |
|
||||||
| **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant |
|
| **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant |
|
||||||
| milvus | Adds support for Milvus vector store | vector-stores-milvus |
|
| milvus | Adds support for Milvus vector store | vector-stores-milvus |
|
||||||
| chroma | Adds support for Chroma DB vector store | vector-stores-chroma |
|
| chroma | Adds support for Chroma DB vector store | vector-stores-chroma |
|
||||||
@ -95,12 +114,15 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
|
|||||||
#### UI
|
#### UI
|
||||||
|
|
||||||
| **Option** | **Description** | **Extra** |
|
| **Option** | **Description** | **Extra** |
|
||||||
|--------------|------------------------------------------|-----------|
|
| ---------- | -------------------------------- | --------- |
|
||||||
| Gradio | Adds support for UI using Gradio | ui |
|
| Gradio | Adds support for UI using Gradio | ui |
|
||||||
|
|
||||||
<Callout intent="warning">
|
<Callout intent="warning">
|
||||||
A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
|
A working **Gradio UI client** is provided to test the API, together with a
|
||||||
model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.
|
set of useful tools such as bulk model download script, ingestion script,
|
||||||
|
documents folder watch, etc. Please refer to the [UI
|
||||||
|
alternatives](/manual/user-interface/alternatives) page for more UI
|
||||||
|
alternatives.
|
||||||
</Callout>
|
</Callout>
|
||||||
|
|
||||||
## Recommended Setups
|
## Recommended Setups
|
||||||
@ -109,7 +131,7 @@ There are just some examples of recommended setups. You can mix and match the di
|
|||||||
You'll find more information in the Manual section of the documentation.
|
You'll find more information in the Manual section of the documentation.
|
||||||
|
|
||||||
> **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux).
|
> **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux).
|
||||||
If you are using Windows, you'll need to set the env var in a different way, for example:
|
> If you are using Windows, you'll need to set the env var in a different way, for example:
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
# Powershell
|
# Powershell
|
||||||
@ -136,6 +158,7 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll
|
|||||||
After the installation, make sure the Ollama desktop app is closed.
|
After the installation, make sure the Ollama desktop app is closed.
|
||||||
|
|
||||||
Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
|
Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama serve
|
ollama serve
|
||||||
```
|
```
|
||||||
@ -152,6 +175,7 @@ ollama pull nomic-embed-text
|
|||||||
```
|
```
|
||||||
|
|
||||||
Once done, on a different terminal, you can install PrivateGPT with the following command:
|
Once done, on a different terminal, you can install PrivateGPT with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
|
poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
|
||||||
```
|
```
|
||||||
@ -175,6 +199,7 @@ You need to have access to sagemaker inference endpoints for the LLM and / or th
|
|||||||
Edit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints.
|
Edit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints.
|
||||||
|
|
||||||
Then, install PrivateGPT with the following command:
|
Then, install PrivateGPT with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry install --extras "ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant"
|
poetry install --extras "ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant"
|
||||||
```
|
```
|
||||||
@ -198,6 +223,7 @@ You need an OPENAI API key to run this setup.
|
|||||||
Edit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY.
|
Edit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY.
|
||||||
|
|
||||||
Then, install PrivateGPT with the following command:
|
Then, install PrivateGPT with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry install --extras "ui llms-openai embeddings-openai vector-stores-qdrant"
|
poetry install --extras "ui llms-openai embeddings-openai vector-stores-qdrant"
|
||||||
```
|
```
|
||||||
@ -221,6 +247,7 @@ You need to have access to Azure OpenAI inference endpoints for the LLM and / or
|
|||||||
Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints.
|
Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints.
|
||||||
|
|
||||||
Then, install PrivateGPT with the following command:
|
Then, install PrivateGPT with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant"
|
poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant"
|
||||||
```
|
```
|
||||||
@ -235,6 +262,30 @@ PrivateGPT will use the already existing `settings-azopenai.yaml` settings file,
|
|||||||
|
|
||||||
The UI will be available at http://localhost:8001
|
The UI will be available at http://localhost:8001
|
||||||
|
|
||||||
|
### Non-Private, FIREWORKS-powered test setup
|
||||||
|
|
||||||
|
If you want to test PrivateGPT with FIREWORKS's LLM and Embeddings -taking into account your data is going to FIREWORKS!- you can run the following command:
|
||||||
|
|
||||||
|
You need an FIREWORKS API key to run this setup.
|
||||||
|
|
||||||
|
Edit the `settings-fireworks.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-fireworks.yaml`, you can just set the env var FIREWORKS_API_KEY.
|
||||||
|
|
||||||
|
Then, install PrivateGPT with the following command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
|
||||||
|
```
|
||||||
|
|
||||||
|
Once installed, you can run PrivateGPT.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
PGPT_PROFILES=fireworks make run
|
||||||
|
```
|
||||||
|
|
||||||
|
PrivateGPT will use the already existing `settings-fireworks.yaml` settings file, which is already configured to use FIREWORKS LLM and Embeddings endpoints, and Qdrant.
|
||||||
|
|
||||||
|
The UI will be available at http://localhost:8001
|
||||||
|
|
||||||
### Local, Llama-CPP powered setup
|
### Local, Llama-CPP powered setup
|
||||||
|
|
||||||
If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command:
|
If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command:
|
||||||
@ -244,6 +295,7 @@ poetry install --extras "ui llms-llama-cpp embeddings-huggingface vector-stores-
|
|||||||
```
|
```
|
||||||
|
|
||||||
In order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script:
|
In order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry run python scripts/setup
|
poetry run python scripts/setup
|
||||||
```
|
```
|
||||||
@ -277,6 +329,7 @@ To do that, you need to install `llama.cpp` python's binding `llama-cpp-python`
|
|||||||
that activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below).
|
that activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below).
|
||||||
|
|
||||||
In other words, one should simply run:
|
In other words, one should simply run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CMAKE_ARGS="-DLLAMA_METAL=on" pip install --force-reinstall --no-cache-dir llama-cpp-python
|
CMAKE_ARGS="-DLLAMA_METAL=on" pip install --force-reinstall --no-cache-dir llama-cpp-python
|
||||||
```
|
```
|
||||||
@ -285,9 +338,10 @@ The above command will force the re-installation of `llama-cpp-python` with `MET
|
|||||||
`llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS).
|
`llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS).
|
||||||
|
|
||||||
More information is available in the documentation of the libraries themselves:
|
More information is available in the documentation of the libraries themselves:
|
||||||
* [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
|
|
||||||
* [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration)
|
- [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
|
||||||
* [llama.cpp](https://github.com/ggerganov/llama.cpp#build)
|
- [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration)
|
||||||
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp#build)
|
||||||
|
|
||||||
##### Llama-CPP Windows NVIDIA GPU support
|
##### Llama-CPP Windows NVIDIA GPU support
|
||||||
|
|
||||||
@ -297,11 +351,11 @@ dependencies.
|
|||||||
|
|
||||||
Some tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070):
|
Some tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070):
|
||||||
|
|
||||||
* Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/
|
- Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/
|
||||||
* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
|
- Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
|
||||||
* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
|
- Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
|
||||||
date and your GPU is detected.
|
date and your GPU is detected.
|
||||||
* [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/
|
- [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/
|
||||||
|
|
||||||
If you have all required dependencies properly configured running the
|
If you have all required dependencies properly configured running the
|
||||||
following powershell command should succeed.
|
following powershell command should succeed.
|
||||||
@ -332,9 +386,9 @@ dependencies.
|
|||||||
|
|
||||||
Some tips:
|
Some tips:
|
||||||
|
|
||||||
* Make sure you have an up-to-date C++ compiler
|
- Make sure you have an up-to-date C++ compiler
|
||||||
* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
|
- Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
|
||||||
* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
|
- Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
|
||||||
date and your GPU is detected.
|
date and your GPU is detected.
|
||||||
|
|
||||||
After that running the following command in the repository will install llama.cpp with GPU support:
|
After that running the following command in the repository will install llama.cpp with GPU support:
|
||||||
@ -356,13 +410,17 @@ AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 |
|
|||||||
|
|
||||||
Linux GPU support is done through ROCm.
|
Linux GPU support is done through ROCm.
|
||||||
Some tips:
|
Some tips:
|
||||||
* Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
|
|
||||||
* [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)
|
- Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
|
||||||
|
- [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl
|
wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl
|
||||||
poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl
|
poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl
|
||||||
```
|
```
|
||||||
* Install bitsandbytes for ROCm
|
|
||||||
|
- Install bitsandbytes for ROCm
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942
|
PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942
|
||||||
BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854
|
BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854
|
||||||
@ -374,6 +432,7 @@ pip install . --extra-index-url https://download.pytorch.org/whl/nightly
|
|||||||
```
|
```
|
||||||
|
|
||||||
After that running the following command in the repository will install llama.cpp with GPU support:
|
After that running the following command in the repository will install llama.cpp with GPU support:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
LLAMA_CPP_PYTHON_VERSION=0.2.56
|
LLAMA_CPP_PYTHON_VERSION=0.2.56
|
||||||
DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942
|
DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942
|
||||||
@ -391,15 +450,15 @@ AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI =
|
|||||||
Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms.
|
Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms.
|
||||||
You might encounter several issues:
|
You might encounter several issues:
|
||||||
|
|
||||||
* Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes.
|
- Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes.
|
||||||
* GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on
|
- GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on
|
||||||
the host.
|
the host.
|
||||||
* Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms.
|
- Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms.
|
||||||
Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.).
|
Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.).
|
||||||
If you encounter any of these issues, please open an issue and we'll try to help.
|
If you encounter any of these issues, please open an issue and we'll try to help.
|
||||||
|
|
||||||
One of the first reflex to adopt is: get more information.
|
One of the first reflex to adopt is: get more information.
|
||||||
If, during your installation, something does not go as planned, retry in *verbose* mode, and see what goes wrong.
|
If, during your installation, something does not go as planned, retry in _verbose_ mode, and see what goes wrong.
|
||||||
|
|
||||||
For example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation.
|
For example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation.
|
||||||
|
|
||||||
@ -414,8 +473,8 @@ To install a C++ compiler on Windows 10/11, follow these steps:
|
|||||||
|
|
||||||
1. Install Visual Studio 2022.
|
1. Install Visual Studio 2022.
|
||||||
2. Make sure the following components are selected:
|
2. Make sure the following components are selected:
|
||||||
* Universal Windows Platform development
|
- Universal Windows Platform development
|
||||||
* C++ CMake tools for Windows
|
- C++ CMake tools for Windows
|
||||||
3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/).
|
3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/).
|
||||||
4. Run the installer and select the `gcc` component.
|
4. Run the installer and select the `gcc` component.
|
||||||
|
|
||||||
|
39
poetry.lock
generated
39
poetry.lock
generated
@ -2685,6 +2685,21 @@ llama-index-core = ">=0.11.0,<0.12.0"
|
|||||||
llama-index-embeddings-openai = ">=0.2.3,<0.3.0"
|
llama-index-embeddings-openai = ">=0.2.3,<0.3.0"
|
||||||
llama-index-llms-azure-openai = ">=0.2.0,<0.3.0"
|
llama-index-llms-azure-openai = ">=0.2.0,<0.3.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "llama-index-embeddings-fireworks"
|
||||||
|
version = "0.2.0"
|
||||||
|
description = "llama-index embeddings fireworks integration"
|
||||||
|
optional = true
|
||||||
|
python-versions = "<3.12,>=3.8.1"
|
||||||
|
files = [
|
||||||
|
{file = "llama_index_embeddings_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:44958479691f55005bd3bbf773316c556e5b1428c6ec174a4f443016e79e48ea"},
|
||||||
|
{file = "llama_index_embeddings_fireworks-0.2.0.tar.gz", hash = "sha256:0085a8fd5b4d4f71f797cfef11a85c4c3fbe763a3680edeae8f410184fa2d266"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
llama-index-core = ">=0.11.0,<0.12.0"
|
||||||
|
llama-index-llms-openai = ">=0.2.0,<0.3.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-index-embeddings-gemini"
|
name = "llama-index-embeddings-gemini"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@ -2778,6 +2793,21 @@ httpx = "*"
|
|||||||
llama-index-core = ">=0.11.0,<0.12.0"
|
llama-index-core = ">=0.11.0,<0.12.0"
|
||||||
llama-index-llms-openai = ">=0.2.1,<0.3.0"
|
llama-index-llms-openai = ">=0.2.1,<0.3.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "llama-index-llms-fireworks"
|
||||||
|
version = "0.2.0"
|
||||||
|
description = "llama-index llms fireworks integration"
|
||||||
|
optional = true
|
||||||
|
python-versions = "<4.0,>=3.8.1"
|
||||||
|
files = [
|
||||||
|
{file = "llama_index_llms_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:65a604f8cf622f7ce695c458d375cd7dac6e27f4596ba90e5464b2594b0688a0"},
|
||||||
|
{file = "llama_index_llms_fireworks-0.2.0.tar.gz", hash = "sha256:cfdd07b6bc01890e55a4dfc3af2e62fe82e5a08b362d52314d024728ebcf7c5b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
llama-index-core = ">=0.11.0,<0.12.0"
|
||||||
|
llama-index-llms-openai = ">=0.2.0,<0.3.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-index-llms-gemini"
|
name = "llama-index-llms-gemini"
|
||||||
version = "0.3.5"
|
version = "0.3.5"
|
||||||
@ -6242,11 +6272,6 @@ files = [
|
|||||||
{file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
|
{file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
|
||||||
{file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
|
{file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
|
||||||
{file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
|
{file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
|
||||||
{file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
|
|
||||||
{file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
|
|
||||||
{file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
|
|
||||||
{file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
|
|
||||||
{file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -7082,6 +7107,7 @@ cffi = ["cffi (>=1.11)"]
|
|||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
|
embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
|
||||||
|
embeddings-fireworks = ["llama-index-embeddings-fireworks"]
|
||||||
embeddings-gemini = ["llama-index-embeddings-gemini"]
|
embeddings-gemini = ["llama-index-embeddings-gemini"]
|
||||||
embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
|
embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
|
||||||
embeddings-mistral = ["llama-index-embeddings-mistralai"]
|
embeddings-mistral = ["llama-index-embeddings-mistralai"]
|
||||||
@ -7089,6 +7115,7 @@ embeddings-ollama = ["llama-index-embeddings-ollama"]
|
|||||||
embeddings-openai = ["llama-index-embeddings-openai"]
|
embeddings-openai = ["llama-index-embeddings-openai"]
|
||||||
embeddings-sagemaker = ["boto3"]
|
embeddings-sagemaker = ["boto3"]
|
||||||
llms-azopenai = ["llama-index-llms-azure-openai"]
|
llms-azopenai = ["llama-index-llms-azure-openai"]
|
||||||
|
llms-fireworks = ["llama-index-llms-fireworks"]
|
||||||
llms-gemini = ["llama-index-llms-gemini"]
|
llms-gemini = ["llama-index-llms-gemini"]
|
||||||
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
|
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
|
||||||
llms-ollama = ["llama-index-llms-ollama"]
|
llms-ollama = ["llama-index-llms-ollama"]
|
||||||
@ -7107,4 +7134,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.11,<3.12"
|
python-versions = ">=3.11,<3.12"
|
||||||
content-hash = "16e3be4521aa64c936ee8fb841655f15090b71cf8faaeed7e73a4bcdf3fbdea2"
|
content-hash = "f41ee2165df33fd6815114a9d6b01508e1e8726dd7a8baf99825514586f250f0"
|
||||||
|
@ -67,6 +67,24 @@ class EmbeddingComponent:
|
|||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
|
case "fireworks":
|
||||||
|
try:
|
||||||
|
from llama_index.embeddings.fireworks import ( # type: ignore
|
||||||
|
FireworksEmbedding,
|
||||||
|
)
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
api_key = (
|
||||||
|
settings.fireworks.embedding_api_key or settings.fireworks.api_key
|
||||||
|
)
|
||||||
|
model = settings.openai.embedding_model
|
||||||
|
self.embedding_model = FireworksEmbedding(
|
||||||
|
api_key=api_key,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
case "ollama":
|
case "ollama":
|
||||||
try:
|
try:
|
||||||
from llama_index.embeddings.ollama import ( # type: ignore
|
from llama_index.embeddings.ollama import ( # type: ignore
|
||||||
|
@ -102,6 +102,19 @@ class LLMComponent:
|
|||||||
api_key=openai_settings.api_key,
|
api_key=openai_settings.api_key,
|
||||||
model=openai_settings.model,
|
model=openai_settings.model,
|
||||||
)
|
)
|
||||||
|
case "fireworks":
|
||||||
|
try:
|
||||||
|
from llama_index.llms.fireworks import Fireworks # type: ignore
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"fireworks dependencies not found, install with `poetry install --extras llms-fireworks`"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
fireworks_settings = settings.fireworks
|
||||||
|
self.llm = Fireworks(
|
||||||
|
model=fireworks_settings.model,
|
||||||
|
api_key=fireworks_settings.api_key,
|
||||||
|
)
|
||||||
case "openailike":
|
case "openailike":
|
||||||
try:
|
try:
|
||||||
from llama_index.llms.openai_like import OpenAILike # type: ignore
|
from llama_index.llms.openai_like import OpenAILike # type: ignore
|
||||||
|
@ -115,6 +115,7 @@ class LLMSettings(BaseModel):
|
|||||||
"mock",
|
"mock",
|
||||||
"ollama",
|
"ollama",
|
||||||
"gemini",
|
"gemini",
|
||||||
|
"fireworks",
|
||||||
]
|
]
|
||||||
max_new_tokens: int = Field(
|
max_new_tokens: int = Field(
|
||||||
256,
|
256,
|
||||||
@ -205,6 +206,7 @@ class EmbeddingSettings(BaseModel):
|
|||||||
"mock",
|
"mock",
|
||||||
"gemini",
|
"gemini",
|
||||||
"mistralai",
|
"mistralai",
|
||||||
|
"fireworks",
|
||||||
]
|
]
|
||||||
ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
|
ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
|
||||||
"simple",
|
"simple",
|
||||||
@ -268,6 +270,23 @@ class OpenAISettings(BaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FireWorksSettings(BaseModel):
|
||||||
|
api_key: str
|
||||||
|
model: str = Field(
|
||||||
|
"accounts/fireworks/models/llama-v3p1-70b-instruct",
|
||||||
|
description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.",
|
||||||
|
)
|
||||||
|
embedding_api_base: str = Field(
|
||||||
|
None,
|
||||||
|
description="Base URL of FIREWORKS API. Example: 'https://api.fireworks.ai/inference/v1'.",
|
||||||
|
)
|
||||||
|
embedding_api_key: str
|
||||||
|
embedding_model: str = Field(
|
||||||
|
"nomic-ai/nomic-embed-text-v1.5",
|
||||||
|
description="FIREWORKS embedding Model to use. Example: 'nomic-ai/nomic-embed-text-v1.5'.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class GeminiSettings(BaseModel):
|
class GeminiSettings(BaseModel):
|
||||||
api_key: str
|
api_key: str
|
||||||
model: str = Field(
|
model: str = Field(
|
||||||
@ -597,6 +616,7 @@ class Settings(BaseModel):
|
|||||||
huggingface: HuggingFaceSettings
|
huggingface: HuggingFaceSettings
|
||||||
sagemaker: SagemakerSettings
|
sagemaker: SagemakerSettings
|
||||||
openai: OpenAISettings
|
openai: OpenAISettings
|
||||||
|
fireworks: FireWorksSettings
|
||||||
gemini: GeminiSettings
|
gemini: GeminiSettings
|
||||||
ollama: OllamaSettings
|
ollama: OllamaSettings
|
||||||
azopenai: AzureOpenAISettings
|
azopenai: AzureOpenAISettings
|
||||||
|
@ -381,7 +381,7 @@ class PrivateGptUi:
|
|||||||
".contain { display: flex !important; flex-direction: column !important; }"
|
".contain { display: flex !important; flex-direction: column !important; }"
|
||||||
"#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }"
|
"#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }"
|
||||||
"#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
|
"#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
|
||||||
"#col { height: calc(100vh - 112px - 16px) !important; }"
|
"#col { min-height: calc(100vh - 112px - 16px) !important; }"
|
||||||
"hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }"
|
"hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }"
|
||||||
".avatar-image { background-color: antiquewhite; border-radius: 2px; }"
|
".avatar-image { background-color: antiquewhite; border-radius: 2px; }"
|
||||||
".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }"
|
".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }"
|
||||||
@ -522,6 +522,7 @@ class PrivateGptUi:
|
|||||||
model_mapping = {
|
model_mapping = {
|
||||||
"llamacpp": config_settings.llamacpp.llm_hf_model_file,
|
"llamacpp": config_settings.llamacpp.llm_hf_model_file,
|
||||||
"openai": config_settings.openai.model,
|
"openai": config_settings.openai.model,
|
||||||
|
"fireworks": config_settings.fireworks.model,
|
||||||
"openailike": config_settings.openai.model,
|
"openailike": config_settings.openai.model,
|
||||||
"azopenai": config_settings.azopenai.llm_model,
|
"azopenai": config_settings.azopenai.llm_model,
|
||||||
"sagemaker": config_settings.sagemaker.llm_endpoint_name,
|
"sagemaker": config_settings.sagemaker.llm_endpoint_name,
|
||||||
|
@ -38,6 +38,8 @@ llama-index-vector-stores-postgres = {version ="*", optional = true}
|
|||||||
llama-index-vector-stores-clickhouse = {version ="*", optional = true}
|
llama-index-vector-stores-clickhouse = {version ="*", optional = true}
|
||||||
llama-index-storage-docstore-postgres = {version ="*", optional = true}
|
llama-index-storage-docstore-postgres = {version ="*", optional = true}
|
||||||
llama-index-storage-index-store-postgres = {version ="*", optional = true}
|
llama-index-storage-index-store-postgres = {version ="*", optional = true}
|
||||||
|
llama-index-llms-fireworks = {version = "*", optional = true}
|
||||||
|
llama-index-embeddings-fireworks = {version = "*", optional = true}
|
||||||
# Postgres
|
# Postgres
|
||||||
psycopg2-binary = {version ="^2.9.9", optional = true}
|
psycopg2-binary = {version ="^2.9.9", optional = true}
|
||||||
asyncpg = {version="^0.29.0", optional = true}
|
asyncpg = {version="^0.29.0", optional = true}
|
||||||
@ -83,6 +85,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"]
|
|||||||
vector-stores-milvus = ["llama-index-vector-stores-milvus"]
|
vector-stores-milvus = ["llama-index-vector-stores-milvus"]
|
||||||
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
|
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
|
||||||
rerank-sentence-transformers = ["torch", "sentence-transformers"]
|
rerank-sentence-transformers = ["torch", "sentence-transformers"]
|
||||||
|
llms-fireworks = ["llama-index-llms-fireworks"]
|
||||||
|
embeddings-fireworks = ["llama-index-embeddings-fireworks"]
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
black = "^24"
|
black = "^24"
|
||||||
|
13
settings-fireworks.yaml
Normal file
13
settings-fireworks.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
server:
|
||||||
|
env_name: ${APP_ENV:fireworks}
|
||||||
|
|
||||||
|
llm:
|
||||||
|
mode: fireworks
|
||||||
|
|
||||||
|
embedding:
|
||||||
|
mode: fireworks
|
||||||
|
|
||||||
|
fireworks:
|
||||||
|
api_key: ${FIREWORKS_API_KEY:}
|
||||||
|
model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
|
||||||
|
#poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
|
@ -128,6 +128,11 @@ openai:
|
|||||||
model: gpt-3.5-turbo
|
model: gpt-3.5-turbo
|
||||||
embedding_api_key: ${OPENAI_API_KEY:}
|
embedding_api_key: ${OPENAI_API_KEY:}
|
||||||
|
|
||||||
|
fireworks:
|
||||||
|
api_key: ${FIREWORKS_API_KEY:}
|
||||||
|
model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
|
||||||
|
embedding_api_key: ${FIREWORKS_API_KEY:}
|
||||||
|
|
||||||
ollama:
|
ollama:
|
||||||
llm_model: llama3.1
|
llm_model: llama3.1
|
||||||
embedding_model: nomic-embed-text
|
embedding_model: nomic-embed-text
|
||||||
|
Loading…
Reference in New Issue
Block a user