This commit is contained in:
Somashekar B R 2024-11-17 19:41:41 +00:00 committed by GitHub
commit 078778c6af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 305 additions and 74 deletions

54
Dockerfile.fireworks Normal file
View File

@ -0,0 +1,54 @@
FROM python:3.11.6-slim-bookworm as base
# Install poetry
RUN pip install pipx
RUN python3 -m pipx ensurepath
RUN pipx install poetry==1.8.3
ENV PATH="/root/.local/bin:$PATH"
ENV PATH=".venv/bin/:$PATH"
RUN apt update && apt install -y \
build-essential
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
FROM base as dependencies
WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./
ARG POETRY_EXTRAS="ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
FROM base as app
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
ENV APP_ENV=prod
ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
EXPOSE 8080
# Prepare a non-root user
# More info about how to configure UIDs and GIDs in Docker:
# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
# Define the User ID (UID) for the non-root user
# UID 100 is chosen to avoid conflicts with existing system users
ARG UID=100
# Define the Group ID (GID) for the non-root user
# GID 65534 is often used for the 'nogroup' or 'nobody' group
ARG GID=65534
RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
WORKDIR /home/worker/app
RUN chown worker /home/worker/app
RUN mkdir local_data && chown worker local_data
RUN mkdir models && chown worker models
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
COPY --chown=worker private_gpt/ private_gpt
COPY --chown=worker *.yaml .
COPY --chown=worker scripts/ scripts
USER worker
ENTRYPOINT python -m private_gpt

View File

@ -1,5 +1,4 @@
services: services:
#----------------------------------- #-----------------------------------
#---- Private-GPT services --------- #---- Private-GPT services ---------
#----------------------------------- #-----------------------------------
@ -93,7 +92,7 @@ services:
ports: ports:
- "11434:11434" - "11434:11434"
volumes: volumes:
- ./models:/root/.ollama - ./local_data:/root/.ollama
profiles: profiles:
- "" - ""
- ollama-cpu - ollama-cpu
@ -114,3 +113,21 @@ services:
capabilities: [gpu] capabilities: [gpu]
profiles: profiles:
- ollama-cuda - ollama-cuda
# fireworks service
private-gpt-fireworks:
build:
context: .
dockerfile: Dockerfile.fireworks
volumes:
- ./local_data/:/home/worker/app/local_data
ports:
- "3001:8080"
environment:
PORT: 8080
PGPT_PROFILES: fireworks
FIREWORKS_API_KEY: ${FIREWORKS_API_KEY}
env_file:
- .env
profiles:
- fireworks

View File

@ -3,45 +3,63 @@ It is important that you review the [Main Concepts](../concepts) section to unde
## Base requirements to run PrivateGPT ## Base requirements to run PrivateGPT
### 1. Clone the PrivateGPT Repository ### 1. Clone the PrivateGPT Repository
Clone the repository and navigate to it: Clone the repository and navigate to it:
```bash ```bash
git clone https://github.com/zylon-ai/private-gpt git clone https://github.com/zylon-ai/private-gpt
cd private-gpt cd private-gpt
``` ```
### 2. Install Python 3.11 ### 2. Install Python 3.11
If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported. If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported.
#### macOS/Linux #### macOS/Linux
Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv): Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv):
```bash ```bash
pyenv install 3.11 pyenv install 3.11
pyenv local 3.11 pyenv local 3.11
``` ```
#### Windows #### Windows
Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win): Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win):
```bash ```bash
pyenv install 3.11 pyenv install 3.11
pyenv local 3.11 pyenv local 3.11
``` ```
### 3. Install `Poetry` ### 3. Install `Poetry`
Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management: Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
Follow the instructions on the official Poetry website to install it. Follow the instructions on the official Poetry website to install it.
<Callout intent="warning"> <Callout intent="warning">
A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version. A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend
To upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it. upgrading to a tested version. To upgrade Poetry to latest tested version, run
`poetry self update 1.8.3` after installing it.
</Callout> </Callout>
### 4. Optional: Install `make` ### 4. Optional: Install `make`
To run various scripts, you need to install `make`. Follow the instructions for your operating system: To run various scripts, you need to install `make`. Follow the instructions for your operating system:
#### macOS #### macOS
(Using Homebrew): (Using Homebrew):
```bash ```bash
brew install make brew install make
``` ```
#### Windows #### Windows
(Using Chocolatey): (Using Chocolatey):
```bash ```bash
choco install make choco install make
``` ```
@ -53,6 +71,7 @@ PrivateGPT allows customization of the setup, from fully local to cloud-based, b
```bash ```bash
poetry install --extras "<extra1> <extra2>..." poetry install --extras "<extra1> <extra2>..."
``` ```
Where `<extra>` can be any of the following options described below. Where `<extra>` can be any of the following options described below.
### Available Modules ### Available Modules
@ -62,7 +81,7 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
#### LLM #### LLM
| **Option** | **Description** | **Extra** | | **Option** | **Description** | **Extra** |
|--------------|------------------------------------------------------------------------|---------------------| | ---------- | --------------------------------------------------------------------- | ---------------- |
| **ollama** | Adds support for Ollama LLM, requires Ollama running locally | llms-ollama | | **ollama** | Adds support for Ollama LLM, requires Ollama running locally | llms-ollama |
| llama-cpp | Adds support for local LLM using LlamaCPP | llms-llama-cpp | | llama-cpp | Adds support for local LLM using LlamaCPP | llms-llama-cpp |
| sagemaker | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints | llms-sagemaker | | sagemaker | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints | llms-sagemaker |
@ -74,7 +93,7 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
#### Embeddings #### Embeddings
| **Option** | **Description** | **Extra** | | **Option** | **Description** | **Extra** |
|------------------|--------------------------------------------------------------------------------|-------------------------| | ----------- | -------------------------------------------------------------------------- | ---------------------- |
| **ollama** | Adds support for Ollama Embeddings, requires Ollama running locally | embeddings-ollama | | **ollama** | Adds support for Ollama Embeddings, requires Ollama running locally | embeddings-ollama |
| huggingface | Adds support for local Embeddings using HuggingFace | embeddings-huggingface | | huggingface | Adds support for local Embeddings using HuggingFace | embeddings-huggingface |
| openai | Adds support for OpenAI Embeddings, requires OpenAI API key | embeddings-openai | | openai | Adds support for OpenAI Embeddings, requires OpenAI API key | embeddings-openai |
@ -85,22 +104,25 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
#### Vector Stores #### Vector Stores
| **Option** | **Description** | **Extra** | | **Option** | **Description** | **Extra** |
|------------------|-----------------------------------------|-------------------------| | ---------- | ---------------------------------------- | ------------------------ |
| **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant | | **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant |
| milvus | Adds support for Milvus vector store | vector-stores-milvus | | milvus | Adds support for Milvus vector store | vector-stores-milvus |
| chroma | Adds support for Chroma DB vector store | vector-stores-chroma | | chroma | Adds support for Chroma DB vector store | vector-stores-chroma |
| postgres | Adds support for Postgres vector store | vector-stores-postgres | | postgres | Adds support for Postgres vector store | vector-stores-postgres |
| clickhouse | Adds support for Clickhouse vector store| vector-stores-clickhouse| | clickhouse | Adds support for Clickhouse vector store | vector-stores-clickhouse |
#### UI #### UI
| **Option** | **Description** | **Extra** | | **Option** | **Description** | **Extra** |
|--------------|------------------------------------------|-----------| | ---------- | -------------------------------- | --------- |
| Gradio | Adds support for UI using Gradio | ui | | Gradio | Adds support for UI using Gradio | ui |
<Callout intent = "warning"> <Callout intent="warning">
A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk A working **Gradio UI client** is provided to test the API, together with a
model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives. set of useful tools such as bulk model download script, ingestion script,
documents folder watch, etc. Please refer to the [UI
alternatives](/manual/user-interface/alternatives) page for more UI
alternatives.
</Callout> </Callout>
## Recommended Setups ## Recommended Setups
@ -109,7 +131,7 @@ There are just some examples of recommended setups. You can mix and match the di
You'll find more information in the Manual section of the documentation. You'll find more information in the Manual section of the documentation.
> **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux). > **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux).
If you are using Windows, you'll need to set the env var in a different way, for example: > If you are using Windows, you'll need to set the env var in a different way, for example:
```powershell ```powershell
# Powershell # Powershell
@ -136,6 +158,7 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll
After the installation, make sure the Ollama desktop app is closed. After the installation, make sure the Ollama desktop app is closed.
Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings): Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
```bash ```bash
ollama serve ollama serve
``` ```
@ -152,6 +175,7 @@ ollama pull nomic-embed-text
``` ```
Once done, on a different terminal, you can install PrivateGPT with the following command: Once done, on a different terminal, you can install PrivateGPT with the following command:
```bash ```bash
poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant" poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
``` ```
@ -175,6 +199,7 @@ You need to have access to sagemaker inference endpoints for the LLM and / or th
Edit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints. Edit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints.
Then, install PrivateGPT with the following command: Then, install PrivateGPT with the following command:
```bash ```bash
poetry install --extras "ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant" poetry install --extras "ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant"
``` ```
@ -198,6 +223,7 @@ You need an OPENAI API key to run this setup.
Edit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY. Edit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY.
Then, install PrivateGPT with the following command: Then, install PrivateGPT with the following command:
```bash ```bash
poetry install --extras "ui llms-openai embeddings-openai vector-stores-qdrant" poetry install --extras "ui llms-openai embeddings-openai vector-stores-qdrant"
``` ```
@ -221,6 +247,7 @@ You need to have access to Azure OpenAI inference endpoints for the LLM and / or
Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints. Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints.
Then, install PrivateGPT with the following command: Then, install PrivateGPT with the following command:
```bash ```bash
poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant" poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant"
``` ```
@ -235,6 +262,30 @@ PrivateGPT will use the already existing `settings-azopenai.yaml` settings file,
The UI will be available at http://localhost:8001 The UI will be available at http://localhost:8001
### Non-Private, FIREWORKS-powered test setup
If you want to test PrivateGPT with FIREWORKS's LLM and Embeddings -taking into account your data is going to FIREWORKS!- you can run the following command:
You need an FIREWORKS API key to run this setup.
Edit the `settings-fireworks.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-fireworks.yaml`, you can just set the env var FIREWORKS_API_KEY.
Then, install PrivateGPT with the following command:
```bash
poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
```
Once installed, you can run PrivateGPT.
```bash
PGPT_PROFILES=fireworks make run
```
PrivateGPT will use the already existing `settings-fireworks.yaml` settings file, which is already configured to use FIREWORKS LLM and Embeddings endpoints, and Qdrant.
The UI will be available at http://localhost:8001
### Local, Llama-CPP powered setup ### Local, Llama-CPP powered setup
If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command: If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command:
@ -244,6 +295,7 @@ poetry install --extras "ui llms-llama-cpp embeddings-huggingface vector-stores-
``` ```
In order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script: In order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script:
```bash ```bash
poetry run python scripts/setup poetry run python scripts/setup
``` ```
@ -277,6 +329,7 @@ To do that, you need to install `llama.cpp` python's binding `llama-cpp-python`
that activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below). that activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below).
In other words, one should simply run: In other words, one should simply run:
```bash ```bash
CMAKE_ARGS="-DLLAMA_METAL=on" pip install --force-reinstall --no-cache-dir llama-cpp-python CMAKE_ARGS="-DLLAMA_METAL=on" pip install --force-reinstall --no-cache-dir llama-cpp-python
``` ```
@ -285,9 +338,10 @@ The above command will force the re-installation of `llama-cpp-python` with `MET
`llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS). `llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS).
More information is available in the documentation of the libraries themselves: More information is available in the documentation of the libraries themselves:
* [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
* [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration) - [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
* [llama.cpp](https://github.com/ggerganov/llama.cpp#build) - [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration)
- [llama.cpp](https://github.com/ggerganov/llama.cpp#build)
##### Llama-CPP Windows NVIDIA GPU support ##### Llama-CPP Windows NVIDIA GPU support
@ -297,11 +351,11 @@ dependencies.
Some tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070): Some tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070):
* Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/ - Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/
* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads - Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to - Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
date and your GPU is detected. date and your GPU is detected.
* [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/ - [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/
If you have all required dependencies properly configured running the If you have all required dependencies properly configured running the
following powershell command should succeed. following powershell command should succeed.
@ -332,9 +386,9 @@ dependencies.
Some tips: Some tips:
* Make sure you have an up-to-date C++ compiler - Make sure you have an up-to-date C++ compiler
* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads - Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to - Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
date and your GPU is detected. date and your GPU is detected.
After that running the following command in the repository will install llama.cpp with GPU support: After that running the following command in the repository will install llama.cpp with GPU support:
@ -356,13 +410,17 @@ AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 |
Linux GPU support is done through ROCm. Linux GPU support is done through ROCm.
Some tips: Some tips:
* Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
* [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html) - Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
- [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)
```bash ```bash
wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl
poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl
``` ```
* Install bitsandbytes for ROCm
- Install bitsandbytes for ROCm
```bash ```bash
PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942 PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942
BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854 BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854
@ -374,6 +432,7 @@ pip install . --extra-index-url https://download.pytorch.org/whl/nightly
``` ```
After that running the following command in the repository will install llama.cpp with GPU support: After that running the following command in the repository will install llama.cpp with GPU support:
```bash ```bash
LLAMA_CPP_PYTHON_VERSION=0.2.56 LLAMA_CPP_PYTHON_VERSION=0.2.56
DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942 DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942
@ -391,15 +450,15 @@ AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI =
Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms. Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms.
You might encounter several issues: You might encounter several issues:
* Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes. - Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes.
* GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on - GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on
the host. the host.
* Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms. - Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms.
Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.). Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.).
If you encounter any of these issues, please open an issue and we'll try to help. If you encounter any of these issues, please open an issue and we'll try to help.
One of the first reflex to adopt is: get more information. One of the first reflex to adopt is: get more information.
If, during your installation, something does not go as planned, retry in *verbose* mode, and see what goes wrong. If, during your installation, something does not go as planned, retry in _verbose_ mode, and see what goes wrong.
For example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation. For example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation.
@ -414,8 +473,8 @@ To install a C++ compiler on Windows 10/11, follow these steps:
1. Install Visual Studio 2022. 1. Install Visual Studio 2022.
2. Make sure the following components are selected: 2. Make sure the following components are selected:
* Universal Windows Platform development - Universal Windows Platform development
* C++ CMake tools for Windows - C++ CMake tools for Windows
3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/). 3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/).
4. Run the installer and select the `gcc` component. 4. Run the installer and select the `gcc` component.

39
poetry.lock generated
View File

@ -2685,6 +2685,21 @@ llama-index-core = ">=0.11.0,<0.12.0"
llama-index-embeddings-openai = ">=0.2.3,<0.3.0" llama-index-embeddings-openai = ">=0.2.3,<0.3.0"
llama-index-llms-azure-openai = ">=0.2.0,<0.3.0" llama-index-llms-azure-openai = ">=0.2.0,<0.3.0"
[[package]]
name = "llama-index-embeddings-fireworks"
version = "0.2.0"
description = "llama-index embeddings fireworks integration"
optional = true
python-versions = "<3.12,>=3.8.1"
files = [
{file = "llama_index_embeddings_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:44958479691f55005bd3bbf773316c556e5b1428c6ec174a4f443016e79e48ea"},
{file = "llama_index_embeddings_fireworks-0.2.0.tar.gz", hash = "sha256:0085a8fd5b4d4f71f797cfef11a85c4c3fbe763a3680edeae8f410184fa2d266"},
]
[package.dependencies]
llama-index-core = ">=0.11.0,<0.12.0"
llama-index-llms-openai = ">=0.2.0,<0.3.0"
[[package]] [[package]]
name = "llama-index-embeddings-gemini" name = "llama-index-embeddings-gemini"
version = "0.2.0" version = "0.2.0"
@ -2778,6 +2793,21 @@ httpx = "*"
llama-index-core = ">=0.11.0,<0.12.0" llama-index-core = ">=0.11.0,<0.12.0"
llama-index-llms-openai = ">=0.2.1,<0.3.0" llama-index-llms-openai = ">=0.2.1,<0.3.0"
[[package]]
name = "llama-index-llms-fireworks"
version = "0.2.0"
description = "llama-index llms fireworks integration"
optional = true
python-versions = "<4.0,>=3.8.1"
files = [
{file = "llama_index_llms_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:65a604f8cf622f7ce695c458d375cd7dac6e27f4596ba90e5464b2594b0688a0"},
{file = "llama_index_llms_fireworks-0.2.0.tar.gz", hash = "sha256:cfdd07b6bc01890e55a4dfc3af2e62fe82e5a08b362d52314d024728ebcf7c5b"},
]
[package.dependencies]
llama-index-core = ">=0.11.0,<0.12.0"
llama-index-llms-openai = ">=0.2.0,<0.3.0"
[[package]] [[package]]
name = "llama-index-llms-gemini" name = "llama-index-llms-gemini"
version = "0.3.5" version = "0.3.5"
@ -6242,11 +6272,6 @@ files = [
{file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
{file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
{file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
{file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
{file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
{file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
{file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
{file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
] ]
[package.dependencies] [package.dependencies]
@ -7082,6 +7107,7 @@ cffi = ["cffi (>=1.11)"]
[extras] [extras]
embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
embeddings-fireworks = ["llama-index-embeddings-fireworks"]
embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-gemini = ["llama-index-embeddings-gemini"]
embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
embeddings-mistral = ["llama-index-embeddings-mistralai"] embeddings-mistral = ["llama-index-embeddings-mistralai"]
@ -7089,6 +7115,7 @@ embeddings-ollama = ["llama-index-embeddings-ollama"]
embeddings-openai = ["llama-index-embeddings-openai"] embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"] embeddings-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"] llms-azopenai = ["llama-index-llms-azure-openai"]
llms-fireworks = ["llama-index-llms-fireworks"]
llms-gemini = ["llama-index-llms-gemini"] llms-gemini = ["llama-index-llms-gemini"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-ollama = ["llama-index-llms-ollama"] llms-ollama = ["llama-index-llms-ollama"]
@ -7107,4 +7134,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.11,<3.12" python-versions = ">=3.11,<3.12"
content-hash = "16e3be4521aa64c936ee8fb841655f15090b71cf8faaeed7e73a4bcdf3fbdea2" content-hash = "f41ee2165df33fd6815114a9d6b01508e1e8726dd7a8baf99825514586f250f0"

View File

@ -67,6 +67,24 @@ class EmbeddingComponent:
api_key=api_key, api_key=api_key,
model=model, model=model,
) )
case "fireworks":
try:
from llama_index.embeddings.fireworks import ( # type: ignore
FireworksEmbedding,
)
except ImportError as e:
raise ImportError(
"FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
) from e
api_key = (
settings.fireworks.embedding_api_key or settings.fireworks.api_key
)
model = settings.openai.embedding_model
self.embedding_model = FireworksEmbedding(
api_key=api_key,
model=model,
)
case "ollama": case "ollama":
try: try:
from llama_index.embeddings.ollama import ( # type: ignore from llama_index.embeddings.ollama import ( # type: ignore

View File

@ -102,6 +102,19 @@ class LLMComponent:
api_key=openai_settings.api_key, api_key=openai_settings.api_key,
model=openai_settings.model, model=openai_settings.model,
) )
case "fireworks":
try:
from llama_index.llms.fireworks import Fireworks # type: ignore
except ImportError as e:
raise ImportError(
"fireworks dependencies not found, install with `poetry install --extras llms-fireworks`"
) from e
fireworks_settings = settings.fireworks
self.llm = Fireworks(
model=fireworks_settings.model,
api_key=fireworks_settings.api_key,
)
case "openailike": case "openailike":
try: try:
from llama_index.llms.openai_like import OpenAILike # type: ignore from llama_index.llms.openai_like import OpenAILike # type: ignore

View File

@ -115,6 +115,7 @@ class LLMSettings(BaseModel):
"mock", "mock",
"ollama", "ollama",
"gemini", "gemini",
"fireworks",
] ]
max_new_tokens: int = Field( max_new_tokens: int = Field(
256, 256,
@ -205,6 +206,7 @@ class EmbeddingSettings(BaseModel):
"mock", "mock",
"gemini", "gemini",
"mistralai", "mistralai",
"fireworks",
] ]
ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field( ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
"simple", "simple",
@ -268,6 +270,23 @@ class OpenAISettings(BaseModel):
) )
class FireWorksSettings(BaseModel):
api_key: str
model: str = Field(
"accounts/fireworks/models/llama-v3p1-70b-instruct",
description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.",
)
embedding_api_base: str = Field(
None,
description="Base URL of FIREWORKS API. Example: 'https://api.fireworks.ai/inference/v1'.",
)
embedding_api_key: str
embedding_model: str = Field(
"nomic-ai/nomic-embed-text-v1.5",
description="FIREWORKS embedding Model to use. Example: 'nomic-ai/nomic-embed-text-v1.5'.",
)
class GeminiSettings(BaseModel): class GeminiSettings(BaseModel):
api_key: str api_key: str
model: str = Field( model: str = Field(
@ -597,6 +616,7 @@ class Settings(BaseModel):
huggingface: HuggingFaceSettings huggingface: HuggingFaceSettings
sagemaker: SagemakerSettings sagemaker: SagemakerSettings
openai: OpenAISettings openai: OpenAISettings
fireworks: FireWorksSettings
gemini: GeminiSettings gemini: GeminiSettings
ollama: OllamaSettings ollama: OllamaSettings
azopenai: AzureOpenAISettings azopenai: AzureOpenAISettings

View File

@ -381,7 +381,7 @@ class PrivateGptUi:
".contain { display: flex !important; flex-direction: column !important; }" ".contain { display: flex !important; flex-direction: column !important; }"
"#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }" "#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }"
"#chatbot { flex-grow: 1 !important; overflow: auto !important;}" "#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
"#col { height: calc(100vh - 112px - 16px) !important; }" "#col { min-height: calc(100vh - 112px - 16px) !important; }"
"hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }" "hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }"
".avatar-image { background-color: antiquewhite; border-radius: 2px; }" ".avatar-image { background-color: antiquewhite; border-radius: 2px; }"
".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }" ".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }"
@ -522,6 +522,7 @@ class PrivateGptUi:
model_mapping = { model_mapping = {
"llamacpp": config_settings.llamacpp.llm_hf_model_file, "llamacpp": config_settings.llamacpp.llm_hf_model_file,
"openai": config_settings.openai.model, "openai": config_settings.openai.model,
"fireworks": config_settings.fireworks.model,
"openailike": config_settings.openai.model, "openailike": config_settings.openai.model,
"azopenai": config_settings.azopenai.llm_model, "azopenai": config_settings.azopenai.llm_model,
"sagemaker": config_settings.sagemaker.llm_endpoint_name, "sagemaker": config_settings.sagemaker.llm_endpoint_name,

View File

@ -38,6 +38,8 @@ llama-index-vector-stores-postgres = {version ="*", optional = true}
llama-index-vector-stores-clickhouse = {version ="*", optional = true} llama-index-vector-stores-clickhouse = {version ="*", optional = true}
llama-index-storage-docstore-postgres = {version ="*", optional = true} llama-index-storage-docstore-postgres = {version ="*", optional = true}
llama-index-storage-index-store-postgres = {version ="*", optional = true} llama-index-storage-index-store-postgres = {version ="*", optional = true}
llama-index-llms-fireworks = {version = "*", optional = true}
llama-index-embeddings-fireworks = {version = "*", optional = true}
# Postgres # Postgres
psycopg2-binary = {version ="^2.9.9", optional = true} psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true} asyncpg = {version="^0.29.0", optional = true}
@ -83,6 +85,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"]
vector-stores-milvus = ["llama-index-vector-stores-milvus"] vector-stores-milvus = ["llama-index-vector-stores-milvus"]
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"] storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
rerank-sentence-transformers = ["torch", "sentence-transformers"] rerank-sentence-transformers = ["torch", "sentence-transformers"]
llms-fireworks = ["llama-index-llms-fireworks"]
embeddings-fireworks = ["llama-index-embeddings-fireworks"]
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
black = "^24" black = "^24"

13
settings-fireworks.yaml Normal file
View File

@ -0,0 +1,13 @@
server:
env_name: ${APP_ENV:fireworks}
llm:
mode: fireworks
embedding:
mode: fireworks
fireworks:
api_key: ${FIREWORKS_API_KEY:}
model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
#poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"

View File

@ -128,6 +128,11 @@ openai:
model: gpt-3.5-turbo model: gpt-3.5-turbo
embedding_api_key: ${OPENAI_API_KEY:} embedding_api_key: ${OPENAI_API_KEY:}
fireworks:
api_key: ${FIREWORKS_API_KEY:}
model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
embedding_api_key: ${FIREWORKS_API_KEY:}
ollama: ollama:
llm_model: llama3.1 llm_model: llama3.1
embedding_model: nomic-embed-text embedding_model: nomic-embed-text