Merge c4be3f8cd2 into b7ee43788d

2025-08-28 03:51:22 +00:00 · 2024-11-17 19:41:41 +00:00 · 2024-11-17 19:41:41 +00:00 · 078778c6af
commit 078778c6af
parent b7ee43788d c4be3f8cd2
11 changed files with 305 additions and 74 deletions
--- a/Dockerfile.fireworks
+++ b/Dockerfile.fireworks
@ -0,0 +1,54 @@
+FROM python:3.11.6-slim-bookworm as base
+
+# Install poetry
+RUN pip install pipx
+RUN python3 -m pipx ensurepath
+RUN pipx install poetry==1.8.3
+ENV PATH="/root/.local/bin:$PATH"
+ENV PATH=".venv/bin/:$PATH"
+
+RUN apt update && apt install -y \
+  build-essential
+
+# https://python-poetry.org/docs/configuration/#virtualenvsin-project
+ENV POETRY_VIRTUALENVS_IN_PROJECT=true
+
+FROM base as dependencies
+WORKDIR /home/worker/app
+COPY pyproject.toml poetry.lock ./
+
+ARG POETRY_EXTRAS="ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
+RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
+
+FROM base as app
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+ENV APP_ENV=prod
+ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
+EXPOSE 8080
+
+# Prepare a non-root user
+# More info about how to configure UIDs and GIDs in Docker:
+# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
+
+# Define the User ID (UID) for the non-root user
+# UID 100 is chosen to avoid conflicts with existing system users
+ARG UID=100
+
+# Define the Group ID (GID) for the non-root user
+# GID 65534 is often used for the 'nogroup' or 'nobody' group
+ARG GID=65534
+
+RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
+WORKDIR /home/worker/app
+
+RUN chown worker /home/worker/app
+RUN mkdir local_data && chown worker local_data
+RUN mkdir models && chown worker models
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker *.yaml .
+COPY --chown=worker scripts/ scripts
+
+USER worker
+ENTRYPOINT python -m private_gpt
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -1,5 +1,4 @@
 services:
-
  #-----------------------------------
  #---- Private-GPT services ---------
  #-----------------------------------
@ -7,7 +6,7 @@ services:
  # Private-GPT service for the Ollama CPU and GPU modes
  # This service builds from an external Dockerfile and runs the Ollama mode.
  private-gpt-ollama:
-    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama  # x-release-please-version
+    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version
    user: root
    build:
      context: .
@ -93,7 +92,7 @@ services:
    ports:
      - "11434:11434"
    volumes:
-      - ./models:/root/.ollama
+      - ./local_data:/root/.ollama
    profiles:
      - ""
      - ollama-cpu
@ -114,3 +113,21 @@ services:
              capabilities: [gpu]
    profiles:
      - ollama-cuda
+
+    # fireworks service
+  private-gpt-fireworks:
+    build:
+      context: .
+      dockerfile: Dockerfile.fireworks
+    volumes:
+      - ./local_data/:/home/worker/app/local_data
+    ports:
+      - "3001:8080"
+    environment:
+      PORT: 8080
+      PGPT_PROFILES: fireworks
+      FIREWORKS_API_KEY: ${FIREWORKS_API_KEY}
+    env_file:
+      - .env
+    profiles:
+      - fireworks
--- a/fern/docs/pages/installation/installation.mdx
+++ b/fern/docs/pages/installation/installation.mdx
@ -3,45 +3,63 @@ It is important that you review the [Main Concepts](../concepts) section to unde
 ## Base requirements to run PrivateGPT

 ### 1. Clone the PrivateGPT Repository
+
 Clone the repository and navigate to it:
+
 ```bash
 git clone https://github.com/zylon-ai/private-gpt
 cd private-gpt
 ```

 ### 2. Install Python 3.11
+
 If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported.
+
 #### macOS/Linux
+
 Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv):
+
 ```bash
 pyenv install 3.11
 pyenv local 3.11
 ```
+
 #### Windows
+
 Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win):
+
 ```bash
 pyenv install 3.11
 pyenv local 3.11
 ```

 ### 3. Install `Poetry`
+
 Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
 Follow the instructions on the official Poetry website to install it.

 <Callout intent="warning">
-A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version.
-To upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it.
+  A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend
+  upgrading to a tested version. To upgrade Poetry to latest tested version, run
+  `poetry self update 1.8.3` after installing it.
 </Callout>

 ### 4. Optional: Install `make`
+
 To run various scripts, you need to install `make`. Follow the instructions for your operating system:
+
 #### macOS
+
 (Using Homebrew):
+
 ```bash
 brew install make
 ```
+
 #### Windows
+
 (Using Chocolatey):
+
 ```bash
 choco install make
 ```
@ -53,6 +71,7 @@ PrivateGPT allows customization of the setup, from fully local to cloud-based, b
 ```bash
 poetry install --extras "<extra1> <extra2>..."
 ```
+
 Where `<extra>` can be any of the following options described below.

 ### Available Modules
@ -61,46 +80,49 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).

 #### LLM

-| **Option**   | **Description**                                                        | **Extra**           |
-|--------------|------------------------------------------------------------------------|---------------------|
-| **ollama**   | Adds support for Ollama LLM, requires Ollama running locally           | llms-ollama         |
-| llama-cpp    | Adds support for local LLM using LlamaCPP                              | llms-llama-cpp      |
-| sagemaker    | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints    | llms-sagemaker      |
-| openai       | Adds support for OpenAI LLM, requires OpenAI API key                   | llms-openai         |
-| openailike   | Adds support for 3rd party LLM providers compatible with OpenAI's API  | llms-openai-like    |
-| azopenai     | Adds support for Azure OpenAI LLM, requires Azure endpoints            | llms-azopenai       |
-| gemini       | Adds support for Gemini LLM, requires Gemini API key                   | llms-gemini         |
+| **Option** | **Description**                                                       | **Extra**        |
+| ---------- | --------------------------------------------------------------------- | ---------------- |
+| **ollama** | Adds support for Ollama LLM, requires Ollama running locally          | llms-ollama      |
+| llama-cpp  | Adds support for local LLM using LlamaCPP                             | llms-llama-cpp   |
+| sagemaker  | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints   | llms-sagemaker   |
+| openai     | Adds support for OpenAI LLM, requires OpenAI API key                  | llms-openai      |
+| openailike | Adds support for 3rd party LLM providers compatible with OpenAI's API | llms-openai-like |
+| azopenai   | Adds support for Azure OpenAI LLM, requires Azure endpoints           | llms-azopenai    |
+| gemini     | Adds support for Gemini LLM, requires Gemini API key                  | llms-gemini      |

 #### Embeddings

-| **Option**       | **Description**                                                                | **Extra**               |
-|------------------|--------------------------------------------------------------------------------|-------------------------|
-| **ollama**       | Adds support for Ollama Embeddings, requires Ollama running locally            | embeddings-ollama       |
-| huggingface      | Adds support for local Embeddings using HuggingFace                            | embeddings-huggingface  |
-| openai           | Adds support for OpenAI Embeddings, requires OpenAI API key                    | embeddings-openai       |
-| sagemaker        | Adds support for Amazon Sagemaker Embeddings, requires Sagemaker endpoints     | embeddings-sagemaker    |
-| azopenai         | Adds support for Azure OpenAI Embeddings, requires Azure endpoints             | embeddings-azopenai     |
-| gemini           | Adds support for Gemini Embeddings, requires Gemini API key                    | embeddings-gemini       |
+| **Option**  | **Description**                                                            | **Extra**              |
+| ----------- | -------------------------------------------------------------------------- | ---------------------- |
+| **ollama**  | Adds support for Ollama Embeddings, requires Ollama running locally        | embeddings-ollama      |
+| huggingface | Adds support for local Embeddings using HuggingFace                        | embeddings-huggingface |
+| openai      | Adds support for OpenAI Embeddings, requires OpenAI API key                | embeddings-openai      |
+| sagemaker   | Adds support for Amazon Sagemaker Embeddings, requires Sagemaker endpoints | embeddings-sagemaker   |
+| azopenai    | Adds support for Azure OpenAI Embeddings, requires Azure endpoints         | embeddings-azopenai    |
+| gemini      | Adds support for Gemini Embeddings, requires Gemini API key                | embeddings-gemini      |

 #### Vector Stores

-| **Option**       | **Description**                         | **Extra**               |
-|------------------|-----------------------------------------|-------------------------|
-| **qdrant**       | Adds support for Qdrant vector store    | vector-stores-qdrant    |
-| milvus           | Adds support for Milvus vector store    | vector-stores-milvus    |
-| chroma           | Adds support for Chroma DB vector store | vector-stores-chroma    |
-| postgres         | Adds support for Postgres vector store  | vector-stores-postgres  |
-| clickhouse       | Adds support for Clickhouse vector store| vector-stores-clickhouse|
+| **Option** | **Description**                          | **Extra**                |
+| ---------- | ---------------------------------------- | ------------------------ |
+| **qdrant** | Adds support for Qdrant vector store     | vector-stores-qdrant     |
+| milvus     | Adds support for Milvus vector store     | vector-stores-milvus     |
+| chroma     | Adds support for Chroma DB vector store  | vector-stores-chroma     |
+| postgres   | Adds support for Postgres vector store   | vector-stores-postgres   |
+| clickhouse | Adds support for Clickhouse vector store | vector-stores-clickhouse |

 #### UI

-| **Option**   | **Description**                          | **Extra** |
-|--------------|------------------------------------------|-----------|
-| Gradio       | Adds support for UI using Gradio         | ui        |
+| **Option** | **Description**                  | **Extra** |
+| ---------- | -------------------------------- | --------- |
+| Gradio     | Adds support for UI using Gradio | ui        |

-<Callout intent = "warning">
-A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
-model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.
+<Callout intent="warning">
+  A working **Gradio UI client** is provided to test the API, together with a
+  set of useful tools such as bulk model download script, ingestion script,
+  documents folder watch, etc. Please refer to the [UI
+  alternatives](/manual/user-interface/alternatives) page for more UI
+  alternatives.
 </Callout>

 ## Recommended Setups
@ -109,7 +131,7 @@ There are just some examples of recommended setups. You can mix and match the di
 You'll find more information in the Manual section of the documentation.

 > **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux).
-If you are using Windows, you'll need to set the env var in a different way, for example:
+> If you are using Windows, you'll need to set the env var in a different way, for example:

 ```powershell
 # Powershell
@ -136,6 +158,7 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll
 After the installation, make sure the Ollama desktop app is closed.

 Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
+
 ```bash
 ollama serve
 ```
@ -152,6 +175,7 @@ ollama pull nomic-embed-text
 ```

 Once done, on a different terminal, you can install PrivateGPT with the following command:
+
 ```bash
 poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
 ```
@ -175,6 +199,7 @@ You need to have access to sagemaker inference endpoints for the LLM and / or th
 Edit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints.

 Then, install PrivateGPT with the following command:
+
 ```bash
 poetry install --extras "ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant"
 ```
@ -198,6 +223,7 @@ You need an OPENAI API key to run this setup.
 Edit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY.

 Then, install PrivateGPT with the following command:
+
 ```bash
 poetry install --extras "ui llms-openai embeddings-openai vector-stores-qdrant"
 ```
@ -221,6 +247,7 @@ You need to have access to Azure OpenAI inference endpoints for the LLM and / or
 Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints.

 Then, install PrivateGPT with the following command:
+
 ```bash
 poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant"
 ```
@ -235,6 +262,30 @@ PrivateGPT will use the already existing `settings-azopenai.yaml` settings file,

 The UI will be available at http://localhost:8001

+### Non-Private, FIREWORKS-powered test setup
+
+If you want to test PrivateGPT with FIREWORKS's LLM and Embeddings -taking into account your data is going to FIREWORKS!- you can run the following command:
+
+You need an FIREWORKS API key to run this setup.
+
+Edit the `settings-fireworks.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-fireworks.yaml`, you can just set the env var FIREWORKS_API_KEY.
+
+Then, install PrivateGPT with the following command:
+
+```bash
+poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
+```
+
+Once installed, you can run PrivateGPT.
+
+```bash
+PGPT_PROFILES=fireworks make run
+```
+
+PrivateGPT will use the already existing `settings-fireworks.yaml` settings file, which is already configured to use FIREWORKS LLM and Embeddings endpoints, and Qdrant.
+
+The UI will be available at http://localhost:8001
+
 ### Local, Llama-CPP powered setup

 If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command:
@ -244,6 +295,7 @@ poetry install --extras "ui llms-llama-cpp embeddings-huggingface vector-stores-
 ```

 In order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script:
+
 ```bash
 poetry run python scripts/setup
 ```
@ -277,6 +329,7 @@ To do that, you need to install `llama.cpp` python's binding `llama-cpp-python`
 that activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below).

 In other words, one should simply run:
+
 ```bash
 CMAKE_ARGS="-DLLAMA_METAL=on" pip install --force-reinstall --no-cache-dir llama-cpp-python
 ```
@ -285,9 +338,10 @@ The above command will force the re-installation of `llama-cpp-python` with `MET
 `llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS).

 More information is available in the documentation of the libraries themselves:
-* [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
-* [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration)
-* [llama.cpp](https://github.com/ggerganov/llama.cpp#build)
+
+- [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
+- [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration)
+- [llama.cpp](https://github.com/ggerganov/llama.cpp#build)

 ##### Llama-CPP Windows NVIDIA GPU support

@ -297,11 +351,11 @@ dependencies.

 Some tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070):

-* Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/
-* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
-* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
+- Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/
+- Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
+- Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
  date and your GPU is detected.
-* [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/
+- [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/

 If you have all required dependencies properly configured running the
 following powershell command should succeed.
@ -332,9 +386,9 @@ dependencies.

 Some tips:

-* Make sure you have an up-to-date C++ compiler
-* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
-* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
+- Make sure you have an up-to-date C++ compiler
+- Install CUDA toolkit https://developer.nvidia.com/cuda-downloads
+- Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to
  date and your GPU is detected.

 After that running the following command in the repository will install llama.cpp with GPU support:
@ -356,13 +410,17 @@ AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 |

 Linux GPU support is done through ROCm.
 Some tips:
-* Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
-* [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)
+
+- Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
+- [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)
+
 ```bash
 wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl
 poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl
 ```
-* Install bitsandbytes for ROCm
+
+- Install bitsandbytes for ROCm
+
 ```bash
 PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942
 BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854
@ -374,6 +432,7 @@ pip install . --extra-index-url https://download.pytorch.org/whl/nightly
 ```

 After that running the following command in the repository will install llama.cpp with GPU support:
+
 ```bash
 LLAMA_CPP_PYTHON_VERSION=0.2.56
 DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942
@ -391,15 +450,15 @@ AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI =
 Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms.
 You might encounter several issues:

-* Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes.
-* GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on
+- Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes.
+- GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on
  the host.
-* Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms.
+- Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms.
  Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.).
  If you encounter any of these issues, please open an issue and we'll try to help.

 One of the first reflex to adopt is: get more information.
-If, during your installation, something does not go as planned, retry in *verbose* mode, and see what goes wrong.
+If, during your installation, something does not go as planned, retry in _verbose_ mode, and see what goes wrong.

 For example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation.

@ -414,8 +473,8 @@ To install a C++ compiler on Windows 10/11, follow these steps:

 1. Install Visual Studio 2022.
 2. Make sure the following components are selected:
-    * Universal Windows Platform development
-    * C++ CMake tools for Windows
+   - Universal Windows Platform development
+   - C++ CMake tools for Windows
 3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/).
 4. Run the installer and select the `gcc` component.

--- a/poetry.lock
+++ b/poetry.lock
@ -2685,6 +2685,21 @@ llama-index-core = ">=0.11.0,<0.12.0"
 llama-index-embeddings-openai = ">=0.2.3,<0.3.0"
 llama-index-llms-azure-openai = ">=0.2.0,<0.3.0"

+[[package]]
+name = "llama-index-embeddings-fireworks"
+version = "0.2.0"
+description = "llama-index embeddings fireworks integration"
+optional = true
+python-versions = "<3.12,>=3.8.1"
+files = [
+    {file = "llama_index_embeddings_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:44958479691f55005bd3bbf773316c556e5b1428c6ec174a4f443016e79e48ea"},
+    {file = "llama_index_embeddings_fireworks-0.2.0.tar.gz", hash = "sha256:0085a8fd5b4d4f71f797cfef11a85c4c3fbe763a3680edeae8f410184fa2d266"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.11.0,<0.12.0"
+llama-index-llms-openai = ">=0.2.0,<0.3.0"
+
 [[package]]
 name = "llama-index-embeddings-gemini"
 version = "0.2.0"
@ -2778,6 +2793,21 @@ httpx = "*"
 llama-index-core = ">=0.11.0,<0.12.0"
 llama-index-llms-openai = ">=0.2.1,<0.3.0"

+[[package]]
+name = "llama-index-llms-fireworks"
+version = "0.2.0"
+description = "llama-index llms fireworks integration"
+optional = true
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_llms_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:65a604f8cf622f7ce695c458d375cd7dac6e27f4596ba90e5464b2594b0688a0"},
+    {file = "llama_index_llms_fireworks-0.2.0.tar.gz", hash = "sha256:cfdd07b6bc01890e55a4dfc3af2e62fe82e5a08b362d52314d024728ebcf7c5b"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.11.0,<0.12.0"
+llama-index-llms-openai = ">=0.2.0,<0.3.0"
+
 [[package]]
 name = "llama-index-llms-gemini"
 version = "0.3.5"
@ -6242,11 +6272,6 @@ files = [
    {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
    {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
    {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
-    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
-    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
-    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
-    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
-    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
 ]

 [package.dependencies]
@ -7082,6 +7107,7 @@ cffi = ["cffi (>=1.11)"]

 [extras]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
+embeddings-fireworks = ["llama-index-embeddings-fireworks"]
 embeddings-gemini = ["llama-index-embeddings-gemini"]
 embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
 embeddings-mistral = ["llama-index-embeddings-mistralai"]
@ -7089,6 +7115,7 @@ embeddings-ollama = ["llama-index-embeddings-ollama"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
 llms-azopenai = ["llama-index-llms-azure-openai"]
+llms-fireworks = ["llama-index-llms-fireworks"]
 llms-gemini = ["llama-index-llms-gemini"]
 llms-llama-cpp = ["llama-index-llms-llama-cpp"]
 llms-ollama = ["llama-index-llms-ollama"]
@ -7107,4 +7134,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "16e3be4521aa64c936ee8fb841655f15090b71cf8faaeed7e73a4bcdf3fbdea2"
+content-hash = "f41ee2165df33fd6815114a9d6b01508e1e8726dd7a8baf99825514586f250f0"
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@ -67,6 +67,24 @@ class EmbeddingComponent:
                    api_key=api_key,
                    model=model,
                )
+            case "fireworks":
+                try:
+                    from llama_index.embeddings.fireworks import (  # type: ignore
+                        FireworksEmbedding,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
+                    ) from e
+
+                api_key = (
+                    settings.fireworks.embedding_api_key or settings.fireworks.api_key
+                )
+                model = settings.openai.embedding_model
+                self.embedding_model = FireworksEmbedding(
+                    api_key=api_key,
+                    model=model,
+                )
            case "ollama":
                try:
                    from llama_index.embeddings.ollama import (  # type: ignore
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -102,6 +102,19 @@ class LLMComponent:
                    api_key=openai_settings.api_key,
                    model=openai_settings.model,
                )
+            case "fireworks":
+                try:
+                    from llama_index.llms.fireworks import Fireworks  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "fireworks dependencies not found, install with `poetry install --extras llms-fireworks`"
+                    ) from e
+
+                fireworks_settings = settings.fireworks
+                self.llm = Fireworks(
+                    model=fireworks_settings.model,
+                    api_key=fireworks_settings.api_key,
+                )
            case "openailike":
                try:
                    from llama_index.llms.openai_like import OpenAILike  # type: ignore
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -115,6 +115,7 @@ class LLMSettings(BaseModel):
        "mock",
        "ollama",
        "gemini",
+        "fireworks",
    ]
    max_new_tokens: int = Field(
        256,
@ -205,6 +206,7 @@ class EmbeddingSettings(BaseModel):
        "mock",
        "gemini",
        "mistralai",
+        "fireworks",
    ]
    ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
        "simple",
@ -268,6 +270,23 @@ class OpenAISettings(BaseModel):
    )


+class FireWorksSettings(BaseModel):
+    api_key: str
+    model: str = Field(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.",
+    )
+    embedding_api_base: str = Field(
+        None,
+        description="Base URL of FIREWORKS API. Example: 'https://api.fireworks.ai/inference/v1'.",
+    )
+    embedding_api_key: str
+    embedding_model: str = Field(
+        "nomic-ai/nomic-embed-text-v1.5",
+        description="FIREWORKS embedding Model to use. Example: 'nomic-ai/nomic-embed-text-v1.5'.",
+    )
+
+
 class GeminiSettings(BaseModel):
    api_key: str
    model: str = Field(
@ -597,6 +616,7 @@ class Settings(BaseModel):
    huggingface: HuggingFaceSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings
+    fireworks: FireWorksSettings
    gemini: GeminiSettings
    ollama: OllamaSettings
    azopenai: AzureOpenAISettings
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@ -381,7 +381,7 @@ class PrivateGptUi:
            ".contain { display: flex !important; flex-direction: column !important; }"
            "#component-0, #component-3, #component-10, #component-8  { height: 100% !important; }"
            "#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
-            "#col { height: calc(100vh - 112px - 16px) !important; }"
+            "#col { min-height: calc(100vh - 112px - 16px) !important; }"
            "hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }"
            ".avatar-image { background-color: antiquewhite; border-radius: 2px; }"
            ".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }"
@ -522,6 +522,7 @@ class PrivateGptUi:
                        model_mapping = {
                            "llamacpp": config_settings.llamacpp.llm_hf_model_file,
                            "openai": config_settings.openai.model,
+                            "fireworks": config_settings.fireworks.model,
                            "openailike": config_settings.openai.model,
                            "azopenai": config_settings.azopenai.llm_model,
                            "sagemaker": config_settings.sagemaker.llm_endpoint_name,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -38,6 +38,8 @@ llama-index-vector-stores-postgres = {version ="*", optional = true}
 llama-index-vector-stores-clickhouse = {version ="*", optional = true}
 llama-index-storage-docstore-postgres = {version ="*", optional = true}
 llama-index-storage-index-store-postgres = {version ="*", optional = true}
+llama-index-llms-fireworks = {version = "*", optional = true}
+llama-index-embeddings-fireworks = {version = "*", optional = true}
 # Postgres
 psycopg2-binary = {version ="^2.9.9", optional = true}
 asyncpg = {version="^0.29.0", optional = true}
@ -83,6 +85,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"]
 vector-stores-milvus = ["llama-index-vector-stores-milvus"]
 storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
 rerank-sentence-transformers = ["torch", "sentence-transformers"]
+llms-fireworks = ["llama-index-llms-fireworks"]
+embeddings-fireworks = ["llama-index-embeddings-fireworks"]

 [tool.poetry.group.dev.dependencies]
 black = "^24"
--- a/settings-fireworks.yaml
+++ b/settings-fireworks.yaml
@ -0,0 +1,13 @@
+server:
+  env_name: ${APP_ENV:fireworks}
+
+llm:
+  mode: fireworks
+
+embedding:
+  mode: fireworks
+
+fireworks:
+  api_key: ${FIREWORKS_API_KEY:}
+  model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
+  #poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai"
--- a/settings.yaml
+++ b/settings.yaml
@ -54,7 +54,7 @@ llm:
  context_window: 3900
  # Select your tokenizer. Llama-index tokenizer is the default.
  # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
-  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
+  temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

 rag:
  similarity_top_k: 2
@ -70,19 +70,19 @@ summarize:
  use_async: true

 clickhouse:
-    host: localhost
-    port: 8443
-    username: admin
-    password: clickhouse
-    database: embeddings
+  host: localhost
+  port: 8443
+  username: admin
+  password: clickhouse
+  database: embeddings

 llamacpp:
  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
-  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
-  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
-  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
-  repeat_penalty: 1.1   # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
+  tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
+  top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
+  top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
+  repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)

 embedding:
  # Should be matching the value above in most cases
@ -128,11 +128,16 @@ openai:
  model: gpt-3.5-turbo
  embedding_api_key: ${OPENAI_API_KEY:}

+fireworks:
+  api_key: ${FIREWORKS_API_KEY:}
+  model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
+  embedding_api_key: ${FIREWORKS_API_KEY:}
+
 ollama:
  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434
-  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
+  embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
  keep_alive: 5m
  request_timeout: 120.0
  autopull_models: true