diff --git a/poetry.lock b/poetry.lock
index 8310e1ec..d36c756c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2450,6 +2450,21 @@ llama-index-core = ">=0.10.11.post1,<0.11.0"
 llama-index-embeddings-openai = ">=0.1.3,<0.2.0"
 llama-index-llms-azure-openai = ">=0.1.3,<0.2.0"
 
+[[package]]
+name = "llama-index-embeddings-fireworks"
+version = "0.1.2"
+description = "llama-index embeddings fireworks integration"
+optional = true
+python-versions = ">=3.8.1,<3.12"
+files = [
+    {file = "llama_index_embeddings_fireworks-0.1.2-py3-none-any.whl", hash = "sha256:33a6445e10c3f32dcdc4095f62590208c94f8d548de55712930050494bd6f6df"},
+    {file = "llama_index_embeddings_fireworks-0.1.2.tar.gz", hash = "sha256:643f13e5b9d7b002809d42c0029c9d784d434b0cd972d88ba40e24b83c10e27d"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+
 [[package]]
 name = "llama-index-embeddings-gemini"
 version = "0.1.8"
@@ -2526,6 +2541,21 @@ httpx = "*"
 llama-index-core = ">=0.10.11.post1,<0.11.0"
 llama-index-llms-openai = ">=0.1.1,<0.2.0"
 
+[[package]]
+name = "llama-index-llms-fireworks"
+version = "0.1.8"
+description = "llama-index llms fireworks integration"
+optional = true
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_llms_fireworks-0.1.8-py3-none-any.whl", hash = "sha256:6cd5e54d8aebcc0475c938d0a88a0adec813192fb683be568115072924d62320"},
+    {file = "llama_index_llms_fireworks-0.1.8.tar.gz", hash = "sha256:c9f7c26efa8c2db756309d46884db53f3e158c39ccf2a43d297d263d7130cca0"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+
 [[package]]
 name = "llama-index-llms-gemini"
 version = "0.1.11"
@@ -3861,6 +3891,8 @@ files = [
     {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"},
     {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"},
     {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"},
+    {file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"},
+    {file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"},
     {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"},
     {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"},
     {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"},
@@ -5869,11 +5901,6 @@ files = [
     {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
     {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
     {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
-    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
-    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
-    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
-    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
-    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
 ]
 
 [package.dependencies]
@@ -6690,12 +6717,14 @@ cffi = ["cffi (>=1.11)"]
 
 [extras]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
+embeddings-fireworks = ["llama-index-embeddings-fireworks"]
 embeddings-gemini = ["llama-index-embeddings-gemini"]
 embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
 embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
 llms-azopenai = ["llama-index-llms-azure-openai"]
+llms-fireworks = ["llama-index-llms-fireworks"]
 llms-gemini = ["google-generativeai", "llama-index-llms-gemini"]
 llms-llama-cpp = ["llama-index-llms-llama-cpp"]
 llms-ollama = ["llama-index-llms-ollama", "ollama"]
@@ -6714,4 +6743,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519"
+content-hash = "43d568d296fc82dfe94a8b13877f0424d5e7980a0f0ca9d93dace983e9623584"
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
index 5d3e9974..49061a94 100644
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@@ -67,6 +67,20 @@ class EmbeddingComponent:
                     api_key=api_key,
                     model=model,
                 )
+            case "fireworks":
+                try:
+                    from llama_index.embeddings.fireworks import FireworksEmbedding
+                except ImportError as e:
+                    raise ImportError(
+                        "FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
+                    ) from e
+                
+                api_key = settings.fireworks.embedding_api_key or settings.fireworks.api_key
+                
+                self.embedding_model = FireworksEmbedding(
+                    api_key=api_key,
+                )
+                
             case "ollama":
                 try:
                     from llama_index.embeddings.ollama import (  # type: ignore
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index e3a02813..aad12c89 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -102,6 +102,19 @@ class LLMComponent:
                     api_key=openai_settings.api_key,
                     model=openai_settings.model,
                 )
+            case "fireworks":
+                try:
+                    from llama_index.llms.fireworks import Fireworks  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "fireworks dependencies not found, install with `poetry install --extras llms-fireworks`"
+                    ) from e
+
+                fireworks_settings = settings.fireworks
+                self.llm = Fireworks(
+                    model=fireworks_settings.model,
+                    api_key=fireworks_settings.api_key,
+                )
             case "openailike":
                 try:
                     from llama_index.llms.openai_like import OpenAILike  # type: ignore
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 4cf192a3..ca15bc52 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -260,6 +260,21 @@ class OpenAISettings(BaseModel):
         description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
     )
 
+class FireWorksSettings(BaseModel):
+    api_key: str
+    model: str = Field(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.",
+    )
+    # embedding_api_base: str = Field(
+    #     None,
+    #     description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.",
+    # )
+    embedding_api_key: str
+    # embedding_model: str = Field(
+    #     "text-embedding-ada-002",
+    #     description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
+    # )
 
 class GeminiSettings(BaseModel):
     api_key: str
@@ -586,6 +601,7 @@ class Settings(BaseModel):
     huggingface: HuggingFaceSettings
     sagemaker: SagemakerSettings
     openai: OpenAISettings
+    fireworks: FireWorksSettings
     gemini: GeminiSettings
     ollama: OllamaSettings
     azopenai: AzureOpenAISettings
diff --git a/pyproject.toml b/pyproject.toml
index da9fab80..8aa88df5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,9 @@ llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
 llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
 llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
 llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
+# firworks
+llama-index-llms-fireworks = {version = "^0.1.5", optional = true}
+llama-index-embeddings-fireworks = {version = "^0.1.2", optional = true}
 # Postgres
 psycopg2-binary = {version ="^2.9.9", optional = true}
 asyncpg = {version="^0.29.0", optional = true}
@@ -90,6 +93,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"]
 vector-stores-milvus = ["llama-index-vector-stores-milvus"]
 storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
 rerank-sentence-transformers = ["torch", "sentence-transformers"]
+llms-fireworks = ["llama-index-llms-fireworks"]
+embeddings-fireworks = ["llama-index-embeddings-fireworks"]
 
 [tool.poetry.group.dev.dependencies]
 black = "^22"
diff --git a/settings-fireworks.yaml b/settings-fireworks.yaml
new file mode 100644
index 00000000..45046b03
--- /dev/null
+++ b/settings-fireworks.yaml
@@ -0,0 +1,13 @@
+server:
+  env_name: ${APP_ENV:fireworks}
+
+llm:
+  mode: fireworks
+
+embedding:
+  mode: fireworks
+
+fireworks:
+  api_key: ${FIREWORKS_API_KEY:}
+  model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
+  #poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant"
diff --git a/settings.yaml b/settings.yaml
index f030604a..b4278a8a 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -52,7 +52,7 @@ llm:
   context_window: 3900
   # Select your tokenizer. Llama-index tokenizer is the default.
   # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
-  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
+  temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
 
 rag:
   similarity_top_k: 2
@@ -68,19 +68,19 @@ summarize:
   use_async: true
 
 clickhouse:
-    host: localhost
-    port: 8443
-    username: admin
-    password: clickhouse
-    database: embeddings
+  host: localhost
+  port: 8443
+  username: admin
+  password: clickhouse
+  database: embeddings
 
 llamacpp:
   llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
   llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
-  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
-  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
-  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
-  repeat_penalty: 1.1   # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
+  tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
+  top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
+  top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
+  repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
 
 embedding:
   # Should be matching the value above in most cases
@@ -126,11 +126,16 @@ openai:
   model: gpt-3.5-turbo
   embedding_api_key: ${OPENAI_API_KEY:}
 
+fireworks:
+  api_key: ${FIREWORKS_API_KEY:}
+  model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
+  embedding_api_key: ${FIREWORKS_API_KEY:}
+
 ollama:
   llm_model: llama3.1
   embedding_model: nomic-embed-text
   api_base: http://localhost:11434
-  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
+  embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
   keep_alive: 5m
   request_timeout: 120.0
   autopull_models: true