FEAT: Added Fireworks Integration

2025-06-21 21:19:42 +00:00 · 2024-09-20 14:13:09 +05:30 · 2024-09-20 14:13:09 +05:30 · f1ad995124
commit f1ad995124
parent 77461b96cf
7 changed files with 112 additions and 17 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -2450,6 +2450,21 @@ llama-index-core = ">=0.10.11.post1,<0.11.0"
 llama-index-embeddings-openai = ">=0.1.3,<0.2.0"
 llama-index-llms-azure-openai = ">=0.1.3,<0.2.0"

+[[package]]
+name = "llama-index-embeddings-fireworks"
+version = "0.1.2"
+description = "llama-index embeddings fireworks integration"
+optional = true
+python-versions = ">=3.8.1,<3.12"
+files = [
+    {file = "llama_index_embeddings_fireworks-0.1.2-py3-none-any.whl", hash = "sha256:33a6445e10c3f32dcdc4095f62590208c94f8d548de55712930050494bd6f6df"},
+    {file = "llama_index_embeddings_fireworks-0.1.2.tar.gz", hash = "sha256:643f13e5b9d7b002809d42c0029c9d784d434b0cd972d88ba40e24b83c10e27d"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+
 [[package]]
 name = "llama-index-embeddings-gemini"
 version = "0.1.8"
@ -2526,6 +2541,21 @@ httpx = "*"
 llama-index-core = ">=0.10.11.post1,<0.11.0"
 llama-index-llms-openai = ">=0.1.1,<0.2.0"

+[[package]]
+name = "llama-index-llms-fireworks"
+version = "0.1.8"
+description = "llama-index llms fireworks integration"
+optional = true
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_llms_fireworks-0.1.8-py3-none-any.whl", hash = "sha256:6cd5e54d8aebcc0475c938d0a88a0adec813192fb683be568115072924d62320"},
+    {file = "llama_index_llms_fireworks-0.1.8.tar.gz", hash = "sha256:c9f7c26efa8c2db756309d46884db53f3e158c39ccf2a43d297d263d7130cca0"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+
 [[package]]
 name = "llama-index-llms-gemini"
 version = "0.1.11"
@ -3861,6 +3891,8 @@ files = [
    {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"},
    {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"},
    {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"},
+    {file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"},
+    {file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"},
    {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"},
    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"},
    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"},
@ -5869,11 +5901,6 @@ files = [
    {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
    {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
    {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
-    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
-    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
-    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
-    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
-    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
 ]

 [package.dependencies]
@ -6690,12 +6717,14 @@ cffi = ["cffi (>=1.11)"]

 [extras]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
+embeddings-fireworks = ["llama-index-embeddings-fireworks"]
 embeddings-gemini = ["llama-index-embeddings-gemini"]
 embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
 embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
 llms-azopenai = ["llama-index-llms-azure-openai"]
+llms-fireworks = ["llama-index-llms-fireworks"]
 llms-gemini = ["google-generativeai", "llama-index-llms-gemini"]
 llms-llama-cpp = ["llama-index-llms-llama-cpp"]
 llms-ollama = ["llama-index-llms-ollama", "ollama"]
@ -6714,4 +6743,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519"
+content-hash = "43d568d296fc82dfe94a8b13877f0424d5e7980a0f0ca9d93dace983e9623584"
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@ -67,6 +67,20 @@ class EmbeddingComponent:
                    api_key=api_key,
                    model=model,
                )
+            case "fireworks":
+                try:
+                    from llama_index.embeddings.fireworks import FireworksEmbedding
+                except ImportError as e:
+                    raise ImportError(
+                        "FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
+                    ) from e
+                
+                api_key = settings.fireworks.embedding_api_key or settings.fireworks.api_key
+                
+                self.embedding_model = FireworksEmbedding(
+                    api_key=api_key,
+                )
+                
            case "ollama":
                try:
                    from llama_index.embeddings.ollama import (  # type: ignore
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -102,6 +102,19 @@ class LLMComponent:
                    api_key=openai_settings.api_key,
                    model=openai_settings.model,
                )
+            case "fireworks":
+                try:
+                    from llama_index.llms.fireworks import Fireworks  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "fireworks dependencies not found, install with `poetry install --extras llms-fireworks`"
+                    ) from e
+
+                fireworks_settings = settings.fireworks
+                self.llm = Fireworks(
+                    model=fireworks_settings.model,
+                    api_key=fireworks_settings.api_key,
+                )
            case "openailike":
                try:
                    from llama_index.llms.openai_like import OpenAILike  # type: ignore
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -260,6 +260,21 @@ class OpenAISettings(BaseModel):
        description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
    )

+class FireWorksSettings(BaseModel):
+    api_key: str
+    model: str = Field(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.",
+    )
+    # embedding_api_base: str = Field(
+    #     None,
+    #     description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.",
+    # )
+    embedding_api_key: str
+    # embedding_model: str = Field(
+    #     "text-embedding-ada-002",
+    #     description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
+    # )

 class GeminiSettings(BaseModel):
    api_key: str
@ -586,6 +601,7 @@ class Settings(BaseModel):
    huggingface: HuggingFaceSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings
+    fireworks: FireWorksSettings
    gemini: GeminiSettings
    ollama: OllamaSettings
    azopenai: AzureOpenAISettings
--- a/pyproject.toml
+++ b/pyproject.toml
@ -37,6 +37,9 @@ llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
 llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
 llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
 llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
+# firworks
+llama-index-llms-fireworks = {version = "^0.1.5", optional = true}
+llama-index-embeddings-fireworks = {version = "^0.1.2", optional = true}
 # Postgres
 psycopg2-binary = {version ="^2.9.9", optional = true}
 asyncpg = {version="^0.29.0", optional = true}
@ -90,6 +93,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"]
 vector-stores-milvus = ["llama-index-vector-stores-milvus"]
 storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
 rerank-sentence-transformers = ["torch", "sentence-transformers"]
+llms-fireworks = ["llama-index-llms-fireworks"]
+embeddings-fireworks = ["llama-index-embeddings-fireworks"]

 [tool.poetry.group.dev.dependencies]
 black = "^22"
--- a/settings-fireworks.yaml
+++ b/settings-fireworks.yaml
@ -0,0 +1,13 @@
+server:
+  env_name: ${APP_ENV:fireworks}
+
+llm:
+  mode: fireworks
+
+embedding:
+  mode: fireworks
+
+fireworks:
+  api_key: ${FIREWORKS_API_KEY:}
+  model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
+  #poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant"
--- a/settings.yaml
+++ b/settings.yaml
@ -52,7 +52,7 @@ llm:
  context_window: 3900
  # Select your tokenizer. Llama-index tokenizer is the default.
  # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
-  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
+  temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

 rag:
  similarity_top_k: 2
@ -68,19 +68,19 @@ summarize:
  use_async: true

 clickhouse:
-    host: localhost
-    port: 8443
-    username: admin
-    password: clickhouse
-    database: embeddings
+  host: localhost
+  port: 8443
+  username: admin
+  password: clickhouse
+  database: embeddings

 llamacpp:
  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
-  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
-  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
-  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
-  repeat_penalty: 1.1   # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
+  tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
+  top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
+  top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
+  repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)

 embedding:
  # Should be matching the value above in most cases
@ -126,11 +126,16 @@ openai:
  model: gpt-3.5-turbo
  embedding_api_key: ${OPENAI_API_KEY:}

+fireworks:
+  api_key: ${FIREWORKS_API_KEY:}
+  model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
+  embedding_api_key: ${FIREWORKS_API_KEY:}
+
 ollama:
  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434
-  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
+  embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
  keep_alive: 5m
  request_timeout: 120.0
  autopull_models: true