diff --git a/poetry.lock b/poetry.lock index 8310e1ec..d36c756c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2450,6 +2450,21 @@ llama-index-core = ">=0.10.11.post1,<0.11.0" llama-index-embeddings-openai = ">=0.1.3,<0.2.0" llama-index-llms-azure-openai = ">=0.1.3,<0.2.0" +[[package]] +name = "llama-index-embeddings-fireworks" +version = "0.1.2" +description = "llama-index embeddings fireworks integration" +optional = true +python-versions = ">=3.8.1,<3.12" +files = [ + {file = "llama_index_embeddings_fireworks-0.1.2-py3-none-any.whl", hash = "sha256:33a6445e10c3f32dcdc4095f62590208c94f8d548de55712930050494bd6f6df"}, + {file = "llama_index_embeddings_fireworks-0.1.2.tar.gz", hash = "sha256:643f13e5b9d7b002809d42c0029c9d784d434b0cd972d88ba40e24b83c10e27d"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + [[package]] name = "llama-index-embeddings-gemini" version = "0.1.8" @@ -2526,6 +2541,21 @@ httpx = "*" llama-index-core = ">=0.10.11.post1,<0.11.0" llama-index-llms-openai = ">=0.1.1,<0.2.0" +[[package]] +name = "llama-index-llms-fireworks" +version = "0.1.8" +description = "llama-index llms fireworks integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_llms_fireworks-0.1.8-py3-none-any.whl", hash = "sha256:6cd5e54d8aebcc0475c938d0a88a0adec813192fb683be568115072924d62320"}, + {file = "llama_index_llms_fireworks-0.1.8.tar.gz", hash = "sha256:c9f7c26efa8c2db756309d46884db53f3e158c39ccf2a43d297d263d7130cca0"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + [[package]] name = "llama-index-llms-gemini" version = "0.1.11" @@ -3861,6 +3891,8 @@ files = [ {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"}, {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"}, {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"}, + {file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"}, + {file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"}, {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"}, {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"}, {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"}, @@ -5869,11 +5901,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -6690,12 +6717,14 @@ cffi = ["cffi (>=1.11)"] [extras] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] +embeddings-fireworks = ["llama-index-embeddings-fireworks"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] llms-azopenai = ["llama-index-llms-azure-openai"] +llms-fireworks = ["llama-index-llms-fireworks"] llms-gemini = ["google-generativeai", "llama-index-llms-gemini"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-ollama = ["llama-index-llms-ollama", "ollama"] @@ -6714,4 +6743,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519" +content-hash = "43d568d296fc82dfe94a8b13877f0424d5e7980a0f0ca9d93dace983e9623584" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 5d3e9974..49061a94 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -67,6 +67,20 @@ class EmbeddingComponent: api_key=api_key, model=model, ) + case "fireworks": + try: + from llama_index.embeddings.fireworks import FireworksEmbedding + except ImportError as e: + raise ImportError( + "FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`" + ) from e + + api_key = settings.fireworks.embedding_api_key or settings.fireworks.api_key + + self.embedding_model = FireworksEmbedding( + api_key=api_key, + ) + case "ollama": try: from llama_index.embeddings.ollama import ( # type: ignore diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index e3a02813..aad12c89 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -102,6 +102,19 @@ class LLMComponent: api_key=openai_settings.api_key, model=openai_settings.model, ) + case "fireworks": + try: + from llama_index.llms.fireworks import Fireworks # type: ignore + except ImportError as e: + raise ImportError( + "fireworks dependencies not found, install with `poetry install --extras llms-fireworks`" + ) from e + + fireworks_settings = settings.fireworks + self.llm = Fireworks( + model=fireworks_settings.model, + api_key=fireworks_settings.api_key, + ) case "openailike": try: from llama_index.llms.openai_like import OpenAILike # type: ignore diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 4cf192a3..ca15bc52 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -260,6 +260,21 @@ class OpenAISettings(BaseModel): description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.", ) +class FireWorksSettings(BaseModel): + api_key: str + model: str = Field( + "accounts/fireworks/models/llama-v3p1-70b-instruct", + description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.", + ) + # embedding_api_base: str = Field( + # None, + # description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.", + # ) + embedding_api_key: str + # embedding_model: str = Field( + # "text-embedding-ada-002", + # description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.", + # ) class GeminiSettings(BaseModel): api_key: str @@ -586,6 +601,7 @@ class Settings(BaseModel): huggingface: HuggingFaceSettings sagemaker: SagemakerSettings openai: OpenAISettings + fireworks: FireWorksSettings gemini: GeminiSettings ollama: OllamaSettings azopenai: AzureOpenAISettings diff --git a/pyproject.toml b/pyproject.toml index da9fab80..8aa88df5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,9 @@ llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true} llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true} llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true} llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true} +# firworks +llama-index-llms-fireworks = {version = "^0.1.5", optional = true} +llama-index-embeddings-fireworks = {version = "^0.1.2", optional = true} # Postgres psycopg2-binary = {version ="^2.9.9", optional = true} asyncpg = {version="^0.29.0", optional = true} @@ -90,6 +93,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"] vector-stores-milvus = ["llama-index-vector-stores-milvus"] storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"] rerank-sentence-transformers = ["torch", "sentence-transformers"] +llms-fireworks = ["llama-index-llms-fireworks"] +embeddings-fireworks = ["llama-index-embeddings-fireworks"] [tool.poetry.group.dev.dependencies] black = "^22" diff --git a/settings-fireworks.yaml b/settings-fireworks.yaml new file mode 100644 index 00000000..45046b03 --- /dev/null +++ b/settings-fireworks.yaml @@ -0,0 +1,13 @@ +server: + env_name: ${APP_ENV:fireworks} + +llm: + mode: fireworks + +embedding: + mode: fireworks + +fireworks: + api_key: ${FIREWORKS_API_KEY:} + model: "accounts/fireworks/models/llama-v3p1-70b-instruct" + #poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant" diff --git a/settings.yaml b/settings.yaml index f030604a..b4278a8a 100644 --- a/settings.yaml +++ b/settings.yaml @@ -52,7 +52,7 @@ llm: context_window: 3900 # Select your tokenizer. Llama-index tokenizer is the default. # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct - temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) + temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) rag: similarity_top_k: 2 @@ -68,19 +68,19 @@ summarize: use_async: true clickhouse: - host: localhost - port: 8443 - username: admin - password: clickhouse - database: embeddings + host: localhost + port: 8443 + username: admin + password: clickhouse + database: embeddings llamacpp: llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf - tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting - top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) - top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) - repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) + tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting + top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) + top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) + repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) embedding: # Should be matching the value above in most cases @@ -126,11 +126,16 @@ openai: model: gpt-3.5-turbo embedding_api_key: ${OPENAI_API_KEY:} +fireworks: + api_key: ${FIREWORKS_API_KEY:} + model: "accounts/fireworks/models/llama-v3p1-70b-instruct" + embedding_api_key: ${FIREWORKS_API_KEY:} + ollama: llm_model: llama3.1 embedding_model: nomic-embed-text api_base: http://localhost:11434 - embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama + embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama keep_alive: 5m request_timeout: 120.0 autopull_models: true