FEAT: Added Fireworks Integration

This commit is contained in:
Somashekar B R 2024-09-20 14:13:09 +05:30
parent 77461b96cf
commit f1ad995124
7 changed files with 112 additions and 17 deletions

41
poetry.lock generated
View File

@ -2450,6 +2450,21 @@ llama-index-core = ">=0.10.11.post1,<0.11.0"
llama-index-embeddings-openai = ">=0.1.3,<0.2.0"
llama-index-llms-azure-openai = ">=0.1.3,<0.2.0"
[[package]]
name = "llama-index-embeddings-fireworks"
version = "0.1.2"
description = "llama-index embeddings fireworks integration"
optional = true
python-versions = ">=3.8.1,<3.12"
files = [
{file = "llama_index_embeddings_fireworks-0.1.2-py3-none-any.whl", hash = "sha256:33a6445e10c3f32dcdc4095f62590208c94f8d548de55712930050494bd6f6df"},
{file = "llama_index_embeddings_fireworks-0.1.2.tar.gz", hash = "sha256:643f13e5b9d7b002809d42c0029c9d784d434b0cd972d88ba40e24b83c10e27d"},
]
[package.dependencies]
llama-index-core = ">=0.10.1,<0.11.0"
llama-index-llms-openai = ">=0.1.1,<0.2.0"
[[package]]
name = "llama-index-embeddings-gemini"
version = "0.1.8"
@ -2526,6 +2541,21 @@ httpx = "*"
llama-index-core = ">=0.10.11.post1,<0.11.0"
llama-index-llms-openai = ">=0.1.1,<0.2.0"
[[package]]
name = "llama-index-llms-fireworks"
version = "0.1.8"
description = "llama-index llms fireworks integration"
optional = true
python-versions = "<4.0,>=3.8.1"
files = [
{file = "llama_index_llms_fireworks-0.1.8-py3-none-any.whl", hash = "sha256:6cd5e54d8aebcc0475c938d0a88a0adec813192fb683be568115072924d62320"},
{file = "llama_index_llms_fireworks-0.1.8.tar.gz", hash = "sha256:c9f7c26efa8c2db756309d46884db53f3e158c39ccf2a43d297d263d7130cca0"},
]
[package.dependencies]
llama-index-core = ">=0.10.1,<0.11.0"
llama-index-llms-openai = ">=0.1.1,<0.2.0"
[[package]]
name = "llama-index-llms-gemini"
version = "0.1.11"
@ -3861,6 +3891,8 @@ files = [
{file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"},
{file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"},
{file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"},
{file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"},
{file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"},
{file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"},
{file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"},
{file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"},
@ -5869,11 +5901,6 @@ files = [
{file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
{file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
{file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
{file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
{file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
{file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
{file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
{file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
]
[package.dependencies]
@ -6690,12 +6717,14 @@ cffi = ["cffi (>=1.11)"]
[extras]
embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
embeddings-fireworks = ["llama-index-embeddings-fireworks"]
embeddings-gemini = ["llama-index-embeddings-gemini"]
embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"]
llms-fireworks = ["llama-index-llms-fireworks"]
llms-gemini = ["google-generativeai", "llama-index-llms-gemini"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-ollama = ["llama-index-llms-ollama", "ollama"]
@ -6714,4 +6743,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.12"
content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519"
content-hash = "43d568d296fc82dfe94a8b13877f0424d5e7980a0f0ca9d93dace983e9623584"

View File

@ -67,6 +67,20 @@ class EmbeddingComponent:
api_key=api_key,
model=model,
)
case "fireworks":
try:
from llama_index.embeddings.fireworks import FireworksEmbedding
except ImportError as e:
raise ImportError(
"FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
) from e
api_key = settings.fireworks.embedding_api_key or settings.fireworks.api_key
self.embedding_model = FireworksEmbedding(
api_key=api_key,
)
case "ollama":
try:
from llama_index.embeddings.ollama import ( # type: ignore

View File

@ -102,6 +102,19 @@ class LLMComponent:
api_key=openai_settings.api_key,
model=openai_settings.model,
)
case "fireworks":
try:
from llama_index.llms.fireworks import Fireworks # type: ignore
except ImportError as e:
raise ImportError(
"fireworks dependencies not found, install with `poetry install --extras llms-fireworks`"
) from e
fireworks_settings = settings.fireworks
self.llm = Fireworks(
model=fireworks_settings.model,
api_key=fireworks_settings.api_key,
)
case "openailike":
try:
from llama_index.llms.openai_like import OpenAILike # type: ignore

View File

@ -260,6 +260,21 @@ class OpenAISettings(BaseModel):
description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
)
class FireWorksSettings(BaseModel):
api_key: str
model: str = Field(
"accounts/fireworks/models/llama-v3p1-70b-instruct",
description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.",
)
# embedding_api_base: str = Field(
# None,
# description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.",
# )
embedding_api_key: str
# embedding_model: str = Field(
# "text-embedding-ada-002",
# description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
# )
class GeminiSettings(BaseModel):
api_key: str
@ -586,6 +601,7 @@ class Settings(BaseModel):
huggingface: HuggingFaceSettings
sagemaker: SagemakerSettings
openai: OpenAISettings
fireworks: FireWorksSettings
gemini: GeminiSettings
ollama: OllamaSettings
azopenai: AzureOpenAISettings

View File

@ -37,6 +37,9 @@ llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
# firworks
llama-index-llms-fireworks = {version = "^0.1.5", optional = true}
llama-index-embeddings-fireworks = {version = "^0.1.2", optional = true}
# Postgres
psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true}
@ -90,6 +93,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"]
vector-stores-milvus = ["llama-index-vector-stores-milvus"]
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
rerank-sentence-transformers = ["torch", "sentence-transformers"]
llms-fireworks = ["llama-index-llms-fireworks"]
embeddings-fireworks = ["llama-index-embeddings-fireworks"]
[tool.poetry.group.dev.dependencies]
black = "^22"

13
settings-fireworks.yaml Normal file
View File

@ -0,0 +1,13 @@
server:
env_name: ${APP_ENV:fireworks}
llm:
mode: fireworks
embedding:
mode: fireworks
fireworks:
api_key: ${FIREWORKS_API_KEY:}
model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
#poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant"

View File

@ -52,7 +52,7 @@ llm:
context_window: 3900
# Select your tokenizer. Llama-index tokenizer is the default.
# tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
rag:
similarity_top_k: 2
@ -68,19 +68,19 @@ summarize:
use_async: true
clickhouse:
host: localhost
port: 8443
username: admin
password: clickhouse
database: embeddings
host: localhost
port: 8443
username: admin
password: clickhouse
database: embeddings
llamacpp:
llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
embedding:
# Should be matching the value above in most cases
@ -126,11 +126,16 @@ openai:
model: gpt-3.5-turbo
embedding_api_key: ${OPENAI_API_KEY:}
fireworks:
api_key: ${FIREWORKS_API_KEY:}
model: "accounts/fireworks/models/llama-v3p1-70b-instruct"
embedding_api_key: ${FIREWORKS_API_KEY:}
ollama:
llm_model: llama3.1
embedding_model: nomic-embed-text
api_base: http://localhost:11434
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
keep_alive: 5m
request_timeout: 120.0
autopull_models: true