mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-04-28 11:36:25 +00:00
164 lines
6.9 KiB
Python
164 lines
6.9 KiB
Python
import logging
|
|
|
|
from injector import inject, singleton
|
|
from llama_index.core.embeddings import BaseEmbedding, MockEmbedding
|
|
|
|
from private_gpt.paths import models_cache_path
|
|
from private_gpt.settings.settings import Settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@singleton
|
|
class EmbeddingComponent:
|
|
embedding_model: BaseEmbedding
|
|
|
|
@inject
|
|
def __init__(self, settings: Settings) -> None:
|
|
embedding_mode = settings.embedding.mode
|
|
logger.info("Initializing the embedding model in mode=%s", embedding_mode)
|
|
match embedding_mode:
|
|
case "huggingface":
|
|
try:
|
|
from llama_index.embeddings.huggingface import ( # type: ignore
|
|
HuggingFaceEmbedding,
|
|
)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Local dependencies not found, install with `poetry install --extras embeddings-huggingface`"
|
|
) from e
|
|
|
|
self.embedding_model = HuggingFaceEmbedding(
|
|
model_name=settings.huggingface.embedding_hf_model_name,
|
|
cache_folder=str(models_cache_path),
|
|
trust_remote_code=settings.huggingface.trust_remote_code,
|
|
)
|
|
case "sagemaker":
|
|
try:
|
|
from private_gpt.components.embedding.custom.sagemaker import (
|
|
SagemakerEmbedding,
|
|
)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Sagemaker dependencies not found, install with `poetry install --extras embeddings-sagemaker`"
|
|
) from e
|
|
|
|
self.embedding_model = SagemakerEmbedding(
|
|
endpoint_name=settings.sagemaker.embedding_endpoint_name,
|
|
)
|
|
case "openai":
|
|
try:
|
|
from llama_index.embeddings.openai import ( # type: ignore
|
|
OpenAIEmbedding,
|
|
)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`"
|
|
) from e
|
|
|
|
api_base = (
|
|
settings.openai.embedding_api_base or settings.openai.api_base
|
|
)
|
|
api_key = settings.openai.embedding_api_key or settings.openai.api_key
|
|
model = settings.openai.embedding_model
|
|
|
|
self.embedding_model = OpenAIEmbedding(
|
|
api_base=api_base,
|
|
api_key=api_key,
|
|
model=model,
|
|
)
|
|
case "fireworks":
|
|
try:
|
|
from llama_index.embeddings.fireworks import FireworksEmbedding
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`"
|
|
) from e
|
|
|
|
api_key = settings.fireworks.embedding_api_key or settings.fireworks.api_key
|
|
|
|
self.embedding_model = FireworksEmbedding(
|
|
api_key=api_key,
|
|
)
|
|
case "ollama":
|
|
try:
|
|
from llama_index.embeddings.ollama import ( # type: ignore
|
|
OllamaEmbedding,
|
|
)
|
|
from ollama import Client # type: ignore
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
|
|
) from e
|
|
|
|
ollama_settings = settings.ollama
|
|
|
|
# Calculate embedding model. If not provided tag, it will be use latest
|
|
model_name = (
|
|
ollama_settings.embedding_model + ":latest"
|
|
if ":" not in ollama_settings.embedding_model
|
|
else ollama_settings.embedding_model
|
|
)
|
|
|
|
self.embedding_model = OllamaEmbedding(
|
|
model_name=model_name,
|
|
base_url=ollama_settings.embedding_api_base,
|
|
)
|
|
|
|
if ollama_settings.autopull_models:
|
|
if ollama_settings.autopull_models:
|
|
from private_gpt.utils.ollama import (
|
|
check_connection,
|
|
pull_model,
|
|
)
|
|
|
|
# TODO: Reuse llama-index client when llama-index is updated
|
|
client = Client(
|
|
host=ollama_settings.embedding_api_base,
|
|
timeout=ollama_settings.request_timeout,
|
|
)
|
|
|
|
if not check_connection(client):
|
|
raise ValueError(
|
|
f"Failed to connect to Ollama, "
|
|
f"check if Ollama server is running on {ollama_settings.api_base}"
|
|
)
|
|
pull_model(client, model_name)
|
|
|
|
case "azopenai":
|
|
try:
|
|
from llama_index.embeddings.azure_openai import ( # type: ignore
|
|
AzureOpenAIEmbedding,
|
|
)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`"
|
|
) from e
|
|
|
|
azopenai_settings = settings.azopenai
|
|
self.embedding_model = AzureOpenAIEmbedding(
|
|
model=azopenai_settings.embedding_model,
|
|
deployment_name=azopenai_settings.embedding_deployment_name,
|
|
api_key=azopenai_settings.api_key,
|
|
azure_endpoint=azopenai_settings.azure_endpoint,
|
|
api_version=azopenai_settings.api_version,
|
|
)
|
|
case "gemini":
|
|
try:
|
|
from llama_index.embeddings.gemini import ( # type: ignore
|
|
GeminiEmbedding,
|
|
)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Gemini dependencies not found, install with `poetry install --extras embeddings-gemini`"
|
|
) from e
|
|
|
|
self.embedding_model = GeminiEmbedding(
|
|
api_key=settings.gemini.api_key,
|
|
model_name=settings.gemini.embedding_model,
|
|
)
|
|
case "mock":
|
|
# Not a random number, is the dimensionality used by
|
|
# the default embedding model
|
|
self.embedding_model = MockEmbedding(384)
|