mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-09-22 11:37:18 +00:00
feat: Upgrade to LlamaIndex to 0.10 (#1663)
* Extract optional dependencies * Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity * Support Ollama embeddings * Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine * Fix vector retriever filters
This commit is contained in:
@@ -3,7 +3,7 @@ import json
|
||||
from typing import Any
|
||||
|
||||
import boto3
|
||||
from llama_index.embeddings.base import BaseEmbedding
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||
from pydantic import Field, PrivateAttr
|
||||
|
||||
|
||||
|
@@ -1,8 +1,7 @@
|
||||
import logging
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import MockEmbedding
|
||||
from llama_index.embeddings.base import BaseEmbedding
|
||||
from llama_index.core.embeddings import BaseEmbedding, MockEmbedding
|
||||
|
||||
from private_gpt.paths import models_cache_path
|
||||
from private_gpt.settings.settings import Settings
|
||||
@@ -19,27 +18,60 @@ class EmbeddingComponent:
|
||||
embedding_mode = settings.embedding.mode
|
||||
logger.info("Initializing the embedding model in mode=%s", embedding_mode)
|
||||
match embedding_mode:
|
||||
case "local":
|
||||
from llama_index.embeddings import HuggingFaceEmbedding
|
||||
case "huggingface":
|
||||
try:
|
||||
from llama_index.embeddings.huggingface import ( # type: ignore
|
||||
HuggingFaceEmbedding,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Local dependencies not found, install with `poetry install --extras embeddings-huggingface`"
|
||||
) from e
|
||||
|
||||
self.embedding_model = HuggingFaceEmbedding(
|
||||
model_name=settings.local.embedding_hf_model_name,
|
||||
model_name=settings.huggingface.embedding_hf_model_name,
|
||||
cache_folder=str(models_cache_path),
|
||||
)
|
||||
case "sagemaker":
|
||||
|
||||
from private_gpt.components.embedding.custom.sagemaker import (
|
||||
SagemakerEmbedding,
|
||||
)
|
||||
try:
|
||||
from private_gpt.components.embedding.custom.sagemaker import (
|
||||
SagemakerEmbedding,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Sagemaker dependencies not found, install with `poetry install --extras embeddings-sagemaker`"
|
||||
) from e
|
||||
|
||||
self.embedding_model = SagemakerEmbedding(
|
||||
endpoint_name=settings.sagemaker.embedding_endpoint_name,
|
||||
)
|
||||
case "openai":
|
||||
from llama_index import OpenAIEmbedding
|
||||
try:
|
||||
from llama_index.embeddings.openai import ( # type: ignore
|
||||
OpenAIEmbedding,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`"
|
||||
) from e
|
||||
|
||||
openai_settings = settings.openai.api_key
|
||||
self.embedding_model = OpenAIEmbedding(api_key=openai_settings)
|
||||
case "ollama":
|
||||
try:
|
||||
from llama_index.embeddings.ollama import ( # type: ignore
|
||||
OllamaEmbedding,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
|
||||
) from e
|
||||
|
||||
ollama_settings = settings.ollama
|
||||
self.embedding_model = OllamaEmbedding(
|
||||
model_name=ollama_settings.embedding_model,
|
||||
base_url=ollama_settings.api_base,
|
||||
)
|
||||
case "mock":
|
||||
# Not a random number, is the dimensionality used by
|
||||
# the default embedding model
|
||||
|
@@ -8,16 +8,13 @@ import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from llama_index import (
|
||||
Document,
|
||||
ServiceContext,
|
||||
StorageContext,
|
||||
VectorStoreIndex,
|
||||
load_index_from_storage,
|
||||
)
|
||||
from llama_index.data_structs import IndexDict
|
||||
from llama_index.indices.base import BaseIndex
|
||||
from llama_index.ingestion import run_transformations
|
||||
from llama_index.core.data_structs import IndexDict
|
||||
from llama_index.core.embeddings.utils import EmbedType
|
||||
from llama_index.core.indices import VectorStoreIndex, load_index_from_storage
|
||||
from llama_index.core.indices.base import BaseIndex
|
||||
from llama_index.core.ingestion import run_transformations
|
||||
from llama_index.core.schema import Document, TransformComponent
|
||||
from llama_index.core.storage import StorageContext
|
||||
|
||||
from private_gpt.components.ingest.ingest_helper import IngestionHelper
|
||||
from private_gpt.paths import local_data_path
|
||||
@@ -30,13 +27,15 @@ class BaseIngestComponent(abc.ABC):
|
||||
def __init__(
|
||||
self,
|
||||
storage_context: StorageContext,
|
||||
service_context: ServiceContext,
|
||||
embed_model: EmbedType,
|
||||
transformations: list[TransformComponent],
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
logger.debug("Initializing base ingest component type=%s", type(self).__name__)
|
||||
self.storage_context = storage_context
|
||||
self.service_context = service_context
|
||||
self.embed_model = embed_model
|
||||
self.transformations = transformations
|
||||
|
||||
@abc.abstractmethod
|
||||
def ingest(self, file_name: str, file_data: Path) -> list[Document]:
|
||||
@@ -55,11 +54,12 @@ class BaseIngestComponentWithIndex(BaseIngestComponent, abc.ABC):
|
||||
def __init__(
|
||||
self,
|
||||
storage_context: StorageContext,
|
||||
service_context: ServiceContext,
|
||||
embed_model: EmbedType,
|
||||
transformations: list[TransformComponent],
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(storage_context, service_context, *args, **kwargs)
|
||||
super().__init__(storage_context, embed_model, transformations, *args, **kwargs)
|
||||
|
||||
self.show_progress = True
|
||||
self._index_thread_lock = (
|
||||
@@ -73,9 +73,10 @@ class BaseIngestComponentWithIndex(BaseIngestComponent, abc.ABC):
|
||||
# Load the index with store_nodes_override=True to be able to delete them
|
||||
index = load_index_from_storage(
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.service_context,
|
||||
store_nodes_override=True, # Force store nodes in index and document stores
|
||||
show_progress=self.show_progress,
|
||||
embed_model=self.embed_model,
|
||||
transformations=self.transformations,
|
||||
)
|
||||
except ValueError:
|
||||
# There are no index in the storage context, creating a new one
|
||||
@@ -83,9 +84,10 @@ class BaseIngestComponentWithIndex(BaseIngestComponent, abc.ABC):
|
||||
index = VectorStoreIndex.from_documents(
|
||||
[],
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.service_context,
|
||||
store_nodes_override=True, # Force store nodes in index and document stores
|
||||
show_progress=self.show_progress,
|
||||
embed_model=self.embed_model,
|
||||
transformations=self.transformations,
|
||||
)
|
||||
index.storage_context.persist(persist_dir=local_data_path)
|
||||
return index
|
||||
@@ -106,11 +108,12 @@ class SimpleIngestComponent(BaseIngestComponentWithIndex):
|
||||
def __init__(
|
||||
self,
|
||||
storage_context: StorageContext,
|
||||
service_context: ServiceContext,
|
||||
embed_model: EmbedType,
|
||||
transformations: list[TransformComponent],
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(storage_context, service_context, *args, **kwargs)
|
||||
super().__init__(storage_context, embed_model, transformations, *args, **kwargs)
|
||||
|
||||
def ingest(self, file_name: str, file_data: Path) -> list[Document]:
|
||||
logger.info("Ingesting file_name=%s", file_name)
|
||||
@@ -151,16 +154,17 @@ class BatchIngestComponent(BaseIngestComponentWithIndex):
|
||||
def __init__(
|
||||
self,
|
||||
storage_context: StorageContext,
|
||||
service_context: ServiceContext,
|
||||
embed_model: EmbedType,
|
||||
transformations: list[TransformComponent],
|
||||
count_workers: int,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(storage_context, service_context, *args, **kwargs)
|
||||
super().__init__(storage_context, embed_model, transformations, *args, **kwargs)
|
||||
# Make an efficient use of the CPU and GPU, the embedding
|
||||
# must be in the transformations
|
||||
assert (
|
||||
len(self.service_context.transformations) >= 2
|
||||
len(self.transformations) >= 2
|
||||
), "Embeddings must be in the transformations"
|
||||
assert count_workers > 0, "count_workers must be > 0"
|
||||
self.count_workers = count_workers
|
||||
@@ -197,7 +201,7 @@ class BatchIngestComponent(BaseIngestComponentWithIndex):
|
||||
logger.debug("Transforming count=%s documents into nodes", len(documents))
|
||||
nodes = run_transformations(
|
||||
documents, # type: ignore[arg-type]
|
||||
self.service_context.transformations,
|
||||
self.transformations,
|
||||
show_progress=self.show_progress,
|
||||
)
|
||||
# Locking the index to avoid concurrent writes
|
||||
@@ -225,16 +229,17 @@ class ParallelizedIngestComponent(BaseIngestComponentWithIndex):
|
||||
def __init__(
|
||||
self,
|
||||
storage_context: StorageContext,
|
||||
service_context: ServiceContext,
|
||||
embed_model: EmbedType,
|
||||
transformations: list[TransformComponent],
|
||||
count_workers: int,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(storage_context, service_context, *args, **kwargs)
|
||||
super().__init__(storage_context, embed_model, transformations, *args, **kwargs)
|
||||
# To make an efficient use of the CPU and GPU, the embeddings
|
||||
# must be in the transformations (to be computed in batches)
|
||||
assert (
|
||||
len(self.service_context.transformations) >= 2
|
||||
len(self.transformations) >= 2
|
||||
), "Embeddings must be in the transformations"
|
||||
assert count_workers > 0, "count_workers must be > 0"
|
||||
self.count_workers = count_workers
|
||||
@@ -278,7 +283,7 @@ class ParallelizedIngestComponent(BaseIngestComponentWithIndex):
|
||||
logger.debug("Transforming count=%s documents into nodes", len(documents))
|
||||
nodes = run_transformations(
|
||||
documents, # type: ignore[arg-type]
|
||||
self.service_context.transformations,
|
||||
self.transformations,
|
||||
show_progress=self.show_progress,
|
||||
)
|
||||
# Locking the index to avoid concurrent writes
|
||||
@@ -311,18 +316,29 @@ class ParallelizedIngestComponent(BaseIngestComponentWithIndex):
|
||||
|
||||
def get_ingestion_component(
|
||||
storage_context: StorageContext,
|
||||
service_context: ServiceContext,
|
||||
embed_model: EmbedType,
|
||||
transformations: list[TransformComponent],
|
||||
settings: Settings,
|
||||
) -> BaseIngestComponent:
|
||||
"""Get the ingestion component for the given configuration."""
|
||||
ingest_mode = settings.embedding.ingest_mode
|
||||
if ingest_mode == "batch":
|
||||
return BatchIngestComponent(
|
||||
storage_context, service_context, settings.embedding.count_workers
|
||||
storage_context=storage_context,
|
||||
embed_model=embed_model,
|
||||
transformations=transformations,
|
||||
count_workers=settings.embedding.count_workers,
|
||||
)
|
||||
elif ingest_mode == "parallel":
|
||||
return ParallelizedIngestComponent(
|
||||
storage_context, service_context, settings.embedding.count_workers
|
||||
storage_context=storage_context,
|
||||
embed_model=embed_model,
|
||||
transformations=transformations,
|
||||
count_workers=settings.embedding.count_workers,
|
||||
)
|
||||
else:
|
||||
return SimpleIngestComponent(storage_context, service_context)
|
||||
return SimpleIngestComponent(
|
||||
storage_context=storage_context,
|
||||
embed_model=embed_model,
|
||||
transformations=transformations,
|
||||
)
|
||||
|
@@ -1,14 +1,58 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from llama_index import Document
|
||||
from llama_index.readers import JSONReader, StringIterableReader
|
||||
from llama_index.readers.file.base import DEFAULT_FILE_READER_CLS
|
||||
from llama_index.core.readers import StringIterableReader
|
||||
from llama_index.core.readers.base import BaseReader
|
||||
from llama_index.core.readers.json import JSONReader
|
||||
from llama_index.core.schema import Document
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Inspired by the `llama_index.core.readers.file.base` module
|
||||
def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
|
||||
try:
|
||||
from llama_index.readers.file.docs import ( # type: ignore
|
||||
DocxReader,
|
||||
HWPReader,
|
||||
PDFReader,
|
||||
)
|
||||
from llama_index.readers.file.epub import EpubReader # type: ignore
|
||||
from llama_index.readers.file.image import ImageReader # type: ignore
|
||||
from llama_index.readers.file.ipynb import IPYNBReader # type: ignore
|
||||
from llama_index.readers.file.markdown import MarkdownReader # type: ignore
|
||||
from llama_index.readers.file.mbox import MboxReader # type: ignore
|
||||
from llama_index.readers.file.slides import PptxReader # type: ignore
|
||||
from llama_index.readers.file.tabular import PandasCSVReader # type: ignore
|
||||
from llama_index.readers.file.video_audio import ( # type: ignore
|
||||
VideoAudioReader,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError("`llama-index-readers-file` package not found") from e
|
||||
|
||||
default_file_reader_cls: dict[str, type[BaseReader]] = {
|
||||
".hwp": HWPReader,
|
||||
".pdf": PDFReader,
|
||||
".docx": DocxReader,
|
||||
".pptx": PptxReader,
|
||||
".ppt": PptxReader,
|
||||
".pptm": PptxReader,
|
||||
".jpg": ImageReader,
|
||||
".png": ImageReader,
|
||||
".jpeg": ImageReader,
|
||||
".mp3": VideoAudioReader,
|
||||
".mp4": VideoAudioReader,
|
||||
".csv": PandasCSVReader,
|
||||
".epub": EpubReader,
|
||||
".md": MarkdownReader,
|
||||
".mbox": MboxReader,
|
||||
".ipynb": IPYNBReader,
|
||||
}
|
||||
return default_file_reader_cls
|
||||
|
||||
|
||||
# Patching the default file reader to support other file types
|
||||
FILE_READER_CLS = DEFAULT_FILE_READER_CLS.copy()
|
||||
FILE_READER_CLS = _try_loading_included_file_formats()
|
||||
FILE_READER_CLS.update(
|
||||
{
|
||||
".json": JSONReader,
|
||||
|
@@ -7,26 +7,20 @@ import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import boto3 # type: ignore
|
||||
from llama_index.bridge.pydantic import Field
|
||||
from llama_index.llms import (
|
||||
from llama_index.core.base.llms.generic_utils import (
|
||||
completion_response_to_chat_response,
|
||||
stream_completion_response_to_chat_response,
|
||||
)
|
||||
from llama_index.core.bridge.pydantic import Field
|
||||
from llama_index.core.llms import (
|
||||
CompletionResponse,
|
||||
CustomLLM,
|
||||
LLMMetadata,
|
||||
)
|
||||
from llama_index.llms.base import (
|
||||
from llama_index.core.llms.callbacks import (
|
||||
llm_chat_callback,
|
||||
llm_completion_callback,
|
||||
)
|
||||
from llama_index.llms.generic_utils import (
|
||||
completion_response_to_chat_response,
|
||||
stream_completion_response_to_chat_response,
|
||||
)
|
||||
from llama_index.llms.llama_utils import (
|
||||
completion_to_prompt as generic_completion_to_prompt,
|
||||
)
|
||||
from llama_index.llms.llama_utils import (
|
||||
messages_to_prompt as generic_messages_to_prompt,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
@@ -161,8 +155,8 @@ class SagemakerLLM(CustomLLM):
|
||||
model_kwargs = model_kwargs or {}
|
||||
model_kwargs.update({"n_ctx": context_window, "verbose": verbose})
|
||||
|
||||
messages_to_prompt = messages_to_prompt or generic_messages_to_prompt
|
||||
completion_to_prompt = completion_to_prompt or generic_completion_to_prompt
|
||||
messages_to_prompt = messages_to_prompt or {}
|
||||
completion_to_prompt = completion_to_prompt or {}
|
||||
|
||||
generate_kwargs = generate_kwargs or {}
|
||||
generate_kwargs.update(
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import logging
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import set_global_tokenizer
|
||||
from llama_index.llms import MockLLM
|
||||
from llama_index.llms.base import LLM
|
||||
from llama_index.core.llms import LLM, MockLLM
|
||||
from llama_index.core.settings import Settings as LlamaIndexSettings
|
||||
from llama_index.core.utils import set_global_tokenizer
|
||||
from transformers import AutoTokenizer # type: ignore
|
||||
|
||||
from private_gpt.components.llm.prompt_helper import get_prompt_style
|
||||
@@ -30,17 +30,23 @@ class LLMComponent:
|
||||
|
||||
logger.info("Initializing the LLM in mode=%s", llm_mode)
|
||||
match settings.llm.mode:
|
||||
case "local":
|
||||
from llama_index.llms import LlamaCPP
|
||||
case "llamacpp":
|
||||
try:
|
||||
from llama_index.llms.llama_cpp import LlamaCPP # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
|
||||
) from e
|
||||
|
||||
prompt_style = get_prompt_style(settings.local.prompt_style)
|
||||
prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
|
||||
|
||||
self.llm = LlamaCPP(
|
||||
model_path=str(models_path / settings.local.llm_hf_model_file),
|
||||
model_path=str(models_path / settings.llamacpp.llm_hf_model_file),
|
||||
temperature=0.1,
|
||||
max_new_tokens=settings.llm.max_new_tokens,
|
||||
context_window=settings.llm.context_window,
|
||||
generate_kwargs={},
|
||||
callback_manager=LlamaIndexSettings.callback_manager,
|
||||
# All to GPU
|
||||
model_kwargs={"n_gpu_layers": -1, "offload_kqv": True},
|
||||
# transform inputs into Llama2 format
|
||||
@@ -50,7 +56,12 @@ class LLMComponent:
|
||||
)
|
||||
|
||||
case "sagemaker":
|
||||
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
|
||||
try:
|
||||
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Sagemaker dependencies not found, install with `poetry install --extras llms-sagemaker`"
|
||||
) from e
|
||||
|
||||
self.llm = SagemakerLLM(
|
||||
endpoint_name=settings.sagemaker.llm_endpoint_name,
|
||||
@@ -58,7 +69,12 @@ class LLMComponent:
|
||||
context_window=settings.llm.context_window,
|
||||
)
|
||||
case "openai":
|
||||
from llama_index.llms import OpenAI
|
||||
try:
|
||||
from llama_index.llms.openai import OpenAI # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"OpenAI dependencies not found, install with `poetry install --extras llms-openai`"
|
||||
) from e
|
||||
|
||||
openai_settings = settings.openai
|
||||
self.llm = OpenAI(
|
||||
@@ -67,7 +83,12 @@ class LLMComponent:
|
||||
model=openai_settings.model,
|
||||
)
|
||||
case "openailike":
|
||||
from llama_index.llms import OpenAILike
|
||||
try:
|
||||
from llama_index.llms.openai_like import OpenAILike # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
|
||||
) from e
|
||||
|
||||
openai_settings = settings.openai
|
||||
self.llm = OpenAILike(
|
||||
@@ -78,12 +99,17 @@ class LLMComponent:
|
||||
max_tokens=None,
|
||||
api_version="",
|
||||
)
|
||||
case "mock":
|
||||
self.llm = MockLLM()
|
||||
case "ollama":
|
||||
from llama_index.llms import Ollama
|
||||
try:
|
||||
from llama_index.llms.ollama import Ollama # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Ollama dependencies not found, install with `poetry install --extras llms-ollama`"
|
||||
) from e
|
||||
|
||||
ollama_settings = settings.ollama
|
||||
self.llm = Ollama(
|
||||
model=ollama_settings.model, base_url=ollama_settings.api_base
|
||||
model=ollama_settings.llm_model, base_url=ollama_settings.api_base
|
||||
)
|
||||
case "mock":
|
||||
self.llm = MockLLM()
|
||||
|
@@ -3,11 +3,7 @@ import logging
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, Literal
|
||||
|
||||
from llama_index.llms import ChatMessage, MessageRole
|
||||
from llama_index.llms.llama_utils import (
|
||||
completion_to_prompt,
|
||||
messages_to_prompt,
|
||||
)
|
||||
from llama_index.core.llms import ChatMessage, MessageRole
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -73,7 +69,9 @@ class DefaultPromptStyle(AbstractPromptStyle):
|
||||
|
||||
|
||||
class Llama2PromptStyle(AbstractPromptStyle):
|
||||
"""Simple prompt style that just uses the default llama_utils functions.
|
||||
"""Simple prompt style that uses llama 2 prompt style.
|
||||
|
||||
Inspired by llama_index/legacy/llms/llama_utils.py
|
||||
|
||||
It transforms the sequence of messages into a prompt that should look like:
|
||||
```text
|
||||
@@ -83,11 +81,61 @@ class Llama2PromptStyle(AbstractPromptStyle):
|
||||
```
|
||||
"""
|
||||
|
||||
BOS, EOS = "<s>", "</s>"
|
||||
B_INST, E_INST = "[INST]", "[/INST]"
|
||||
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
||||
DEFAULT_SYSTEM_PROMPT = """\
|
||||
You are a helpful, respectful and honest assistant. \
|
||||
Always answer as helpfully as possible and follow ALL given instructions. \
|
||||
Do not speculate or make up information. \
|
||||
Do not reference any given instructions or context. \
|
||||
"""
|
||||
|
||||
def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
|
||||
return messages_to_prompt(messages)
|
||||
string_messages: list[str] = []
|
||||
if messages[0].role == MessageRole.SYSTEM:
|
||||
# pull out the system message (if it exists in messages)
|
||||
system_message_str = messages[0].content or ""
|
||||
messages = messages[1:]
|
||||
else:
|
||||
system_message_str = self.DEFAULT_SYSTEM_PROMPT
|
||||
|
||||
system_message_str = f"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}"
|
||||
|
||||
for i in range(0, len(messages), 2):
|
||||
# first message should always be a user
|
||||
user_message = messages[i]
|
||||
assert user_message.role == MessageRole.USER
|
||||
|
||||
if i == 0:
|
||||
# make sure system prompt is included at the start
|
||||
str_message = f"{self.BOS} {self.B_INST} {system_message_str} "
|
||||
else:
|
||||
# end previous user-assistant interaction
|
||||
string_messages[-1] += f" {self.EOS}"
|
||||
# no need to include system prompt
|
||||
str_message = f"{self.BOS} {self.B_INST} "
|
||||
|
||||
# include user message content
|
||||
str_message += f"{user_message.content} {self.E_INST}"
|
||||
|
||||
if len(messages) > (i + 1):
|
||||
# if assistant message exists, add to str_message
|
||||
assistant_message = messages[i + 1]
|
||||
assert assistant_message.role == MessageRole.ASSISTANT
|
||||
str_message += f" {assistant_message.content}"
|
||||
|
||||
string_messages.append(str_message)
|
||||
|
||||
return "".join(string_messages)
|
||||
|
||||
def _completion_to_prompt(self, completion: str) -> str:
|
||||
return completion_to_prompt(completion)
|
||||
system_prompt_str = self.DEFAULT_SYSTEM_PROMPT
|
||||
|
||||
return (
|
||||
f"{self.BOS} {self.B_INST} {self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} "
|
||||
f"{completion.strip()} {self.E_INST}"
|
||||
)
|
||||
|
||||
|
||||
class TagPromptStyle(AbstractPromptStyle):
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import logging
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index.storage.docstore import BaseDocumentStore, SimpleDocumentStore
|
||||
from llama_index.storage.index_store import SimpleIndexStore
|
||||
from llama_index.storage.index_store.types import BaseIndexStore
|
||||
from llama_index.core.storage.docstore import BaseDocumentStore, SimpleDocumentStore
|
||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||
from llama_index.core.storage.index_store.types import BaseIndexStore
|
||||
|
||||
from private_gpt.paths import local_data_path
|
||||
|
||||
|
@@ -1,12 +1,28 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
|
||||
from llama_index.schema import BaseNode, MetadataMode
|
||||
from llama_index.vector_stores import ChromaVectorStore
|
||||
from llama_index.vector_stores.chroma import chunk_list
|
||||
from llama_index.vector_stores.utils import node_to_metadata_dict
|
||||
from llama_index.core.schema import BaseNode, MetadataMode
|
||||
from llama_index.core.vector_stores.utils import node_to_metadata_dict
|
||||
from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore
|
||||
|
||||
|
||||
class BatchedChromaVectorStore(ChromaVectorStore):
|
||||
def chunk_list(
|
||||
lst: list[BaseNode], max_chunk_size: int
|
||||
) -> Generator[list[BaseNode], None, None]:
|
||||
"""Yield successive max_chunk_size-sized chunks from lst.
|
||||
|
||||
Args:
|
||||
lst (List[BaseNode]): list of nodes with embeddings
|
||||
max_chunk_size (int): max chunk size
|
||||
|
||||
Yields:
|
||||
Generator[List[BaseNode], None, None]: list of nodes with embeddings
|
||||
"""
|
||||
for i in range(0, len(lst), max_chunk_size):
|
||||
yield lst[i : i + max_chunk_size]
|
||||
|
||||
|
||||
class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore
|
||||
"""Chroma vector store, batching additions to avoid reaching the max batch limit.
|
||||
|
||||
In this vector store, embeddings are stored within a ChromaDB collection.
|
||||
|
@@ -2,11 +2,14 @@ import logging
|
||||
import typing
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import VectorStoreIndex
|
||||
from llama_index.indices.vector_store import VectorIndexRetriever
|
||||
from llama_index.vector_stores.types import VectorStore
|
||||
from llama_index.core.indices.vector_store import VectorIndexRetriever, VectorStoreIndex
|
||||
from llama_index.core.vector_stores.types import (
|
||||
FilterCondition,
|
||||
MetadataFilter,
|
||||
MetadataFilters,
|
||||
VectorStore,
|
||||
)
|
||||
|
||||
from private_gpt.components.vector_store.batched_chroma import BatchedChromaVectorStore
|
||||
from private_gpt.open_ai.extensions.context_filter import ContextFilter
|
||||
from private_gpt.paths import local_data_path
|
||||
from private_gpt.settings.settings import Settings
|
||||
@@ -14,34 +17,36 @@ from private_gpt.settings.settings import Settings
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@typing.no_type_check
|
||||
def _chromadb_doc_id_metadata_filter(
|
||||
def _doc_id_metadata_filter(
|
||||
context_filter: ContextFilter | None,
|
||||
) -> dict | None:
|
||||
if context_filter is None or context_filter.docs_ids is None:
|
||||
return {} # No filter
|
||||
elif len(context_filter.docs_ids) < 1:
|
||||
return {"doc_id": "-"} # Effectively filtering out all docs
|
||||
else:
|
||||
doc_filter_items = []
|
||||
if len(context_filter.docs_ids) > 1:
|
||||
doc_filter = {"$or": doc_filter_items}
|
||||
for doc_id in context_filter.docs_ids:
|
||||
doc_filter_items.append({"doc_id": doc_id})
|
||||
else:
|
||||
doc_filter = {"doc_id": context_filter.docs_ids[0]}
|
||||
return doc_filter
|
||||
) -> MetadataFilters:
|
||||
filters = MetadataFilters(filters=[], condition=FilterCondition.OR)
|
||||
|
||||
if context_filter is not None and context_filter.docs_ids is not None:
|
||||
for doc_id in context_filter.docs_ids:
|
||||
filters.filters.append(MetadataFilter(key="doc_id", value=doc_id))
|
||||
|
||||
return filters
|
||||
|
||||
|
||||
@singleton
|
||||
class VectorStoreComponent:
|
||||
settings: Settings
|
||||
vector_store: VectorStore
|
||||
|
||||
@inject
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
match settings.vectorstore.database:
|
||||
case "pgvector":
|
||||
from llama_index.vector_stores import PGVectorStore
|
||||
try:
|
||||
from llama_index.vector_stores.postgres import ( # type: ignore
|
||||
PGVectorStore,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`"
|
||||
) from e
|
||||
|
||||
if settings.pgvector is None:
|
||||
raise ValueError(
|
||||
@@ -61,11 +66,13 @@ class VectorStoreComponent:
|
||||
from chromadb.config import ( # type: ignore
|
||||
Settings as ChromaSettings,
|
||||
)
|
||||
|
||||
from private_gpt.components.vector_store.batched_chroma import (
|
||||
BatchedChromaVectorStore,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"'chromadb' is not installed."
|
||||
"To use PrivateGPT with Chroma, install the 'chroma' extra."
|
||||
"`poetry install --extras chroma`"
|
||||
"ChromaDB dependencies not found, install with `poetry install --extras vector-stores-chroma`"
|
||||
) from e
|
||||
|
||||
chroma_settings = ChromaSettings(anonymized_telemetry=False)
|
||||
@@ -85,8 +92,15 @@ class VectorStoreComponent:
|
||||
)
|
||||
|
||||
case "qdrant":
|
||||
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
||||
from qdrant_client import QdrantClient
|
||||
try:
|
||||
from llama_index.vector_stores.qdrant import ( # type: ignore
|
||||
QdrantVectorStore,
|
||||
)
|
||||
from qdrant_client import QdrantClient # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Qdrant dependencies not found, install with `poetry install --extras vector-stores-qdrant`"
|
||||
) from e
|
||||
|
||||
if settings.qdrant is None:
|
||||
logger.info(
|
||||
@@ -112,20 +126,20 @@ class VectorStoreComponent:
|
||||
f"Vectorstore database {settings.vectorstore.database} not supported"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_retriever(
|
||||
self,
|
||||
index: VectorStoreIndex,
|
||||
context_filter: ContextFilter | None = None,
|
||||
similarity_top_k: int = 2,
|
||||
) -> VectorIndexRetriever:
|
||||
# This way we support qdrant (using doc_ids) and chroma (using where clause)
|
||||
# This way we support qdrant (using doc_ids) and the rest (using filters)
|
||||
return VectorIndexRetriever(
|
||||
index=index,
|
||||
similarity_top_k=similarity_top_k,
|
||||
doc_ids=context_filter.docs_ids if context_filter else None,
|
||||
vector_store_kwargs={
|
||||
"where": _chromadb_doc_id_metadata_filter(context_filter)
|
||||
},
|
||||
filters=_doc_id_metadata_filter(context_filter)
|
||||
if self.settings.vectorstore.database != "qdrant"
|
||||
else None,
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
|
@@ -4,6 +4,9 @@ import logging
|
||||
from fastapi import Depends, FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from injector import Injector
|
||||
from llama_index.core.callbacks import CallbackManager
|
||||
from llama_index.core.callbacks.global_handlers import create_global_handler
|
||||
from llama_index.core.settings import Settings as LlamaIndexSettings
|
||||
|
||||
from private_gpt.server.chat.chat_router import chat_router
|
||||
from private_gpt.server.chunks.chunks_router import chunks_router
|
||||
@@ -31,6 +34,10 @@ def create_app(root_injector: Injector) -> FastAPI:
|
||||
app.include_router(embeddings_router)
|
||||
app.include_router(health_router)
|
||||
|
||||
# Add LlamaIndex simple observability
|
||||
global_handler = create_global_handler("simple")
|
||||
LlamaIndexSettings.callback_manager = CallbackManager([global_handler])
|
||||
|
||||
settings = root_injector.get(Settings)
|
||||
if settings.server.cors.enabled:
|
||||
logger.debug("Setting up CORS middleware")
|
||||
@@ -45,7 +52,12 @@ def create_app(root_injector: Injector) -> FastAPI:
|
||||
|
||||
if settings.ui.enabled:
|
||||
logger.debug("Importing the UI module")
|
||||
from private_gpt.ui.ui import PrivateGptUi
|
||||
try:
|
||||
from private_gpt.ui.ui import PrivateGptUi
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"UI dependencies not found, install with `poetry install --extras ui`"
|
||||
) from e
|
||||
|
||||
ui = root_injector.get(PrivateGptUi)
|
||||
ui.mount_in_app(app, settings.ui.path)
|
||||
|
@@ -1,11 +1,6 @@
|
||||
"""FastAPI app creation, logger configuration and main API routes."""
|
||||
|
||||
import llama_index
|
||||
|
||||
from private_gpt.di import global_injector
|
||||
from private_gpt.launcher import create_app
|
||||
|
||||
# Add LlamaIndex simple observability
|
||||
llama_index.set_global_handler("simple")
|
||||
|
||||
app = create_app(global_injector)
|
||||
|
@@ -3,7 +3,7 @@ import uuid
|
||||
from collections.abc import Iterator
|
||||
from typing import Literal
|
||||
|
||||
from llama_index.llms import ChatResponse, CompletionResponse
|
||||
from llama_index.core.llms import ChatResponse, CompletionResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from private_gpt.server.chunks.chunks_service import Chunk
|
||||
|
@@ -1,5 +1,5 @@
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from llama_index.llms import ChatMessage, MessageRole
|
||||
from llama_index.core.llms import ChatMessage, MessageRole
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import StreamingResponse
|
||||
|
||||
|
@@ -1,14 +1,15 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import ServiceContext, StorageContext, VectorStoreIndex
|
||||
from llama_index.chat_engine import ContextChatEngine, SimpleChatEngine
|
||||
from llama_index.chat_engine.types import (
|
||||
from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
|
||||
from llama_index.core.chat_engine.types import (
|
||||
BaseChatEngine,
|
||||
)
|
||||
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
|
||||
from llama_index.llms import ChatMessage, MessageRole
|
||||
from llama_index.types import TokenGen
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
|
||||
from llama_index.core.llms import ChatMessage, MessageRole
|
||||
from llama_index.core.storage import StorageContext
|
||||
from llama_index.core.types import TokenGen
|
||||
from pydantic import BaseModel
|
||||
|
||||
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
|
||||
@@ -75,20 +76,19 @@ class ChatService:
|
||||
embedding_component: EmbeddingComponent,
|
||||
node_store_component: NodeStoreComponent,
|
||||
) -> None:
|
||||
self.llm_service = llm_component
|
||||
self.llm_component = llm_component
|
||||
self.embedding_component = embedding_component
|
||||
self.vector_store_component = vector_store_component
|
||||
self.storage_context = StorageContext.from_defaults(
|
||||
vector_store=vector_store_component.vector_store,
|
||||
docstore=node_store_component.doc_store,
|
||||
index_store=node_store_component.index_store,
|
||||
)
|
||||
self.service_context = ServiceContext.from_defaults(
|
||||
llm=llm_component.llm, embed_model=embedding_component.embedding_model
|
||||
)
|
||||
self.index = VectorStoreIndex.from_vector_store(
|
||||
vector_store_component.vector_store,
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.service_context,
|
||||
llm=llm_component.llm,
|
||||
embed_model=embedding_component.embedding_model,
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
@@ -105,7 +105,7 @@ class ChatService:
|
||||
return ContextChatEngine.from_defaults(
|
||||
system_prompt=system_prompt,
|
||||
retriever=vector_index_retriever,
|
||||
service_context=self.service_context,
|
||||
llm=self.llm_component.llm, # Takes no effect at the moment
|
||||
node_postprocessors=[
|
||||
MetadataReplacementPostProcessor(target_metadata_key="window"),
|
||||
],
|
||||
@@ -113,7 +113,7 @@ class ChatService:
|
||||
else:
|
||||
return SimpleChatEngine.from_defaults(
|
||||
system_prompt=system_prompt,
|
||||
service_context=self.service_context,
|
||||
llm=self.llm_component.llm,
|
||||
)
|
||||
|
||||
def stream_chat(
|
||||
|
@@ -1,8 +1,9 @@
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import ServiceContext, StorageContext, VectorStoreIndex
|
||||
from llama_index.schema import NodeWithScore
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from llama_index.core.schema import NodeWithScore
|
||||
from llama_index.core.storage import StorageContext
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
|
||||
@@ -15,7 +16,7 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
|
||||
from private_gpt.server.ingest.model import IngestedDoc
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from llama_index.schema import RelatedNodeInfo
|
||||
from llama_index.core.schema import RelatedNodeInfo
|
||||
|
||||
|
||||
class Chunk(BaseModel):
|
||||
@@ -63,14 +64,13 @@ class ChunksService:
|
||||
node_store_component: NodeStoreComponent,
|
||||
) -> None:
|
||||
self.vector_store_component = vector_store_component
|
||||
self.llm_component = llm_component
|
||||
self.embedding_component = embedding_component
|
||||
self.storage_context = StorageContext.from_defaults(
|
||||
vector_store=vector_store_component.vector_store,
|
||||
docstore=node_store_component.doc_store,
|
||||
index_store=node_store_component.index_store,
|
||||
)
|
||||
self.query_service_context = ServiceContext.from_defaults(
|
||||
llm=llm_component.llm, embed_model=embedding_component.embedding_model
|
||||
)
|
||||
|
||||
def _get_sibling_nodes_text(
|
||||
self, node_with_score: NodeWithScore, related_number: int, forward: bool = True
|
||||
@@ -103,7 +103,8 @@ class ChunksService:
|
||||
index = VectorStoreIndex.from_vector_store(
|
||||
self.vector_store_component.vector_store,
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.query_service_context,
|
||||
llm=self.llm_component.llm,
|
||||
embed_model=self.embedding_component.embedding_model,
|
||||
show_progress=True,
|
||||
)
|
||||
vector_index_retriever = self.vector_store_component.get_retriever(
|
||||
|
@@ -4,11 +4,8 @@ from pathlib import Path
|
||||
from typing import AnyStr, BinaryIO
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import (
|
||||
ServiceContext,
|
||||
StorageContext,
|
||||
)
|
||||
from llama_index.node_parser import SentenceWindowNodeParser
|
||||
from llama_index.core.node_parser import SentenceWindowNodeParser
|
||||
from llama_index.core.storage import StorageContext
|
||||
|
||||
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
|
||||
from private_gpt.components.ingest.ingest_component import get_ingestion_component
|
||||
@@ -40,17 +37,12 @@ class IngestService:
|
||||
index_store=node_store_component.index_store,
|
||||
)
|
||||
node_parser = SentenceWindowNodeParser.from_defaults()
|
||||
self.ingest_service_context = ServiceContext.from_defaults(
|
||||
llm=self.llm_service.llm,
|
||||
embed_model=embedding_component.embedding_model,
|
||||
node_parser=node_parser,
|
||||
# Embeddings done early in the pipeline of node transformations, right
|
||||
# after the node parsing
|
||||
transformations=[node_parser, embedding_component.embedding_model],
|
||||
)
|
||||
|
||||
self.ingest_component = get_ingestion_component(
|
||||
self.storage_context, self.ingest_service_context, settings=settings()
|
||||
self.storage_context,
|
||||
embed_model=embedding_component.embedding_model,
|
||||
transformations=[node_parser, embedding_component.embedding_model],
|
||||
settings=settings(),
|
||||
)
|
||||
|
||||
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:
|
||||
|
@@ -3,10 +3,9 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from watchdog.events import (
|
||||
DirCreatedEvent,
|
||||
DirModifiedEvent,
|
||||
FileCreatedEvent,
|
||||
FileModifiedEvent,
|
||||
FileSystemEvent,
|
||||
FileSystemEventHandler,
|
||||
)
|
||||
from watchdog.observers import Observer
|
||||
@@ -20,11 +19,11 @@ class IngestWatcher:
|
||||
self.on_file_changed = on_file_changed
|
||||
|
||||
class Handler(FileSystemEventHandler):
|
||||
def on_modified(self, event: DirModifiedEvent | FileModifiedEvent) -> None:
|
||||
def on_modified(self, event: FileSystemEvent) -> None:
|
||||
if isinstance(event, FileModifiedEvent):
|
||||
on_file_changed(Path(event.src_path))
|
||||
|
||||
def on_created(self, event: DirCreatedEvent | FileCreatedEvent) -> None:
|
||||
def on_created(self, event: FileSystemEvent) -> None:
|
||||
if isinstance(event, FileCreatedEvent):
|
||||
on_file_changed(Path(event.src_path))
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
from typing import Any, Literal
|
||||
|
||||
from llama_index import Document
|
||||
from llama_index.core.schema import Document
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
|
@@ -81,7 +81,7 @@ class DataSettings(BaseModel):
|
||||
|
||||
|
||||
class LLMSettings(BaseModel):
|
||||
mode: Literal["local", "openai", "openailike", "sagemaker", "mock", "ollama"]
|
||||
mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"]
|
||||
max_new_tokens: int = Field(
|
||||
256,
|
||||
description="The maximum number of token that the LLM is authorized to generate in one completion.",
|
||||
@@ -104,12 +104,9 @@ class VectorstoreSettings(BaseModel):
|
||||
database: Literal["chroma", "qdrant", "pgvector"]
|
||||
|
||||
|
||||
class LocalSettings(BaseModel):
|
||||
class LlamaCPPSettings(BaseModel):
|
||||
llm_hf_repo_id: str
|
||||
llm_hf_model_file: str
|
||||
embedding_hf_model_name: str = Field(
|
||||
description="Name of the HuggingFace model to use for embeddings"
|
||||
)
|
||||
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
|
||||
"llama2",
|
||||
description=(
|
||||
@@ -123,8 +120,14 @@ class LocalSettings(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class HuggingFaceSettings(BaseModel):
|
||||
embedding_hf_model_name: str = Field(
|
||||
description="Name of the HuggingFace model to use for embeddings"
|
||||
)
|
||||
|
||||
|
||||
class EmbeddingSettings(BaseModel):
|
||||
mode: Literal["local", "openai", "sagemaker", "mock"]
|
||||
mode: Literal["huggingface", "openai", "sagemaker", "ollama", "mock"]
|
||||
ingest_mode: Literal["simple", "batch", "parallel"] = Field(
|
||||
"simple",
|
||||
description=(
|
||||
@@ -173,10 +176,14 @@ class OllamaSettings(BaseModel):
|
||||
"http://localhost:11434",
|
||||
description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
|
||||
)
|
||||
model: str = Field(
|
||||
llm_model: str = Field(
|
||||
None,
|
||||
description="Model to use. Example: 'llama2-uncensored'.",
|
||||
)
|
||||
embedding_model: str = Field(
|
||||
None,
|
||||
description="Model to use. Example: 'nomic-embed-text'.",
|
||||
)
|
||||
|
||||
|
||||
class UISettings(BaseModel):
|
||||
@@ -292,7 +299,8 @@ class Settings(BaseModel):
|
||||
ui: UISettings
|
||||
llm: LLMSettings
|
||||
embedding: EmbeddingSettings
|
||||
local: LocalSettings
|
||||
llamacpp: LlamaCPPSettings
|
||||
huggingface: HuggingFaceSettings
|
||||
sagemaker: SagemakerSettings
|
||||
openai: OpenAISettings
|
||||
ollama: OllamaSettings
|
||||
|
@@ -10,7 +10,7 @@ import gradio as gr # type: ignore
|
||||
from fastapi import FastAPI
|
||||
from gradio.themes.utils.colors import slate # type: ignore
|
||||
from injector import inject, singleton
|
||||
from llama_index.llms import ChatMessage, ChatResponse, MessageRole
|
||||
from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
|
||||
from pydantic import BaseModel
|
||||
|
||||
from private_gpt.constants import PROJECT_ROOT_PATH
|
||||
|
Reference in New Issue
Block a user