feat: Upgrade to LlamaIndex to 0.10 (#1663)

* Extract optional dependencies * Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity * Support Ollama embeddings * Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine * Fix vector retriever filters
2025-09-04 16:51:48 +00:00 · 2024-03-06 17:51:30 +01:00
parent 12f3a39e8a
commit 45f05711eb
43 changed files with 1474 additions and 1396 deletions
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@@ -7,26 +7,20 @@ import logging
 from typing import TYPE_CHECKING, Any

 import boto3  # type: ignore
-from llama_index.bridge.pydantic import Field
-from llama_index.llms import (
+from llama_index.core.base.llms.generic_utils import (
+    completion_response_to_chat_response,
+    stream_completion_response_to_chat_response,
+)
+from llama_index.core.bridge.pydantic import Field
+from llama_index.core.llms import (
    CompletionResponse,
    CustomLLM,
    LLMMetadata,
 )
-from llama_index.llms.base import (
+from llama_index.core.llms.callbacks import (
    llm_chat_callback,
    llm_completion_callback,
 )
-from llama_index.llms.generic_utils import (
-    completion_response_to_chat_response,
-    stream_completion_response_to_chat_response,
-)
-from llama_index.llms.llama_utils import (
-    completion_to_prompt as generic_completion_to_prompt,
-)
-from llama_index.llms.llama_utils import (
-    messages_to_prompt as generic_messages_to_prompt,
-)

 if TYPE_CHECKING:
    from collections.abc import Sequence
@@ -161,8 +155,8 @@ class SagemakerLLM(CustomLLM):
        model_kwargs = model_kwargs or {}
        model_kwargs.update({"n_ctx": context_window, "verbose": verbose})

-        messages_to_prompt = messages_to_prompt or generic_messages_to_prompt
-        completion_to_prompt = completion_to_prompt or generic_completion_to_prompt
+        messages_to_prompt = messages_to_prompt or {}
+        completion_to_prompt = completion_to_prompt or {}

        generate_kwargs = generate_kwargs or {}
        generate_kwargs.update(
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -1,9 +1,9 @@
 import logging

 from injector import inject, singleton
-from llama_index import set_global_tokenizer
-from llama_index.llms import MockLLM
-from llama_index.llms.base import LLM
+from llama_index.core.llms import LLM, MockLLM
+from llama_index.core.settings import Settings as LlamaIndexSettings
+from llama_index.core.utils import set_global_tokenizer
 from transformers import AutoTokenizer  # type: ignore

 from private_gpt.components.llm.prompt_helper import get_prompt_style
@@ -30,17 +30,23 @@ class LLMComponent:

        logger.info("Initializing the LLM in mode=%s", llm_mode)
        match settings.llm.mode:
-            case "local":
-                from llama_index.llms import LlamaCPP
+            case "llamacpp":
+                try:
+                    from llama_index.llms.llama_cpp import LlamaCPP  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
+                    ) from e

-                prompt_style = get_prompt_style(settings.local.prompt_style)
+                prompt_style = get_prompt_style(settings.llamacpp.prompt_style)

                self.llm = LlamaCPP(
-                    model_path=str(models_path / settings.local.llm_hf_model_file),
+                    model_path=str(models_path / settings.llamacpp.llm_hf_model_file),
                    temperature=0.1,
                    max_new_tokens=settings.llm.max_new_tokens,
                    context_window=settings.llm.context_window,
                    generate_kwargs={},
+                    callback_manager=LlamaIndexSettings.callback_manager,
                    # All to GPU
                    model_kwargs={"n_gpu_layers": -1, "offload_kqv": True},
                    # transform inputs into Llama2 format
@@ -50,7 +56,12 @@ class LLMComponent:
                )

            case "sagemaker":
-                from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
+                try:
+                    from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
+                except ImportError as e:
+                    raise ImportError(
+                        "Sagemaker dependencies not found, install with `poetry install --extras llms-sagemaker`"
+                    ) from e

                self.llm = SagemakerLLM(
                    endpoint_name=settings.sagemaker.llm_endpoint_name,
@@ -58,7 +69,12 @@ class LLMComponent:
                    context_window=settings.llm.context_window,
                )
            case "openai":
-                from llama_index.llms import OpenAI
+                try:
+                    from llama_index.llms.openai import OpenAI  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "OpenAI dependencies not found, install with `poetry install --extras llms-openai`"
+                    ) from e

                openai_settings = settings.openai
                self.llm = OpenAI(
@@ -67,7 +83,12 @@ class LLMComponent:
                    model=openai_settings.model,
                )
            case "openailike":
-                from llama_index.llms import OpenAILike
+                try:
+                    from llama_index.llms.openai_like import OpenAILike  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
+                    ) from e

                openai_settings = settings.openai
                self.llm = OpenAILike(
@@ -78,12 +99,17 @@ class LLMComponent:
                    max_tokens=None,
                    api_version="",
                )
-            case "mock":
-                self.llm = MockLLM()
            case "ollama":
-                from llama_index.llms import Ollama
+                try:
+                    from llama_index.llms.ollama import Ollama  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "Ollama dependencies not found, install with `poetry install --extras llms-ollama`"
+                    ) from e

                ollama_settings = settings.ollama
                self.llm = Ollama(
-                    model=ollama_settings.model, base_url=ollama_settings.api_base
+                    model=ollama_settings.llm_model, base_url=ollama_settings.api_base
                )
+            case "mock":
+                self.llm = MockLLM()
--- a/private_gpt/components/llm/prompt_helper.py
+++ b/private_gpt/components/llm/prompt_helper.py
@@ -3,11 +3,7 @@ import logging
 from collections.abc import Sequence
 from typing import Any, Literal

-from llama_index.llms import ChatMessage, MessageRole
-from llama_index.llms.llama_utils import (
-    completion_to_prompt,
-    messages_to_prompt,
-)
+from llama_index.core.llms import ChatMessage, MessageRole

 logger = logging.getLogger(__name__)

@@ -73,7 +69,9 @@ class DefaultPromptStyle(AbstractPromptStyle):


 class Llama2PromptStyle(AbstractPromptStyle):
-    """Simple prompt style that just uses the default llama_utils functions.
+    """Simple prompt style that uses llama 2 prompt style.
+
+    Inspired by llama_index/legacy/llms/llama_utils.py

    It transforms the sequence of messages into a prompt that should look like:
    ```text
@@ -83,11 +81,61 @@ class Llama2PromptStyle(AbstractPromptStyle):
    ```
    """

+    BOS, EOS = "<s>", "</s>"
+    B_INST, E_INST = "[INST]", "[/INST]"
+    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+    DEFAULT_SYSTEM_PROMPT = """\
+    You are a helpful, respectful and honest assistant. \
+    Always answer as helpfully as possible and follow ALL given instructions. \
+    Do not speculate or make up information. \
+    Do not reference any given instructions or context. \
+    """
+
    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
-        return messages_to_prompt(messages)
+        string_messages: list[str] = []
+        if messages[0].role == MessageRole.SYSTEM:
+            # pull out the system message (if it exists in messages)
+            system_message_str = messages[0].content or ""
+            messages = messages[1:]
+        else:
+            system_message_str = self.DEFAULT_SYSTEM_PROMPT
+
+        system_message_str = f"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}"
+
+        for i in range(0, len(messages), 2):
+            # first message should always be a user
+            user_message = messages[i]
+            assert user_message.role == MessageRole.USER
+
+            if i == 0:
+                # make sure system prompt is included at the start
+                str_message = f"{self.BOS} {self.B_INST} {system_message_str} "
+            else:
+                # end previous user-assistant interaction
+                string_messages[-1] += f" {self.EOS}"
+                # no need to include system prompt
+                str_message = f"{self.BOS} {self.B_INST} "
+
+            # include user message content
+            str_message += f"{user_message.content} {self.E_INST}"
+
+            if len(messages) > (i + 1):
+                # if assistant message exists, add to str_message
+                assistant_message = messages[i + 1]
+                assert assistant_message.role == MessageRole.ASSISTANT
+                str_message += f" {assistant_message.content}"
+
+            string_messages.append(str_message)
+
+        return "".join(string_messages)

    def _completion_to_prompt(self, completion: str) -> str:
-        return completion_to_prompt(completion)
+        system_prompt_str = self.DEFAULT_SYSTEM_PROMPT
+
+        return (
+            f"{self.BOS} {self.B_INST} {self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} "
+            f"{completion.strip()} {self.E_INST}"
+        )


 class TagPromptStyle(AbstractPromptStyle):