mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-09-04 16:51:48 +00:00
feat: Upgrade to LlamaIndex to 0.10 (#1663)
* Extract optional dependencies * Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity * Support Ollama embeddings * Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine * Fix vector retriever filters
This commit is contained in:
@@ -7,26 +7,20 @@ import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import boto3 # type: ignore
|
||||
from llama_index.bridge.pydantic import Field
|
||||
from llama_index.llms import (
|
||||
from llama_index.core.base.llms.generic_utils import (
|
||||
completion_response_to_chat_response,
|
||||
stream_completion_response_to_chat_response,
|
||||
)
|
||||
from llama_index.core.bridge.pydantic import Field
|
||||
from llama_index.core.llms import (
|
||||
CompletionResponse,
|
||||
CustomLLM,
|
||||
LLMMetadata,
|
||||
)
|
||||
from llama_index.llms.base import (
|
||||
from llama_index.core.llms.callbacks import (
|
||||
llm_chat_callback,
|
||||
llm_completion_callback,
|
||||
)
|
||||
from llama_index.llms.generic_utils import (
|
||||
completion_response_to_chat_response,
|
||||
stream_completion_response_to_chat_response,
|
||||
)
|
||||
from llama_index.llms.llama_utils import (
|
||||
completion_to_prompt as generic_completion_to_prompt,
|
||||
)
|
||||
from llama_index.llms.llama_utils import (
|
||||
messages_to_prompt as generic_messages_to_prompt,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
@@ -161,8 +155,8 @@ class SagemakerLLM(CustomLLM):
|
||||
model_kwargs = model_kwargs or {}
|
||||
model_kwargs.update({"n_ctx": context_window, "verbose": verbose})
|
||||
|
||||
messages_to_prompt = messages_to_prompt or generic_messages_to_prompt
|
||||
completion_to_prompt = completion_to_prompt or generic_completion_to_prompt
|
||||
messages_to_prompt = messages_to_prompt or {}
|
||||
completion_to_prompt = completion_to_prompt or {}
|
||||
|
||||
generate_kwargs = generate_kwargs or {}
|
||||
generate_kwargs.update(
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import logging
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import set_global_tokenizer
|
||||
from llama_index.llms import MockLLM
|
||||
from llama_index.llms.base import LLM
|
||||
from llama_index.core.llms import LLM, MockLLM
|
||||
from llama_index.core.settings import Settings as LlamaIndexSettings
|
||||
from llama_index.core.utils import set_global_tokenizer
|
||||
from transformers import AutoTokenizer # type: ignore
|
||||
|
||||
from private_gpt.components.llm.prompt_helper import get_prompt_style
|
||||
@@ -30,17 +30,23 @@ class LLMComponent:
|
||||
|
||||
logger.info("Initializing the LLM in mode=%s", llm_mode)
|
||||
match settings.llm.mode:
|
||||
case "local":
|
||||
from llama_index.llms import LlamaCPP
|
||||
case "llamacpp":
|
||||
try:
|
||||
from llama_index.llms.llama_cpp import LlamaCPP # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
|
||||
) from e
|
||||
|
||||
prompt_style = get_prompt_style(settings.local.prompt_style)
|
||||
prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
|
||||
|
||||
self.llm = LlamaCPP(
|
||||
model_path=str(models_path / settings.local.llm_hf_model_file),
|
||||
model_path=str(models_path / settings.llamacpp.llm_hf_model_file),
|
||||
temperature=0.1,
|
||||
max_new_tokens=settings.llm.max_new_tokens,
|
||||
context_window=settings.llm.context_window,
|
||||
generate_kwargs={},
|
||||
callback_manager=LlamaIndexSettings.callback_manager,
|
||||
# All to GPU
|
||||
model_kwargs={"n_gpu_layers": -1, "offload_kqv": True},
|
||||
# transform inputs into Llama2 format
|
||||
@@ -50,7 +56,12 @@ class LLMComponent:
|
||||
)
|
||||
|
||||
case "sagemaker":
|
||||
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
|
||||
try:
|
||||
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Sagemaker dependencies not found, install with `poetry install --extras llms-sagemaker`"
|
||||
) from e
|
||||
|
||||
self.llm = SagemakerLLM(
|
||||
endpoint_name=settings.sagemaker.llm_endpoint_name,
|
||||
@@ -58,7 +69,12 @@ class LLMComponent:
|
||||
context_window=settings.llm.context_window,
|
||||
)
|
||||
case "openai":
|
||||
from llama_index.llms import OpenAI
|
||||
try:
|
||||
from llama_index.llms.openai import OpenAI # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"OpenAI dependencies not found, install with `poetry install --extras llms-openai`"
|
||||
) from e
|
||||
|
||||
openai_settings = settings.openai
|
||||
self.llm = OpenAI(
|
||||
@@ -67,7 +83,12 @@ class LLMComponent:
|
||||
model=openai_settings.model,
|
||||
)
|
||||
case "openailike":
|
||||
from llama_index.llms import OpenAILike
|
||||
try:
|
||||
from llama_index.llms.openai_like import OpenAILike # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
|
||||
) from e
|
||||
|
||||
openai_settings = settings.openai
|
||||
self.llm = OpenAILike(
|
||||
@@ -78,12 +99,17 @@ class LLMComponent:
|
||||
max_tokens=None,
|
||||
api_version="",
|
||||
)
|
||||
case "mock":
|
||||
self.llm = MockLLM()
|
||||
case "ollama":
|
||||
from llama_index.llms import Ollama
|
||||
try:
|
||||
from llama_index.llms.ollama import Ollama # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Ollama dependencies not found, install with `poetry install --extras llms-ollama`"
|
||||
) from e
|
||||
|
||||
ollama_settings = settings.ollama
|
||||
self.llm = Ollama(
|
||||
model=ollama_settings.model, base_url=ollama_settings.api_base
|
||||
model=ollama_settings.llm_model, base_url=ollama_settings.api_base
|
||||
)
|
||||
case "mock":
|
||||
self.llm = MockLLM()
|
||||
|
@@ -3,11 +3,7 @@ import logging
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, Literal
|
||||
|
||||
from llama_index.llms import ChatMessage, MessageRole
|
||||
from llama_index.llms.llama_utils import (
|
||||
completion_to_prompt,
|
||||
messages_to_prompt,
|
||||
)
|
||||
from llama_index.core.llms import ChatMessage, MessageRole
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -73,7 +69,9 @@ class DefaultPromptStyle(AbstractPromptStyle):
|
||||
|
||||
|
||||
class Llama2PromptStyle(AbstractPromptStyle):
|
||||
"""Simple prompt style that just uses the default llama_utils functions.
|
||||
"""Simple prompt style that uses llama 2 prompt style.
|
||||
|
||||
Inspired by llama_index/legacy/llms/llama_utils.py
|
||||
|
||||
It transforms the sequence of messages into a prompt that should look like:
|
||||
```text
|
||||
@@ -83,11 +81,61 @@ class Llama2PromptStyle(AbstractPromptStyle):
|
||||
```
|
||||
"""
|
||||
|
||||
BOS, EOS = "<s>", "</s>"
|
||||
B_INST, E_INST = "[INST]", "[/INST]"
|
||||
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
||||
DEFAULT_SYSTEM_PROMPT = """\
|
||||
You are a helpful, respectful and honest assistant. \
|
||||
Always answer as helpfully as possible and follow ALL given instructions. \
|
||||
Do not speculate or make up information. \
|
||||
Do not reference any given instructions or context. \
|
||||
"""
|
||||
|
||||
def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
|
||||
return messages_to_prompt(messages)
|
||||
string_messages: list[str] = []
|
||||
if messages[0].role == MessageRole.SYSTEM:
|
||||
# pull out the system message (if it exists in messages)
|
||||
system_message_str = messages[0].content or ""
|
||||
messages = messages[1:]
|
||||
else:
|
||||
system_message_str = self.DEFAULT_SYSTEM_PROMPT
|
||||
|
||||
system_message_str = f"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}"
|
||||
|
||||
for i in range(0, len(messages), 2):
|
||||
# first message should always be a user
|
||||
user_message = messages[i]
|
||||
assert user_message.role == MessageRole.USER
|
||||
|
||||
if i == 0:
|
||||
# make sure system prompt is included at the start
|
||||
str_message = f"{self.BOS} {self.B_INST} {system_message_str} "
|
||||
else:
|
||||
# end previous user-assistant interaction
|
||||
string_messages[-1] += f" {self.EOS}"
|
||||
# no need to include system prompt
|
||||
str_message = f"{self.BOS} {self.B_INST} "
|
||||
|
||||
# include user message content
|
||||
str_message += f"{user_message.content} {self.E_INST}"
|
||||
|
||||
if len(messages) > (i + 1):
|
||||
# if assistant message exists, add to str_message
|
||||
assistant_message = messages[i + 1]
|
||||
assert assistant_message.role == MessageRole.ASSISTANT
|
||||
str_message += f" {assistant_message.content}"
|
||||
|
||||
string_messages.append(str_message)
|
||||
|
||||
return "".join(string_messages)
|
||||
|
||||
def _completion_to_prompt(self, completion: str) -> str:
|
||||
return completion_to_prompt(completion)
|
||||
system_prompt_str = self.DEFAULT_SYSTEM_PROMPT
|
||||
|
||||
return (
|
||||
f"{self.BOS} {self.B_INST} {self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} "
|
||||
f"{completion.strip()} {self.E_INST}"
|
||||
)
|
||||
|
||||
|
||||
class TagPromptStyle(AbstractPromptStyle):
|
||||
|
Reference in New Issue
Block a user