feat: update llama-index + dependencies (#2092)

* chore: update libraries

* fix: mypy

* chore: more updates

* fix: mypy/black

* chore: fix docker warnings

* fix: mypy

* fix: black
This commit is contained in:
Javier Martinez 2024-09-26 16:29:52 +02:00 committed by GitHub
parent 5fbb402477
commit 5851b02378
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 2773 additions and 2420 deletions

View File

@ -61,7 +61,7 @@ services:
ollama: ollama:
image: traefik:v2.10 image: traefik:v2.10
ports: ports:
- "11434:11434" - "8080:8080"
command: command:
- "--providers.file.filename=/etc/router.yml" - "--providers.file.filename=/etc/router.yml"
- "--log.level=ERROR" - "--log.level=ERROR"
@ -83,6 +83,8 @@ services:
# Ollama service for the CPU mode # Ollama service for the CPU mode
ollama-cpu: ollama-cpu:
image: ollama/ollama:latest image: ollama/ollama:latest
ports:
- "11434:11434"
volumes: volumes:
- ./models:/root/.ollama - ./models:/root/.ollama
profiles: profiles:
@ -92,6 +94,8 @@ services:
# Ollama service for the CUDA mode # Ollama service for the CUDA mode
ollama-cuda: ollama-cuda:
image: ollama/ollama:latest image: ollama/ollama:latest
ports:
- "11434:11434"
volumes: volumes:
- ./models:/root/.ollama - ./models:/root/.ollama
deploy: deploy:

4997
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -403,7 +403,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex):
self.transformations, self.transformations,
show_progress=self.show_progress, show_progress=self.show_progress,
) )
self.node_q.put(("process", file_name, documents, nodes)) self.node_q.put(("process", file_name, documents, list(nodes)))
finally: finally:
self.doc_semaphore.release() self.doc_semaphore.release()
self.doc_q.task_done() # unblock Q joins self.doc_q.task_done() # unblock Q joins

View File

@ -120,7 +120,6 @@ class LLMComponent:
api_version="", api_version="",
temperature=settings.llm.temperature, temperature=settings.llm.temperature,
context_window=settings.llm.context_window, context_window=settings.llm.context_window,
max_new_tokens=settings.llm.max_new_tokens,
messages_to_prompt=prompt_style.messages_to_prompt, messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt,
tokenizer=settings.llm.tokenizer, tokenizer=settings.llm.tokenizer,
@ -184,10 +183,10 @@ class LLMComponent:
return wrapper return wrapper
Ollama.chat = add_keep_alive(Ollama.chat) Ollama.chat = add_keep_alive(Ollama.chat) # type: ignore
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat) Ollama.stream_chat = add_keep_alive(Ollama.stream_chat) # type: ignore
Ollama.complete = add_keep_alive(Ollama.complete) Ollama.complete = add_keep_alive(Ollama.complete) # type: ignore
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) # type: ignore
self.llm = llm self.llm = llm

View File

@ -40,7 +40,8 @@ class AbstractPromptStyle(abc.ABC):
logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt) logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
return prompt return prompt
def completion_to_prompt(self, completion: str) -> str: def completion_to_prompt(self, prompt: str) -> str:
completion = prompt # Fix: Llama-index parameter has to be named as prompt
prompt = self._completion_to_prompt(completion) prompt = self._completion_to_prompt(completion)
logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt) logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
return prompt return prompt
@ -285,8 +286,9 @@ class ChatMLPromptStyle(AbstractPromptStyle):
def get_prompt_style( def get_prompt_style(
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] prompt_style: (
| None Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
)
) -> AbstractPromptStyle: ) -> AbstractPromptStyle:
"""Get the prompt style to use from the given string. """Get the prompt style to use from the given string.

View File

@ -38,10 +38,10 @@ class NodeStoreComponent:
case "postgres": case "postgres":
try: try:
from llama_index.core.storage.docstore.postgres_docstore import ( from llama_index.storage.docstore.postgres import ( # type: ignore
PostgresDocumentStore, PostgresDocumentStore,
) )
from llama_index.core.storage.index_store.postgres_index_store import ( from llama_index.storage.index_store.postgres import ( # type: ignore
PostgresIndexStore, PostgresIndexStore,
) )
except ImportError: except ImportError:
@ -55,6 +55,7 @@ class NodeStoreComponent:
self.index_store = PostgresIndexStore.from_params( self.index_store = PostgresIndexStore.from_params(
**settings.postgres.model_dump(exclude_none=True) **settings.postgres.model_dump(exclude_none=True)
) )
self.doc_store = PostgresDocumentStore.from_params( self.doc_store = PostgresDocumentStore.from_params(
**settings.postgres.model_dump(exclude_none=True) **settings.postgres.model_dump(exclude_none=True)
) )

View File

@ -1,14 +1,17 @@
from collections.abc import Generator from collections.abc import Generator, Sequence
from typing import Any from typing import TYPE_CHECKING, Any
from llama_index.core.schema import BaseNode, MetadataMode from llama_index.core.schema import BaseNode, MetadataMode
from llama_index.core.vector_stores.utils import node_to_metadata_dict from llama_index.core.vector_stores.utils import node_to_metadata_dict
from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore
if TYPE_CHECKING:
from collections.abc import Mapping
def chunk_list( def chunk_list(
lst: list[BaseNode], max_chunk_size: int lst: Sequence[BaseNode], max_chunk_size: int
) -> Generator[list[BaseNode], None, None]: ) -> Generator[Sequence[BaseNode], None, None]:
"""Yield successive max_chunk_size-sized chunks from lst. """Yield successive max_chunk_size-sized chunks from lst.
Args: Args:
@ -60,7 +63,7 @@ class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore
) )
self.chroma_client = chroma_client self.chroma_client = chroma_client
def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]: def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
"""Add nodes to index, batching the insertion to avoid issues. """Add nodes to index, batching the insertion to avoid issues.
Args: Args:
@ -78,8 +81,8 @@ class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore
all_ids = [] all_ids = []
for node_chunk in node_chunks: for node_chunk in node_chunks:
embeddings = [] embeddings: list[Sequence[float]] = []
metadatas = [] metadatas: list[Mapping[str, Any]] = []
ids = [] ids = []
documents = [] documents = []
for node in node_chunk: for node in node_chunk:

View File

@ -1,4 +1,5 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING
from injector import inject, singleton from injector import inject, singleton
from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
@ -26,6 +27,9 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
from private_gpt.server.chunks.chunks_service import Chunk from private_gpt.server.chunks.chunks_service import Chunk
from private_gpt.settings.settings import Settings from private_gpt.settings.settings import Settings
if TYPE_CHECKING:
from llama_index.core.postprocessor.types import BaseNodePostprocessor
class Completion(BaseModel): class Completion(BaseModel):
response: str response: str
@ -114,12 +118,15 @@ class ChatService:
context_filter=context_filter, context_filter=context_filter,
similarity_top_k=self.settings.rag.similarity_top_k, similarity_top_k=self.settings.rag.similarity_top_k,
) )
node_postprocessors = [ node_postprocessors: list[BaseNodePostprocessor] = [
MetadataReplacementPostProcessor(target_metadata_key="window"), MetadataReplacementPostProcessor(target_metadata_key="window"),
]
if settings.rag.similarity_value:
node_postprocessors.append(
SimilarityPostprocessor( SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value similarity_cutoff=settings.rag.similarity_value
), )
] )
if settings.rag.rerank.enabled: if settings.rag.rerank.enabled:
rerank_postprocessor = SentenceTransformerRerank( rerank_postprocessor = SentenceTransformerRerank(

View File

@ -90,9 +90,9 @@ class SummarizeService:
# Add context documents to summarize # Add context documents to summarize
if use_context: if use_context:
# 1. Recover all ref docs # 1. Recover all ref docs
ref_docs: dict[ ref_docs: dict[str, RefDocInfo] | None = (
str, RefDocInfo self.storage_context.docstore.get_all_ref_doc_info()
] | None = self.storage_context.docstore.get_all_ref_doc_info() )
if ref_docs is None: if ref_docs is None:
raise ValueError("No documents have been ingested yet.") raise ValueError("No documents have been ingested yet.")

View File

@ -136,9 +136,8 @@ class LLMSettings(BaseModel):
0.1, 0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
) )
prompt_style: Literal[ prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
"default", "llama2", "llama3", "tag", "mistral", "chatml" Field(
] = Field(
"llama2", "llama2",
description=( description=(
"The prompt style to use for the chat engine. " "The prompt style to use for the chat engine. "
@ -150,6 +149,7 @@ class LLMSettings(BaseModel):
"`llama2` is the historic behaviour. `default` might work better with your custom models." "`llama2` is the historic behaviour. `default` might work better with your custom models."
), ),
) )
)
class VectorstoreSettings(BaseModel): class VectorstoreSettings(BaseModel):

View File

@ -1,4 +1,5 @@
"""This file should be imported if and only if you want to run the UI locally.""" """This file should be imported if and only if you want to run the UI locally."""
import base64 import base64
import logging import logging
import time import time

View File

@ -7,63 +7,54 @@ authors = ["Zylon <hi@zylon.ai>"]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.11,<3.12" python = ">=3.11,<3.12"
# PrivateGPT # PrivateGPT
fastapi = { extras = ["all"], version = "^0.111.0" } fastapi = { extras = ["all"], version = "^0.115.0" }
python-multipart = "^0.0.9" python-multipart = "^0.0.10"
injector = "^0.21.0" injector = "^0.22.0"
pyyaml = "^6.0.1" pyyaml = "^6.0.2"
watchdog = "^4.0.1" watchdog = "^4.0.1"
transformers = "^4.42.3" transformers = "^4.44.2"
docx2txt = "^0.8" docx2txt = "^0.8"
cryptography = "^3.1" cryptography = "^3.1"
# LlamaIndex core libs # LlamaIndex core libs
llama-index-core = "^0.10.52" llama-index-core = ">=0.11.2,<0.12.0"
llama-index-readers-file = "^0.1.27" llama-index-readers-file = "*"
# Optional LlamaIndex integration libs # Optional LlamaIndex integration libs
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true} llama-index-llms-llama-cpp = {version = "*", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true} llama-index-llms-openai = {version ="*", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true} llama-index-llms-openai-like = {version ="*", optional = true}
llama-index-llms-ollama = {version ="^0.2.2", optional = true} llama-index-llms-ollama = {version ="*", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true} llama-index-llms-azure-openai = {version ="*", optional = true}
llama-index-llms-gemini = {version ="^0.1.11", optional = true} llama-index-llms-gemini = {version ="*", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} llama-index-embeddings-ollama = {version ="*", optional = true}
llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true} llama-index-embeddings-huggingface = {version ="*", optional = true}
llama-index-embeddings-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-openai = {version ="*", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-azure-openai = {version ="*", optional = true}
llama-index-embeddings-gemini = {version ="^0.1.8", optional = true} llama-index-embeddings-gemini = {version ="*", optional = true}
llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true} llama-index-embeddings-mistralai = {version ="*", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true} llama-index-vector-stores-qdrant = {version ="*", optional = true}
llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true} llama-index-vector-stores-milvus = {version ="*", optional = true}
llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true} llama-index-vector-stores-chroma = {version ="*", optional = true}
llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true} llama-index-vector-stores-postgres = {version ="*", optional = true}
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true} llama-index-vector-stores-clickhouse = {version ="*", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true} llama-index-storage-docstore-postgres = {version ="*", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true} llama-index-storage-index-store-postgres = {version ="*", optional = true}
# Postgres # Postgres
psycopg2-binary = {version ="^2.9.9", optional = true} psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true} asyncpg = {version="^0.29.0", optional = true}
# ClickHouse # ClickHouse
clickhouse-connect = {version = "^0.7.15", optional = true} clickhouse-connect = {version = "^0.7.19", optional = true}
# Optional Sagemaker dependency # Optional Sagemaker dependency
boto3 = {version ="^1.34.139", optional = true} boto3 = {version ="^1.35.26", optional = true}
# Optional Qdrant client
qdrant-client = {version ="^1.9.0", optional = true}
# Optional Reranker dependencies # Optional Reranker dependencies
torch = {version ="^2.3.1", optional = true} torch = {version ="^2.4.1", optional = true}
sentence-transformers = {version ="^3.0.1", optional = true} sentence-transformers = {version ="^3.1.1", optional = true}
# Optional UI # Optional UI
gradio = {version ="^4.37.2", optional = true} gradio = {version ="^4.44.0", optional = true}
ffmpy = "0.4.0" ffmpy = {version ="^0.4.0", optional = true}
# Optional Google Gemini dependency
google-generativeai = {version ="^0.5.4", optional = true}
# Optional Ollama client
ollama = {version ="^0.3.0", optional = true}
# Optional HF Transformers # Optional HF Transformers
einops = {version = "^0.8.0", optional = true} einops = {version = "^0.8.0", optional = true}
@ -74,11 +65,11 @@ ui = ["gradio", "ffmpy"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-openai = ["llama-index-llms-openai"] llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"] llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama", "ollama"] llms-ollama = ["llama-index-llms-ollama"]
llms-sagemaker = ["boto3"] llms-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"] llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"] llms-gemini = ["llama-index-llms-gemini"]
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-ollama = ["llama-index-embeddings-ollama"]
embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"] embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"]
embeddings-openai = ["llama-index-embeddings-openai"] embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"] embeddings-sagemaker = ["boto3"]
@ -94,14 +85,14 @@ storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-ind
rerank-sentence-transformers = ["torch", "sentence-transformers"] rerank-sentence-transformers = ["torch", "sentence-transformers"]
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
black = "^22" black = "^24"
mypy = "^1.2" mypy = "^1.11"
pre-commit = "^2" pre-commit = "^3"
pytest = "^7" pytest = "^8"
pytest-cov = "^3" pytest-cov = "^5"
ruff = "^0" ruff = "^0"
pytest-asyncio = "^0.21.1" pytest-asyncio = "^0.24.0"
types-pyyaml = "^6.0.12.12" types-pyyaml = "^6.0.12.20240917"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]

View File

@ -5,7 +5,7 @@ from private_gpt.launcher import create_app
from tests.fixtures.mock_injector import MockInjector from tests.fixtures.mock_injector import MockInjector
@pytest.fixture() @pytest.fixture
def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient: def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
if request is not None and hasattr(request, "param"): if request is not None and hasattr(request, "param"):
injector.bind_settings(request.param or {}) injector.bind_settings(request.param or {})

View File

@ -19,6 +19,6 @@ class IngestHelper:
return ingest_result return ingest_result
@pytest.fixture() @pytest.fixture
def ingest_helper(test_client: TestClient) -> IngestHelper: def ingest_helper(test_client: TestClient) -> IngestHelper:
return IngestHelper(test_client) return IngestHelper(test_client)

View File

@ -37,6 +37,6 @@ class MockInjector:
return self.test_injector.get(interface) return self.test_injector.get(interface)
@pytest.fixture() @pytest.fixture
def injector() -> MockInjector: def injector() -> MockInjector:
return MockInjector() return MockInjector()

View File

@ -6,7 +6,7 @@ import pytest
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
@pytest.fixture() @pytest.fixture
def file_path() -> str: def file_path() -> str:
return "test.txt" return "test.txt"