feat: Upgrade to LlamaIndex to 0.10 (#1663)

* Extract optional dependencies

* Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity

* Support Ollama embeddings

* Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine

* Fix vector retriever filters
This commit is contained in:
Iván Martínez
2024-03-06 17:51:30 +01:00
committed by GitHub
parent 12f3a39e8a
commit 45f05711eb
43 changed files with 1474 additions and 1396 deletions

View File

@@ -4,11 +4,8 @@ from pathlib import Path
from typing import AnyStr, BinaryIO
from injector import inject, singleton
from llama_index import (
ServiceContext,
StorageContext,
)
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.storage import StorageContext
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
from private_gpt.components.ingest.ingest_component import get_ingestion_component
@@ -40,17 +37,12 @@ class IngestService:
index_store=node_store_component.index_store,
)
node_parser = SentenceWindowNodeParser.from_defaults()
self.ingest_service_context = ServiceContext.from_defaults(
llm=self.llm_service.llm,
embed_model=embedding_component.embedding_model,
node_parser=node_parser,
# Embeddings done early in the pipeline of node transformations, right
# after the node parsing
transformations=[node_parser, embedding_component.embedding_model],
)
self.ingest_component = get_ingestion_component(
self.storage_context, self.ingest_service_context, settings=settings()
self.storage_context,
embed_model=embedding_component.embedding_model,
transformations=[node_parser, embedding_component.embedding_model],
settings=settings(),
)
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:

View File

@@ -3,10 +3,9 @@ from pathlib import Path
from typing import Any
from watchdog.events import (
DirCreatedEvent,
DirModifiedEvent,
FileCreatedEvent,
FileModifiedEvent,
FileSystemEvent,
FileSystemEventHandler,
)
from watchdog.observers import Observer
@@ -20,11 +19,11 @@ class IngestWatcher:
self.on_file_changed = on_file_changed
class Handler(FileSystemEventHandler):
def on_modified(self, event: DirModifiedEvent | FileModifiedEvent) -> None:
def on_modified(self, event: FileSystemEvent) -> None:
if isinstance(event, FileModifiedEvent):
on_file_changed(Path(event.src_path))
def on_created(self, event: DirCreatedEvent | FileCreatedEvent) -> None:
def on_created(self, event: FileSystemEvent) -> None:
if isinstance(event, FileCreatedEvent):
on_file_changed(Path(event.src_path))

View File

@@ -1,6 +1,6 @@
from typing import Any, Literal
from llama_index import Document
from llama_index.core.schema import Document
from pydantic import BaseModel, Field