mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-04-27 11:21:34 +00:00
* Extract optional dependencies * Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity * Support Ollama embeddings * Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine * Fix vector retriever filters
33 lines
981 B
Python
33 lines
981 B
Python
from typing import Any, Literal
|
|
|
|
from llama_index.core.schema import Document
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class IngestedDoc(BaseModel):
|
|
object: Literal["ingest.document"]
|
|
doc_id: str = Field(examples=["c202d5e6-7b69-4869-81cc-dd574ee8ee11"])
|
|
doc_metadata: dict[str, Any] | None = Field(
|
|
examples=[
|
|
{
|
|
"page_label": "2",
|
|
"file_name": "Sales Report Q3 2023.pdf",
|
|
}
|
|
]
|
|
)
|
|
|
|
@staticmethod
|
|
def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
|
|
"""Remove unwanted metadata keys."""
|
|
for key in ["doc_id", "window", "original_text"]:
|
|
metadata.pop(key, None)
|
|
return metadata
|
|
|
|
@staticmethod
|
|
def from_document(document: Document) -> "IngestedDoc":
|
|
return IngestedDoc(
|
|
object="ingest.document",
|
|
doc_id=document.doc_id,
|
|
doc_metadata=IngestedDoc.curate_metadata(document.metadata),
|
|
)
|