diff --git a/private_gpt/components/ingest/ingest_component.py b/private_gpt/components/ingest/ingest_component.py index 122679f9..674d770f 100644 --- a/private_gpt/components/ingest/ingest_component.py +++ b/private_gpt/components/ingest/ingest_component.py @@ -44,7 +44,7 @@ class BaseIngestComponent(abc.ABC): self, file_name: str, file_data: Path, - file_metadata: dict[str, str] | None = None, + file_metadata: dict[str, Any] | None = None, ) -> list[Document]: pass @@ -126,7 +126,7 @@ class SimpleIngestComponent(BaseIngestComponentWithIndex): self, file_name: str, file_data: Path, - file_metadata: dict[str, str] | None = None, + file_metadata: dict[str, Any] | None = None, ) -> list[Document]: logger.info("Ingesting file_name=%s", file_name) documents = IngestionHelper.transform_file_into_documents( @@ -191,7 +191,7 @@ class BatchIngestComponent(BaseIngestComponentWithIndex): self, file_name: str, file_data: Path, - file_metadata: dict[str, str] | None = None, + file_metadata: dict[str, Any] | None = None, ) -> list[Document]: logger.info("Ingesting file_name=%s", file_name) documents = IngestionHelper.transform_file_into_documents( @@ -281,7 +281,7 @@ class ParallelizedIngestComponent(BaseIngestComponentWithIndex): self, file_name: str, file_data: Path, - file_metadata: dict[str, str] | None = None, + file_metadata: dict[str, Any] | None = None, ) -> list[Document]: logger.info("Ingesting file_name=%s", file_name) # Running in a single (1) process to release the current @@ -489,7 +489,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex): self, file_name: str, file_data: Path, - file_metadata: dict[str, str] | None = None, + file_metadata: dict[str, Any] | None = None, ) -> list[Document]: documents = IngestionHelper.transform_file_into_documents( file_name, file_data, file_metadata diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py index 9ff2e685..4ac358a9 100644 --- a/private_gpt/components/ingest/ingest_helper.py +++ b/private_gpt/components/ingest/ingest_helper.py @@ -1,5 +1,6 @@ import logging from pathlib import Path +from typing import Any from llama_index.core.readers import StringIterableReader from llama_index.core.readers.base import BaseReader @@ -69,7 +70,7 @@ class IngestionHelper: @staticmethod def transform_file_into_documents( - file_name: str, file_data: Path, file_metadata: dict[str, str] | None = None + file_name: str, file_data: Path, file_metadata: dict[str, Any] | None = None ) -> list[Document]: documents = IngestionHelper._load_file_to_documents(file_name, file_data) for document in documents: diff --git a/private_gpt/server/ingest/ingest_router.py b/private_gpt/server/ingest/ingest_router.py index cda60ee7..d9136be7 100644 --- a/private_gpt/server/ingest/ingest_router.py +++ b/private_gpt/server/ingest/ingest_router.py @@ -1,4 +1,5 @@ -from typing import Literal +import json +from typing import Any, Literal from fastapi import APIRouter, Depends, Form, HTTPException, Request, UploadFile from pydantic import BaseModel, Field @@ -20,7 +21,7 @@ class IngestTextBody(BaseModel): "Chinese martial arts." ] ) - metadata: dict[str, str] = Field( + metadata: dict[str, Any] = Field( None, examples=[ { @@ -55,6 +56,7 @@ def ingest_file( metadata: Optional metadata to be associated with the file. You do not have to specify this field if not needed. + The metadata needs to be in JSON format. e.g. {"title": "Avatar: The Last Airbender", "year": "2005"} The context obtained from files is later used in @@ -74,7 +76,7 @@ def ingest_file( if file.filename is None: raise HTTPException(400, "No file name provided") - metadata_dict = None if metadata is None else eval(metadata) + metadata_dict = None if metadata is None else json.loads(metadata) ingested_documents = service.ingest_bin_data( file.filename, file.file, metadata_dict ) diff --git a/tests/fixtures/ingest_helper.py b/tests/fixtures/ingest_helper.py index 4e6155ae..ab794667 100644 --- a/tests/fixtures/ingest_helper.py +++ b/tests/fixtures/ingest_helper.py @@ -1,5 +1,6 @@ import json from pathlib import Path +from typing import Any import pytest from fastapi.testclient import TestClient @@ -19,7 +20,9 @@ class IngestHelper: ingest_result = IngestResponse.model_validate(response.json()) return ingest_result - def ingest_file_with_metadata(self, path: Path, metadata: dict) -> IngestResponse: + def ingest_file_with_metadata( + self, path: Path, metadata: dict[str, Any] + ) -> IngestResponse: files = { "file": (path.name, path.open("rb")), "metadata": (None, json.dumps(metadata)),