fix: parse metadata as json, allow metadata typing

2025-08-10 11:52:10 +00:00 · 2024-07-29 09:26:40 +02:00 · 2024-07-29 09:26:40 +02:00 · 9177a0ad73
commit 9177a0ad73
parent f47c05730d
4 changed files with 16 additions and 10 deletions
--- a/private_gpt/components/ingest/ingest_component.py
+++ b/private_gpt/components/ingest/ingest_component.py
@ -44,7 +44,7 @@ class BaseIngestComponent(abc.ABC):
        self,
        file_name: str,
        file_data: Path,
-        file_metadata: dict[str, str] | None = None,
+        file_metadata: dict[str, Any] | None = None,
    ) -> list[Document]:
        pass
@ -126,7 +126,7 @@ class SimpleIngestComponent(BaseIngestComponentWithIndex):
        self,
        file_name: str,
        file_data: Path,
-        file_metadata: dict[str, str] | None = None,
+        file_metadata: dict[str, Any] | None = None,
    ) -> list[Document]:
        logger.info("Ingesting file_name=%s", file_name)
        documents = IngestionHelper.transform_file_into_documents(
@ -191,7 +191,7 @@ class BatchIngestComponent(BaseIngestComponentWithIndex):
        self,
        file_name: str,
        file_data: Path,
-        file_metadata: dict[str, str] | None = None,
+        file_metadata: dict[str, Any] | None = None,
    ) -> list[Document]:
        logger.info("Ingesting file_name=%s", file_name)
        documents = IngestionHelper.transform_file_into_documents(
@ -281,7 +281,7 @@ class ParallelizedIngestComponent(BaseIngestComponentWithIndex):
        self,
        file_name: str,
        file_data: Path,
-        file_metadata: dict[str, str] | None = None,
+        file_metadata: dict[str, Any] | None = None,
    ) -> list[Document]:
        logger.info("Ingesting file_name=%s", file_name)
        # Running in a single (1) process to release the current
@ -489,7 +489,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex):
        self,
        file_name: str,
        file_data: Path,
-        file_metadata: dict[str, str] | None = None,
+        file_metadata: dict[str, Any] | None = None,
    ) -> list[Document]:
        documents = IngestionHelper.transform_file_into_documents(
            file_name, file_data, file_metadata
--- a/private_gpt/components/ingest/ingest_helper.py
+++ b/private_gpt/components/ingest/ingest_helper.py
@ -1,5 +1,6 @@
 import logging
 from pathlib import Path
 from typing import Any
 from llama_index.core.readers import StringIterableReader
 from llama_index.core.readers.base import BaseReader
@ -69,7 +70,7 @@ class IngestionHelper:
    @staticmethod
    def transform_file_into_documents(
-        file_name: str, file_data: Path, file_metadata: dict[str, str] | None = None
+        file_name: str, file_data: Path, file_metadata: dict[str, Any] | None = None
    ) -> list[Document]:
        documents = IngestionHelper._load_file_to_documents(file_name, file_data)
        for document in documents:
--- a/private_gpt/server/ingest/ingest_router.py
+++ b/private_gpt/server/ingest/ingest_router.py
@ -1,4 +1,5 @@
-from typing import Literal
+import json
 from typing import Any, Literal
 from fastapi import APIRouter, Depends, Form, HTTPException, Request, UploadFile
 from pydantic import BaseModel, Field
@ -20,7 +21,7 @@ class IngestTextBody(BaseModel):
            "Chinese martial arts."
        ]
    )
-    metadata: dict[str, str] = Field(
+    metadata: dict[str, Any] = Field(
        None,
        examples=[
            {
@ -55,6 +56,7 @@ def ingest_file(
    metadata: Optional metadata to be associated with the file.
    You do not have to specify this field if not needed.
    The metadata needs to be in JSON format.
    e.g. {"title": "Avatar: The Last Airbender", "year": "2005"}
    The context obtained from files is later used in
@ -74,7 +76,7 @@ def ingest_file(
    if file.filename is None:
        raise HTTPException(400, "No file name provided")
-    metadata_dict = None if metadata is None else eval(metadata)
+    metadata_dict = None if metadata is None else json.loads(metadata)
    ingested_documents = service.ingest_bin_data(
        file.filename, file.file, metadata_dict
    )
--- a/tests/fixtures/ingest_helper.py
+++ b/tests/fixtures/ingest_helper.py
@ -1,5 +1,6 @@
 import json
 from pathlib import Path
 from typing import Any
 import pytest
 from fastapi.testclient import TestClient
@ -19,7 +20,9 @@ class IngestHelper:
        ingest_result = IngestResponse.model_validate(response.json())
        return ingest_result
-    def ingest_file_with_metadata(self, path: Path, metadata: dict) -> IngestResponse:
+    def ingest_file_with_metadata(
        self, path: Path, metadata: dict[str, Any]
    ) -> IngestResponse:
        files = {
            "file": (path.name, path.open("rb")),
            "metadata": (None, json.dumps(metadata)),