mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-10 11:52:10 +00:00
fix: parse metadata as json, allow metadata typing
This commit is contained in:
parent
f47c05730d
commit
9177a0ad73
@ -44,7 +44,7 @@ class BaseIngestComponent(abc.ABC):
|
|||||||
self,
|
self,
|
||||||
file_name: str,
|
file_name: str,
|
||||||
file_data: Path,
|
file_data: Path,
|
||||||
file_metadata: dict[str, str] | None = None,
|
file_metadata: dict[str, Any] | None = None,
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -126,7 +126,7 @@ class SimpleIngestComponent(BaseIngestComponentWithIndex):
|
|||||||
self,
|
self,
|
||||||
file_name: str,
|
file_name: str,
|
||||||
file_data: Path,
|
file_data: Path,
|
||||||
file_metadata: dict[str, str] | None = None,
|
file_metadata: dict[str, Any] | None = None,
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
logger.info("Ingesting file_name=%s", file_name)
|
logger.info("Ingesting file_name=%s", file_name)
|
||||||
documents = IngestionHelper.transform_file_into_documents(
|
documents = IngestionHelper.transform_file_into_documents(
|
||||||
@ -191,7 +191,7 @@ class BatchIngestComponent(BaseIngestComponentWithIndex):
|
|||||||
self,
|
self,
|
||||||
file_name: str,
|
file_name: str,
|
||||||
file_data: Path,
|
file_data: Path,
|
||||||
file_metadata: dict[str, str] | None = None,
|
file_metadata: dict[str, Any] | None = None,
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
logger.info("Ingesting file_name=%s", file_name)
|
logger.info("Ingesting file_name=%s", file_name)
|
||||||
documents = IngestionHelper.transform_file_into_documents(
|
documents = IngestionHelper.transform_file_into_documents(
|
||||||
@ -281,7 +281,7 @@ class ParallelizedIngestComponent(BaseIngestComponentWithIndex):
|
|||||||
self,
|
self,
|
||||||
file_name: str,
|
file_name: str,
|
||||||
file_data: Path,
|
file_data: Path,
|
||||||
file_metadata: dict[str, str] | None = None,
|
file_metadata: dict[str, Any] | None = None,
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
logger.info("Ingesting file_name=%s", file_name)
|
logger.info("Ingesting file_name=%s", file_name)
|
||||||
# Running in a single (1) process to release the current
|
# Running in a single (1) process to release the current
|
||||||
@ -489,7 +489,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex):
|
|||||||
self,
|
self,
|
||||||
file_name: str,
|
file_name: str,
|
||||||
file_data: Path,
|
file_data: Path,
|
||||||
file_metadata: dict[str, str] | None = None,
|
file_metadata: dict[str, Any] | None = None,
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
documents = IngestionHelper.transform_file_into_documents(
|
documents = IngestionHelper.transform_file_into_documents(
|
||||||
file_name, file_data, file_metadata
|
file_name, file_data, file_metadata
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from llama_index.core.readers import StringIterableReader
|
from llama_index.core.readers import StringIterableReader
|
||||||
from llama_index.core.readers.base import BaseReader
|
from llama_index.core.readers.base import BaseReader
|
||||||
@ -69,7 +70,7 @@ class IngestionHelper:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def transform_file_into_documents(
|
def transform_file_into_documents(
|
||||||
file_name: str, file_data: Path, file_metadata: dict[str, str] | None = None
|
file_name: str, file_data: Path, file_metadata: dict[str, Any] | None = None
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
documents = IngestionHelper._load_file_to_documents(file_name, file_data)
|
documents = IngestionHelper._load_file_to_documents(file_name, file_data)
|
||||||
for document in documents:
|
for document in documents:
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from typing import Literal
|
import json
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Form, HTTPException, Request, UploadFile
|
from fastapi import APIRouter, Depends, Form, HTTPException, Request, UploadFile
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
@ -20,7 +21,7 @@ class IngestTextBody(BaseModel):
|
|||||||
"Chinese martial arts."
|
"Chinese martial arts."
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
metadata: dict[str, str] = Field(
|
metadata: dict[str, Any] = Field(
|
||||||
None,
|
None,
|
||||||
examples=[
|
examples=[
|
||||||
{
|
{
|
||||||
@ -55,6 +56,7 @@ def ingest_file(
|
|||||||
|
|
||||||
metadata: Optional metadata to be associated with the file.
|
metadata: Optional metadata to be associated with the file.
|
||||||
You do not have to specify this field if not needed.
|
You do not have to specify this field if not needed.
|
||||||
|
The metadata needs to be in JSON format.
|
||||||
e.g. {"title": "Avatar: The Last Airbender", "year": "2005"}
|
e.g. {"title": "Avatar: The Last Airbender", "year": "2005"}
|
||||||
|
|
||||||
The context obtained from files is later used in
|
The context obtained from files is later used in
|
||||||
@ -74,7 +76,7 @@ def ingest_file(
|
|||||||
if file.filename is None:
|
if file.filename is None:
|
||||||
raise HTTPException(400, "No file name provided")
|
raise HTTPException(400, "No file name provided")
|
||||||
|
|
||||||
metadata_dict = None if metadata is None else eval(metadata)
|
metadata_dict = None if metadata is None else json.loads(metadata)
|
||||||
ingested_documents = service.ingest_bin_data(
|
ingested_documents = service.ingest_bin_data(
|
||||||
file.filename, file.file, metadata_dict
|
file.filename, file.file, metadata_dict
|
||||||
)
|
)
|
||||||
|
5
tests/fixtures/ingest_helper.py
vendored
5
tests/fixtures/ingest_helper.py
vendored
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
@ -19,7 +20,9 @@ class IngestHelper:
|
|||||||
ingest_result = IngestResponse.model_validate(response.json())
|
ingest_result = IngestResponse.model_validate(response.json())
|
||||||
return ingest_result
|
return ingest_result
|
||||||
|
|
||||||
def ingest_file_with_metadata(self, path: Path, metadata: dict) -> IngestResponse:
|
def ingest_file_with_metadata(
|
||||||
|
self, path: Path, metadata: dict[str, Any]
|
||||||
|
) -> IngestResponse:
|
||||||
files = {
|
files = {
|
||||||
"file": (path.name, path.open("rb")),
|
"file": (path.name, path.open("rb")),
|
||||||
"metadata": (None, json.dumps(metadata)),
|
"metadata": (None, json.dumps(metadata)),
|
||||||
|
Loading…
Reference in New Issue
Block a user