diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py index a1109070..da62568b 100644 --- a/private_gpt/components/ingest/ingest_helper.py +++ b/private_gpt/components/ingest/ingest_helper.py @@ -92,7 +92,13 @@ class IngestionHelper: return string_reader.load_data([file_data.read_text()]) logger.debug("Specific reader found for extension=%s", extension) - return reader_cls().load_data(file_data) + documents = reader_cls().load_data(file_data) + + # Sanitize NUL bytes in text which can't be stored in Postgres + for i in range(len(documents)): + documents[i].text = documents[i].text.replace("\u0000", "") + + return documents @staticmethod def _exclude_metadata(documents: list[Document]) -> None: