diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py index 054153e8..da62568b 100644 --- a/private_gpt/components/ingest/ingest_helper.py +++ b/private_gpt/components/ingest/ingest_helper.py @@ -94,6 +94,7 @@ class IngestionHelper: logger.debug("Specific reader found for extension=%s", extension) documents = reader_cls().load_data(file_data) + # Sanitize NUL bytes in text which can't be stored in Postgres for i in range(len(documents)): documents[i].text = documents[i].text.replace("\u0000", "")