mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-03 12:39:37 +00:00
fix: Sanitize null bytes before ingestion (#2090)
* Sanitize null bytes before ingestion * Added comments
This commit is contained in:
parent
fa3c30661d
commit
5fbb402477
@ -92,7 +92,13 @@ class IngestionHelper:
|
|||||||
return string_reader.load_data([file_data.read_text()])
|
return string_reader.load_data([file_data.read_text()])
|
||||||
|
|
||||||
logger.debug("Specific reader found for extension=%s", extension)
|
logger.debug("Specific reader found for extension=%s", extension)
|
||||||
return reader_cls().load_data(file_data)
|
documents = reader_cls().load_data(file_data)
|
||||||
|
|
||||||
|
# Sanitize NUL bytes in text which can't be stored in Postgres
|
||||||
|
for i in range(len(documents)):
|
||||||
|
documents[i].text = documents[i].text.replace("\u0000", "")
|
||||||
|
|
||||||
|
return documents
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _exclude_metadata(documents: list[Document]) -> None:
|
def _exclude_metadata(documents: list[Document]) -> None:
|
||||||
|
Loading…
Reference in New Issue
Block a user