mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-04-27 11:21:34 +00:00
fix: Sanitize null bytes before ingestion (#2090)
* Sanitize null bytes before ingestion * Added comments
This commit is contained in:
parent
fa3c30661d
commit
5fbb402477
@ -92,7 +92,13 @@ class IngestionHelper:
|
||||
return string_reader.load_data([file_data.read_text()])
|
||||
|
||||
logger.debug("Specific reader found for extension=%s", extension)
|
||||
return reader_cls().load_data(file_data)
|
||||
documents = reader_cls().load_data(file_data)
|
||||
|
||||
# Sanitize NUL bytes in text which can't be stored in Postgres
|
||||
for i in range(len(documents)):
|
||||
documents[i].text = documents[i].text.replace("\u0000", "")
|
||||
|
||||
return documents
|
||||
|
||||
@staticmethod
|
||||
def _exclude_metadata(documents: list[Document]) -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user