mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-28 16:26:56 +00:00
Sanitize null bytes before ingestion
This commit is contained in:
parent
77461b96cf
commit
43a9dbe21b
@ -92,7 +92,12 @@ class IngestionHelper:
|
||||
return string_reader.load_data([file_data.read_text()])
|
||||
|
||||
logger.debug("Specific reader found for extension=%s", extension)
|
||||
return reader_cls().load_data(file_data)
|
||||
documents = reader_cls().load_data(file_data)
|
||||
|
||||
for i in range(len(documents)):
|
||||
documents[i].text = documents[i].text.replace("\u0000", "")
|
||||
|
||||
return documents
|
||||
|
||||
@staticmethod
|
||||
def _exclude_metadata(documents: list[Document]) -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user