mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 08:58:48 +00:00
Update and rename everynote.py to evernote.py (#1060)
Updating this base file as well as the .ipynb file of the example on the website: https://github.com/hwchase17/langchain/compare/master...akshayvkt:langchain:patch-1 https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/everynote.html
This commit is contained in:
parent
34cba2da32
commit
d8ed286200
@ -1,4 +1,4 @@
|
|||||||
"""Load documents from Everynote.
|
"""Load documents from Evernote.
|
||||||
|
|
||||||
https://gist.github.com/foxmask/7b29c43a161e001ff04afdb2f181e31c
|
https://gist.github.com/foxmask/7b29c43a161e001ff04afdb2f181e31c
|
||||||
"""
|
"""
|
||||||
@ -52,7 +52,7 @@ def _parse_note(note: List) -> dict:
|
|||||||
|
|
||||||
|
|
||||||
def _parse_note_xml(xml_file: str) -> str:
|
def _parse_note_xml(xml_file: str) -> str:
|
||||||
"""Parse everynote xml."""
|
"""Parse Evernote xml."""
|
||||||
# Without huge_tree set to True, parser may complain about huge text node
|
# Without huge_tree set to True, parser may complain about huge text node
|
||||||
# Try to recover, because there may be " ", which will cause
|
# Try to recover, because there may be " ", which will cause
|
||||||
# "XMLSyntaxError: Entity 'nbsp' not defined"
|
# "XMLSyntaxError: Entity 'nbsp' not defined"
|
||||||
@ -68,15 +68,15 @@ def _parse_note_xml(xml_file: str) -> str:
|
|||||||
return result_string
|
return result_string
|
||||||
|
|
||||||
|
|
||||||
class EveryNoteLoader(BaseLoader):
|
class EverNoteLoader(BaseLoader):
|
||||||
"""Loader to load in EverNnote files.."""
|
"""Loader to load in EverNote files.."""
|
||||||
|
|
||||||
def __init__(self, file_path: str):
|
def __init__(self, file_path: str):
|
||||||
"""Initialize with file path."""
|
"""Initialize with file path."""
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load document from EveryNote file."""
|
"""Load document from EverNote file."""
|
||||||
text = _parse_note_xml(self.file_path)
|
text = _parse_note_xml(self.file_path)
|
||||||
metadata = {"source": self.file_path}
|
metadata = {"source": self.file_path}
|
||||||
return [Document(page_content=text, metadata=metadata)]
|
return [Document(page_content=text, metadata=metadata)]
|
Loading…
Reference in New Issue
Block a user