Add PythonLoader which auto-detects encoding of Python files (#3311)

This PR contributes a `PythonLoader`, which inherits from
`TextLoader` but detects and sets the encoding automatically.
This commit is contained in:
Paul Garner
2023-04-21 18:47:57 +01:00
committed by GitHub
parent 1ecbeec24e
commit aa9d5707e0
7 changed files with 104 additions and 3 deletions

View File

@@ -0,0 +1,19 @@
from pathlib import Path
import pytest
from langchain.document_loaders.python import PythonLoader
@pytest.mark.parametrize("filename", ["default-encoding.py", "non-utf8-encoding.py"])
def test_python_loader(filename: str) -> None:
"""Test Python loader."""
file_path = Path(__file__).parent.parent / "examples" / filename
loader = PythonLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
metadata = docs[0].metadata
assert metadata["source"] == str(file_path)