mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-29 21:30:18 +00:00
# TextLoader auto detect encoding and enhanced exception handling - Add an option to enable encoding detection on `TextLoader`. - The detection is done using `chardet` - The loading is done by trying all detected encodings by order of confidence or raise an exception otherwise. ### New Dependencies: - `chardet` Fixes #4479 ## Before submitting <!-- If you're adding a new integration, include an integration test and an example notebook showing its use! --> ## Who can review? Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested: - @eyurtsev --------- Co-authored-by: blob42 <spike@w530>
This commit is contained in:
@@ -89,6 +89,8 @@ gql = {version = "^3.4.1", optional = true}
|
||||
pandas = {version = "^2.0.1", optional = true}
|
||||
telethon = {version = "^1.28.5", optional = true}
|
||||
zep-python = {version="^0.25", optional=true}
|
||||
chardet = {version="^5.1.0", optional=true}
|
||||
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
autodoc_pydantic = "^1.8.0"
|
||||
@@ -156,6 +158,7 @@ ruff = "^0.0.249"
|
||||
types-toml = "^0.10.8.1"
|
||||
types-redis = "^4.3.21.6"
|
||||
black = "^23.1.0"
|
||||
types-chardet = "^5.0.4.6"
|
||||
|
||||
[tool.poetry.group.typing.dependencies]
|
||||
mypy = "^0.991"
|
||||
@@ -174,6 +177,7 @@ setuptools = "^67.6.1"
|
||||
llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
|
||||
qdrant = ["qdrant-client"]
|
||||
openai = ["openai", "tiktoken"]
|
||||
text_helpers = ["chardet"]
|
||||
cohere = ["cohere"]
|
||||
in_memory_store = ["docarray"]
|
||||
hnswlib = ["docarray", "protobuf", "hnswlib"]
|
||||
@@ -185,6 +189,7 @@ all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "
|
||||
# merge-conflicts
|
||||
extended_testing = [
|
||||
"beautifulsoup4",
|
||||
"chardet",
|
||||
"jq",
|
||||
"pdfminer.six",
|
||||
"pypdf",
|
||||
|
||||
Reference in New Issue
Block a user