mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-06 21:20:33 +00:00
infra[patch]: Add unit tests for Huggingface dataset loader (#14053)
- **Description:** Add unit tests for huggingface dataset loader and sample huggingface dataset for future tests. Updates dependencies for `datasets` module. - Adds coverage for [previous pull request](https://github.com/langchain-ai/langchain/pull/13864) - **Tag maintainer:** @hwchase17 --------- Co-authored-by: Amy Han <amyhan@Amys-Air.lan> Co-authored-by: Amy Han <amyhan@Amys-MacBook-Air.local> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
6eb40db353
commit
b6d26d3f9f
179
libs/langchain/poetry.lock
generated
179
libs/langchain/poetry.lock
generated
@ -1855,6 +1855,49 @@ files = [
|
|||||||
marshmallow = ">=3.18.0,<4.0.0"
|
marshmallow = ">=3.18.0,<4.0.0"
|
||||||
typing-inspect = ">=0.4.0,<1"
|
typing-inspect = ">=0.4.0,<1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "datasets"
|
||||||
|
version = "2.15.0"
|
||||||
|
description = "HuggingFace community-driven open-source library of datasets"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.8.0"
|
||||||
|
files = [
|
||||||
|
{file = "datasets-2.15.0-py3-none-any.whl", hash = "sha256:6d658d23811393dfc982d026082e1650bdaaae28f6a86e651966cb072229a228"},
|
||||||
|
{file = "datasets-2.15.0.tar.gz", hash = "sha256:a26d059370bd7503bd60e9337977199a13117a83f72fb61eda7e66f0c4d50b2b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
aiohttp = "*"
|
||||||
|
dill = ">=0.3.0,<0.3.8"
|
||||||
|
fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]}
|
||||||
|
huggingface-hub = ">=0.18.0"
|
||||||
|
multiprocess = "*"
|
||||||
|
numpy = ">=1.17"
|
||||||
|
packaging = "*"
|
||||||
|
pandas = "*"
|
||||||
|
pyarrow = ">=8.0.0"
|
||||||
|
pyarrow-hotfix = "*"
|
||||||
|
pyyaml = ">=5.1"
|
||||||
|
requests = ">=2.19.0"
|
||||||
|
tqdm = ">=4.62.1"
|
||||||
|
xxhash = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"]
|
||||||
|
audio = ["librosa", "soundfile (>=0.12.1)"]
|
||||||
|
benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
|
||||||
|
dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
|
||||||
|
docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"]
|
||||||
|
jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
|
||||||
|
metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
|
||||||
|
quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"]
|
||||||
|
s3 = ["s3fs"]
|
||||||
|
tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"]
|
||||||
|
tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
|
||||||
|
tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
|
||||||
|
torch = ["torch"]
|
||||||
|
vision = ["Pillow (>=6.2.1)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "debugpy"
|
name = "debugpy"
|
||||||
version = "1.8.0"
|
version = "1.8.0"
|
||||||
@ -2582,6 +2625,10 @@ files = [
|
|||||||
{file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"},
|
{file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
|
||||||
|
requests = {version = "*", optional = true, markers = "extra == \"http\""}
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
abfs = ["adlfs"]
|
abfs = ["adlfs"]
|
||||||
adl = ["adlfs"]
|
adl = ["adlfs"]
|
||||||
@ -6867,6 +6914,17 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = ">=1.16.6"
|
numpy = ">=1.16.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyarrow-hotfix"
|
||||||
|
version = "0.6"
|
||||||
|
description = ""
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
files = [
|
||||||
|
{file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"},
|
||||||
|
{file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyasn1"
|
name = "pyasn1"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
@ -11095,6 +11153,123 @@ files = [
|
|||||||
{file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
|
{file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xxhash"
|
||||||
|
version = "3.4.1"
|
||||||
|
description = "Python binding for xxHash"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb11628470a6004dc71a09fe90c2f459ff03d611376c1debeec2d648f44cb693"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bef2a7dc7b4f4beb45a1edbba9b9194c60a43a89598a87f1a0226d183764189"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0f7b2d547d72c7eda7aa817acf8791f0146b12b9eba1d4432c531fb0352228"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00f2fdef6b41c9db3d2fc0e7f94cb3db86693e5c45d6de09625caad9a469635b"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23cfd9ca09acaf07a43e5a695143d9a21bf00f5b49b15c07d5388cadf1f9ce11"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6a9ff50a3cf88355ca4731682c168049af1ca222d1d2925ef7119c1a78e95b3b"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f1d7c69a1e9ca5faa75546fdd267f214f63f52f12692f9b3a2f6467c9e67d5e7"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:672b273040d5d5a6864a36287f3514efcd1d4b1b6a7480f294c4b1d1ee1b8de0"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4178f78d70e88f1c4a89ff1ffe9f43147185930bb962ee3979dba15f2b1cc799"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9804b9eb254d4b8cc83ab5a2002128f7d631dd427aa873c8727dba7f1f0d1c2b"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c09c49473212d9c87261d22c74370457cfff5db2ddfc7fd1e35c80c31a8c14ce"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:ebbb1616435b4a194ce3466d7247df23499475c7ed4eb2681a1fa42ff766aff6"},
|
||||||
|
{file = "xxhash-3.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:25dc66be3db54f8a2d136f695b00cfe88018e59ccff0f3b8f545869f376a8a46"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58c49083801885273e262c0f5bbeac23e520564b8357fbb18fb94ff09d3d3ea5"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b526015a973bfbe81e804a586b703f163861da36d186627e27524f5427b0d520"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ad4457644c91a966f6fe137d7467636bdc51a6ce10a1d04f365c70d6a16d7e"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:248d3e83d119770f96003271fe41e049dd4ae52da2feb8f832b7a20e791d2920"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2070b6d5bbef5ee031666cf21d4953c16e92c2f8a24a94b5c240f8995ba3b1d0"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2746035f518f0410915e247877f7df43ef3372bf36cfa52cc4bc33e85242641"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a8ba6181514681c2591840d5632fcf7356ab287d4aff1c8dea20f3c78097088"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aac5010869240e95f740de43cd6a05eae180c59edd182ad93bf12ee289484fa"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4cb11d8debab1626181633d184b2372aaa09825bde709bf927704ed72765bed1"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b29728cff2c12f3d9f1d940528ee83918d803c0567866e062683f300d1d2eff3"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a15cbf3a9c40672523bdb6ea97ff74b443406ba0ab9bca10ceccd9546414bd84"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e66df260fed01ed8ea790c2913271641c58481e807790d9fca8bfd5a3c13844"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-win32.whl", hash = "sha256:e867f68a8f381ea12858e6d67378c05359d3a53a888913b5f7d35fbf68939d5f"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:200a5a3ad9c7c0c02ed1484a1d838b63edcf92ff538770ea07456a3732c577f4"},
|
||||||
|
{file = "xxhash-3.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:1d03f1c0d16d24ea032e99f61c552cb2b77d502e545187338bea461fde253583"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4bbba9b182697a52bc0c9f8ec0ba1acb914b4937cd4a877ad78a3b3eeabefb3"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9fd28a9da300e64e434cfc96567a8387d9a96e824a9be1452a1e7248b7763b78"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6066d88c9329ab230e18998daec53d819daeee99d003955c8db6fc4971b45ca3"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93805bc3233ad89abf51772f2ed3355097a5dc74e6080de19706fc447da99cd3"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64da57d5ed586ebb2ecdde1e997fa37c27fe32fe61a656b77fabbc58e6fbff6e"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a97322e9a7440bf3c9805cbaac090358b43f650516486746f7fa482672593df"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe750d512982ee7d831838a5dee9e9848f3fb440e4734cca3f298228cc957a6"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fd79d4087727daf4d5b8afe594b37d611ab95dc8e29fe1a7517320794837eb7d"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:743612da4071ff9aa4d055f3f111ae5247342931dedb955268954ef7201a71ff"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b41edaf05734092f24f48c0958b3c6cbaaa5b7e024880692078c6b1f8247e2fc"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:a90356ead70d715fe64c30cd0969072de1860e56b78adf7c69d954b43e29d9fa"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac56eebb364e44c85e1d9e9cc5f6031d78a34f0092fea7fc80478139369a8b4a"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-win32.whl", hash = "sha256:911035345932a153c427107397c1518f8ce456f93c618dd1c5b54ebb22e73747"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:f31ce76489f8601cc7b8713201ce94b4bd7b7ce90ba3353dccce7e9e1fee71fa"},
|
||||||
|
{file = "xxhash-3.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:b5beb1c6a72fdc7584102f42c4d9df232ee018ddf806e8c90906547dfb43b2da"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6d42b24d1496deb05dee5a24ed510b16de1d6c866c626c2beb11aebf3be278b9"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b685fab18876b14a8f94813fa2ca80cfb5ab6a85d31d5539b7cd749ce9e3624"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419ffe34c17ae2df019a4685e8d3934d46b2e0bbe46221ab40b7e04ed9f11137"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e041ce5714f95251a88670c114b748bca3bf80cc72400e9f23e6d0d59cf2681"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc860d887c5cb2f524899fb8338e1bb3d5789f75fac179101920d9afddef284b"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:312eba88ffe0a05e332e3a6f9788b73883752be63f8588a6dc1261a3eaaaf2b2"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e01226b6b6a1ffe4e6bd6d08cfcb3ca708b16f02eb06dd44f3c6e53285f03e4f"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9f3025a0d5d8cf406a9313cd0d5789c77433ba2004b1c75439b67678e5136537"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:6d3472fd4afef2a567d5f14411d94060099901cd8ce9788b22b8c6f13c606a93"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:43984c0a92f06cac434ad181f329a1445017c33807b7ae4f033878d860a4b0f2"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a55e0506fdb09640a82ec4f44171273eeabf6f371a4ec605633adb2837b5d9d5"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:faec30437919555b039a8bdbaba49c013043e8f76c999670aef146d33e05b3a0"},
|
||||||
|
{file = "xxhash-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c9e1b646af61f1fc7083bb7b40536be944f1ac67ef5e360bca2d73430186971a"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:961d948b7b1c1b6c08484bbce3d489cdf153e4122c3dfb07c2039621243d8795"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:719a378930504ab159f7b8e20fa2aa1896cde050011af838af7e7e3518dd82de"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74fb5cb9406ccd7c4dd917f16630d2e5e8cbbb02fc2fca4e559b2a47a64f4940"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5dab508ac39e0ab988039bc7f962c6ad021acd81fd29145962b068df4148c476"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c59f3e46e7daf4c589e8e853d700ef6607afa037bfad32c390175da28127e8c"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cc07256eff0795e0f642df74ad096f8c5d23fe66bc138b83970b50fc7f7f6c5"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9f749999ed80f3955a4af0eb18bb43993f04939350b07b8dd2f44edc98ffee9"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7688d7c02149a90a3d46d55b341ab7ad1b4a3f767be2357e211b4e893efbaaf6"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a8b4977963926f60b0d4f830941c864bed16aa151206c01ad5c531636da5708e"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8106d88da330f6535a58a8195aa463ef5281a9aa23b04af1848ff715c4398fb4"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4c76a77dbd169450b61c06fd2d5d436189fc8ab7c1571d39265d4822da16df22"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11f11357c86d83e53719c592021fd524efa9cf024dc7cb1dfb57bbbd0d8713f2"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-win32.whl", hash = "sha256:0c786a6cd74e8765c6809892a0d45886e7c3dc54de4985b4a5eb8b630f3b8e3b"},
|
||||||
|
{file = "xxhash-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:aabf37fb8fa27430d50507deeab2ee7b1bcce89910dd10657c38e71fee835594"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6127813abc1477f3a83529b6bbcfeddc23162cece76fa69aee8f6a8a97720562"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef2e194262f5db16075caea7b3f7f49392242c688412f386d3c7b07c7733a70a"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71be94265b6c6590f0018bbf73759d21a41c6bda20409782d8117e76cd0dfa8b"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10e0a619cdd1c0980e25eb04e30fe96cf8f4324758fa497080af9c21a6de573f"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa122124d2e3bd36581dd78c0efa5f429f5220313479fb1072858188bc2d5ff1"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17032f5a4fea0a074717fe33477cb5ee723a5f428de7563e75af64bfc1b1e10"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca7783b20e3e4f3f52f093538895863f21d18598f9a48211ad757680c3bd006f"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d77d09a1113899fad5f354a1eb4f0a9afcf58cefff51082c8ad643ff890e30cf"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:21287bcdd299fdc3328cc0fbbdeaa46838a1c05391264e51ddb38a3f5b09611f"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:dfd7a6cc483e20b4ad90224aeb589e64ec0f31e5610ab9957ff4314270b2bf31"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:543c7fcbc02bbb4840ea9915134e14dc3dc15cbd5a30873a7a5bf66039db97ec"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fe0a98d990e433013f41827b62be9ab43e3cf18e08b1483fcc343bda0d691182"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-win32.whl", hash = "sha256:b9097af00ebf429cc7c0e7d2fdf28384e4e2e91008130ccda8d5ae653db71e54"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:d699b921af0dcde50ab18be76c0d832f803034d80470703700cb7df0fbec2832"},
|
||||||
|
{file = "xxhash-3.4.1-cp39-cp39-win_arm64.whl", hash = "sha256:2be491723405e15cc099ade1280133ccfbf6322d2ef568494fb7d07d280e7eee"},
|
||||||
|
{file = "xxhash-3.4.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:431625fad7ab5649368c4849d2b49a83dc711b1f20e1f7f04955aab86cd307bc"},
|
||||||
|
{file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc6dbd5fc3c9886a9e041848508b7fb65fd82f94cc793253990f81617b61fe49"},
|
||||||
|
{file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ff8dbd0ec97aec842476cb8ccc3e17dd288cd6ce3c8ef38bff83d6eb927817"},
|
||||||
|
{file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef73a53fe90558a4096e3256752268a8bdc0322f4692ed928b6cd7ce06ad4fe3"},
|
||||||
|
{file = "xxhash-3.4.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:450401f42bbd274b519d3d8dcf3c57166913381a3d2664d6609004685039f9d3"},
|
||||||
|
{file = "xxhash-3.4.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a162840cf4de8a7cd8720ff3b4417fbc10001eefdd2d21541a8226bb5556e3bb"},
|
||||||
|
{file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b736a2a2728ba45017cb67785e03125a79d246462dfa892d023b827007412c52"},
|
||||||
|
{file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0ae4c2e7698adef58710d6e7a32ff518b66b98854b1c68e70eee504ad061d8"},
|
||||||
|
{file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6322c4291c3ff174dcd104fae41500e75dad12be6f3085d119c2c8a80956c51"},
|
||||||
|
{file = "xxhash-3.4.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:dd59ed668801c3fae282f8f4edadf6dc7784db6d18139b584b6d9677ddde1b6b"},
|
||||||
|
{file = "xxhash-3.4.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92693c487e39523a80474b0394645b393f0ae781d8db3474ccdcead0559ccf45"},
|
||||||
|
{file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4603a0f642a1e8d7f3ba5c4c25509aca6a9c1cc16f85091004a7028607ead663"},
|
||||||
|
{file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa45e8cbfbadb40a920fe9ca40c34b393e0b067082d94006f7f64e70c7490a6"},
|
||||||
|
{file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:595b252943b3552de491ff51e5bb79660f84f033977f88f6ca1605846637b7c6"},
|
||||||
|
{file = "xxhash-3.4.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:562d8b8f783c6af969806aaacf95b6c7b776929ae26c0cd941d54644ea7ef51e"},
|
||||||
|
{file = "xxhash-3.4.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:41ddeae47cf2828335d8d991f2d2b03b0bdc89289dc64349d712ff8ce59d0647"},
|
||||||
|
{file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c44d584afdf3c4dbb3277e32321d1a7b01d6071c1992524b6543025fb8f4206f"},
|
||||||
|
{file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7bddb3a5b86213cc3f2c61500c16945a1b80ecd572f3078ddbbe68f9dabdfb"},
|
||||||
|
{file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ecb6c987b62437c2f99c01e97caf8d25660bf541fe79a481d05732e5236719c"},
|
||||||
|
{file = "xxhash-3.4.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:696b4e18b7023527d5c50ed0626ac0520edac45a50ec7cf3fc265cd08b1f4c03"},
|
||||||
|
{file = "xxhash-3.4.1.tar.gz", hash = "sha256:0379d6cf1ff987cd421609a264ce025e74f346e3e145dd106c0cc2e3ec3f99a9"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "yarl"
|
name = "yarl"
|
||||||
version = "1.9.2"
|
version = "1.9.2"
|
||||||
@ -11263,7 +11438,7 @@ cli = ["typer"]
|
|||||||
cohere = ["cohere"]
|
cohere = ["cohere"]
|
||||||
docarray = ["docarray"]
|
docarray = ["docarray"]
|
||||||
embeddings = ["sentence-transformers"]
|
embeddings = ["sentence-transformers"]
|
||||||
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "dgml-utils", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
|
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
|
||||||
javascript = ["esprima"]
|
javascript = ["esprima"]
|
||||||
llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
|
llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
|
||||||
openai = ["openai", "tiktoken"]
|
openai = ["openai", "tiktoken"]
|
||||||
@ -11273,4 +11448,4 @@ text-helpers = ["chardet"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0"
|
python-versions = ">=3.8.1,<4.0"
|
||||||
content-hash = "38defed5772071d7af43b3638c4b966755903a864037c001806ca0a65998bfe1"
|
content-hash = "20041f6228e201d2ebc881c62e7505259c029c20c1171f057cc67e4cf5691850"
|
||||||
|
@ -147,6 +147,7 @@ praw = {version = "^7.7.1", optional = true}
|
|||||||
msal = {version = "^1.25.0", optional = true}
|
msal = {version = "^1.25.0", optional = true}
|
||||||
databricks-vectorsearch = {version = "^0.21", optional = true}
|
databricks-vectorsearch = {version = "^0.21", optional = true}
|
||||||
dgml-utils = {version = "^0.3.0", optional = true}
|
dgml-utils = {version = "^0.3.0", optional = true}
|
||||||
|
datasets = {version = "^2.15.0", optional = true}
|
||||||
|
|
||||||
[tool.poetry.group.test.dependencies]
|
[tool.poetry.group.test.dependencies]
|
||||||
# The only dependencies that should be added are
|
# The only dependencies that should be added are
|
||||||
@ -332,6 +333,7 @@ extended_testing = [
|
|||||||
"bibtexparser",
|
"bibtexparser",
|
||||||
"cassio",
|
"cassio",
|
||||||
"chardet",
|
"chardet",
|
||||||
|
"datasets",
|
||||||
"google-cloud-documentai",
|
"google-cloud-documentai",
|
||||||
"esprima",
|
"esprima",
|
||||||
"jq",
|
"jq",
|
||||||
|
@ -0,0 +1,109 @@
|
|||||||
|
from typing import Any, Generator, List, Tuple
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
|
||||||
|
|
||||||
|
class SampleHuggingface(datasets.GeneratorBasedBuilder):
|
||||||
|
"""Sample huggingface dataset with two different versions for testing."""
|
||||||
|
|
||||||
|
BUILDER_CONFIGS = [
|
||||||
|
datasets.BuilderConfig(
|
||||||
|
name="v1",
|
||||||
|
version=datasets.Version("1.0.0"),
|
||||||
|
description="Sample v1 description",
|
||||||
|
),
|
||||||
|
datasets.BuilderConfig(
|
||||||
|
name="v2",
|
||||||
|
version=datasets.Version("1.0.0"),
|
||||||
|
description="Sample v2 description",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _info(self) -> datasets.DatasetInfo:
|
||||||
|
"""This function defines the structure of the dataset"""
|
||||||
|
return datasets.DatasetInfo(
|
||||||
|
description="Sample Huggingface dataset",
|
||||||
|
features=datasets.Features(
|
||||||
|
{
|
||||||
|
"split": datasets.Value("string"),
|
||||||
|
"text": datasets.Value("string"),
|
||||||
|
"list": datasets.features.Sequence(datasets.Value("string")),
|
||||||
|
"dict": datasets.features.Sequence(
|
||||||
|
{
|
||||||
|
"dict_text": datasets.Value("string"),
|
||||||
|
"dict_int": datasets.Value("int32"),
|
||||||
|
}
|
||||||
|
),
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _split_generators(
|
||||||
|
self, dl_manager: datasets.DownloadManager
|
||||||
|
) -> List[datasets.SplitGenerator]:
|
||||||
|
"""
|
||||||
|
This function defines how the dataset's splits will be generated.
|
||||||
|
Args:
|
||||||
|
dl_manager (`DownloadManager`):
|
||||||
|
Helper for downloading datasets from files and online sources.
|
||||||
|
This is not being used for this test file.
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TRAIN,
|
||||||
|
gen_kwargs={"split": "train", "name": self.config.name},
|
||||||
|
),
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TEST,
|
||||||
|
gen_kwargs={"split": "test", "name": self.config.name},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _generate_examples(
|
||||||
|
self, split: str, name: str
|
||||||
|
) -> Generator[Tuple[int, object], Any, None]:
|
||||||
|
"""This function returns the examples.
|
||||||
|
Args:
|
||||||
|
split (`string`):
|
||||||
|
Split to process
|
||||||
|
name (`string`):
|
||||||
|
Name of dataset, as defined in the BuilderConfig
|
||||||
|
"""
|
||||||
|
if name == "v1":
|
||||||
|
yield (
|
||||||
|
1,
|
||||||
|
{
|
||||||
|
"split": split,
|
||||||
|
"text": "This is text in version 1",
|
||||||
|
"list": ["List item 1", "List item 2", "List item 3"],
|
||||||
|
"dict": [
|
||||||
|
{
|
||||||
|
"dict_text": "Object text 1",
|
||||||
|
"dict_int": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dict_text": "Object text 2",
|
||||||
|
"dict_int": str(000),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
elif name == "v2":
|
||||||
|
yield (
|
||||||
|
2,
|
||||||
|
{
|
||||||
|
"split": split,
|
||||||
|
"text": "This is text in version 2",
|
||||||
|
"list": ["Hello", "Bonjour", "Hola"],
|
||||||
|
"dict": [
|
||||||
|
{
|
||||||
|
"dict_text": "Hello world!",
|
||||||
|
"dict_int": "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dict_text": "langchain is cool",
|
||||||
|
"dict_int": str(123),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
@ -0,0 +1,101 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.document_loaders import HuggingFaceDatasetLoader
|
||||||
|
|
||||||
|
HUGGING_FACE_EXAMPLE_DATASET = str(
|
||||||
|
Path(__file__).parent / "sample_documents" / "sample_hugging_face_dataset.py"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("datasets")
|
||||||
|
@pytest.fixture
|
||||||
|
def test_load_string() -> None:
|
||||||
|
"""Loads page_content of type string"""
|
||||||
|
page_content_column = "text"
|
||||||
|
name = "v1"
|
||||||
|
|
||||||
|
loader = HuggingFaceDatasetLoader(
|
||||||
|
HUGGING_FACE_EXAMPLE_DATASET, page_content_column, name
|
||||||
|
)
|
||||||
|
docs = loader.load()
|
||||||
|
|
||||||
|
# Length should be number of splits for specified `name`
|
||||||
|
assert len(docs) == 2
|
||||||
|
doc = docs[0]
|
||||||
|
assert doc.page_content == '"This is text in version 1"'
|
||||||
|
assert doc.metadata.keys() == {
|
||||||
|
"split",
|
||||||
|
"list",
|
||||||
|
"dict",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("datasets")
|
||||||
|
@pytest.fixture
|
||||||
|
def test_load_list() -> None:
|
||||||
|
"""Loads page_content of type List"""
|
||||||
|
page_content_column = "list"
|
||||||
|
name = "v1"
|
||||||
|
|
||||||
|
loader = HuggingFaceDatasetLoader(
|
||||||
|
HUGGING_FACE_EXAMPLE_DATASET, page_content_column, name
|
||||||
|
)
|
||||||
|
doc = loader.load()[0]
|
||||||
|
assert doc.page_content == '["List item 1", "List item 2", "List item 3"]'
|
||||||
|
assert doc.metadata.keys() == {
|
||||||
|
"split",
|
||||||
|
"text",
|
||||||
|
"dict",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("datasets")
|
||||||
|
@pytest.fixture
|
||||||
|
def test_load_object() -> None:
|
||||||
|
"""Loads page_content of type Object"""
|
||||||
|
page_content_column = "dict"
|
||||||
|
name = "v2"
|
||||||
|
|
||||||
|
loader = HuggingFaceDatasetLoader(
|
||||||
|
HUGGING_FACE_EXAMPLE_DATASET, page_content_column, name
|
||||||
|
)
|
||||||
|
doc = loader.load()[0]
|
||||||
|
assert (
|
||||||
|
doc.page_content
|
||||||
|
== '{"dict_text": ["Hello world!", "langchain is cool"], "dict_int": [2, 123]}'
|
||||||
|
)
|
||||||
|
assert doc.metadata.keys() == {
|
||||||
|
"split",
|
||||||
|
"text",
|
||||||
|
"list",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("datasets")
|
||||||
|
@pytest.fixture
|
||||||
|
def test_load_nonexistent_dataset() -> None:
|
||||||
|
"""Tests that ValueError is thrown for nonexistent dataset name"""
|
||||||
|
page_content_column = "text"
|
||||||
|
name = "v3"
|
||||||
|
|
||||||
|
loader = HuggingFaceDatasetLoader(
|
||||||
|
HUGGING_FACE_EXAMPLE_DATASET, page_content_column, name
|
||||||
|
)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
loader.load()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("datasets")
|
||||||
|
@pytest.fixture
|
||||||
|
def test_load_nonexistent_feature() -> None:
|
||||||
|
"""Tests that KeyError is thrown for nonexistent feature/key in dataset"""
|
||||||
|
page_content_column = "langchain"
|
||||||
|
name = "v2"
|
||||||
|
|
||||||
|
loader = HuggingFaceDatasetLoader(
|
||||||
|
HUGGING_FACE_EXAMPLE_DATASET, page_content_column, name
|
||||||
|
)
|
||||||
|
with pytest.raises(KeyError):
|
||||||
|
loader.load()
|
Loading…
Reference in New Issue
Block a user