mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-15 15:02:33 +00:00
JSON loader (#4067)
This implements a loader of text passages in JSON format. The `jq` syntax is used to define a schema for accessing the relevant contents from the JSON file. This requires dependency on the `jq` package: https://pypi.org/project/jq/. --------- Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com>
This commit is contained in:
committed by
GitHub
parent
bb6d97c18c
commit
6567b73e1a
16
tests/integration_tests/document_loaders/test_json_loader.py
Normal file
16
tests/integration_tests/document_loaders/test_json_loader.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from pathlib import Path
|
||||
|
||||
from langchain.document_loaders import JSONLoader
|
||||
|
||||
|
||||
def test_json_loader() -> None:
|
||||
"""Test unstructured loader."""
|
||||
file_path = Path(__file__).parent.parent / "examples/example.json"
|
||||
loader = JSONLoader(str(file_path), ".messages[].content")
|
||||
docs = loader.load()
|
||||
|
||||
# Check that the correct number of documents are loaded.
|
||||
assert len(docs) == 3
|
||||
|
||||
# Make sure that None content are converted to empty strings.
|
||||
assert docs[-1].page_content == ""
|
||||
25
tests/integration_tests/examples/example.json
Normal file
25
tests/integration_tests/examples/example.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"messages": [
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675597571851,
|
||||
"content": "Bye!"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 1",
|
||||
"timestamp_ms": 1675597435669,
|
||||
"content": "Oh no worries! Bye"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595060730,
|
||||
"photos": [
|
||||
{
|
||||
"uri": "url_of_some_picture.jpg",
|
||||
"creation_timestamp": 1675595059
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "User 1 and User 2 chat"
|
||||
}
|
||||
Reference in New Issue
Block a user