JSON loader (#4067)

This implements a loader of text passages in JSON format. The `jq`
syntax is used to define a schema for accessing the relevant contents
from the JSON file. This requires dependency on the `jq` package:
https://pypi.org/project/jq/.

---------

Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com>
This commit is contained in:
Aivin V. Solatorio
2023-05-05 17:48:13 -04:00
committed by GitHub
parent bb6d97c18c
commit 6567b73e1a
7 changed files with 584 additions and 4 deletions

View File

@@ -0,0 +1,16 @@
from pathlib import Path
from langchain.document_loaders import JSONLoader
def test_json_loader() -> None:
"""Test unstructured loader."""
file_path = Path(__file__).parent.parent / "examples/example.json"
loader = JSONLoader(str(file_path), ".messages[].content")
docs = loader.load()
# Check that the correct number of documents are loaded.
assert len(docs) == 3
# Make sure that None content are converted to empty strings.
assert docs[-1].page_content == ""

View File

@@ -0,0 +1,25 @@
{
"messages": [
{
"sender_name": "User 2",
"timestamp_ms": 1675597571851,
"content": "Bye!"
},
{
"sender_name": "User 1",
"timestamp_ms": 1675597435669,
"content": "Oh no worries! Bye"
},
{
"sender_name": "User 2",
"timestamp_ms": 1675595060730,
"photos": [
{
"uri": "url_of_some_picture.jpg",
"creation_timestamp": 1675595059
}
]
}
],
"title": "User 1 and User 2 chat"
}