mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 11:55:21 +00:00
community: fixes json loader not getting texts with json standard (#27327)
This PR fixes JSONLoader._get_text not converting objects to json string correctly. If an object is serializable and is not a dict, JSONLoader will use python built-in str() method to convert it to string. This may cause object converted to strings not following json standard. For example, a list will be converted to string with single quotes, and if json.loads try to load this string, it will cause error. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
4149c0dd8d
commit
13c3c4a210
@ -188,7 +188,7 @@ class JSONLoader(BaseLoader):
|
||||
# In case the text is None, set it to an empty string
|
||||
elif isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, dict):
|
||||
elif isinstance(content, (dict, list)):
|
||||
return json.dumps(content) if content else ""
|
||||
else:
|
||||
return str(content) if content is not None else ""
|
||||
|
@ -1,4 +1,5 @@
|
||||
import io
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
@ -12,7 +13,7 @@ pytestmark = pytest.mark.requires("jq")
|
||||
|
||||
|
||||
def test_load_valid_string_content(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="value1",
|
||||
@ -37,7 +38,7 @@ def test_load_valid_string_content(mocker: MockerFixture) -> None:
|
||||
|
||||
|
||||
def test_load_valid_dict_content(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content='{"text": "value1"}',
|
||||
@ -64,7 +65,7 @@ def test_load_valid_dict_content(mocker: MockerFixture) -> None:
|
||||
|
||||
|
||||
def test_load_valid_bool_content(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="False",
|
||||
@ -93,7 +94,7 @@ def test_load_valid_bool_content(mocker: MockerFixture) -> None:
|
||||
|
||||
|
||||
def test_load_valid_numeric_content(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="99",
|
||||
@ -122,7 +123,7 @@ def test_load_valid_numeric_content(mocker: MockerFixture) -> None:
|
||||
|
||||
|
||||
def test_load_invalid_test_content(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
|
||||
mocker.patch("builtins.open", mocker.mock_open())
|
||||
mocker.patch(
|
||||
@ -139,7 +140,7 @@ def test_load_invalid_test_content(mocker: MockerFixture) -> None:
|
||||
|
||||
|
||||
def test_load_jsonlines(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="value1",
|
||||
@ -177,7 +178,7 @@ def test_load_jsonlines(mocker: MockerFixture) -> None:
|
||||
),
|
||||
)
|
||||
def test_load_jsonlines_list(params: Dict, mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="value1",
|
||||
@ -250,7 +251,7 @@ def test_json_meta_01(
|
||||
mocker.patch("builtins.open", mocker.mock_open())
|
||||
mocker.patch(patch_func, return_value=patch_func_value)
|
||||
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="value1",
|
||||
@ -300,7 +301,7 @@ def test_json_meta_02(
|
||||
mocker.patch("builtins.open", mocker.mock_open())
|
||||
mocker.patch(patch_func, return_value=patch_func_value)
|
||||
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="value1",
|
||||
@ -336,7 +337,7 @@ def test_json_meta_02(
|
||||
def test_load_json_with_jq_parsable_content_key(
|
||||
params: Dict, mocker: MockerFixture
|
||||
) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="value1",
|
||||
@ -364,7 +365,7 @@ def test_load_json_with_jq_parsable_content_key(
|
||||
|
||||
|
||||
def test_load_json_with_nested_jq_parsable_content_key(mocker: MockerFixture) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="message1",
|
||||
@ -401,7 +402,7 @@ def test_load_json_with_nested_jq_parsable_content_key(mocker: MockerFixture) ->
|
||||
def test_load_json_with_nested_jq_parsable_content_key_with_metadata(
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
file_path = "/workspaces/langchain/test.json"
|
||||
file_path = str(Path("/workspaces/langchain/test.json").resolve())
|
||||
expected_docs = [
|
||||
Document(
|
||||
page_content="message1",
|
||||
|
Loading…
Reference in New Issue
Block a user