mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-30 23:28:35 +00:00
fix(json_utils): fix function find_json_objects (#2289)
Co-authored-by: dongzhancai1 <dongzhancai1@jd.com> Co-authored-by: aries_ckt <916701291@qq.com>
This commit is contained in:
parent
ddfb435fd4
commit
d5213004d2
@ -48,6 +48,7 @@ def find_json_objects(text):
|
|||||||
escape_character = False
|
escape_character = False
|
||||||
stack = []
|
stack = []
|
||||||
start_index = -1
|
start_index = -1
|
||||||
|
modified_text = list(text) # Convert text to a list for easy modification
|
||||||
|
|
||||||
for i, char in enumerate(text):
|
for i, char in enumerate(text):
|
||||||
# Handle escape characters
|
# Handle escape characters
|
||||||
@ -59,12 +60,12 @@ def find_json_objects(text):
|
|||||||
if char == '"' and not escape_character:
|
if char == '"' and not escape_character:
|
||||||
inside_string = not inside_string
|
inside_string = not inside_string
|
||||||
|
|
||||||
if not inside_string and char == "\n":
|
# Replace newline and tab characters inside strings
|
||||||
continue
|
if inside_string:
|
||||||
if inside_string and char == "\n":
|
if char == "\n":
|
||||||
char = "\\n"
|
modified_text[i] = "\\n"
|
||||||
if inside_string and char == "\t":
|
elif char == "\t":
|
||||||
char = "\\t"
|
modified_text[i] = "\\t"
|
||||||
|
|
||||||
# Handle opening brackets
|
# Handle opening brackets
|
||||||
if char in "{[" and not inside_string:
|
if char in "{[" and not inside_string:
|
||||||
@ -78,7 +79,8 @@ def find_json_objects(text):
|
|||||||
if not stack:
|
if not stack:
|
||||||
end_index = i + 1
|
end_index = i + 1
|
||||||
try:
|
try:
|
||||||
json_obj = json.loads(text[start_index:end_index])
|
json_str = "".join(modified_text[start_index:end_index])
|
||||||
|
json_obj = json.loads(json_str)
|
||||||
json_objects.append(json_obj)
|
json_objects.append(json_obj)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
64
dbgpt/util/tests/test_json_utils.py
Normal file
64
dbgpt/util/tests/test_json_utils.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from dbgpt.util.json_utils import find_json_objects
|
||||||
|
|
||||||
|
# 定义参数化测试数据
|
||||||
|
test_data = [
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
```json
|
||||||
|
|
||||||
|
{
|
||||||
|
"serial_number": "1",
|
||||||
|
"agent": "CodeOptimizer",
|
||||||
|
"content": "```json
|
||||||
|
select *
|
||||||
|
from table
|
||||||
|
where column = 'value'
|
||||||
|
``` optimize the code above.",
|
||||||
|
"rely": ""
|
||||||
|
}
|
||||||
|
```
|
||||||
|
""",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"serial_number": "1",
|
||||||
|
"agent": "CodeOptimizer",
|
||||||
|
"content": "```json\nselect * \nfrom table\nwhere column = 'value'\n``` optimize the code above.",
|
||||||
|
"rely": "",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Test case with nested code block",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"key": "value"
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
[{"key": "value"}],
|
||||||
|
"Test case with simple JSON",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"key1": "value1"
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"key2": "value2"
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
[{"key1": "value1"}, {"key2": "value2"}],
|
||||||
|
"Test case with multiple JSON objects",
|
||||||
|
),
|
||||||
|
("", [], "Test case with empty input"),
|
||||||
|
("This is not a JSON string", [], "Test case with non-JSON input"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, expected, description", test_data)
|
||||||
|
def test_find_json_objects(text, expected, description):
|
||||||
|
result = find_json_objects(text)
|
||||||
|
assert (
|
||||||
|
result == expected
|
||||||
|
), f"Test failed: {description}\nExpected: {expected}\nGot: {result}"
|
Loading…
Reference in New Issue
Block a user