mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-19 03:01:29 +00:00
upstage[patch] : fix error handling in Layout Analysis parser (#22054)
This pull request addresses and fixes exception handling in the UpstageLayoutAnalysisParser and enhances the test coverage by adding error exception tests for the document loader. These improvements ensure robust error handling and increase the reliability of the system when dealing with external API calls and JSON responses. ### Changes Made 1. Fix Request Exception Handling: - Issue: The existing implementation of UpstageLayoutAnalysisParser did not properly handle exceptions thrown by the requests library, which could lead to unhandled exceptions and potential crashes. - Solution: Added comprehensive exception handling for requests.RequestException to catch any request-related errors. This includes logging the error details and raising a ValueError with a meaningful error message. 2. Add Error Exception Tests for Document Loader: - New Tests: Introduced new test cases to verify the robustness of the UpstageLayoutAnalysisLoader against various error scenarios. The tests ensure that the loader gracefully handles: - RequestException: Simulates network issues or invalid API requests to ensure appropriate error handling and user feedback. - JSONDecodeError: Simulates scenarios where the API response is not a valid JSON, ensuring the system does not crash and provides clear error messaging.
This commit is contained in:
parent
d9eff44400
commit
4fda7bf4f2
@ -181,19 +181,22 @@ class UpstageLayoutAnalysisParser(BaseBlobParser):
|
|||||||
|
|
||||||
result = response.json().get("elements", [])
|
result = response.json().get("elements", [])
|
||||||
|
|
||||||
|
elements = [
|
||||||
|
element for element in result if element["category"] not in self.exclude
|
||||||
|
]
|
||||||
|
|
||||||
|
return elements
|
||||||
|
|
||||||
except requests.RequestException as req_err:
|
except requests.RequestException as req_err:
|
||||||
# Handle any request-related exceptions
|
# Handle any request-related exceptions
|
||||||
print(f"Request Exception: {req_err}")
|
print(f"Request Exception: {req_err}")
|
||||||
|
raise ValueError(f"Failed to send request: {req_err}")
|
||||||
except json.JSONDecodeError as json_err:
|
except json.JSONDecodeError as json_err:
|
||||||
# Handle JSON decode errors
|
# Handle JSON decode errors
|
||||||
print(f"JSON Decode Error: {json_err}")
|
print(f"JSON Decode Error: {json_err}")
|
||||||
raise ValueError(f"Failed to decode JSON response: {json_err}")
|
raise ValueError(f"Failed to decode JSON response: {json_err}")
|
||||||
|
|
||||||
elements = [
|
return []
|
||||||
element for element in result if element["category"] not in self.exclude
|
|
||||||
]
|
|
||||||
|
|
||||||
return elements
|
|
||||||
|
|
||||||
def _split_and_request(
|
def _split_and_request(
|
||||||
self,
|
self,
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, get_args
|
from typing import Any, Dict, get_args
|
||||||
|
from unittest import TestCase
|
||||||
from unittest.mock import MagicMock, Mock, patch
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from langchain_upstage import UpstageLayoutAnalysisLoader
|
from langchain_upstage import UpstageLayoutAnalysisLoader
|
||||||
from langchain_upstage.layout_analysis import OutputType, SplitType
|
from langchain_upstage.layout_analysis import OutputType, SplitType
|
||||||
|
|
||||||
@ -205,3 +209,45 @@ def test_page_split_html_output(mock_post: Mock) -> None:
|
|||||||
assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
|
assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
|
||||||
assert document.metadata["type"] == "html"
|
assert document.metadata["type"] == "html"
|
||||||
assert document.metadata["split"] == "page"
|
assert document.metadata["split"] == "page"
|
||||||
|
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
def test_request_exception(mock_post: Mock) -> None:
|
||||||
|
mock_post.side_effect = requests.RequestException("Mocked request exception")
|
||||||
|
|
||||||
|
loader = UpstageLayoutAnalysisLoader(
|
||||||
|
file_path=EXAMPLE_PDF_PATH,
|
||||||
|
output_type="html",
|
||||||
|
split="page",
|
||||||
|
api_key="valid_api_key",
|
||||||
|
exclude=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
with TestCase.assertRaises(TestCase(), ValueError) as context:
|
||||||
|
loader.load()
|
||||||
|
|
||||||
|
assert "Failed to send request: Mocked request exception" == str(context.exception)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
def test_json_decode_error(mock_post: Mock) -> None:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0)
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
loader = UpstageLayoutAnalysisLoader(
|
||||||
|
file_path=EXAMPLE_PDF_PATH,
|
||||||
|
output_type="html",
|
||||||
|
split="page",
|
||||||
|
api_key="valid_api_key",
|
||||||
|
exclude=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
with TestCase.assertRaises(TestCase(), ValueError) as context:
|
||||||
|
loader.load()
|
||||||
|
|
||||||
|
assert (
|
||||||
|
"Failed to decode JSON response: Expecting value: line 1 column 1 (char 0)"
|
||||||
|
== str(context.exception)
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user