upstage[patch] : fix error handling in Layout Analysis parser (#22054)

This pull request addresses and fixes exception handling in the
UpstageLayoutAnalysisParser and enhances the test coverage by adding
error exception tests for the document loader. These improvements ensure
robust error handling and increase the reliability of the system when
dealing with external API calls and JSON responses.

### Changes Made
1. Fix Request Exception Handling:

- Issue: The existing implementation of UpstageLayoutAnalysisParser did
not properly handle exceptions thrown by the requests library, which
could lead to unhandled exceptions and potential crashes.
- Solution: Added comprehensive exception handling for
requests.RequestException to catch any request-related errors. This
includes logging the error details and raising a ValueError with a
meaningful error message.

2. Add Error Exception Tests for Document Loader:

- New Tests: Introduced new test cases to verify the robustness of the
UpstageLayoutAnalysisLoader against various error scenarios. The tests
ensure that the loader gracefully handles:
- RequestException: Simulates network issues or invalid API requests to
ensure appropriate error handling and user feedback.
- JSONDecodeError: Simulates scenarios where the API response is not a
valid JSON, ensuring the system does not crash and provides clear error
messaging.
This commit is contained in:
junkeon 2024-05-24 00:45:34 +09:00 committed by GitHub
parent d9eff44400
commit 4fda7bf4f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 54 additions and 5 deletions

View File

@ -181,19 +181,22 @@ class UpstageLayoutAnalysisParser(BaseBlobParser):
result = response.json().get("elements", [])
elements = [
element for element in result if element["category"] not in self.exclude
]
return elements
except requests.RequestException as req_err:
# Handle any request-related exceptions
print(f"Request Exception: {req_err}")
raise ValueError(f"Failed to send request: {req_err}")
except json.JSONDecodeError as json_err:
# Handle JSON decode errors
print(f"JSON Decode Error: {json_err}")
raise ValueError(f"Failed to decode JSON response: {json_err}")
elements = [
element for element in result if element["category"] not in self.exclude
]
return elements
return []
def _split_and_request(
self,

View File

@ -1,7 +1,11 @@
import json
from pathlib import Path
from typing import Any, Dict, get_args
from unittest import TestCase
from unittest.mock import MagicMock, Mock, patch
import requests
from langchain_upstage import UpstageLayoutAnalysisLoader
from langchain_upstage.layout_analysis import OutputType, SplitType
@ -205,3 +209,45 @@ def test_page_split_html_output(mock_post: Mock) -> None:
assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
assert document.metadata["type"] == "html"
assert document.metadata["split"] == "page"
@patch("requests.post")
def test_request_exception(mock_post: Mock) -> None:
mock_post.side_effect = requests.RequestException("Mocked request exception")
loader = UpstageLayoutAnalysisLoader(
file_path=EXAMPLE_PDF_PATH,
output_type="html",
split="page",
api_key="valid_api_key",
exclude=[],
)
with TestCase.assertRaises(TestCase(), ValueError) as context:
loader.load()
assert "Failed to send request: Mocked request exception" == str(context.exception)
@patch("requests.post")
def test_json_decode_error(mock_post: Mock) -> None:
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0)
mock_post.return_value = mock_response
loader = UpstageLayoutAnalysisLoader(
file_path=EXAMPLE_PDF_PATH,
output_type="html",
split="page",
api_key="valid_api_key",
exclude=[],
)
with TestCase.assertRaises(TestCase(), ValueError) as context:
loader.load()
assert (
"Failed to decode JSON response: Expecting value: line 1 column 1 (char 0)"
== str(context.exception)
)