mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-13 08:27:03 +00:00
langchain[patch]: Mathpix PDF loader supports arbitrary extra params (#13950)
- **Description:** Support providing whatever extra parameters you want to the Mathpix PDF loader API request. - **Issue:** #12773 - **Dependencies:** None --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
9e2ae866c4
commit
1c4bfb8c5f
@ -372,6 +372,7 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
processed_file_format: str = "md",
|
||||
max_wait_time_seconds: int = 500,
|
||||
should_clean_pdf: bool = False,
|
||||
extra_request_data: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize with a file path.
|
||||
@ -382,6 +383,7 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
max_wait_time_seconds: a maximum time to wait for the response from
|
||||
the server. Default is 500.
|
||||
should_clean_pdf: a flag to clean the PDF file. Default is False.
|
||||
extra_request_data: Additional request data.
|
||||
**kwargs: additional keyword arguments.
|
||||
"""
|
||||
self.mathpix_api_key = get_from_dict_or_env(
|
||||
@ -392,6 +394,9 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
)
|
||||
super().__init__(file_path, **kwargs)
|
||||
self.processed_file_format = processed_file_format
|
||||
self.extra_request_data = (
|
||||
extra_request_data if extra_request_data is not None else {}
|
||||
)
|
||||
self.max_wait_time_seconds = max_wait_time_seconds
|
||||
self.should_clean_pdf = should_clean_pdf
|
||||
|
||||
@ -405,7 +410,10 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
|
||||
@property
|
||||
def data(self) -> dict:
|
||||
options = {"conversion_formats": {self.processed_file_format: True}}
|
||||
options = {
|
||||
"conversion_formats": {self.processed_file_format: True},
|
||||
**self.extra_request_data,
|
||||
}
|
||||
return {"options_json": json.dumps(options)}
|
||||
|
||||
def send_pdf(self) -> str:
|
||||
|
Loading…
Reference in New Issue
Block a user