mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-14 08:56:27 +00:00
langchain[patch]: Mathpix PDF loader supports arbitrary extra params (#13950)
- **Description:** Support providing whatever extra parameters you want to the Mathpix PDF loader API request. - **Issue:** #12773 - **Dependencies:** None --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
9e2ae866c4
commit
1c4bfb8c5f
@ -372,6 +372,7 @@ class MathpixPDFLoader(BasePDFLoader):
|
|||||||
processed_file_format: str = "md",
|
processed_file_format: str = "md",
|
||||||
max_wait_time_seconds: int = 500,
|
max_wait_time_seconds: int = 500,
|
||||||
should_clean_pdf: bool = False,
|
should_clean_pdf: bool = False,
|
||||||
|
extra_request_data: Optional[Dict[str, Any]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize with a file path.
|
"""Initialize with a file path.
|
||||||
@ -382,6 +383,7 @@ class MathpixPDFLoader(BasePDFLoader):
|
|||||||
max_wait_time_seconds: a maximum time to wait for the response from
|
max_wait_time_seconds: a maximum time to wait for the response from
|
||||||
the server. Default is 500.
|
the server. Default is 500.
|
||||||
should_clean_pdf: a flag to clean the PDF file. Default is False.
|
should_clean_pdf: a flag to clean the PDF file. Default is False.
|
||||||
|
extra_request_data: Additional request data.
|
||||||
**kwargs: additional keyword arguments.
|
**kwargs: additional keyword arguments.
|
||||||
"""
|
"""
|
||||||
self.mathpix_api_key = get_from_dict_or_env(
|
self.mathpix_api_key = get_from_dict_or_env(
|
||||||
@ -392,6 +394,9 @@ class MathpixPDFLoader(BasePDFLoader):
|
|||||||
)
|
)
|
||||||
super().__init__(file_path, **kwargs)
|
super().__init__(file_path, **kwargs)
|
||||||
self.processed_file_format = processed_file_format
|
self.processed_file_format = processed_file_format
|
||||||
|
self.extra_request_data = (
|
||||||
|
extra_request_data if extra_request_data is not None else {}
|
||||||
|
)
|
||||||
self.max_wait_time_seconds = max_wait_time_seconds
|
self.max_wait_time_seconds = max_wait_time_seconds
|
||||||
self.should_clean_pdf = should_clean_pdf
|
self.should_clean_pdf = should_clean_pdf
|
||||||
|
|
||||||
@ -405,7 +410,10 @@ class MathpixPDFLoader(BasePDFLoader):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self) -> dict:
|
def data(self) -> dict:
|
||||||
options = {"conversion_formats": {self.processed_file_format: True}}
|
options = {
|
||||||
|
"conversion_formats": {self.processed_file_format: True},
|
||||||
|
**self.extra_request_data,
|
||||||
|
}
|
||||||
return {"options_json": json.dumps(options)}
|
return {"options_json": json.dumps(options)}
|
||||||
|
|
||||||
def send_pdf(self) -> str:
|
def send_pdf(self) -> str:
|
||||||
|
Loading…
Reference in New Issue
Block a user