mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-30 03:28:40 +00:00
Bagatur/arxiv kwargs (#10903)
support all arXiv api wrapper kwargs in loader
This commit is contained in:
parent
697efd9757
commit
9a858a9107
@ -1,4 +1,4 @@
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
@ -9,25 +9,18 @@ class ArxivLoader(BaseLoader):
|
||||
"""Load a query result from `Arxiv`.
|
||||
|
||||
The loader converts the original PDF format into the text.
|
||||
|
||||
Args:
|
||||
Supports all arguments of `ArxivAPIWrapper`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query: str,
|
||||
load_max_docs: Optional[int] = 100,
|
||||
load_all_available_meta: Optional[bool] = False,
|
||||
self, query: str, doc_content_chars_max: Optional[int] = None, **kwargs: Any
|
||||
):
|
||||
self.query = query
|
||||
"""The query to be passed to the arxiv.org API."""
|
||||
self.load_max_docs = load_max_docs
|
||||
"""The maximum number of documents to load."""
|
||||
self.load_all_available_meta = load_all_available_meta
|
||||
"""Whether to load all available metadata."""
|
||||
self.client = ArxivAPIWrapper(
|
||||
doc_content_chars_max=doc_content_chars_max, **kwargs
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
arxiv_client = ArxivAPIWrapper(
|
||||
load_max_docs=self.load_max_docs,
|
||||
load_all_available_meta=self.load_all_available_meta,
|
||||
)
|
||||
docs = arxiv_client.load(self.query)
|
||||
return docs
|
||||
return self.client.load(self.query)
|
||||
|
Loading…
Reference in New Issue
Block a user