mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 05:43:55 +00:00
Bagatur/arxiv kwargs (#10903)
support all arXiv api wrapper kwargs in loader
This commit is contained in:
parent
697efd9757
commit
9a858a9107
@ -1,4 +1,4 @@
|
|||||||
from typing import List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.document_loaders.base import BaseLoader
|
from langchain.document_loaders.base import BaseLoader
|
||||||
@ -9,25 +9,18 @@ class ArxivLoader(BaseLoader):
|
|||||||
"""Load a query result from `Arxiv`.
|
"""Load a query result from `Arxiv`.
|
||||||
|
|
||||||
The loader converts the original PDF format into the text.
|
The loader converts the original PDF format into the text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
Supports all arguments of `ArxivAPIWrapper`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, query: str, doc_content_chars_max: Optional[int] = None, **kwargs: Any
|
||||||
query: str,
|
|
||||||
load_max_docs: Optional[int] = 100,
|
|
||||||
load_all_available_meta: Optional[bool] = False,
|
|
||||||
):
|
):
|
||||||
self.query = query
|
self.query = query
|
||||||
"""The query to be passed to the arxiv.org API."""
|
self.client = ArxivAPIWrapper(
|
||||||
self.load_max_docs = load_max_docs
|
doc_content_chars_max=doc_content_chars_max, **kwargs
|
||||||
"""The maximum number of documents to load."""
|
)
|
||||||
self.load_all_available_meta = load_all_available_meta
|
|
||||||
"""Whether to load all available metadata."""
|
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
arxiv_client = ArxivAPIWrapper(
|
return self.client.load(self.query)
|
||||||
load_max_docs=self.load_max_docs,
|
|
||||||
load_all_available_meta=self.load_all_available_meta,
|
|
||||||
)
|
|
||||||
docs = arxiv_client.load(self.query)
|
|
||||||
return docs
|
|
||||||
|
Loading…
Reference in New Issue
Block a user