From affa3e755a9d28807b7e226685dc7b9cc151a211 Mon Sep 17 00:00:00 2001 From: Archan Ghosh Date: Sat, 23 Dec 2023 02:44:22 +0530 Subject: [PATCH] Update arxiv.py with get_summaries_as_docs inside of Arxivloader (#14953) Added the call function get_summaries_as_docs inside of Arxivloader - **Description:** Added a function that returns the documents from get_summaries_as_docs, as the call signature is present in the parent file but never used from Arxivloader, this can be used from Arxivloader itself just like .load() as both the signatures are same. - **Issue:** Reduces time to load papers as no pdf is processed only metadata is pulled from Arxiv allowing users for faster load times on bulk loads. Users can then choose one or more paper and use ID directly with .load() to load pdf thereby loading all the contents of the paper. --- libs/community/langchain_community/document_loaders/arxiv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/community/langchain_community/document_loaders/arxiv.py b/libs/community/langchain_community/document_loaders/arxiv.py index 968d5dcfc34..811d2b605ff 100644 --- a/libs/community/langchain_community/document_loaders/arxiv.py +++ b/libs/community/langchain_community/document_loaders/arxiv.py @@ -25,3 +25,6 @@ class ArxivLoader(BaseLoader): def load(self) -> List[Document]: return self.client.load(self.query) + + def get_summaries_as_docs(self) -> List[Document]: + return self.client.get_summaries_as_docs(self.query)