diff --git a/libs/community/langchain_community/utilities/arxiv.py b/libs/community/langchain_community/utilities/arxiv.py index a4c6cf5acea..5ed5c763611 100644 --- a/libs/community/langchain_community/utilities/arxiv.py +++ b/libs/community/langchain_community/utilities/arxiv.py @@ -94,6 +94,16 @@ class ArxivAPIWrapper(BaseModel): ) return values + def _fetch_results(self, query: str) -> Any: + """Helper function to fetch arxiv results based on query.""" + if self.is_arxiv_identifier(query): + return self.arxiv_search( + id_list=query.split(), max_results=self.top_k_results + ).results() + return self.arxiv_search( + query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results + ).results() + def get_summaries_as_docs(self, query: str) -> List[Document]: """ Performs an arxiv search and returns list of @@ -107,16 +117,11 @@ class ArxivAPIWrapper(BaseModel): query: a plaintext search query """ try: - if self.is_arxiv_identifier(query): - results = self.arxiv_search( - id_list=query.split(), - max_results=self.top_k_results, - ).results() - else: - results = self.arxiv_search( # type: ignore - query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results - ).results() + results = self._fetch_results( + query + ) # Using helper function to fetch results except self.arxiv_exceptions as ex: + logger.error(f"Arxiv exception: {ex}") # Added error logging return [Document(page_content=f"Arxiv exception: {ex}")] docs = [ Document( @@ -146,16 +151,11 @@ class ArxivAPIWrapper(BaseModel): query: a plaintext search query """ try: - if self.is_arxiv_identifier(query): - results = self.arxiv_search( - id_list=query.split(), - max_results=self.top_k_results, - ).results() - else: - results = self.arxiv_search( # type: ignore - query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results - ).results() + results = self._fetch_results( + query + ) # Using helper function to fetch results except self.arxiv_exceptions as ex: + logger.error(f"Arxiv exception: {ex}") # Added error logging return f"Arxiv exception: {ex}" docs = [ f"Published: {result.updated.date()}\n" @@ -208,15 +208,9 @@ class ArxivAPIWrapper(BaseModel): try: # Remove the ":" and "-" from the query, as they can cause search problems query = query.replace(":", "").replace("-", "") - if self.is_arxiv_identifier(query): - results = self.arxiv_search( - id_list=query[: self.ARXIV_MAX_QUERY_LENGTH].split(), - max_results=self.load_max_docs, - ).results() - else: - results = self.arxiv_search( # type: ignore - query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.load_max_docs - ).results() + results = self._fetch_results( + query + ) # Using helper function to fetch results except self.arxiv_exceptions as ex: logger.debug("Error on arxiv: %s", ex) return