mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-11-04 10:10:09 +00:00 
			
		
		
		
	community: refactor Arxiv search logic (#27084)
PR message: Description: This PR refactors the Arxiv API wrapper by extracting the Arxiv search logic into a helper function (_fetch_results) to reduce code duplication and improve maintainability. The helper function is used in methods like get_summaries_as_docs, run, and lazy_load, streamlining the code and making it easier to maintain in the future. Issue: This is a minor refactor, so no specific issue is being fixed. Dependencies: No new dependencies are introduced with this change. Add tests and docs: No new integrations were added, so no additional tests or docs are necessary for this PR. Lint and test: I have run make format, make lint, and make test to ensure all checks pass successfully. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							57fbc6bdf1
						
					
				
				
					commit
					443b37403d
				
			@@ -94,6 +94,16 @@ class ArxivAPIWrapper(BaseModel):
 | 
			
		||||
            )
 | 
			
		||||
        return values
 | 
			
		||||
 | 
			
		||||
    def _fetch_results(self, query: str) -> Any:
 | 
			
		||||
        """Helper function to fetch arxiv results based on query."""
 | 
			
		||||
        if self.is_arxiv_identifier(query):
 | 
			
		||||
            return self.arxiv_search(
 | 
			
		||||
                id_list=query.split(), max_results=self.top_k_results
 | 
			
		||||
            ).results()
 | 
			
		||||
        return self.arxiv_search(
 | 
			
		||||
            query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
 | 
			
		||||
        ).results()
 | 
			
		||||
 | 
			
		||||
    def get_summaries_as_docs(self, query: str) -> List[Document]:
 | 
			
		||||
        """
 | 
			
		||||
        Performs an arxiv search and returns list of
 | 
			
		||||
@@ -107,16 +117,11 @@ class ArxivAPIWrapper(BaseModel):
 | 
			
		||||
            query: a plaintext search query
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            if self.is_arxiv_identifier(query):
 | 
			
		||||
                results = self.arxiv_search(
 | 
			
		||||
                    id_list=query.split(),
 | 
			
		||||
                    max_results=self.top_k_results,
 | 
			
		||||
                ).results()
 | 
			
		||||
            else:
 | 
			
		||||
                results = self.arxiv_search(  # type: ignore
 | 
			
		||||
                    query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
 | 
			
		||||
                ).results()
 | 
			
		||||
            results = self._fetch_results(
 | 
			
		||||
                query
 | 
			
		||||
            )  # Using helper function to fetch results
 | 
			
		||||
        except self.arxiv_exceptions as ex:
 | 
			
		||||
            logger.error(f"Arxiv exception: {ex}")  # Added error logging
 | 
			
		||||
            return [Document(page_content=f"Arxiv exception: {ex}")]
 | 
			
		||||
        docs = [
 | 
			
		||||
            Document(
 | 
			
		||||
@@ -146,16 +151,11 @@ class ArxivAPIWrapper(BaseModel):
 | 
			
		||||
            query: a plaintext search query
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            if self.is_arxiv_identifier(query):
 | 
			
		||||
                results = self.arxiv_search(
 | 
			
		||||
                    id_list=query.split(),
 | 
			
		||||
                    max_results=self.top_k_results,
 | 
			
		||||
                ).results()
 | 
			
		||||
            else:
 | 
			
		||||
                results = self.arxiv_search(  # type: ignore
 | 
			
		||||
                    query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
 | 
			
		||||
                ).results()
 | 
			
		||||
            results = self._fetch_results(
 | 
			
		||||
                query
 | 
			
		||||
            )  # Using helper function to fetch results
 | 
			
		||||
        except self.arxiv_exceptions as ex:
 | 
			
		||||
            logger.error(f"Arxiv exception: {ex}")  # Added error logging
 | 
			
		||||
            return f"Arxiv exception: {ex}"
 | 
			
		||||
        docs = [
 | 
			
		||||
            f"Published: {result.updated.date()}\n"
 | 
			
		||||
@@ -208,15 +208,9 @@ class ArxivAPIWrapper(BaseModel):
 | 
			
		||||
        try:
 | 
			
		||||
            # Remove the ":" and "-" from the query, as they can cause search problems
 | 
			
		||||
            query = query.replace(":", "").replace("-", "")
 | 
			
		||||
            if self.is_arxiv_identifier(query):
 | 
			
		||||
                results = self.arxiv_search(
 | 
			
		||||
                    id_list=query[: self.ARXIV_MAX_QUERY_LENGTH].split(),
 | 
			
		||||
                    max_results=self.load_max_docs,
 | 
			
		||||
                ).results()
 | 
			
		||||
            else:
 | 
			
		||||
                results = self.arxiv_search(  # type: ignore
 | 
			
		||||
                    query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.load_max_docs
 | 
			
		||||
                ).results()
 | 
			
		||||
            results = self._fetch_results(
 | 
			
		||||
                query
 | 
			
		||||
            )  # Using helper function to fetch results
 | 
			
		||||
        except self.arxiv_exceptions as ex:
 | 
			
		||||
            logger.debug("Error on arxiv: %s", ex)
 | 
			
		||||
            return
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user