mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-10 08:56:42 +00:00
community: refactor Arxiv search logic (#27084)
PR message: Description: This PR refactors the Arxiv API wrapper by extracting the Arxiv search logic into a helper function (_fetch_results) to reduce code duplication and improve maintainability. The helper function is used in methods like get_summaries_as_docs, run, and lazy_load, streamlining the code and making it easier to maintain in the future. Issue: This is a minor refactor, so no specific issue is being fixed. Dependencies: No new dependencies are introduced with this change. Add tests and docs: No new integrations were added, so no additional tests or docs are necessary for this PR. Lint and test: I have run make format, make lint, and make test to ensure all checks pass successfully. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
57fbc6bdf1
commit
443b37403d
@ -94,6 +94,16 @@ class ArxivAPIWrapper(BaseModel):
|
|||||||
)
|
)
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
def _fetch_results(self, query: str) -> Any:
|
||||||
|
"""Helper function to fetch arxiv results based on query."""
|
||||||
|
if self.is_arxiv_identifier(query):
|
||||||
|
return self.arxiv_search(
|
||||||
|
id_list=query.split(), max_results=self.top_k_results
|
||||||
|
).results()
|
||||||
|
return self.arxiv_search(
|
||||||
|
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
||||||
|
).results()
|
||||||
|
|
||||||
def get_summaries_as_docs(self, query: str) -> List[Document]:
|
def get_summaries_as_docs(self, query: str) -> List[Document]:
|
||||||
"""
|
"""
|
||||||
Performs an arxiv search and returns list of
|
Performs an arxiv search and returns list of
|
||||||
@ -107,16 +117,11 @@ class ArxivAPIWrapper(BaseModel):
|
|||||||
query: a plaintext search query
|
query: a plaintext search query
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if self.is_arxiv_identifier(query):
|
results = self._fetch_results(
|
||||||
results = self.arxiv_search(
|
query
|
||||||
id_list=query.split(),
|
) # Using helper function to fetch results
|
||||||
max_results=self.top_k_results,
|
|
||||||
).results()
|
|
||||||
else:
|
|
||||||
results = self.arxiv_search( # type: ignore
|
|
||||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
|
||||||
).results()
|
|
||||||
except self.arxiv_exceptions as ex:
|
except self.arxiv_exceptions as ex:
|
||||||
|
logger.error(f"Arxiv exception: {ex}") # Added error logging
|
||||||
return [Document(page_content=f"Arxiv exception: {ex}")]
|
return [Document(page_content=f"Arxiv exception: {ex}")]
|
||||||
docs = [
|
docs = [
|
||||||
Document(
|
Document(
|
||||||
@ -146,16 +151,11 @@ class ArxivAPIWrapper(BaseModel):
|
|||||||
query: a plaintext search query
|
query: a plaintext search query
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if self.is_arxiv_identifier(query):
|
results = self._fetch_results(
|
||||||
results = self.arxiv_search(
|
query
|
||||||
id_list=query.split(),
|
) # Using helper function to fetch results
|
||||||
max_results=self.top_k_results,
|
|
||||||
).results()
|
|
||||||
else:
|
|
||||||
results = self.arxiv_search( # type: ignore
|
|
||||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
|
||||||
).results()
|
|
||||||
except self.arxiv_exceptions as ex:
|
except self.arxiv_exceptions as ex:
|
||||||
|
logger.error(f"Arxiv exception: {ex}") # Added error logging
|
||||||
return f"Arxiv exception: {ex}"
|
return f"Arxiv exception: {ex}"
|
||||||
docs = [
|
docs = [
|
||||||
f"Published: {result.updated.date()}\n"
|
f"Published: {result.updated.date()}\n"
|
||||||
@ -208,15 +208,9 @@ class ArxivAPIWrapper(BaseModel):
|
|||||||
try:
|
try:
|
||||||
# Remove the ":" and "-" from the query, as they can cause search problems
|
# Remove the ":" and "-" from the query, as they can cause search problems
|
||||||
query = query.replace(":", "").replace("-", "")
|
query = query.replace(":", "").replace("-", "")
|
||||||
if self.is_arxiv_identifier(query):
|
results = self._fetch_results(
|
||||||
results = self.arxiv_search(
|
query
|
||||||
id_list=query[: self.ARXIV_MAX_QUERY_LENGTH].split(),
|
) # Using helper function to fetch results
|
||||||
max_results=self.load_max_docs,
|
|
||||||
).results()
|
|
||||||
else:
|
|
||||||
results = self.arxiv_search( # type: ignore
|
|
||||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.load_max_docs
|
|
||||||
).results()
|
|
||||||
except self.arxiv_exceptions as ex:
|
except self.arxiv_exceptions as ex:
|
||||||
logger.debug("Error on arxiv: %s", ex)
|
logger.debug("Error on arxiv: %s", ex)
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user