mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 13:54:48 +00:00
add more reasonable arxiv retriever (#13327)
This commit is contained in:
parent
4b7a85887e
commit
be854225c7
@ -12,7 +12,12 @@ class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
|
|||||||
It uses all ArxivAPIWrapper arguments without any change.
|
It uses all ArxivAPIWrapper arguments without any change.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
get_full_documents: bool = False
|
||||||
|
|
||||||
def _get_relevant_documents(
|
def _get_relevant_documents(
|
||||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
|
if self.get_full_documents:
|
||||||
return self.load(query=query)
|
return self.load(query=query)
|
||||||
|
else:
|
||||||
|
return self.get_summaries_as_docs(query)
|
||||||
|
@ -90,6 +90,43 @@ class ArxivAPIWrapper(BaseModel):
|
|||||||
)
|
)
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
def get_summaries_as_docs(self, query: str) -> List[Document]:
|
||||||
|
"""
|
||||||
|
Performs an arxiv search and returns list of
|
||||||
|
documents, with summaries as the content.
|
||||||
|
|
||||||
|
If an error occurs or no documents found, error text
|
||||||
|
is returned instead. Wrapper for
|
||||||
|
https://lukasschwab.me/arxiv.py/index.html#Search
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: a plaintext search query
|
||||||
|
""" # noqa: E501
|
||||||
|
try:
|
||||||
|
if self.is_arxiv_identifier(query):
|
||||||
|
results = self.arxiv_search(
|
||||||
|
id_list=query.split(),
|
||||||
|
max_results=self.top_k_results,
|
||||||
|
).results()
|
||||||
|
else:
|
||||||
|
results = self.arxiv_search( # type: ignore
|
||||||
|
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
||||||
|
).results()
|
||||||
|
except self.arxiv_exceptions as ex:
|
||||||
|
return [Document(page_content=f"Arxiv exception: {ex}")]
|
||||||
|
docs = [
|
||||||
|
Document(
|
||||||
|
page_content=result.summary,
|
||||||
|
metadata={
|
||||||
|
"Published": result.updated.date(),
|
||||||
|
"Title": result.title,
|
||||||
|
"Authors": ", ".join(a.name for a in result.authors),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for result in results
|
||||||
|
]
|
||||||
|
return docs
|
||||||
|
|
||||||
def run(self, query: str) -> str:
|
def run(self, query: str) -> str:
|
||||||
"""
|
"""
|
||||||
Performs an arxiv search and A single string
|
Performs an arxiv search and A single string
|
||||||
|
Loading…
Reference in New Issue
Block a user