mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-03 05:34:01 +00:00
add more reasonable arxiv retriever (#13327)
This commit is contained in:
parent
4b7a85887e
commit
be854225c7
@ -12,7 +12,12 @@ class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
|
||||
It uses all ArxivAPIWrapper arguments without any change.
|
||||
"""
|
||||
|
||||
get_full_documents: bool = False
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
return self.load(query=query)
|
||||
if self.get_full_documents:
|
||||
return self.load(query=query)
|
||||
else:
|
||||
return self.get_summaries_as_docs(query)
|
||||
|
@ -90,6 +90,43 @@ class ArxivAPIWrapper(BaseModel):
|
||||
)
|
||||
return values
|
||||
|
||||
def get_summaries_as_docs(self, query: str) -> List[Document]:
|
||||
"""
|
||||
Performs an arxiv search and returns list of
|
||||
documents, with summaries as the content.
|
||||
|
||||
If an error occurs or no documents found, error text
|
||||
is returned instead. Wrapper for
|
||||
https://lukasschwab.me/arxiv.py/index.html#Search
|
||||
|
||||
Args:
|
||||
query: a plaintext search query
|
||||
""" # noqa: E501
|
||||
try:
|
||||
if self.is_arxiv_identifier(query):
|
||||
results = self.arxiv_search(
|
||||
id_list=query.split(),
|
||||
max_results=self.top_k_results,
|
||||
).results()
|
||||
else:
|
||||
results = self.arxiv_search( # type: ignore
|
||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
||||
).results()
|
||||
except self.arxiv_exceptions as ex:
|
||||
return [Document(page_content=f"Arxiv exception: {ex}")]
|
||||
docs = [
|
||||
Document(
|
||||
page_content=result.summary,
|
||||
metadata={
|
||||
"Published": result.updated.date(),
|
||||
"Title": result.title,
|
||||
"Authors": ", ".join(a.name for a in result.authors),
|
||||
},
|
||||
)
|
||||
for result in results
|
||||
]
|
||||
return docs
|
||||
|
||||
def run(self, query: str) -> str:
|
||||
"""
|
||||
Performs an arxiv search and A single string
|
||||
|
Loading…
Reference in New Issue
Block a user