From bc5ec63d67586d9c4a718b4ba6d49a88601c13ef Mon Sep 17 00:00:00 2001 From: peterdhp <36303607+peterdhp@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:21:22 +0900 Subject: [PATCH] community : allow using apikey for PubMedAPIWrapper (#27246) **Description**: > Without an API key, any site (IP address) posting more than 3 requests per second to the E-utilities will receive an error message. By including an API key, a site can post up to 10 requests per second by default. quoted from A General Introduction to the E-utilities,NCBI : https://www.ncbi.nlm.nih.gov/books/NBK25497/ I have simply added a api_key parameter to the PubMedAPIWrapper that can be used to increase the number of requests per second from 3 to 10. **Twitter handle** : @KORmaori --------- Co-authored-by: Erick Friis --- libs/community/langchain_community/utilities/pubmed.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/community/langchain_community/utilities/pubmed.py b/libs/community/langchain_community/utilities/pubmed.py index e3b23cfa0ad..20185d0c285 100644 --- a/libs/community/langchain_community/utilities/pubmed.py +++ b/libs/community/langchain_community/utilities/pubmed.py @@ -31,6 +31,7 @@ class PubMedAPIWrapper(BaseModel): sleep_time: time to wait between retries. Default is 0.2 seconds. email: email address to be used for the PubMed API. + api_key: API key to be used for the PubMed API. """ parse: Any #: :meta private: @@ -47,6 +48,7 @@ class PubMedAPIWrapper(BaseModel): MAX_QUERY_LENGTH: int = 300 doc_content_chars_max: int = 2000 email: str = "your_email@example.com" + api_key: str = "" @model_validator(mode="before") @classmethod @@ -101,6 +103,8 @@ class PubMedAPIWrapper(BaseModel): + str({urllib.parse.quote(query)}) + f"&retmode=json&retmax={self.top_k_results}&usehistory=y" ) + if self.api_key != "": + url += f"&api_key={self.api_key}" result = urllib.request.urlopen(url) text = result.read().decode("utf-8") json_text = json.loads(text) @@ -135,6 +139,8 @@ class PubMedAPIWrapper(BaseModel): + "&webenv=" + webenv ) + if self.api_key != "": + url += f"&api_key={self.api_key}" retry = 0 while True: