community[minor]: add mojeek search util (#20922)

**Description:** This pull request introduces a new feature to community
tools, enhancing its search capabilities by integrating the Mojeek
search engine
**Dependencies:** None

---------

Co-authored-by: Igor Brai <igor@mojeek.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
Igor Brai
2024-04-29 18:49:53 +03:00
committed by GitHub
parent 4822beb298
commit b3e74f2b98
9 changed files with 179 additions and 0 deletions

View File

@@ -157,6 +157,9 @@ if TYPE_CHECKING:
from langchain_community.tools.metaphor_search import (
MetaphorSearchResults, # noqa: F401
)
from langchain_community.tools.mojeek_search.tool import (
MojeekSearch, # noqa: F401
)
from langchain_community.tools.nasa.tool import (
NasaAction, # noqa: F401
)
@@ -385,6 +388,7 @@ __all__ = [
"ListSparkSQLTool",
"MerriamWebsterQueryRun",
"MetaphorSearchResults",
"MojeekSearch",
"MoveFileTool",
"NasaAction",
"NavigateBackTool",
@@ -528,6 +532,7 @@ _module_lookup = {
"ListSparkSQLTool": "langchain_community.tools.spark_sql.tool",
"MerriamWebsterQueryRun": "langchain_community.tools.merriam_webster.tool",
"MetaphorSearchResults": "langchain_community.tools.metaphor_search",
"MojeekSearch": "langchain_community.tools.mojeek_search.tool",
"MoveFileTool": "langchain_community.tools.file_management",
"NasaAction": "langchain_community.tools.nasa.tool",
"NavigateBackTool": "langchain_community.tools.playwright",

View File

@@ -0,0 +1,45 @@
from __future__ import annotations
from typing import Any, Optional
from langchain_core.callbacks import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain_core.tools import BaseTool
from langchain_community.utilities.mojeek_search import MojeekSearchAPIWrapper
class MojeekSearch(BaseTool):
name: str = "mojeek_search"
description: str = (
"A wrapper around Mojeek Search. "
"Useful for when you need to web search results. "
"Input should be a search query."
)
api_wrapper: MojeekSearchAPIWrapper
@classmethod
def config(
cls, api_key: str, search_kwargs: Optional[dict] = None, **kwargs: Any
) -> MojeekSearch:
wrapper = MojeekSearchAPIWrapper(
api_key=api_key, search_kwargs=search_kwargs or {}
)
return cls(api_wrapper=wrapper, **kwargs)
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
return self.api_wrapper.run(query)
async def _arun(
self,
query: str,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("MojeekSearch does not support async")

View File

@@ -82,6 +82,9 @@ if TYPE_CHECKING:
from langchain_community.utilities.metaphor_search import (
MetaphorSearchAPIWrapper, # noqa: F401
)
from langchain_community.utilities.mojeek_search import (
MojeekSearchAPIWrapper, # noqa: F401
)
from langchain_community.utilities.nasa import (
NasaAPIWrapper, # noqa: F401
)
@@ -189,6 +192,7 @@ __all__ = [
"MaxComputeAPIWrapper",
"MerriamWebsterAPIWrapper",
"MetaphorSearchAPIWrapper",
"MojeekSearchAPIWrapper",
"NVIDIARivaASR",
"NVIDIARivaStream",
"NVIDIARivaTTS",
@@ -249,6 +253,7 @@ _module_lookup = {
"MaxComputeAPIWrapper": "langchain_community.utilities.max_compute",
"MerriamWebsterAPIWrapper": "langchain_community.utilities.merriam_webster",
"MetaphorSearchAPIWrapper": "langchain_community.utilities.metaphor_search",
"MojeekSearchAPIWrapper": "langchain_community.utilities.mojeek_search",
"NVIDIARivaASR": "langchain_community.utilities.nvidia_riva",
"NVIDIARivaStream": "langchain_community.utilities.nvidia_riva",
"NVIDIARivaTTS": "langchain_community.utilities.nvidia_riva",

View File

@@ -0,0 +1,44 @@
import json
from typing import List
import requests
from langchain_core.pydantic_v1 import BaseModel, Field
class MojeekSearchAPIWrapper(BaseModel):
api_key: str
search_kwargs: dict = Field(default_factory=dict)
api_url = "https://api.mojeek.com/search"
def run(self, query: str) -> str:
search_results = self._search(query)
results = []
for result in search_results:
title = result.get("title", "")
url = result.get("url", "")
desc = result.get("desc", "")
results.append({"title": title, "url": url, "desc": desc})
return json.dumps(results)
def _search(self, query: str) -> List[dict]:
headers = {
"Accept": "application/json",
}
req = requests.PreparedRequest()
request = {
**self.search_kwargs,
**{"q": query, "fmt": "json", "api_key": self.api_key},
}
req.prepare_url(self.api_url, request)
if req.url is None:
raise ValueError("prepared url is None, this should not happen")
response = requests.get(req.url, headers=headers)
if not response.ok:
raise Exception(f"HTTP error {response.status_code}")
return response.json().get("response", {}).get("results", [])

View File

@@ -138,6 +138,7 @@ EXPECTED_ALL = [
"format_tool_to_openai_function",
"tool",
"MerriamWebsterQueryRun",
"MojeekSearch",
]

View File

@@ -139,6 +139,7 @@ _EXPECTED = [
"authenticate",
"format_tool_to_openai_function",
"tool",
"MojeekSearch",
]

View File

@@ -58,6 +58,7 @@ EXPECTED_ALL = [
"YouSearchAPIWrapper",
"ZapierNLAWrapper",
"MerriamWebsterAPIWrapper",
"MojeekSearchAPIWrapper",
]