From b3e74f2b98ce3cb2df7699d2cb53a4520a3cab5c Mon Sep 17 00:00:00 2001 From: Igor Brai <53081752+igorbrai@users.noreply.github.com> Date: Mon, 29 Apr 2024 18:49:53 +0300 Subject: [PATCH] community[minor]: add mojeek search util (#20922) **Description:** This pull request introduces a new feature to community tools, enhancing its search capabilities by integrating the Mojeek search engine **Dependencies:** None --------- Co-authored-by: Igor Brai Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: ccurme --- .../integrations/tools/mojeek_search.ipynb | 77 +++++++++++++++++++ .../langchain_community/tools/__init__.py | 5 ++ .../tools/mojeek_search/__init__.py | 0 .../tools/mojeek_search/tool.py | 45 +++++++++++ .../langchain_community/utilities/__init__.py | 5 ++ .../utilities/mojeek_search.py | 44 +++++++++++ .../tests/unit_tests/tools/test_imports.py | 1 + .../tests/unit_tests/tools/test_public_api.py | 1 + .../unit_tests/utilities/test_imports.py | 1 + 9 files changed, 179 insertions(+) create mode 100644 docs/docs/integrations/tools/mojeek_search.ipynb create mode 100644 libs/community/langchain_community/tools/mojeek_search/__init__.py create mode 100644 libs/community/langchain_community/tools/mojeek_search/tool.py create mode 100644 libs/community/langchain_community/utilities/mojeek_search.py diff --git a/docs/docs/integrations/tools/mojeek_search.ipynb b/docs/docs/integrations/tools/mojeek_search.ipynb new file mode 100644 index 00000000000..49406d443cd --- /dev/null +++ b/docs/docs/integrations/tools/mojeek_search.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Mojeek Search\n", + "\n", + "The following notebook will explain how to get results using Mojeek Search. Please visit [Mojeek Website](https://www.mojeek.com/services/search/web-search-api/) to obtain an API key." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.tools import MojeekSearch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api_key = \"KEY\" # obtained from Mojeek Website" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search = MojeekSearch.config(api_key=api_key, search_kwargs={\"t\": 10})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In `search_kwargs` you can add any search parameter that you can find on [Mojeek Documentation](https://www.mojeek.com/support/api/search/request_parameters.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search.run(\"mojeek\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/community/langchain_community/tools/__init__.py b/libs/community/langchain_community/tools/__init__.py index 48db105fcd9..17260585e0d 100644 --- a/libs/community/langchain_community/tools/__init__.py +++ b/libs/community/langchain_community/tools/__init__.py @@ -157,6 +157,9 @@ if TYPE_CHECKING: from langchain_community.tools.metaphor_search import ( MetaphorSearchResults, # noqa: F401 ) + from langchain_community.tools.mojeek_search.tool import ( + MojeekSearch, # noqa: F401 + ) from langchain_community.tools.nasa.tool import ( NasaAction, # noqa: F401 ) @@ -385,6 +388,7 @@ __all__ = [ "ListSparkSQLTool", "MerriamWebsterQueryRun", "MetaphorSearchResults", + "MojeekSearch", "MoveFileTool", "NasaAction", "NavigateBackTool", @@ -528,6 +532,7 @@ _module_lookup = { "ListSparkSQLTool": "langchain_community.tools.spark_sql.tool", "MerriamWebsterQueryRun": "langchain_community.tools.merriam_webster.tool", "MetaphorSearchResults": "langchain_community.tools.metaphor_search", + "MojeekSearch": "langchain_community.tools.mojeek_search.tool", "MoveFileTool": "langchain_community.tools.file_management", "NasaAction": "langchain_community.tools.nasa.tool", "NavigateBackTool": "langchain_community.tools.playwright", diff --git a/libs/community/langchain_community/tools/mojeek_search/__init__.py b/libs/community/langchain_community/tools/mojeek_search/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/libs/community/langchain_community/tools/mojeek_search/tool.py b/libs/community/langchain_community/tools/mojeek_search/tool.py new file mode 100644 index 00000000000..9112e1afe65 --- /dev/null +++ b/libs/community/langchain_community/tools/mojeek_search/tool.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import Any, Optional + +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.tools import BaseTool + +from langchain_community.utilities.mojeek_search import MojeekSearchAPIWrapper + + +class MojeekSearch(BaseTool): + name: str = "mojeek_search" + description: str = ( + "A wrapper around Mojeek Search. " + "Useful for when you need to web search results. " + "Input should be a search query." + ) + api_wrapper: MojeekSearchAPIWrapper + + @classmethod + def config( + cls, api_key: str, search_kwargs: Optional[dict] = None, **kwargs: Any + ) -> MojeekSearch: + wrapper = MojeekSearchAPIWrapper( + api_key=api_key, search_kwargs=search_kwargs or {} + ) + return cls(api_wrapper=wrapper, **kwargs) + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + return self.api_wrapper.run(query) + + async def _arun( + self, + query: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("MojeekSearch does not support async") diff --git a/libs/community/langchain_community/utilities/__init__.py b/libs/community/langchain_community/utilities/__init__.py index 148447c8446..330e95c9701 100644 --- a/libs/community/langchain_community/utilities/__init__.py +++ b/libs/community/langchain_community/utilities/__init__.py @@ -82,6 +82,9 @@ if TYPE_CHECKING: from langchain_community.utilities.metaphor_search import ( MetaphorSearchAPIWrapper, # noqa: F401 ) + from langchain_community.utilities.mojeek_search import ( + MojeekSearchAPIWrapper, # noqa: F401 + ) from langchain_community.utilities.nasa import ( NasaAPIWrapper, # noqa: F401 ) @@ -189,6 +192,7 @@ __all__ = [ "MaxComputeAPIWrapper", "MerriamWebsterAPIWrapper", "MetaphorSearchAPIWrapper", + "MojeekSearchAPIWrapper", "NVIDIARivaASR", "NVIDIARivaStream", "NVIDIARivaTTS", @@ -249,6 +253,7 @@ _module_lookup = { "MaxComputeAPIWrapper": "langchain_community.utilities.max_compute", "MerriamWebsterAPIWrapper": "langchain_community.utilities.merriam_webster", "MetaphorSearchAPIWrapper": "langchain_community.utilities.metaphor_search", + "MojeekSearchAPIWrapper": "langchain_community.utilities.mojeek_search", "NVIDIARivaASR": "langchain_community.utilities.nvidia_riva", "NVIDIARivaStream": "langchain_community.utilities.nvidia_riva", "NVIDIARivaTTS": "langchain_community.utilities.nvidia_riva", diff --git a/libs/community/langchain_community/utilities/mojeek_search.py b/libs/community/langchain_community/utilities/mojeek_search.py new file mode 100644 index 00000000000..4b4a7670221 --- /dev/null +++ b/libs/community/langchain_community/utilities/mojeek_search.py @@ -0,0 +1,44 @@ +import json +from typing import List + +import requests +from langchain_core.pydantic_v1 import BaseModel, Field + + +class MojeekSearchAPIWrapper(BaseModel): + api_key: str + search_kwargs: dict = Field(default_factory=dict) + api_url = "https://api.mojeek.com/search" + + def run(self, query: str) -> str: + search_results = self._search(query) + + results = [] + + for result in search_results: + title = result.get("title", "") + url = result.get("url", "") + desc = result.get("desc", "") + results.append({"title": title, "url": url, "desc": desc}) + + return json.dumps(results) + + def _search(self, query: str) -> List[dict]: + headers = { + "Accept": "application/json", + } + + req = requests.PreparedRequest() + request = { + **self.search_kwargs, + **{"q": query, "fmt": "json", "api_key": self.api_key}, + } + req.prepare_url(self.api_url, request) + if req.url is None: + raise ValueError("prepared url is None, this should not happen") + + response = requests.get(req.url, headers=headers) + if not response.ok: + raise Exception(f"HTTP error {response.status_code}") + + return response.json().get("response", {}).get("results", []) diff --git a/libs/community/tests/unit_tests/tools/test_imports.py b/libs/community/tests/unit_tests/tools/test_imports.py index c6ae50302d1..60e5c0b10a2 100644 --- a/libs/community/tests/unit_tests/tools/test_imports.py +++ b/libs/community/tests/unit_tests/tools/test_imports.py @@ -138,6 +138,7 @@ EXPECTED_ALL = [ "format_tool_to_openai_function", "tool", "MerriamWebsterQueryRun", + "MojeekSearch", ] diff --git a/libs/community/tests/unit_tests/tools/test_public_api.py b/libs/community/tests/unit_tests/tools/test_public_api.py index a4fe6e89a42..88d49d5bd22 100644 --- a/libs/community/tests/unit_tests/tools/test_public_api.py +++ b/libs/community/tests/unit_tests/tools/test_public_api.py @@ -139,6 +139,7 @@ _EXPECTED = [ "authenticate", "format_tool_to_openai_function", "tool", + "MojeekSearch", ] diff --git a/libs/community/tests/unit_tests/utilities/test_imports.py b/libs/community/tests/unit_tests/utilities/test_imports.py index c6e2951b9b5..f561e3ac26e 100644 --- a/libs/community/tests/unit_tests/utilities/test_imports.py +++ b/libs/community/tests/unit_tests/utilities/test_imports.py @@ -58,6 +58,7 @@ EXPECTED_ALL = [ "YouSearchAPIWrapper", "ZapierNLAWrapper", "MerriamWebsterAPIWrapper", + "MojeekSearchAPIWrapper", ]