From e84a350791f5c7d1e6133ff93224755375f88eba Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:46:15 -0800 Subject: [PATCH] infra: rm community split scripts (#14633) --- .../community/langchain_community/__init__.py | 9 - .../agent_toolkits/__init__.py | 79 -- .../agent_toolkits/json/base.py | 53 -- .../agent_toolkits/nla/tool.py | 57 -- .../agent_toolkits/openapi/base.py | 77 -- .../agent_toolkits/openapi/planner.py | 370 --------- .../agent_toolkits/openapi/toolkit.py | 90 --- .../agent_toolkits/powerbi/base.py | 68 -- .../agent_toolkits/powerbi/chat_base.py | 69 -- .../agent_toolkits/powerbi/toolkit.py | 106 --- .../agent_toolkits/spark_sql/base.py | 64 -- .../agent_toolkits/sql/base.py | 102 --- .../langchain_community/callbacks/__init__.py | 66 -- .../langchain_community/callbacks/manager.py | 69 -- .../callbacks/tracers/__init__.py | 18 - .../document_loaders/base.py | 101 --- .../blob_loaders/file_system.py | 147 ---- .../document_loaders/generic.py | 190 ----- .../document_loaders/parsers/generic.py | 70 -- .../parsers/language/language_parser.py | 157 ---- .../document_loaders/telegram.py | 262 ------- .../beautiful_soup_transformer.py | 149 ---- .../document_transformers/openai_functions.py | 140 ---- .../embeddings/__init__.py | 161 ---- .../embeddings/huggingface.py | 343 --------- .../embeddings/johnsnowlabs.py | 92 --- .../embeddings/self_hosted_hugging_face.py | 168 ---- .../langchain_community/llms/anthropic.py | 351 --------- .../llms/cloudflare_workersai.py | 126 --- .../retrievers/__init__.py | 106 --- .../langchain_community/storage/__init__.py | 19 - .../tools/amadeus/closest_airport.py | 50 -- .../langchain_community/tools/clickup/tool.py | 42 - .../langchain_community/tools/jira/tool.py | 44 -- .../langchain_community/tools/powerbi/tool.py | 276 ------- .../tools/spark_sql/tool.py | 130 ---- .../tools/sql_database/tool.py | 134 ---- .../langchain_community/tools/zapier/tool.py | 215 ------ .../callbacks/test_langchain_tracer.py | 283 ------- .../callbacks/test_openai_callback.py | 68 -- .../callbacks/test_streamlit_callback.py | 30 - .../callbacks/test_wandb_tracer.py | 118 --- .../chat_models/test_openai.py | 333 -------- .../chat_models/test_qianfan_endpoint.py | 219 ------ .../document_loaders/parsers/test_language.py | 182 ----- .../integration_tests/llms/test_fireworks.py | 136 ---- .../llms/test_opaqueprompts.py | 77 -- .../llms/test_symblai_nebula.py | 42 - .../integration_tests/llms/test_vertexai.py | 151 ---- .../integration_tests/utilities/test_arxiv.py | 171 ---- .../utilities/test_pubmed.py | 164 ---- .../vectorstores/conftest.py | 44 -- .../callbacks/test_callback_manager.py | 85 -- .../unit_tests/callbacks/test_imports.py | 31 - .../unit_tests/chat_loaders/test_slack.py | 23 - .../unit_tests/chat_models/test_bedrock.py | 54 -- .../parsers/test_pdf_parsers.py | 96 --- .../unit_tests/embeddings/test_imports.py | 60 -- .../tests/unit_tests/llms/test_openai.py | 56 -- .../unit_tests/retrievers/test_imports.py | 42 - .../tests/unit_tests/storage/test_imports.py | 11 - .../tests/unit_tests/tools/test_exported.py | 40 - .../unit_tests/vectorstores/test_faiss.py | 728 ------------------ .../unit_tests/vectorstores/test_imports.py | 13 - .../libs/core/langchain_core/load/load.py | 144 ---- .../core/langchain_core/utils/__init__.py | 49 -- .../libs/core/langchain_core/utils/env.py | 45 -- .../tests/unit_tests/utils/test_imports.py | 28 - .../langchain/langchain/callbacks/__init__.py | 83 -- .../langchain/langchain/callbacks/manager.py | 68 -- .../unit_tests/callbacks/test_manager.py | 36 - .../tests/unit_tests/chains/test_llm.py | 75 -- .../unit_tests/load/test_serializable.py | 57 -- .../tests/unit_tests/test_dependencies.py | 113 --- .../community_split/script_integrations.sh | 313 -------- .scripts/community_split/update_imports.py | 85 -- 76 files changed, 9123 deletions(-) delete mode 100644 .scripts/community_split/libs/community/langchain_community/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/callbacks/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/callbacks/manager.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/callbacks/tracers/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_loaders/base.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_loaders/generic.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/embeddings/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/llms/anthropic.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/retrievers/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/storage/__init__.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/jira/tool.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py delete mode 100644 .scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py delete mode 100644 .scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/callbacks/test_callback_manager.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/callbacks/test_imports.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/chat_loaders/test_slack.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/chat_models/test_bedrock.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/embeddings/test_imports.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/llms/test_openai.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/retrievers/test_imports.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/storage/test_imports.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/tools/test_exported.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_faiss.py delete mode 100644 .scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_imports.py delete mode 100644 .scripts/community_split/libs/core/langchain_core/load/load.py delete mode 100644 .scripts/community_split/libs/core/langchain_core/utils/__init__.py delete mode 100644 .scripts/community_split/libs/core/langchain_core/utils/env.py delete mode 100644 .scripts/community_split/libs/core/tests/unit_tests/utils/test_imports.py delete mode 100644 .scripts/community_split/libs/langchain/langchain/callbacks/__init__.py delete mode 100644 .scripts/community_split/libs/langchain/langchain/callbacks/manager.py delete mode 100644 .scripts/community_split/libs/langchain/tests/unit_tests/callbacks/test_manager.py delete mode 100644 .scripts/community_split/libs/langchain/tests/unit_tests/chains/test_llm.py delete mode 100644 .scripts/community_split/libs/langchain/tests/unit_tests/load/test_serializable.py delete mode 100644 .scripts/community_split/libs/langchain/tests/unit_tests/test_dependencies.py delete mode 100755 .scripts/community_split/script_integrations.sh delete mode 100644 .scripts/community_split/update_imports.py diff --git a/.scripts/community_split/libs/community/langchain_community/__init__.py b/.scripts/community_split/libs/community/langchain_community/__init__.py deleted file mode 100644 index ac7aeef6faf..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Main entrypoint into package.""" -from importlib import metadata - -try: - __version__ = metadata.version(__package__) -except metadata.PackageNotFoundError: - # Case where package metadata is not available. - __version__ = "" -del metadata # optional, avoids polluting the results of dir(__package__) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py deleted file mode 100644 index f23fb6a7e82..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Agent toolkits contain integrations with various resources and services. - -LangChain has a large ecosystem of integrations with various external resources -like local and remote file systems, APIs and databases. - -These integrations allow developers to create versatile applications that combine the -power of LLMs with the ability to access, interact with and manipulate external -resources. - -When developing an application, developers should inspect the capabilities and -permissions of the tools that underlie the given agent toolkit, and determine -whether permissions of the given toolkit are appropriate for the application. - -See [Security](https://python.langchain.com/docs/security) for more information. -""" -from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit -from langchain_community.agent_toolkits.amadeus.toolkit import AmadeusToolkit -from langchain_community.agent_toolkits.azure_cognitive_services import ( - AzureCognitiveServicesToolkit, -) -from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501 - create_conversational_retrieval_agent, -) -from langchain_community.agent_toolkits.file_management.toolkit import ( - FileManagementToolkit, -) -from langchain_community.agent_toolkits.gmail.toolkit import GmailToolkit -from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit -from langchain_community.agent_toolkits.json.base import create_json_agent -from langchain_community.agent_toolkits.json.toolkit import JsonToolkit -from langchain_community.agent_toolkits.multion.toolkit import MultionToolkit -from langchain_community.agent_toolkits.nasa.toolkit import NasaToolkit -from langchain_community.agent_toolkits.nla.toolkit import NLAToolkit -from langchain_community.agent_toolkits.office365.toolkit import O365Toolkit -from langchain_community.agent_toolkits.openapi.base import create_openapi_agent -from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit -from langchain_community.agent_toolkits.playwright.toolkit import ( - PlayWrightBrowserToolkit, -) -from langchain_community.agent_toolkits.powerbi.base import create_pbi_agent -from langchain_community.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent -from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit -from langchain_community.agent_toolkits.slack.toolkit import SlackToolkit -from langchain_community.agent_toolkits.spark_sql.base import create_spark_sql_agent -from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit -from langchain_community.agent_toolkits.sql.base import create_sql_agent -from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit -from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit -from langchain_community.agent_toolkits.zapier.toolkit import ZapierToolkit - - -__all__ = [ - "AINetworkToolkit", - "AmadeusToolkit", - "AzureCognitiveServicesToolkit", - "FileManagementToolkit", - "GmailToolkit", - "JiraToolkit", - "JsonToolkit", - "MultionToolkit", - "NasaToolkit", - "NLAToolkit", - "O365Toolkit", - "OpenAPIToolkit", - "PlayWrightBrowserToolkit", - "PowerBIToolkit", - "SlackToolkit", - "SteamToolkit", - "SQLDatabaseToolkit", - "SparkSQLToolkit", - "ZapierToolkit", - "create_json_agent", - "create_openapi_agent", - "create_pbi_agent", - "create_pbi_chat_agent", - "create_spark_sql_agent", - "create_sql_agent", - "create_conversational_retrieval_agent", -] diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py deleted file mode 100644 index 0d9f10d14c6..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Json agent.""" -from __future__ import annotations -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models import BaseLanguageModel - -from langchain_community.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX -from langchain_community.agent_toolkits.json.toolkit import JsonToolkit - -if TYPE_CHECKING: - from langchain.agents.agent import AgentExecutor - - -def create_json_agent( - llm: BaseLanguageModel, - toolkit: JsonToolkit, - callback_manager: Optional[BaseCallbackManager] = None, - prefix: str = JSON_PREFIX, - suffix: str = JSON_SUFFIX, - format_instructions: Optional[str] = None, - input_variables: Optional[List[str]] = None, - verbose: bool = False, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> AgentExecutor: - """Construct a json agent from an LLM and tools.""" - from langchain.agents.agent import AgentExecutor - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.chains.llm import LLMChain - tools = toolkit.get_tools() - prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} - prompt = ZeroShotAgent.create_prompt( - tools, - prefix=prefix, - suffix=suffix, - input_variables=input_variables, - **prompt_params, - ) - llm_chain = LLMChain( - llm=llm, - prompt=prompt, - callback_manager=callback_manager, - ) - tool_names = [tool.name for tool in tools] - agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - verbose=verbose, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py deleted file mode 100644 index 7d94b2f9bca..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Tool for interacting with a single API with natural language definition.""" - -from __future__ import annotations -from typing import Any, Optional, TYPE_CHECKING - -from langchain_core.language_models import BaseLanguageModel -from langchain_core.tools import Tool - -from langchain_community.tools.openapi.utils.api_models import APIOperation -from langchain_community.tools.openapi.utils.openapi_utils import OpenAPISpec -from langchain_community.utilities.requests import Requests - -if TYPE_CHECKING: - from langchain.chains.api.openapi.chain import OpenAPIEndpointChain - - -class NLATool(Tool): - """Natural Language API Tool.""" - - @classmethod - def from_open_api_endpoint_chain( - cls, chain: OpenAPIEndpointChain, api_title: str - ) -> "NLATool": - """Convert an endpoint chain to an API endpoint tool.""" - expanded_name = ( - f'{api_title.replace(" ", "_")}.{chain.api_operation.operation_id}' - ) - description = ( - f"I'm an AI from {api_title}. Instruct what you want," - " and I'll assist via an API with description:" - f" {chain.api_operation.description}" - ) - return cls(name=expanded_name, func=chain.run, description=description) - - @classmethod - def from_llm_and_method( - cls, - llm: BaseLanguageModel, - path: str, - method: str, - spec: OpenAPISpec, - requests: Optional[Requests] = None, - verbose: bool = False, - return_intermediate_steps: bool = False, - **kwargs: Any, - ) -> "NLATool": - """Instantiate the tool from the specified path and method.""" - api_operation = APIOperation.from_openapi_spec(spec, path, method) - chain = OpenAPIEndpointChain.from_api_operation( - api_operation, - llm, - requests=requests, - verbose=verbose, - return_intermediate_steps=return_intermediate_steps, - **kwargs, - ) - return cls.from_open_api_endpoint_chain(chain, spec.info.title) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py deleted file mode 100644 index f8dc14d85f3..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py +++ /dev/null @@ -1,77 +0,0 @@ -"""OpenAPI spec agent.""" -from __future__ import annotations -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models import BaseLanguageModel - -from langchain_community.agent_toolkits.openapi.prompt import ( - OPENAPI_PREFIX, - OPENAPI_SUFFIX, -) -from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit - -if TYPE_CHECKING: - from langchain.agents.agent import AgentExecutor - - -def create_openapi_agent( - llm: BaseLanguageModel, - toolkit: OpenAPIToolkit, - callback_manager: Optional[BaseCallbackManager] = None, - prefix: str = OPENAPI_PREFIX, - suffix: str = OPENAPI_SUFFIX, - format_instructions: Optional[str] = None, - input_variables: Optional[List[str]] = None, - max_iterations: Optional[int] = 15, - max_execution_time: Optional[float] = None, - early_stopping_method: str = "force", - verbose: bool = False, - return_intermediate_steps: bool = False, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> AgentExecutor: - """Construct an OpenAPI agent from an LLM and tools. - - *Security Note*: When creating an OpenAPI agent, check the permissions - and capabilities of the underlying toolkit. - - For example, if the default implementation of OpenAPIToolkit - uses the RequestsToolkit which contains tools to make arbitrary - network requests against any URL (e.g., GET, POST, PATCH, PUT, DELETE), - - Control access to who can submit issue requests using this toolkit and - what network access it has. - - See https://python.langchain.com/docs/security for more information. - """ - from langchain.agents.agent import AgentExecutor - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.chains.llm import LLMChain - tools = toolkit.get_tools() - prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} - prompt = ZeroShotAgent.create_prompt( - tools, - prefix=prefix, - suffix=suffix, - input_variables=input_variables, - **prompt_params - ) - llm_chain = LLMChain( - llm=llm, - prompt=prompt, - callback_manager=callback_manager, - ) - tool_names = [tool.name for tool in tools] - agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - verbose=verbose, - return_intermediate_steps=return_intermediate_steps, - max_iterations=max_iterations, - max_execution_time=max_execution_time, - early_stopping_method=early_stopping_method, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py deleted file mode 100644 index 55702b3bb09..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py +++ /dev/null @@ -1,370 +0,0 @@ -"""Agent that interacts with OpenAPI APIs via a hierarchical planning approach.""" -import json -import re -from functools import partial -from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING - -import yaml -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models import BaseLanguageModel -from langchain_core.prompts import BasePromptTemplate, PromptTemplate -from langchain_core.pydantic_v1 import Field -from langchain_core.tools import BaseTool, Tool -from langchain_community.llms import OpenAI - -from langchain_community.agent_toolkits.openapi.planner_prompt import ( - API_CONTROLLER_PROMPT, - API_CONTROLLER_TOOL_DESCRIPTION, - API_CONTROLLER_TOOL_NAME, - API_ORCHESTRATOR_PROMPT, - API_PLANNER_PROMPT, - API_PLANNER_TOOL_DESCRIPTION, - API_PLANNER_TOOL_NAME, - PARSING_DELETE_PROMPT, - PARSING_GET_PROMPT, - PARSING_PATCH_PROMPT, - PARSING_POST_PROMPT, - PARSING_PUT_PROMPT, - REQUESTS_DELETE_TOOL_DESCRIPTION, - REQUESTS_GET_TOOL_DESCRIPTION, - REQUESTS_PATCH_TOOL_DESCRIPTION, - REQUESTS_POST_TOOL_DESCRIPTION, - REQUESTS_PUT_TOOL_DESCRIPTION, -) -from langchain_community.agent_toolkits.openapi.spec import ReducedOpenAPISpec -from langchain_community.tools.requests.tool import BaseRequestsTool -from langchain_community.utilities.requests import RequestsWrapper - -if TYPE_CHECKING: - from langchain.agents.agent import AgentExecutor - from langchain.chains.llm import LLMChain - from langchain.memory import ReadOnlySharedMemory - -# -# Requests tools with LLM-instructed extraction of truncated responses. -# -# Of course, truncating so bluntly may lose a lot of valuable -# information in the response. -# However, the goal for now is to have only a single inference step. -MAX_RESPONSE_LENGTH = 5000 -"""Maximum length of the response to be returned.""" - - -def _get_default_llm_chain(prompt: BasePromptTemplate) -> LLMChain: - from langchain.chains.llm import LLMChain - return LLMChain( - llm=OpenAI(), - prompt=prompt, - ) - - -def _get_default_llm_chain_factory( - prompt: BasePromptTemplate, -) -> Callable[[], LLMChain]: - """Returns a default LLMChain factory.""" - return partial(_get_default_llm_chain, prompt) - - -class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool): - """Requests GET tool with LLM-instructed extraction of truncated responses.""" - - name: str = "requests_get" - """Tool name.""" - description = REQUESTS_GET_TOOL_DESCRIPTION - """Tool description.""" - response_length: Optional[int] = MAX_RESPONSE_LENGTH - """Maximum length of the response to be returned.""" - llm_chain: Any = Field( - default_factory=_get_default_llm_chain_factory(PARSING_GET_PROMPT) - ) - """LLMChain used to extract the response.""" - - def _run(self, text: str) -> str: - from langchain.output_parsers.json import parse_json_markdown - try: - data = parse_json_markdown(text) - except json.JSONDecodeError as e: - raise e - data_params = data.get("params") - response = self.requests_wrapper.get(data["url"], params=data_params) - response = response[: self.response_length] - return self.llm_chain.predict( - response=response, instructions=data["output_instructions"] - ).strip() - - async def _arun(self, text: str) -> str: - raise NotImplementedError() - - -class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool): - """Requests POST tool with LLM-instructed extraction of truncated responses.""" - - name: str = "requests_post" - """Tool name.""" - description = REQUESTS_POST_TOOL_DESCRIPTION - """Tool description.""" - response_length: Optional[int] = MAX_RESPONSE_LENGTH - """Maximum length of the response to be returned.""" - llm_chain: Any = Field( - default_factory=_get_default_llm_chain_factory(PARSING_POST_PROMPT) - ) - """LLMChain used to extract the response.""" - - def _run(self, text: str) -> str: - from langchain.output_parsers.json import parse_json_markdown - try: - data = parse_json_markdown(text) - except json.JSONDecodeError as e: - raise e - response = self.requests_wrapper.post(data["url"], data["data"]) - response = response[: self.response_length] - return self.llm_chain.predict( - response=response, instructions=data["output_instructions"] - ).strip() - - async def _arun(self, text: str) -> str: - raise NotImplementedError() - - -class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool): - """Requests PATCH tool with LLM-instructed extraction of truncated responses.""" - - name: str = "requests_patch" - """Tool name.""" - description = REQUESTS_PATCH_TOOL_DESCRIPTION - """Tool description.""" - response_length: Optional[int] = MAX_RESPONSE_LENGTH - """Maximum length of the response to be returned.""" - llm_chain: Any = Field( - default_factory=_get_default_llm_chain_factory(PARSING_PATCH_PROMPT) - ) - """LLMChain used to extract the response.""" - - def _run(self, text: str) -> str: - from langchain.output_parsers.json import parse_json_markdown - try: - data = parse_json_markdown(text) - except json.JSONDecodeError as e: - raise e - response = self.requests_wrapper.patch(data["url"], data["data"]) - response = response[: self.response_length] - return self.llm_chain.predict( - response=response, instructions=data["output_instructions"] - ).strip() - - async def _arun(self, text: str) -> str: - raise NotImplementedError() - - -class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool): - """Requests PUT tool with LLM-instructed extraction of truncated responses.""" - - name: str = "requests_put" - """Tool name.""" - description = REQUESTS_PUT_TOOL_DESCRIPTION - """Tool description.""" - response_length: Optional[int] = MAX_RESPONSE_LENGTH - """Maximum length of the response to be returned.""" - llm_chain: Any = Field( - default_factory=_get_default_llm_chain_factory(PARSING_PUT_PROMPT) - ) - """LLMChain used to extract the response.""" - - def _run(self, text: str) -> str: - from langchain.output_parsers.json import parse_json_markdown - try: - data = parse_json_markdown(text) - except json.JSONDecodeError as e: - raise e - response = self.requests_wrapper.put(data["url"], data["data"]) - response = response[: self.response_length] - return self.llm_chain.predict( - response=response, instructions=data["output_instructions"] - ).strip() - - async def _arun(self, text: str) -> str: - raise NotImplementedError() - - -class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool): - """A tool that sends a DELETE request and parses the response.""" - - name: str = "requests_delete" - """The name of the tool.""" - description = REQUESTS_DELETE_TOOL_DESCRIPTION - """The description of the tool.""" - - response_length: Optional[int] = MAX_RESPONSE_LENGTH - """The maximum length of the response.""" - llm_chain: Any = Field( - default_factory=_get_default_llm_chain_factory(PARSING_DELETE_PROMPT) - ) - """The LLM chain used to parse the response.""" - - def _run(self, text: str) -> str: - from langchain.output_parsers.json import parse_json_markdown - try: - data = parse_json_markdown(text) - except json.JSONDecodeError as e: - raise e - response = self.requests_wrapper.delete(data["url"]) - response = response[: self.response_length] - return self.llm_chain.predict( - response=response, instructions=data["output_instructions"] - ).strip() - - async def _arun(self, text: str) -> str: - raise NotImplementedError() - - -# -# Orchestrator, planner, controller. -# -def _create_api_planner_tool( - api_spec: ReducedOpenAPISpec, llm: BaseLanguageModel -) -> Tool: - from langchain.chains.llm import LLMChain - endpoint_descriptions = [ - f"{name} {description}" for name, description, _ in api_spec.endpoints - ] - prompt = PromptTemplate( - template=API_PLANNER_PROMPT, - input_variables=["query"], - partial_variables={"endpoints": "- " + "- ".join(endpoint_descriptions)}, - ) - chain = LLMChain(llm=llm, prompt=prompt) - tool = Tool( - name=API_PLANNER_TOOL_NAME, - description=API_PLANNER_TOOL_DESCRIPTION, - func=chain.run, - ) - return tool - - -def _create_api_controller_agent( - api_url: str, - api_docs: str, - requests_wrapper: RequestsWrapper, - llm: BaseLanguageModel, -) -> AgentExecutor: - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.agents.agent import AgentExecutor - from langchain.chains.llm import LLMChain - get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT) - post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT) - tools: List[BaseTool] = [ - RequestsGetToolWithParsing( - requests_wrapper=requests_wrapper, llm_chain=get_llm_chain - ), - RequestsPostToolWithParsing( - requests_wrapper=requests_wrapper, llm_chain=post_llm_chain - ), - ] - prompt = PromptTemplate( - template=API_CONTROLLER_PROMPT, - input_variables=["input", "agent_scratchpad"], - partial_variables={ - "api_url": api_url, - "api_docs": api_docs, - "tool_names": ", ".join([tool.name for tool in tools]), - "tool_descriptions": "\n".join( - [f"{tool.name}: {tool.description}" for tool in tools] - ), - }, - ) - agent = ZeroShotAgent( - llm_chain=LLMChain(llm=llm, prompt=prompt), - allowed_tools=[tool.name for tool in tools], - ) - return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True) - - -def _create_api_controller_tool( - api_spec: ReducedOpenAPISpec, - requests_wrapper: RequestsWrapper, - llm: BaseLanguageModel, -) -> Tool: - """Expose controller as a tool. - - The tool is invoked with a plan from the planner, and dynamically - creates a controller agent with relevant documentation only to - constrain the context. - """ - - base_url = api_spec.servers[0]["url"] # TODO: do better. - - def _create_and_run_api_controller_agent(plan_str: str) -> str: - pattern = r"\b(GET|POST|PATCH|DELETE)\s+(/\S+)*" - matches = re.findall(pattern, plan_str) - endpoint_names = [ - "{method} {route}".format(method=method, route=route.split("?")[0]) - for method, route in matches - ] - docs_str = "" - for endpoint_name in endpoint_names: - found_match = False - for name, _, docs in api_spec.endpoints: - regex_name = re.compile(re.sub("\{.*?\}", ".*", name)) - if regex_name.match(endpoint_name): - found_match = True - docs_str += f"== Docs for {endpoint_name} == \n{yaml.dump(docs)}\n" - if not found_match: - raise ValueError(f"{endpoint_name} endpoint does not exist.") - - agent = _create_api_controller_agent(base_url, docs_str, requests_wrapper, llm) - return agent.run(plan_str) - - return Tool( - name=API_CONTROLLER_TOOL_NAME, - func=_create_and_run_api_controller_agent, - description=API_CONTROLLER_TOOL_DESCRIPTION, - ) - - -def create_openapi_agent( - api_spec: ReducedOpenAPISpec, - requests_wrapper: RequestsWrapper, - llm: BaseLanguageModel, - shared_memory: Optional[ReadOnlySharedMemory] = None, - callback_manager: Optional[BaseCallbackManager] = None, - verbose: bool = True, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> AgentExecutor: - """Instantiate OpenAI API planner and controller for a given spec. - - Inject credentials via requests_wrapper. - - We use a top-level "orchestrator" agent to invoke the planner and controller, - rather than a top-level planner - that invokes a controller with its plan. This is to keep the planner simple. - """ - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.agents.agent import AgentExecutor - from langchain.chains.llm import LLMChain - tools = [ - _create_api_planner_tool(api_spec, llm), - _create_api_controller_tool(api_spec, requests_wrapper, llm), - ] - prompt = PromptTemplate( - template=API_ORCHESTRATOR_PROMPT, - input_variables=["input", "agent_scratchpad"], - partial_variables={ - "tool_names": ", ".join([tool.name for tool in tools]), - "tool_descriptions": "\n".join( - [f"{tool.name}: {tool.description}" for tool in tools] - ), - }, - ) - agent = ZeroShotAgent( - llm_chain=LLMChain(llm=llm, prompt=prompt, memory=shared_memory), - allowed_tools=[tool.name for tool in tools], - **kwargs, - ) - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - verbose=verbose, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py deleted file mode 100644 index 5b7f3fcbd52..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Requests toolkit.""" -from __future__ import annotations - -from typing import Any, List - -from langchain_core.language_models import BaseLanguageModel -from langchain_core.tools import Tool - -from langchain_community.agent_toolkits.base import BaseToolkit -from langchain_community.agent_toolkits.json.base import create_json_agent -from langchain_community.agent_toolkits.json.toolkit import JsonToolkit -from langchain_community.agent_toolkits.openapi.prompt import DESCRIPTION -from langchain_community.tools import BaseTool -from langchain_community.tools.json.tool import JsonSpec -from langchain_community.tools.requests.tool import ( - RequestsDeleteTool, - RequestsGetTool, - RequestsPatchTool, - RequestsPostTool, - RequestsPutTool, -) -from langchain_community.utilities.requests import TextRequestsWrapper - - -class RequestsToolkit(BaseToolkit): - """Toolkit for making REST requests. - - *Security Note*: This toolkit contains tools to make GET, POST, PATCH, PUT, - and DELETE requests to an API. - - Exercise care in who is allowed to use this toolkit. If exposing - to end users, consider that users will be able to make arbitrary - requests on behalf of the server hosting the code. For example, - users could ask the server to make a request to a private API - that is only accessible from the server. - - Control access to who can submit issue requests using this toolkit and - what network access it has. - - See https://python.langchain.com/docs/security for more information. - """ - - requests_wrapper: TextRequestsWrapper - - def get_tools(self) -> List[BaseTool]: - """Return a list of tools.""" - return [ - RequestsGetTool(requests_wrapper=self.requests_wrapper), - RequestsPostTool(requests_wrapper=self.requests_wrapper), - RequestsPatchTool(requests_wrapper=self.requests_wrapper), - RequestsPutTool(requests_wrapper=self.requests_wrapper), - RequestsDeleteTool(requests_wrapper=self.requests_wrapper), - ] - - -class OpenAPIToolkit(BaseToolkit): - """Toolkit for interacting with an OpenAPI API. - - *Security Note*: This toolkit contains tools that can read and modify - the state of a service; e.g., by creating, deleting, or updating, - reading underlying data. - - For example, this toolkit can be used to delete data exposed via - an OpenAPI compliant API. - """ - - json_agent: Any - requests_wrapper: TextRequestsWrapper - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - json_agent_tool = Tool( - name="json_explorer", - func=self.json_agent.run, - description=DESCRIPTION, - ) - request_toolkit = RequestsToolkit(requests_wrapper=self.requests_wrapper) - return [*request_toolkit.get_tools(), json_agent_tool] - - @classmethod - def from_llm( - cls, - llm: BaseLanguageModel, - json_spec: JsonSpec, - requests_wrapper: TextRequestsWrapper, - **kwargs: Any, - ) -> OpenAPIToolkit: - """Create json agent from llm, then initialize.""" - json_agent = create_json_agent(llm, JsonToolkit(spec=json_spec), **kwargs) - return cls(json_agent=json_agent, requests_wrapper=requests_wrapper) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py deleted file mode 100644 index ef70ee7b43e..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Power BI agent.""" -from __future__ import annotations - -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models import BaseLanguageModel - -from langchain_community.agent_toolkits.powerbi.prompt import ( - POWERBI_PREFIX, - POWERBI_SUFFIX, -) -from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit -from langchain_community.utilities.powerbi import PowerBIDataset - -if TYPE_CHECKING: - from langchain.agents import AgentExecutor - - -def create_pbi_agent( - llm: BaseLanguageModel, - toolkit: Optional[PowerBIToolkit] = None, - powerbi: Optional[PowerBIDataset] = None, - callback_manager: Optional[BaseCallbackManager] = None, - prefix: str = POWERBI_PREFIX, - suffix: str = POWERBI_SUFFIX, - format_instructions: Optional[str] = None, - examples: Optional[str] = None, - input_variables: Optional[List[str]] = None, - top_k: int = 10, - verbose: bool = False, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> AgentExecutor: - """Construct a Power BI agent from an LLM and tools.""" - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.agents import AgentExecutor - from langchain.chains.llm import LLMChain - if toolkit is None: - if powerbi is None: - raise ValueError("Must provide either a toolkit or powerbi dataset") - toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples) - tools = toolkit.get_tools() - tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names - prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} - agent = ZeroShotAgent( - llm_chain=LLMChain( - llm=llm, - prompt=ZeroShotAgent.create_prompt( - tools, - prefix=prefix.format(top_k=top_k).format(tables=tables), - suffix=suffix, - input_variables=input_variables, - **prompt_params, - ), - callback_manager=callback_manager, # type: ignore - verbose=verbose, - ), - allowed_tools=[tool.name for tool in tools], - **kwargs, - ) - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - verbose=verbose, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py deleted file mode 100644 index 0171e037135..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Power BI agent.""" -from __future__ import annotations -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models.chat_models import BaseChatModel - -from langchain_community.agent_toolkits.powerbi.prompt import ( - POWERBI_CHAT_PREFIX, - POWERBI_CHAT_SUFFIX, -) -from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit -from langchain_community.utilities.powerbi import PowerBIDataset - -if TYPE_CHECKING: - from langchain.agents import AgentExecutor - from langchain.agents.agent import AgentOutputParser - from langchain.memory.chat_memory import BaseChatMemory - - -def create_pbi_chat_agent( - llm: BaseChatModel, - toolkit: Optional[PowerBIToolkit] = None, - powerbi: Optional[PowerBIDataset] = None, - callback_manager: Optional[BaseCallbackManager] = None, - output_parser: Optional[AgentOutputParser] = None, - prefix: str = POWERBI_CHAT_PREFIX, - suffix: str = POWERBI_CHAT_SUFFIX, - examples: Optional[str] = None, - input_variables: Optional[List[str]] = None, - memory: Optional[BaseChatMemory] = None, - top_k: int = 10, - verbose: bool = False, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> AgentExecutor: - """Construct a Power BI agent from a Chat LLM and tools. - - If you supply only a toolkit and no Power BI dataset, the same LLM is used for both. - """ - from langchain.agents import AgentExecutor - from langchain.agents.conversational_chat.base import ConversationalChatAgent - from langchain.memory import ConversationBufferMemory - if toolkit is None: - if powerbi is None: - raise ValueError("Must provide either a toolkit or powerbi dataset") - toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples) - tools = toolkit.get_tools() - tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names - agent = ConversationalChatAgent.from_llm_and_tools( - llm=llm, - tools=tools, - system_message=prefix.format(top_k=top_k).format(tables=tables), - human_message=suffix, - input_variables=input_variables, - callback_manager=callback_manager, - output_parser=output_parser, - verbose=verbose, - **kwargs, - ) - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - memory=memory - or ConversationBufferMemory(memory_key="chat_history", return_messages=True), - verbose=verbose, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py deleted file mode 100644 index bf4969b889c..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Toolkit for interacting with a Power BI dataset.""" -from __future__ import annotations -from typing import List, Optional, Union, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models import BaseLanguageModel -from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.prompts import PromptTemplate -from langchain_core.prompts.chat import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -) -from langchain_core.pydantic_v1 import Field - -from langchain_community.agent_toolkits.base import BaseToolkit -from langchain_community.tools import BaseTool -from langchain_community.tools.powerbi.prompt import ( - QUESTION_TO_QUERY_BASE, - SINGLE_QUESTION_TO_QUERY, - USER_INPUT, -) -from langchain_community.tools.powerbi.tool import ( - InfoPowerBITool, - ListPowerBITool, - QueryPowerBITool, -) -from langchain_community.utilities.powerbi import PowerBIDataset - -if TYPE_CHECKING: - from langchain.chains.llm import LLMChain - - -class PowerBIToolkit(BaseToolkit): - """Toolkit for interacting with Power BI dataset. - - *Security Note*: This toolkit interacts with an external service. - - Control access to who can use this toolkit. - - Make sure that the capabilities given by this toolkit to the calling - code are appropriately scoped to the application. - - See https://python.langchain.com/docs/security for more information. - """ - - powerbi: PowerBIDataset = Field(exclude=True) - llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True) - examples: Optional[str] = None - max_iterations: int = 5 - callback_manager: Optional[BaseCallbackManager] = None - output_token_limit: Optional[int] = None - tiktoken_model_name: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - QueryPowerBITool( - llm_chain=self._get_chain(), - powerbi=self.powerbi, - examples=self.examples, - max_iterations=self.max_iterations, - output_token_limit=self.output_token_limit, - tiktoken_model_name=self.tiktoken_model_name, - ), - InfoPowerBITool(powerbi=self.powerbi), - ListPowerBITool(powerbi=self.powerbi), - ] - - def _get_chain(self) -> LLMChain: - """Construct the chain based on the callback manager and model type.""" - from langchain.chains.llm import LLMChain - if isinstance(self.llm, BaseLanguageModel): - return LLMChain( - llm=self.llm, - callback_manager=self.callback_manager - if self.callback_manager - else None, - prompt=PromptTemplate( - template=SINGLE_QUESTION_TO_QUERY, - input_variables=["tool_input", "tables", "schemas", "examples"], - ), - ) - - system_prompt = SystemMessagePromptTemplate( - prompt=PromptTemplate( - template=QUESTION_TO_QUERY_BASE, - input_variables=["tables", "schemas", "examples"], - ) - ) - human_prompt = HumanMessagePromptTemplate( - prompt=PromptTemplate( - template=USER_INPUT, - input_variables=["tool_input"], - ) - ) - return LLMChain( - llm=self.llm, - callback_manager=self.callback_manager if self.callback_manager else None, - prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py deleted file mode 100644 index 8b5cd9d9078..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Spark SQL agent.""" -from __future__ import annotations -from typing import Any, Dict, List, Optional, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager, Callbacks -from langchain_core.language_models import BaseLanguageModel - -from langchain_community.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUFFIX -from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit - -if TYPE_CHECKING: - from langchain.agents.agent import AgentExecutor - - -def create_spark_sql_agent( - llm: BaseLanguageModel, - toolkit: SparkSQLToolkit, - callback_manager: Optional[BaseCallbackManager] = None, - callbacks: Callbacks = None, - prefix: str = SQL_PREFIX, - suffix: str = SQL_SUFFIX, - format_instructions: Optional[str] = None, - input_variables: Optional[List[str]] = None, - top_k: int = 10, - max_iterations: Optional[int] = 15, - max_execution_time: Optional[float] = None, - early_stopping_method: str = "force", - verbose: bool = False, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> AgentExecutor: - """Construct a Spark SQL agent from an LLM and tools.""" - from langchain.agents.agent import AgentExecutor - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.chains.llm import LLMChain - tools = toolkit.get_tools() - prefix = prefix.format(top_k=top_k) - prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} - prompt = ZeroShotAgent.create_prompt( - tools, - prefix=prefix, - suffix=suffix, - input_variables=input_variables, - **prompt_params, - ) - llm_chain = LLMChain( - llm=llm, - prompt=prompt, - callback_manager=callback_manager, - callbacks=callbacks, - ) - tool_names = [tool.name for tool in tools] - agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - callbacks=callbacks, - verbose=verbose, - max_iterations=max_iterations, - max_execution_time=max_execution_time, - early_stopping_method=early_stopping_method, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py deleted file mode 100644 index bab14e6497a..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py +++ /dev/null @@ -1,102 +0,0 @@ -"""SQL agent.""" -from __future__ import annotations -from typing import Any, Dict, List, Optional, Sequence, TYPE_CHECKING - -from langchain_core.callbacks import BaseCallbackManager -from langchain_core.language_models import BaseLanguageModel -from langchain_core.messages import AIMessage, SystemMessage -from langchain_core.prompts.chat import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, - MessagesPlaceholder, -) - -from langchain_community.agent_toolkits.sql.prompt import ( - SQL_FUNCTIONS_SUFFIX, - SQL_PREFIX, - SQL_SUFFIX, -) -from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit -from langchain_community.tools import BaseTool - -if TYPE_CHECKING: - from langchain.agents.agent import AgentExecutor - from langchain.agents.agent_types import AgentType - - -def create_sql_agent( - llm: BaseLanguageModel, - toolkit: SQLDatabaseToolkit, - agent_type: Optional[AgentType] = None, - callback_manager: Optional[BaseCallbackManager] = None, - prefix: str = SQL_PREFIX, - suffix: Optional[str] = None, - format_instructions: Optional[str] = None, - input_variables: Optional[List[str]] = None, - top_k: int = 10, - max_iterations: Optional[int] = 15, - max_execution_time: Optional[float] = None, - early_stopping_method: str = "force", - verbose: bool = False, - agent_executor_kwargs: Optional[Dict[str, Any]] = None, - extra_tools: Sequence[BaseTool] = (), - **kwargs: Any, -) -> AgentExecutor: - """Construct an SQL agent from an LLM and tools.""" - from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent - from langchain.agents.agent_types import AgentType - from langchain.agents.mrkl.base import ZeroShotAgent - from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent - from langchain.chains.llm import LLMChain - agent_type = agent_type or AgentType.ZERO_SHOT_REACT_DESCRIPTION - tools = toolkit.get_tools() + list(extra_tools) - prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k) - agent: BaseSingleActionAgent - - if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION: - prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} - prompt = ZeroShotAgent.create_prompt( - tools, - prefix=prefix, - suffix=suffix or SQL_SUFFIX, - input_variables=input_variables, - **prompt_params, - ) - llm_chain = LLMChain( - llm=llm, - prompt=prompt, - callback_manager=callback_manager, - ) - tool_names = [tool.name for tool in tools] - agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) - - elif agent_type == AgentType.OPENAI_FUNCTIONS: - messages = [ - SystemMessage(content=prefix), - HumanMessagePromptTemplate.from_template("{input}"), - AIMessage(content=suffix or SQL_FUNCTIONS_SUFFIX), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ] - input_variables = ["input", "agent_scratchpad"] - _prompt = ChatPromptTemplate(input_variables=input_variables, messages=messages) - - agent = OpenAIFunctionsAgent( - llm=llm, - prompt=_prompt, - tools=tools, - callback_manager=callback_manager, - **kwargs, - ) - else: - raise ValueError(f"Agent type {agent_type} not supported at the moment.") - - return AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - callback_manager=callback_manager, - verbose=verbose, - max_iterations=max_iterations, - max_execution_time=max_execution_time, - early_stopping_method=early_stopping_method, - **(agent_executor_kwargs or {}), - ) diff --git a/.scripts/community_split/libs/community/langchain_community/callbacks/__init__.py b/.scripts/community_split/libs/community/langchain_community/callbacks/__init__.py deleted file mode 100644 index 6016e8304d7..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/callbacks/__init__.py +++ /dev/null @@ -1,66 +0,0 @@ -"""**Callback handlers** allow listening to events in LangChain. - -**Class hierarchy:** - -.. code-block:: - - BaseCallbackHandler --> CallbackHandler # Example: AimCallbackHandler -""" - -from langchain_community.callbacks.aim_callback import AimCallbackHandler -from langchain_community.callbacks.argilla_callback import ArgillaCallbackHandler -from langchain_community.callbacks.arize_callback import ArizeCallbackHandler -from langchain_community.callbacks.arthur_callback import ArthurCallbackHandler -from langchain_community.callbacks.clearml_callback import ClearMLCallbackHandler -from langchain_community.callbacks.comet_ml_callback import CometCallbackHandler -from langchain_community.callbacks.context_callback import ContextCallbackHandler -from langchain_community.callbacks.flyte_callback import FlyteCallbackHandler -from langchain_community.callbacks.human import HumanApprovalCallbackHandler -from langchain_community.callbacks.infino_callback import InfinoCallbackHandler -from langchain_community.callbacks.labelstudio_callback import ( - LabelStudioCallbackHandler, -) -from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler -from langchain_community.callbacks.manager import ( - get_openai_callback, - wandb_tracing_enabled, -) -from langchain_community.callbacks.mlflow_callback import MlflowCallbackHandler -from langchain_community.callbacks.openai_info import OpenAICallbackHandler -from langchain_community.callbacks.promptlayer_callback import ( - PromptLayerCallbackHandler, -) -from langchain_community.callbacks.sagemaker_callback import SageMakerCallbackHandler -from langchain_community.callbacks.streamlit import ( - LLMThoughtLabeler, - StreamlitCallbackHandler, -) -from langchain_community.callbacks.trubrics_callback import TrubricsCallbackHandler -from langchain_community.callbacks.wandb_callback import WandbCallbackHandler -from langchain_community.callbacks.whylabs_callback import WhyLabsCallbackHandler - -__all__ = [ - "AimCallbackHandler", - "ArgillaCallbackHandler", - "ArizeCallbackHandler", - "PromptLayerCallbackHandler", - "ArthurCallbackHandler", - "ClearMLCallbackHandler", - "CometCallbackHandler", - "ContextCallbackHandler", - "HumanApprovalCallbackHandler", - "InfinoCallbackHandler", - "MlflowCallbackHandler", - "LLMonitorCallbackHandler", - "OpenAICallbackHandler", - "LLMThoughtLabeler", - "StreamlitCallbackHandler", - "WandbCallbackHandler", - "WhyLabsCallbackHandler", - "get_openai_callback", - "wandb_tracing_enabled", - "FlyteCallbackHandler", - "SageMakerCallbackHandler", - "LabelStudioCallbackHandler", - "TrubricsCallbackHandler", -] diff --git a/.scripts/community_split/libs/community/langchain_community/callbacks/manager.py b/.scripts/community_split/libs/community/langchain_community/callbacks/manager.py deleted file mode 100644 index 196afed3d0f..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/callbacks/manager.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import annotations - -import logging -from contextlib import contextmanager -from contextvars import ContextVar -from typing import ( - Generator, - Optional, -) - -from langchain_core.tracers.context import register_configure_hook - -from langchain_community.callbacks.openai_info import OpenAICallbackHandler -from langchain_community.callbacks.tracers.wandb import WandbTracer - -logger = logging.getLogger(__name__) - -openai_callback_var: ContextVar[Optional[OpenAICallbackHandler]] = ContextVar( - "openai_callback", default=None -) -wandb_tracing_callback_var: ContextVar[Optional[WandbTracer]] = ContextVar( # noqa: E501 - "tracing_wandb_callback", default=None -) - -register_configure_hook(openai_callback_var, True) -register_configure_hook( - wandb_tracing_callback_var, True, WandbTracer, "LANGCHAIN_WANDB_TRACING" -) - - -@contextmanager -def get_openai_callback() -> Generator[OpenAICallbackHandler, None, None]: - """Get the OpenAI callback handler in a context manager. - which conveniently exposes token and cost information. - - Returns: - OpenAICallbackHandler: The OpenAI callback handler. - - Example: - >>> with get_openai_callback() as cb: - ... # Use the OpenAI callback handler - """ - cb = OpenAICallbackHandler() - openai_callback_var.set(cb) - yield cb - openai_callback_var.set(None) - - -@contextmanager -def wandb_tracing_enabled( - session_name: str = "default", -) -> Generator[None, None, None]: - """Get the WandbTracer in a context manager. - - Args: - session_name (str, optional): The name of the session. - Defaults to "default". - - Returns: - None - - Example: - >>> with wandb_tracing_enabled() as session: - ... # Use the WandbTracer session - """ - cb = WandbTracer() - wandb_tracing_callback_var.set(cb) - yield None - wandb_tracing_callback_var.set(None) diff --git a/.scripts/community_split/libs/community/langchain_community/callbacks/tracers/__init__.py b/.scripts/community_split/libs/community/langchain_community/callbacks/tracers/__init__.py deleted file mode 100644 index 8af691585a6..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/callbacks/tracers/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Tracers that record execution of LangChain runs.""" - -from langchain_core.tracers.langchain import LangChainTracer -from langchain_core.tracers.langchain_v1 import LangChainTracerV1 -from langchain_core.tracers.stdout import ( - ConsoleCallbackHandler, - FunctionCallbackHandler, -) - -from langchain_community.callbacks.tracers.wandb import WandbTracer - -__all__ = [ - "ConsoleCallbackHandler", - "FunctionCallbackHandler", - "LangChainTracer", - "LangChainTracerV1", - "WandbTracer", -] diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/base.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/base.py deleted file mode 100644 index 770cae74429..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/base.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Abstract interface for document loader implementations.""" -from __future__ import annotations -from abc import ABC, abstractmethod -from typing import Iterator, List, Optional, TYPE_CHECKING - -from langchain_core.documents import Document - -from langchain_community.document_loaders.blob_loaders import Blob - -if TYPE_CHECKING: - from langchain.text_splitter import TextSplitter - - -class BaseLoader(ABC): - """Interface for Document Loader. - - Implementations should implement the lazy-loading method using generators - to avoid loading all Documents into memory at once. - - The `load` method will remain as is for backwards compatibility, but its - implementation should be just `list(self.lazy_load())`. - """ - - # Sub-classes should implement this method - # as return list(self.lazy_load()). - # This method returns a List which is materialized in memory. - @abstractmethod - def load(self) -> List[Document]: - """Load data into Document objects.""" - - def load_and_split( - self, text_splitter: Optional[TextSplitter] = None - ) -> List[Document]: - """Load Documents and split into chunks. Chunks are returned as Documents. - - Args: - text_splitter: TextSplitter instance to use for splitting documents. - Defaults to RecursiveCharacterTextSplitter. - - Returns: - List of Documents. - """ - from langchain.text_splitter import RecursiveCharacterTextSplitter - - if text_splitter is None: - _text_splitter: TextSplitter = RecursiveCharacterTextSplitter() - else: - _text_splitter = text_splitter - docs = self.load() - return _text_splitter.split_documents(docs) - - # Attention: This method will be upgraded into an abstractmethod once it's - # implemented in all the existing subclasses. - def lazy_load( - self, - ) -> Iterator[Document]: - """A lazy loader for Documents.""" - raise NotImplementedError( - f"{self.__class__.__name__} does not implement lazy_load()" - ) - - -class BaseBlobParser(ABC): - """Abstract interface for blob parsers. - - A blob parser provides a way to parse raw data stored in a blob into one - or more documents. - - The parser can be composed with blob loaders, making it easy to reuse - a parser independent of how the blob was originally loaded. - """ - - @abstractmethod - def lazy_parse(self, blob: Blob) -> Iterator[Document]: - """Lazy parsing interface. - - Subclasses are required to implement this method. - - Args: - blob: Blob instance - - Returns: - Generator of documents - """ - - def parse(self, blob: Blob) -> List[Document]: - """Eagerly parse the blob into a document or documents. - - This is a convenience method for interactive development environment. - - Production applications should favor the lazy_parse method instead. - - Subclasses should generally not over-ride this parse method. - - Args: - blob: Blob instance - - Returns: - List of documents - """ - return list(self.lazy_parse(blob)) diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py deleted file mode 100644 index 0fcdd4438ee..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Use to load blobs from the local file system.""" -from pathlib import Path -from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union - -from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader - -T = TypeVar("T") - - -def _make_iterator( - length_func: Callable[[], int], show_progress: bool = False -) -> Callable[[Iterable[T]], Iterator[T]]: - """Create a function that optionally wraps an iterable in tqdm.""" - if show_progress: - try: - from tqdm.auto import tqdm - except ImportError: - raise ImportError( - "You must install tqdm to use show_progress=True." - "You can install tqdm with `pip install tqdm`." - ) - - # Make sure to provide `total` here so that tqdm can show - # a progress bar that takes into account the total number of files. - def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]: - """Wrap an iterable in a tqdm progress bar.""" - return tqdm(iterable, total=length_func()) - - iterator = _with_tqdm - else: - iterator = iter # type: ignore - - return iterator - - -# PUBLIC API - - -class FileSystemBlobLoader(BlobLoader): - """Load blobs in the local file system. - - Example: - - .. code-block:: python - - from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader - loader = FileSystemBlobLoader("/path/to/directory") - for blob in loader.yield_blobs(): - print(blob) - """ # noqa: E501 - - def __init__( - self, - path: Union[str, Path], - *, - glob: str = "**/[!.]*", - exclude: Sequence[str] = (), - suffixes: Optional[Sequence[str]] = None, - show_progress: bool = False, - ) -> None: - """Initialize with a path to directory and how to glob over it. - - Args: - path: Path to directory to load from or path to file to load. - If a path to a file is provided, glob/exclude/suffixes are ignored. - glob: Glob pattern relative to the specified path - by default set to pick up all non-hidden files - exclude: patterns to exclude from results, use glob syntax - suffixes: Provide to keep only files with these suffixes - Useful when wanting to keep files with different suffixes - Suffixes must include the dot, e.g. ".txt" - show_progress: If true, will show a progress bar as the files are loaded. - This forces an iteration through all matching files - to count them prior to loading them. - - Examples: - - .. code-block:: python - from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader - - # Load a single file. - loader = FileSystemBlobLoader("/path/to/file.txt") - - # Recursively load all text files in a directory. - loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt") - - # Recursively load all non-hidden files in a directory. - loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*") - - # Load all files in a directory without recursion. - loader = FileSystemBlobLoader("/path/to/directory", glob="*") - - # Recursively load all files in a directory, except for py or pyc files. - loader = FileSystemBlobLoader( - "/path/to/directory", - glob="**/*.txt", - exclude=["**/*.py", "**/*.pyc"] - ) - """ # noqa: E501 - if isinstance(path, Path): - _path = path - elif isinstance(path, str): - _path = Path(path) - else: - raise TypeError(f"Expected str or Path, got {type(path)}") - - self.path = _path.expanduser() # Expand user to handle ~ - self.glob = glob - self.suffixes = set(suffixes or []) - self.show_progress = show_progress - self.exclude = exclude - - def yield_blobs( - self, - ) -> Iterable[Blob]: - """Yield blobs that match the requested pattern.""" - iterator = _make_iterator( - length_func=self.count_matching_files, show_progress=self.show_progress - ) - - for path in iterator(self._yield_paths()): - yield Blob.from_path(path) - - def _yield_paths(self) -> Iterable[Path]: - """Yield paths that match the requested pattern.""" - if self.path.is_file(): - yield self.path - return - - paths = self.path.glob(self.glob) - for path in paths: - if self.exclude: - if any(path.match(glob) for glob in self.exclude): - continue - if path.is_file(): - if self.suffixes and path.suffix not in self.suffixes: - continue - yield path - - def count_matching_files(self) -> int: - """Count files that match the pattern without loading them.""" - # Carry out a full iteration to count the files without - # materializing anything expensive in memory. - num = 0 - for _ in self._yield_paths(): - num += 1 - return num diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py deleted file mode 100644 index 0ec6ca60bdf..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py +++ /dev/null @@ -1,190 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import ( - TYPE_CHECKING, - Any, - Iterator, - List, - Literal, - Optional, - Sequence, - Union, -) - -from langchain_core.documents import Document - -from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader -from langchain_community.document_loaders.blob_loaders import ( - BlobLoader, - FileSystemBlobLoader, -) -from langchain_community.document_loaders.parsers.registry import get_parser - -if TYPE_CHECKING: - from langchain.text_splitter import TextSplitter - -_PathLike = Union[str, Path] - -DEFAULT = Literal["default"] - - -class GenericLoader(BaseLoader): - """Generic Document Loader. - - A generic document loader that allows combining an arbitrary blob loader with - a blob parser. - - Examples: - - Parse a specific PDF file: - - .. code-block:: python - - from langchain_community.document_loaders import GenericLoader - from langchain_community.document_loaders.parsers.pdf import PyPDFParser - - # Recursively load all text files in a directory. - loader = GenericLoader.from_filesystem( - "my_lovely_pdf.pdf", - parser=PyPDFParser() - ) - - .. code-block:: python - - from langchain_community.document_loaders import GenericLoader - from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader - - - loader = GenericLoader.from_filesystem( - path="path/to/directory", - glob="**/[!.]*", - suffixes=[".pdf"], - show_progress=True, - ) - - docs = loader.lazy_load() - next(docs) - - Example instantiations to change which files are loaded: - - .. code-block:: python - - # Recursively load all text files in a directory. - loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/*.txt") - - # Recursively load all non-hidden files in a directory. - loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/[!.]*") - - # Load all files in a directory without recursion. - loader = GenericLoader.from_filesystem("/path/to/dir", glob="*") - - Example instantiations to change which parser is used: - - .. code-block:: python - - from langchain_community.document_loaders.parsers.pdf import PyPDFParser - - # Recursively load all text files in a directory. - loader = GenericLoader.from_filesystem( - "/path/to/dir", - glob="**/*.pdf", - parser=PyPDFParser() - ) - - """ # noqa: E501 - - def __init__( - self, - blob_loader: BlobLoader, - blob_parser: BaseBlobParser, - ) -> None: - """A generic document loader. - - Args: - blob_loader: A blob loader which knows how to yield blobs - blob_parser: A blob parser which knows how to parse blobs into documents - """ - self.blob_loader = blob_loader - self.blob_parser = blob_parser - - def lazy_load( - self, - ) -> Iterator[Document]: - """Load documents lazily. Use this when working at a large scale.""" - for blob in self.blob_loader.yield_blobs(): - yield from self.blob_parser.lazy_parse(blob) - - def load(self) -> List[Document]: - """Load all documents.""" - return list(self.lazy_load()) - - def load_and_split( - self, text_splitter: Optional[TextSplitter] = None - ) -> List[Document]: - """Load all documents and split them into sentences.""" - raise NotImplementedError( - "Loading and splitting is not yet implemented for generic loaders. " - "When they will be implemented they will be added via the initializer. " - "This method should not be used going forward." - ) - - @classmethod - def from_filesystem( - cls, - path: _PathLike, - *, - glob: str = "**/[!.]*", - exclude: Sequence[str] = (), - suffixes: Optional[Sequence[str]] = None, - show_progress: bool = False, - parser: Union[DEFAULT, BaseBlobParser] = "default", - parser_kwargs: Optional[dict] = None, - ) -> GenericLoader: - """Create a generic document loader using a filesystem blob loader. - - Args: - path: The path to the directory to load documents from OR the path to a - single file to load. If this is a file, glob, exclude, suffixes - will be ignored. - glob: The glob pattern to use to find documents. - suffixes: The suffixes to use to filter documents. If None, all files - matching the glob will be loaded. - exclude: A list of patterns to exclude from the loader. - show_progress: Whether to show a progress bar or not (requires tqdm). - Proxies to the file system loader. - parser: A blob parser which knows how to parse blobs into documents, - will instantiate a default parser if not provided. - The default can be overridden by either passing a parser or - setting the class attribute `blob_parser` (the latter - should be used with inheritance). - parser_kwargs: Keyword arguments to pass to the parser. - - Returns: - A generic document loader. - """ - blob_loader = FileSystemBlobLoader( - path, - glob=glob, - exclude=exclude, - suffixes=suffixes, - show_progress=show_progress, - ) - if isinstance(parser, str): - if parser == "default": - try: - # If there is an implementation of get_parser on the class, use it. - blob_parser = cls.get_parser(**(parser_kwargs or {})) - except NotImplementedError: - # if not then use the global registry. - blob_parser = get_parser(parser) - else: - blob_parser = get_parser(parser) - else: - blob_parser = parser - return cls(blob_loader, blob_parser) - - @staticmethod - def get_parser(**kwargs: Any) -> BaseBlobParser: - """Override this method to associate a default parser with the class.""" - raise NotImplementedError() diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py deleted file mode 100644 index 6b6b91b93ee..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Code for generic / auxiliary parsers. - -This module contains some logic to help assemble more sophisticated parsers. -""" -from typing import Iterator, Mapping, Optional - -from langchain_core.documents import Document - -from langchain_community.document_loaders.base import BaseBlobParser -from langchain_community.document_loaders.blob_loaders.schema import Blob - - -class MimeTypeBasedParser(BaseBlobParser): - """Parser that uses `mime`-types to parse a blob. - - This parser is useful for simple pipelines where the mime-type is sufficient - to determine how to parse a blob. - - To use, configure handlers based on mime-types and pass them to the initializer. - - Example: - - .. code-block:: python - - from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser - - parser = MimeTypeBasedParser( - handlers={ - "application/pdf": ..., - }, - fallback_parser=..., - ) - """ # noqa: E501 - - def __init__( - self, - handlers: Mapping[str, BaseBlobParser], - *, - fallback_parser: Optional[BaseBlobParser] = None, - ) -> None: - """Define a parser that uses mime-types to determine how to parse a blob. - - Args: - handlers: A mapping from mime-types to functions that take a blob, parse it - and return a document. - fallback_parser: A fallback_parser parser to use if the mime-type is not - found in the handlers. If provided, this parser will be - used to parse blobs with all mime-types not found in - the handlers. - If not provided, a ValueError will be raised if the - mime-type is not found in the handlers. - """ - self.handlers = handlers - self.fallback_parser = fallback_parser - - def lazy_parse(self, blob: Blob) -> Iterator[Document]: - """Load documents from a blob.""" - mimetype = blob.mimetype - - if mimetype is None: - raise ValueError(f"{blob} does not have a mimetype.") - - if mimetype in self.handlers: - handler = self.handlers[mimetype] - yield from handler.lazy_parse(blob) - else: - if self.fallback_parser is not None: - yield from self.fallback_parser.lazy_parse(blob) - else: - raise ValueError(f"Unsupported mime type: {mimetype}") diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py deleted file mode 100644 index b87ca900657..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py +++ /dev/null @@ -1,157 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Iterator, Optional, TYPE_CHECKING - -from langchain_core.documents import Document - -from langchain_community.document_loaders.base import BaseBlobParser -from langchain_community.document_loaders.blob_loaders import Blob -from langchain_community.document_loaders.parsers.language.cobol import CobolSegmenter -from langchain_community.document_loaders.parsers.language.javascript import ( - JavaScriptSegmenter, -) -from langchain_community.document_loaders.parsers.language.python import PythonSegmenter - -if TYPE_CHECKING: - from langchain.text_splitter import Language - -try: - from langchain.text_splitter import Language - LANGUAGE_EXTENSIONS: Dict[str, str] = { - "py": Language.PYTHON, - "js": Language.JS, - "cobol": Language.COBOL, - } - - LANGUAGE_SEGMENTERS: Dict[str, Any] = { - Language.PYTHON: PythonSegmenter, - Language.JS: JavaScriptSegmenter, - Language.COBOL: CobolSegmenter, - } -except ImportError: - LANGUAGE_EXTENSIONS = {} - LANGUAGE_SEGMENTERS = {} - - -class LanguageParser(BaseBlobParser): - """Parse using the respective programming language syntax. - - Each top-level function and class in the code is loaded into separate documents. - Furthermore, an extra document is generated, containing the remaining top-level code - that excludes the already segmented functions and classes. - - This approach can potentially improve the accuracy of QA models over source code. - - Currently, the supported languages for code parsing are Python and JavaScript. - - The language used for parsing can be configured, along with the minimum number of - lines required to activate the splitting based on syntax. - - Examples: - - .. code-block:: python - - from langchain.text_splitter.Language - from langchain_community.document_loaders.generic import GenericLoader - from langchain_community.document_loaders.parsers import LanguageParser - - loader = GenericLoader.from_filesystem( - "./code", - glob="**/*", - suffixes=[".py", ".js"], - parser=LanguageParser() - ) - docs = loader.load() - - Example instantiations to manually select the language: - - .. code-block:: python - - from langchain.text_splitter import Language - - loader = GenericLoader.from_filesystem( - "./code", - glob="**/*", - suffixes=[".py"], - parser=LanguageParser(language=Language.PYTHON) - ) - - Example instantiations to set number of lines threshold: - - .. code-block:: python - - loader = GenericLoader.from_filesystem( - "./code", - glob="**/*", - suffixes=[".py"], - parser=LanguageParser(parser_threshold=200) - ) - """ - - def __init__(self, language: Optional[Language] = None, parser_threshold: int = 0): - """ - Language parser that split code using the respective language syntax. - - Args: - language: If None (default), it will try to infer language from source. - parser_threshold: Minimum lines needed to activate parsing (0 by default). - """ - self.language = language - self.parser_threshold = parser_threshold - - def lazy_parse(self, blob: Blob) -> Iterator[Document]: - code = blob.as_string() - - language = self.language or ( - LANGUAGE_EXTENSIONS.get(blob.source.rsplit(".", 1)[-1]) - if isinstance(blob.source, str) - else None - ) - - if language is None: - yield Document( - page_content=code, - metadata={ - "source": blob.source, - }, - ) - return - - if self.parser_threshold >= len(code.splitlines()): - yield Document( - page_content=code, - metadata={ - "source": blob.source, - "language": language, - }, - ) - return - - self.Segmenter = LANGUAGE_SEGMENTERS[language] - segmenter = self.Segmenter(blob.as_string()) - if not segmenter.is_valid(): - yield Document( - page_content=code, - metadata={ - "source": blob.source, - }, - ) - return - - for functions_classes in segmenter.extract_functions_classes(): - yield Document( - page_content=functions_classes, - metadata={ - "source": blob.source, - "content_type": "functions_classes", - "language": language, - }, - ) - yield Document( - page_content=segmenter.simplify_code(), - metadata={ - "source": blob.source, - "content_type": "simplified_code", - "language": language, - }, - ) diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py deleted file mode 100644 index b4b796b6abc..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py +++ /dev/null @@ -1,262 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Union - -from langchain_core.documents import Document - -from langchain_community.document_loaders.base import BaseLoader - -if TYPE_CHECKING: - import pandas as pd - from telethon.hints import EntityLike - - -def concatenate_rows(row: dict) -> str: - """Combine message information in a readable format ready to be used.""" - date = row["date"] - sender = row["from"] - text = row["text"] - return f"{sender} on {date}: {text}\n\n" - - -class TelegramChatFileLoader(BaseLoader): - """Load from `Telegram chat` dump.""" - - def __init__(self, path: str): - """Initialize with a path.""" - self.file_path = path - - def load(self) -> List[Document]: - """Load documents.""" - p = Path(self.file_path) - - with open(p, encoding="utf8") as f: - d = json.load(f) - - text = "".join( - concatenate_rows(message) - for message in d["messages"] - if message["type"] == "message" and isinstance(message["text"], str) - ) - metadata = {"source": str(p)} - - return [Document(page_content=text, metadata=metadata)] - - -def text_to_docs(text: Union[str, List[str]]) -> List[Document]: - """Convert a string or list of strings to a list of Documents with metadata.""" - from langchain.text_splitter import RecursiveCharacterTextSplitter - if isinstance(text, str): - # Take a single string as one page - text = [text] - page_docs = [Document(page_content=page) for page in text] - - # Add page numbers as metadata - for i, doc in enumerate(page_docs): - doc.metadata["page"] = i + 1 - - # Split pages into chunks - doc_chunks = [] - - for doc in page_docs: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=800, - separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""], - chunk_overlap=20, - ) - chunks = text_splitter.split_text(doc.page_content) - for i, chunk in enumerate(chunks): - doc = Document( - page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i} - ) - # Add sources a metadata - doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}" - doc_chunks.append(doc) - return doc_chunks - - -class TelegramChatApiLoader(BaseLoader): - """Load `Telegram` chat json directory dump.""" - - def __init__( - self, - chat_entity: Optional[EntityLike] = None, - api_id: Optional[int] = None, - api_hash: Optional[str] = None, - username: Optional[str] = None, - file_path: str = "telegram_data.json", - ): - """Initialize with API parameters. - - Args: - chat_entity: The chat entity to fetch data from. - api_id: The API ID. - api_hash: The API hash. - username: The username. - file_path: The file path to save the data to. Defaults to - "telegram_data.json". - """ - self.chat_entity = chat_entity - self.api_id = api_id - self.api_hash = api_hash - self.username = username - self.file_path = file_path - - async def fetch_data_from_telegram(self) -> None: - """Fetch data from Telegram API and save it as a JSON file.""" - from telethon.sync import TelegramClient - - data = [] - async with TelegramClient(self.username, self.api_id, self.api_hash) as client: - async for message in client.iter_messages(self.chat_entity): - is_reply = message.reply_to is not None - reply_to_id = message.reply_to.reply_to_msg_id if is_reply else None - data.append( - { - "sender_id": message.sender_id, - "text": message.text, - "date": message.date.isoformat(), - "message.id": message.id, - "is_reply": is_reply, - "reply_to_id": reply_to_id, - } - ) - - with open(self.file_path, "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=4) - - def _get_message_threads(self, data: pd.DataFrame) -> dict: - """Create a dictionary of message threads from the given data. - - Args: - data (pd.DataFrame): A DataFrame containing the conversation \ - data with columns: - - message.sender_id - - text - - date - - message.id - - is_reply - - reply_to_id - - Returns: - dict: A dictionary where the key is the parent message ID and \ - the value is a list of message IDs in ascending order. - """ - - def find_replies(parent_id: int, reply_data: pd.DataFrame) -> List[int]: - """ - Recursively find all replies to a given parent message ID. - - Args: - parent_id (int): The parent message ID. - reply_data (pd.DataFrame): A DataFrame containing reply messages. - - Returns: - list: A list of message IDs that are replies to the parent message ID. - """ - # Find direct replies to the parent message ID - direct_replies = reply_data[reply_data["reply_to_id"] == parent_id][ - "message.id" - ].tolist() - - # Recursively find replies to the direct replies - all_replies = [] - for reply_id in direct_replies: - all_replies += [reply_id] + find_replies(reply_id, reply_data) - - return all_replies - - # Filter out parent messages - parent_messages = data[~data["is_reply"]] - - # Filter out reply messages and drop rows with NaN in 'reply_to_id' - reply_messages = data[data["is_reply"]].dropna(subset=["reply_to_id"]) - - # Convert 'reply_to_id' to integer - reply_messages["reply_to_id"] = reply_messages["reply_to_id"].astype(int) - - # Create a dictionary of message threads with parent message IDs as keys and \ - # lists of reply message IDs as values - message_threads = { - parent_id: [parent_id] + find_replies(parent_id, reply_messages) - for parent_id in parent_messages["message.id"] - } - - return message_threads - - def _combine_message_texts( - self, message_threads: Dict[int, List[int]], data: pd.DataFrame - ) -> str: - """ - Combine the message texts for each parent message ID based \ - on the list of message threads. - - Args: - message_threads (dict): A dictionary where the key is the parent message \ - ID and the value is a list of message IDs in ascending order. - data (pd.DataFrame): A DataFrame containing the conversation data: - - message.sender_id - - text - - date - - message.id - - is_reply - - reply_to_id - - Returns: - str: A combined string of message texts sorted by date. - """ - combined_text = "" - - # Iterate through sorted parent message IDs - for parent_id, message_ids in message_threads.items(): - # Get the message texts for the message IDs and sort them by date - message_texts = ( - data[data["message.id"].isin(message_ids)] - .sort_values(by="date")["text"] - .tolist() - ) - message_texts = [str(elem) for elem in message_texts] - - # Combine the message texts - combined_text += " ".join(message_texts) + ".\n" - - return combined_text.strip() - - def load(self) -> List[Document]: - """Load documents.""" - - if self.chat_entity is not None: - try: - import nest_asyncio - - nest_asyncio.apply() - asyncio.run(self.fetch_data_from_telegram()) - except ImportError: - raise ImportError( - """`nest_asyncio` package not found. - please install with `pip install nest_asyncio` - """ - ) - - p = Path(self.file_path) - - with open(p, encoding="utf8") as f: - d = json.load(f) - try: - import pandas as pd - except ImportError: - raise ImportError( - """`pandas` package not found. - please install with `pip install pandas` - """ - ) - normalized_messages = pd.json_normalize(d) - df = pd.DataFrame(normalized_messages) - - message_threads = self._get_message_threads(df) - combined_texts = self._combine_message_texts(message_threads, df) - - return text_to_docs(combined_texts) diff --git a/.scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py b/.scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py deleted file mode 100644 index 0e2b5d394c2..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py +++ /dev/null @@ -1,149 +0,0 @@ -from typing import Any, Iterator, List, Sequence, cast - -from langchain_core.documents import BaseDocumentTransformer, Document - - -class BeautifulSoupTransformer(BaseDocumentTransformer): - """Transform HTML content by extracting specific tags and removing unwanted ones. - - Example: - .. code-block:: python - - from langchain_community.document_transformers import BeautifulSoupTransformer - - bs4_transformer = BeautifulSoupTransformer() - docs_transformed = bs4_transformer.transform_documents(docs) - """ # noqa: E501 - - def __init__(self) -> None: - """ - Initialize the transformer. - - This checks if the BeautifulSoup4 package is installed. - If not, it raises an ImportError. - """ - try: - import bs4 # noqa:F401 - except ImportError: - raise ImportError( - "BeautifulSoup4 is required for BeautifulSoupTransformer. " - "Please install it with `pip install beautifulsoup4`." - ) - - def transform_documents( - self, - documents: Sequence[Document], - unwanted_tags: List[str] = ["script", "style"], - tags_to_extract: List[str] = ["p", "li", "div", "a"], - remove_lines: bool = True, - **kwargs: Any, - ) -> Sequence[Document]: - """ - Transform a list of Document objects by cleaning their HTML content. - - Args: - documents: A sequence of Document objects containing HTML content. - unwanted_tags: A list of tags to be removed from the HTML. - tags_to_extract: A list of tags whose content will be extracted. - remove_lines: If set to True, unnecessary lines will be - removed from the HTML content. - - Returns: - A sequence of Document objects with transformed content. - """ - for doc in documents: - cleaned_content = doc.page_content - - cleaned_content = self.remove_unwanted_tags(cleaned_content, unwanted_tags) - - cleaned_content = self.extract_tags(cleaned_content, tags_to_extract) - - if remove_lines: - cleaned_content = self.remove_unnecessary_lines(cleaned_content) - - doc.page_content = cleaned_content - - return documents - - @staticmethod - def remove_unwanted_tags(html_content: str, unwanted_tags: List[str]) -> str: - """ - Remove unwanted tags from a given HTML content. - - Args: - html_content: The original HTML content string. - unwanted_tags: A list of tags to be removed from the HTML. - - Returns: - A cleaned HTML string with unwanted tags removed. - """ - from bs4 import BeautifulSoup - - soup = BeautifulSoup(html_content, "html.parser") - for tag in unwanted_tags: - for element in soup.find_all(tag): - element.decompose() - return str(soup) - - @staticmethod - def extract_tags(html_content: str, tags: List[str]) -> str: - """ - Extract specific tags from a given HTML content. - - Args: - html_content: The original HTML content string. - tags: A list of tags to be extracted from the HTML. - - Returns: - A string combining the content of the extracted tags. - """ - from bs4 import BeautifulSoup - - soup = BeautifulSoup(html_content, "html.parser") - text_parts: List[str] = [] - for element in soup.find_all(): - if element.name in tags: - # Extract all navigable strings recursively from this element. - text_parts += get_navigable_strings(element) - - # To avoid duplicate text, remove all descendants from the soup. - element.decompose() - - return " ".join(text_parts) - - @staticmethod - def remove_unnecessary_lines(content: str) -> str: - """ - Clean up the content by removing unnecessary lines. - - Args: - content: A string, which may contain unnecessary lines or spaces. - - Returns: - A cleaned string with unnecessary lines removed. - """ - lines = content.split("\n") - stripped_lines = [line.strip() for line in lines] - non_empty_lines = [line for line in stripped_lines if line] - cleaned_content = " ".join(non_empty_lines) - return cleaned_content - - async def atransform_documents( - self, - documents: Sequence[Document], - **kwargs: Any, - ) -> Sequence[Document]: - raise NotImplementedError - - -def get_navigable_strings(element: Any) -> Iterator[str]: - from bs4 import NavigableString, Tag - - for child in cast(Tag, element).children: - if isinstance(child, Tag): - yield from get_navigable_strings(child) - elif isinstance(child, NavigableString): - if (element.name == "a") and (href := element.get("href")): - yield f"{child.strip()} ({href})" - else: - yield child.strip() diff --git a/.scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py b/.scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py deleted file mode 100644 index b796e6db87b..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Document transformers that use OpenAI Functions models""" -from typing import Any, Dict, Optional, Sequence, Type, Union - -from langchain_core.documents import BaseDocumentTransformer, Document -from langchain_core.language_models import BaseLanguageModel -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.pydantic_v1 import BaseModel - - -class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel): - """Extract metadata tags from document contents using OpenAI functions. - - Example: - .. code-block:: python - - from langchain_community.chat_models import ChatOpenAI - from langchain_community.document_transformers import OpenAIMetadataTagger - from langchain_core.documents import Document - - schema = { - "properties": { - "movie_title": { "type": "string" }, - "critic": { "type": "string" }, - "tone": { - "type": "string", - "enum": ["positive", "negative"] - }, - "rating": { - "type": "integer", - "description": "The number of stars the critic rated the movie" - } - }, - "required": ["movie_title", "critic", "tone"] - } - - # Must be an OpenAI model that supports functions - llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613") - tagging_chain = create_tagging_chain(schema, llm) - document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain) - original_documents = [ - Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."), - Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}), - ] - - enhanced_documents = document_transformer.transform_documents(original_documents) - """ # noqa: E501 - - tagging_chain: Any - """The chain used to extract metadata from each document.""" - - def transform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - """Automatically extract and populate metadata - for each document according to the provided schema.""" - - new_documents = [] - - for document in documents: - extracted_metadata: Dict = self.tagging_chain.run(document.page_content) # type: ignore[assignment] # noqa: E501 - new_document = Document( - page_content=document.page_content, - metadata={**extracted_metadata, **document.metadata}, - ) - new_documents.append(new_document) - return new_documents - - async def atransform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - raise NotImplementedError - - -def create_metadata_tagger( - metadata_schema: Union[Dict[str, Any], Type[BaseModel]], - llm: BaseLanguageModel, - prompt: Optional[ChatPromptTemplate] = None, - *, - tagging_chain_kwargs: Optional[Dict] = None, -) -> OpenAIMetadataTagger: - """Create a DocumentTransformer that uses an OpenAI function chain to automatically - tag documents with metadata based on their content and an input schema. - - Args: - metadata_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary - is passed in, it's assumed to already be a valid JsonSchema. - For best results, pydantic.BaseModels should have docstrings describing what - the schema represents and descriptions for the parameters. - llm: Language model to use, assumed to support the OpenAI function-calling API. - Defaults to use "gpt-3.5-turbo-0613" - prompt: BasePromptTemplate to pass to the model. - - Returns: - An LLMChain that will pass the given function to the model. - - Example: - .. code-block:: python - - from langchain_community.chat_models import ChatOpenAI - from langchain_community.document_transformers import create_metadata_tagger - from langchain_core.documents import Document - - schema = { - "properties": { - "movie_title": { "type": "string" }, - "critic": { "type": "string" }, - "tone": { - "type": "string", - "enum": ["positive", "negative"] - }, - "rating": { - "type": "integer", - "description": "The number of stars the critic rated the movie" - } - }, - "required": ["movie_title", "critic", "tone"] - } - - # Must be an OpenAI model that supports functions - llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613") - - document_transformer = create_metadata_tagger(schema, llm) - original_documents = [ - Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."), - Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}), - ] - - enhanced_documents = document_transformer.transform_documents(original_documents) - """ # noqa: E501 - from langchain.chains.openai_functions import create_tagging_chain - metadata_schema = ( - metadata_schema - if isinstance(metadata_schema, dict) - else metadata_schema.schema() - ) - _tagging_chain_kwargs = tagging_chain_kwargs or {} - tagging_chain = create_tagging_chain( - metadata_schema, llm, prompt=prompt, **_tagging_chain_kwargs - ) - return OpenAIMetadataTagger(tagging_chain=tagging_chain) diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py b/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py deleted file mode 100644 index ce9cfc7aa0b..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py +++ /dev/null @@ -1,161 +0,0 @@ -"""**Embedding models** are wrappers around embedding models -from different APIs and services. - -**Embedding models** can be LLMs or not. - -**Class hierarchy:** - -.. code-block:: - - Embeddings --> Embeddings # Examples: OpenAIEmbeddings, HuggingFaceEmbeddings -""" - - -import logging -from typing import Any - -from langchain_community.embeddings.aleph_alpha import ( - AlephAlphaAsymmetricSemanticEmbedding, - AlephAlphaSymmetricSemanticEmbedding, -) -from langchain_community.embeddings.awa import AwaEmbeddings -from langchain_community.embeddings.azure_openai import AzureOpenAIEmbeddings -from langchain_community.embeddings.baidu_qianfan_endpoint import ( - QianfanEmbeddingsEndpoint, -) -from langchain_community.embeddings.bedrock import BedrockEmbeddings -from langchain_community.embeddings.bookend import BookendEmbeddings -from langchain_community.embeddings.clarifai import ClarifaiEmbeddings -from langchain_community.embeddings.cohere import CohereEmbeddings -from langchain_community.embeddings.dashscope import DashScopeEmbeddings -from langchain_community.embeddings.databricks import DatabricksEmbeddings -from langchain_community.embeddings.deepinfra import DeepInfraEmbeddings -from langchain_community.embeddings.edenai import EdenAiEmbeddings -from langchain_community.embeddings.elasticsearch import ElasticsearchEmbeddings -from langchain_community.embeddings.embaas import EmbaasEmbeddings -from langchain_community.embeddings.ernie import ErnieEmbeddings -from langchain_community.embeddings.fake import ( - DeterministicFakeEmbedding, - FakeEmbeddings, -) -from langchain_community.embeddings.fastembed import FastEmbedEmbeddings -from langchain_community.embeddings.google_palm import GooglePalmEmbeddings -from langchain_community.embeddings.gpt4all import GPT4AllEmbeddings -from langchain_community.embeddings.gradient_ai import GradientEmbeddings -from langchain_community.embeddings.huggingface import ( - HuggingFaceBgeEmbeddings, - HuggingFaceEmbeddings, - HuggingFaceInferenceAPIEmbeddings, - HuggingFaceInstructEmbeddings, -) -from langchain_community.embeddings.huggingface_hub import HuggingFaceHubEmbeddings -from langchain_community.embeddings.infinity import InfinityEmbeddings -from langchain_community.embeddings.javelin_ai_gateway import JavelinAIGatewayEmbeddings -from langchain_community.embeddings.jina import JinaEmbeddings -from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings -from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings -from langchain_community.embeddings.localai import LocalAIEmbeddings -from langchain_community.embeddings.minimax import MiniMaxEmbeddings -from langchain_community.embeddings.mlflow import MlflowEmbeddings -from langchain_community.embeddings.mlflow_gateway import MlflowAIGatewayEmbeddings -from langchain_community.embeddings.modelscope_hub import ModelScopeEmbeddings -from langchain_community.embeddings.mosaicml import MosaicMLInstructorEmbeddings -from langchain_community.embeddings.nlpcloud import NLPCloudEmbeddings -from langchain_community.embeddings.octoai_embeddings import OctoAIEmbeddings -from langchain_community.embeddings.ollama import OllamaEmbeddings -from langchain_community.embeddings.openai import OpenAIEmbeddings -from langchain_community.embeddings.sagemaker_endpoint import ( - SagemakerEndpointEmbeddings, -) -from langchain_community.embeddings.self_hosted import SelfHostedEmbeddings -from langchain_community.embeddings.self_hosted_hugging_face import ( - SelfHostedHuggingFaceEmbeddings, - SelfHostedHuggingFaceInstructEmbeddings, -) -from langchain_community.embeddings.sentence_transformer import ( - SentenceTransformerEmbeddings, -) -from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings -from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings -from langchain_community.embeddings.vertexai import VertexAIEmbeddings -from langchain_community.embeddings.voyageai import VoyageEmbeddings -from langchain_community.embeddings.xinference import XinferenceEmbeddings - -logger = logging.getLogger(__name__) - -__all__ = [ - "OpenAIEmbeddings", - "AzureOpenAIEmbeddings", - "ClarifaiEmbeddings", - "CohereEmbeddings", - "DatabricksEmbeddings", - "ElasticsearchEmbeddings", - "FastEmbedEmbeddings", - "HuggingFaceEmbeddings", - "HuggingFaceInferenceAPIEmbeddings", - "InfinityEmbeddings", - "GradientEmbeddings", - "JinaEmbeddings", - "LlamaCppEmbeddings", - "HuggingFaceHubEmbeddings", - "MlflowEmbeddings", - "MlflowAIGatewayEmbeddings", - "ModelScopeEmbeddings", - "TensorflowHubEmbeddings", - "SagemakerEndpointEmbeddings", - "HuggingFaceInstructEmbeddings", - "MosaicMLInstructorEmbeddings", - "SelfHostedEmbeddings", - "SelfHostedHuggingFaceEmbeddings", - "SelfHostedHuggingFaceInstructEmbeddings", - "FakeEmbeddings", - "DeterministicFakeEmbedding", - "AlephAlphaAsymmetricSemanticEmbedding", - "AlephAlphaSymmetricSemanticEmbedding", - "SentenceTransformerEmbeddings", - "GooglePalmEmbeddings", - "MiniMaxEmbeddings", - "VertexAIEmbeddings", - "BedrockEmbeddings", - "DeepInfraEmbeddings", - "EdenAiEmbeddings", - "DashScopeEmbeddings", - "EmbaasEmbeddings", - "OctoAIEmbeddings", - "SpacyEmbeddings", - "NLPCloudEmbeddings", - "GPT4AllEmbeddings", - "XinferenceEmbeddings", - "LocalAIEmbeddings", - "AwaEmbeddings", - "HuggingFaceBgeEmbeddings", - "ErnieEmbeddings", - "JavelinAIGatewayEmbeddings", - "OllamaEmbeddings", - "QianfanEmbeddingsEndpoint", - "JohnSnowLabsEmbeddings", - "VoyageEmbeddings", - "BookendEmbeddings", -] - - -# TODO: this is in here to maintain backwards compatibility -class HypotheticalDocumentEmbedder: - def __init__(self, *args: Any, **kwargs: Any): - logger.warning( - "Using a deprecated class. Please use " - "`from langchain.chains import HypotheticalDocumentEmbedder` instead" - ) - from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H - - return H(*args, **kwargs) # type: ignore - - @classmethod - def from_llm(cls, *args: Any, **kwargs: Any) -> Any: - logger.warning( - "Using a deprecated class. Please use " - "`from langchain.chains import HypotheticalDocumentEmbedder` instead" - ) - from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H - - return H.from_llm(*args, **kwargs) diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py b/.scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py deleted file mode 100644 index 84a568866f1..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py +++ /dev/null @@ -1,343 +0,0 @@ -from typing import Any, Dict, List, Optional - -import requests -from langchain_core.embeddings import Embeddings -from langchain_core.pydantic_v1 import BaseModel, Extra, Field - -DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" -DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" -DEFAULT_BGE_MODEL = "BAAI/bge-large-en" -DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " -DEFAULT_QUERY_INSTRUCTION = ( - "Represent the question for retrieving supporting documents: " -) -DEFAULT_QUERY_BGE_INSTRUCTION_EN = ( - "Represent this question for searching relevant passages: " -) -DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章:" - - -class HuggingFaceEmbeddings(BaseModel, Embeddings): - """HuggingFace sentence_transformers embedding models. - - To use, you should have the ``sentence_transformers`` python package installed. - - Example: - .. code-block:: python - - from langchain_community.embeddings import HuggingFaceEmbeddings - - model_name = "sentence-transformers/all-mpnet-base-v2" - model_kwargs = {'device': 'cpu'} - encode_kwargs = {'normalize_embeddings': False} - hf = HuggingFaceEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs - ) - """ - - client: Any #: :meta private: - model_name: str = DEFAULT_MODEL_NAME - """Model name to use.""" - cache_folder: Optional[str] = None - """Path to store models. - Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass to the model.""" - encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass when calling the `encode` method of the model.""" - multi_process: bool = False - """Run encode() on multiple GPUs.""" - - def __init__(self, **kwargs: Any): - """Initialize the sentence_transformer.""" - super().__init__(**kwargs) - try: - import sentence_transformers - - except ImportError as exc: - raise ImportError( - "Could not import sentence_transformers python package. " - "Please install it with `pip install sentence-transformers`." - ) from exc - - self.client = sentence_transformers.SentenceTransformer( - self.model_name, cache_folder=self.cache_folder, **self.model_kwargs - ) - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a HuggingFace transformer model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - import sentence_transformers - - texts = list(map(lambda x: x.replace("\n", " "), texts)) - if self.multi_process: - pool = self.client.start_multi_process_pool() - embeddings = self.client.encode_multi_process(texts, pool) - sentence_transformers.SentenceTransformer.stop_multi_process_pool(pool) - else: - embeddings = self.client.encode(texts, **self.encode_kwargs) - - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace transformer model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - return self.embed_documents([text])[0] - - -class HuggingFaceInstructEmbeddings(BaseModel, Embeddings): - """Wrapper around sentence_transformers embedding models. - - To use, you should have the ``sentence_transformers`` - and ``InstructorEmbedding`` python packages installed. - - Example: - .. code-block:: python - - from langchain_community.embeddings import HuggingFaceInstructEmbeddings - - model_name = "hkunlp/instructor-large" - model_kwargs = {'device': 'cpu'} - encode_kwargs = {'normalize_embeddings': True} - hf = HuggingFaceInstructEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs - ) - """ - - client: Any #: :meta private: - model_name: str = DEFAULT_INSTRUCT_MODEL - """Model name to use.""" - cache_folder: Optional[str] = None - """Path to store models. - Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass to the model.""" - encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass when calling the `encode` method of the model.""" - embed_instruction: str = DEFAULT_EMBED_INSTRUCTION - """Instruction to use for embedding documents.""" - query_instruction: str = DEFAULT_QUERY_INSTRUCTION - """Instruction to use for embedding query.""" - - def __init__(self, **kwargs: Any): - """Initialize the sentence_transformer.""" - super().__init__(**kwargs) - try: - from InstructorEmbedding import INSTRUCTOR - - self.client = INSTRUCTOR( - self.model_name, cache_folder=self.cache_folder, **self.model_kwargs - ) - except ImportError as e: - raise ImportError("Dependencies for InstructorEmbedding not found.") from e - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a HuggingFace instruct model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - instruction_pairs = [[self.embed_instruction, text] for text in texts] - embeddings = self.client.encode(instruction_pairs, **self.encode_kwargs) - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace instruct model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - instruction_pair = [self.query_instruction, text] - embedding = self.client.encode([instruction_pair], **self.encode_kwargs)[0] - return embedding.tolist() - - -class HuggingFaceBgeEmbeddings(BaseModel, Embeddings): - """HuggingFace BGE sentence_transformers embedding models. - - To use, you should have the ``sentence_transformers`` python package installed. - - Example: - .. code-block:: python - - from langchain_community.embeddings import HuggingFaceBgeEmbeddings - - model_name = "BAAI/bge-large-en" - model_kwargs = {'device': 'cpu'} - encode_kwargs = {'normalize_embeddings': True} - hf = HuggingFaceBgeEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs - ) - """ - - client: Any #: :meta private: - model_name: str = DEFAULT_BGE_MODEL - """Model name to use.""" - cache_folder: Optional[str] = None - """Path to store models. - Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass to the model.""" - encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass when calling the `encode` method of the model.""" - query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN - """Instruction to use for embedding query.""" - - def __init__(self, **kwargs: Any): - """Initialize the sentence_transformer.""" - super().__init__(**kwargs) - try: - import sentence_transformers - - except ImportError as exc: - raise ImportError( - "Could not import sentence_transformers python package. " - "Please install it with `pip install sentence_transformers`." - ) from exc - - self.client = sentence_transformers.SentenceTransformer( - self.model_name, cache_folder=self.cache_folder, **self.model_kwargs - ) - if "-zh" in self.model_name: - self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a HuggingFace transformer model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - texts = [t.replace("\n", " ") for t in texts] - embeddings = self.client.encode(texts, **self.encode_kwargs) - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace transformer model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - text = text.replace("\n", " ") - embedding = self.client.encode( - self.query_instruction + text, **self.encode_kwargs - ) - return embedding.tolist() - - -class HuggingFaceInferenceAPIEmbeddings(BaseModel, Embeddings): - """Embed texts using the HuggingFace API. - - Requires a HuggingFace Inference API key and a model name. - """ - - api_key: str - """Your API key for the HuggingFace Inference API.""" - model_name: str = "sentence-transformers/all-MiniLM-L6-v2" - """The name of the model to use for text embeddings.""" - api_url: Optional[str] = None - """Custom inference endpoint url. None for using default public url.""" - - @property - def _api_url(self) -> str: - return self.api_url or self._default_api_url - - @property - def _default_api_url(self) -> str: - return ( - "https://api-inference.huggingface.co" - "/pipeline" - "/feature-extraction" - f"/{self.model_name}" - ) - - @property - def _headers(self) -> dict: - return {"Authorization": f"Bearer {self.api_key}"} - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Get the embeddings for a list of texts. - - Args: - texts (Documents): A list of texts to get embeddings for. - - Returns: - Embedded texts as List[List[float]], where each inner List[float] - corresponds to a single input text. - - Example: - .. code-block:: python - - from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings - - hf_embeddings = HuggingFaceInferenceAPIEmbeddings( - api_key="your_api_key", - model_name="sentence-transformers/all-MiniLM-l6-v2" - ) - texts = ["Hello, world!", "How are you?"] - hf_embeddings.embed_documents(texts) - """ # noqa: E501 - response = requests.post( - self._api_url, - headers=self._headers, - json={ - "inputs": texts, - "options": {"wait_for_model": True, "use_cache": True}, - }, - ) - return response.json() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace transformer model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - return self.embed_documents([text])[0] diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py b/.scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py deleted file mode 100644 index f183efe87b5..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py +++ /dev/null @@ -1,92 +0,0 @@ -import os -import sys -from typing import Any, List - -from langchain_core.embeddings import Embeddings -from langchain_core.pydantic_v1 import BaseModel, Extra - - -class JohnSnowLabsEmbeddings(BaseModel, Embeddings): - """JohnSnowLabs embedding models - - To use, you should have the ``johnsnowlabs`` python package installed. - Example: - .. code-block:: python - - from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings - - embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert') - output = embedding.embed_query("foo bar") - """ # noqa: E501 - - model: Any = "embed_sentence.bert" - - def __init__( - self, - model: Any = "embed_sentence.bert", - hardware_target: str = "cpu", - **kwargs: Any, - ): - """Initialize the johnsnowlabs model.""" - super().__init__(**kwargs) - # 1) Check imports - try: - from johnsnowlabs import nlp - from nlu.pipe.pipeline import NLUPipeline - except ImportError as exc: - raise ImportError( - "Could not import johnsnowlabs python package. " - "Please install it with `pip install johnsnowlabs`." - ) from exc - - # 2) Start a Spark Session - try: - os.environ["PYSPARK_PYTHON"] = sys.executable - os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable - nlp.start(hardware_target=hardware_target) - except Exception as exc: - raise Exception("Failure starting Spark Session") from exc - - # 3) Load the model - try: - if isinstance(model, str): - self.model = nlp.load(model) - elif isinstance(model, NLUPipeline): - self.model = model - else: - self.model = nlp.to_nlu_pipe(model) - except Exception as exc: - raise Exception("Failure loading model") from exc - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a JohnSnowLabs transformer model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - - df = self.model.predict(texts, output_level="document") - emb_col = None - for c in df.columns: - if "embedding" in c: - emb_col = c - return [vec.tolist() for vec in df[emb_col].tolist()] - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a JohnSnowLabs transformer model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - return self.embed_documents([text])[0] diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py b/.scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py deleted file mode 100644 index 0b706532cf2..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py +++ /dev/null @@ -1,168 +0,0 @@ -import importlib -import logging -from typing import Any, Callable, List, Optional - -from langchain_community.embeddings.self_hosted import SelfHostedEmbeddings - -DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" -DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" -DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " -DEFAULT_QUERY_INSTRUCTION = ( - "Represent the question for retrieving supporting documents: " -) - -logger = logging.getLogger(__name__) - - -def _embed_documents(client: Any, *args: Any, **kwargs: Any) -> List[List[float]]: - """Inference function to send to the remote hardware. - - Accepts a sentence_transformer model_id and - returns a list of embeddings for each document in the batch. - """ - return client.encode(*args, **kwargs) - - -def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0) -> Any: - """Load the embedding model.""" - if not instruct: - import sentence_transformers - - client = sentence_transformers.SentenceTransformer(model_id) - else: - from InstructorEmbedding import INSTRUCTOR - - client = INSTRUCTOR(model_id) - - if importlib.util.find_spec("torch") is not None: - import torch - - cuda_device_count = torch.cuda.device_count() - if device < -1 or (device >= cuda_device_count): - raise ValueError( - f"Got device=={device}, " - f"device is required to be within [-1, {cuda_device_count})" - ) - if device < 0 and cuda_device_count > 0: - logger.warning( - "Device has %d GPUs available. " - "Provide device={deviceId} to `from_model_id` to use available" - "GPUs for execution. deviceId is -1 for CPU and " - "can be a positive integer associated with CUDA device id.", - cuda_device_count, - ) - - client = client.to(device) - return client - - -class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings): - """HuggingFace embedding models on self-hosted remote hardware. - - Supported hardware includes auto-launched instances on AWS, GCP, Azure, - and Lambda, as well as servers specified - by IP address and SSH credentials (such as on-prem, or another cloud - like Paperspace, Coreweave, etc.). - - To use, you should have the ``runhouse`` python package installed. - - Example: - .. code-block:: python - - from langchain_community.embeddings import SelfHostedHuggingFaceEmbeddings - import runhouse as rh - model_name = "sentence-transformers/all-mpnet-base-v2" - gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") - hf = SelfHostedHuggingFaceEmbeddings(model_name=model_name, hardware=gpu) - """ - - client: Any #: :meta private: - model_id: str = DEFAULT_MODEL_NAME - """Model name to use.""" - model_reqs: List[str] = ["./", "sentence_transformers", "torch"] - """Requirements to install on hardware to inference the model.""" - hardware: Any - """Remote hardware to send the inference function to.""" - model_load_fn: Callable = load_embedding_model - """Function to load the model remotely on the server.""" - load_fn_kwargs: Optional[dict] = None - """Keyword arguments to pass to the model load function.""" - inference_fn: Callable = _embed_documents - """Inference function to extract the embeddings.""" - - def __init__(self, **kwargs: Any): - """Initialize the remote inference function.""" - load_fn_kwargs = kwargs.pop("load_fn_kwargs", {}) - load_fn_kwargs["model_id"] = load_fn_kwargs.get("model_id", DEFAULT_MODEL_NAME) - load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", False) - load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0) - super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) - - -class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings): - """HuggingFace InstructEmbedding models on self-hosted remote hardware. - - Supported hardware includes auto-launched instances on AWS, GCP, Azure, - and Lambda, as well as servers specified - by IP address and SSH credentials (such as on-prem, or another - cloud like Paperspace, Coreweave, etc.). - - To use, you should have the ``runhouse`` python package installed. - - Example: - .. code-block:: python - - from langchain_community.embeddings import SelfHostedHuggingFaceInstructEmbeddings - import runhouse as rh - model_name = "hkunlp/instructor-large" - gpu = rh.cluster(name='rh-a10x', instance_type='A100:1') - hf = SelfHostedHuggingFaceInstructEmbeddings( - model_name=model_name, hardware=gpu) - """ # noqa: E501 - - model_id: str = DEFAULT_INSTRUCT_MODEL - """Model name to use.""" - embed_instruction: str = DEFAULT_EMBED_INSTRUCTION - """Instruction to use for embedding documents.""" - query_instruction: str = DEFAULT_QUERY_INSTRUCTION - """Instruction to use for embedding query.""" - model_reqs: List[str] = ["./", "InstructorEmbedding", "torch"] - """Requirements to install on hardware to inference the model.""" - - def __init__(self, **kwargs: Any): - """Initialize the remote inference function.""" - load_fn_kwargs = kwargs.pop("load_fn_kwargs", {}) - load_fn_kwargs["model_id"] = load_fn_kwargs.get( - "model_id", DEFAULT_INSTRUCT_MODEL - ) - load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", True) - load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0) - super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a HuggingFace instruct model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - instruction_pairs = [] - for text in texts: - instruction_pairs.append([self.embed_instruction, text]) - embeddings = self.client(self.pipeline_ref, instruction_pairs) - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace instruct model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - instruction_pair = [self.query_instruction, text] - embedding = self.client(self.pipeline_ref, [instruction_pair])[0] - return embedding.tolist() diff --git a/.scripts/community_split/libs/community/langchain_community/llms/anthropic.py b/.scripts/community_split/libs/community/langchain_community/llms/anthropic.py deleted file mode 100644 index be832cf1368..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/llms/anthropic.py +++ /dev/null @@ -1,351 +0,0 @@ -import re -import warnings -from typing import ( - Any, - AsyncIterator, - Callable, - Dict, - Iterator, - List, - Mapping, - Optional, -) - -from langchain_core.callbacks import ( - AsyncCallbackManagerForLLMRun, - CallbackManagerForLLMRun, -) -from langchain_core.language_models import BaseLanguageModel -from langchain_core.language_models.llms import LLM -from langchain_core.outputs import GenerationChunk -from langchain_core.prompt_values import PromptValue -from langchain_core.pydantic_v1 import Field, SecretStr, root_validator -from langchain_core.utils import ( - check_package_version, - get_from_dict_or_env, - get_pydantic_field_names, -) -from langchain_core.utils.utils import build_extra_kwargs, convert_to_secret_str - - -class _AnthropicCommon(BaseLanguageModel): - client: Any = None #: :meta private: - async_client: Any = None #: :meta private: - model: str = Field(default="claude-2", alias="model_name") - """Model name to use.""" - - max_tokens_to_sample: int = Field(default=256, alias="max_tokens") - """Denotes the number of tokens to predict per generation.""" - - temperature: Optional[float] = None - """A non-negative float that tunes the degree of randomness in generation.""" - - top_k: Optional[int] = None - """Number of most likely tokens to consider at each step.""" - - top_p: Optional[float] = None - """Total probability mass of tokens to consider at each step.""" - - streaming: bool = False - """Whether to stream the results.""" - - default_request_timeout: Optional[float] = None - """Timeout for requests to Anthropic Completion API. Default is 600 seconds.""" - - anthropic_api_url: Optional[str] = None - - anthropic_api_key: Optional[SecretStr] = None - - HUMAN_PROMPT: Optional[str] = None - AI_PROMPT: Optional[str] = None - count_tokens: Optional[Callable[[str], int]] = None - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - - @root_validator(pre=True) - def build_extra(cls, values: Dict) -> Dict: - extra = values.get("model_kwargs", {}) - all_required_field_names = get_pydantic_field_names(cls) - values["model_kwargs"] = build_extra_kwargs( - extra, values, all_required_field_names - ) - return values - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - values["anthropic_api_key"] = convert_to_secret_str( - get_from_dict_or_env(values, "anthropic_api_key", "ANTHROPIC_API_KEY") - ) - # Get custom api url from environment. - values["anthropic_api_url"] = get_from_dict_or_env( - values, - "anthropic_api_url", - "ANTHROPIC_API_URL", - default="https://api.anthropic.com", - ) - - try: - import anthropic - - check_package_version("anthropic", gte_version="0.3") - values["client"] = anthropic.Anthropic( - base_url=values["anthropic_api_url"], - api_key=values["anthropic_api_key"].get_secret_value(), - timeout=values["default_request_timeout"], - ) - values["async_client"] = anthropic.AsyncAnthropic( - base_url=values["anthropic_api_url"], - api_key=values["anthropic_api_key"].get_secret_value(), - timeout=values["default_request_timeout"], - ) - values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT - values["AI_PROMPT"] = anthropic.AI_PROMPT - values["count_tokens"] = values["client"].count_tokens - - except ImportError: - raise ImportError( - "Could not import anthropic python package. " - "Please it install it with `pip install anthropic`." - ) - return values - - @property - def _default_params(self) -> Mapping[str, Any]: - """Get the default parameters for calling Anthropic API.""" - d = { - "max_tokens_to_sample": self.max_tokens_to_sample, - "model": self.model, - } - if self.temperature is not None: - d["temperature"] = self.temperature - if self.top_k is not None: - d["top_k"] = self.top_k - if self.top_p is not None: - d["top_p"] = self.top_p - return {**d, **self.model_kwargs} - - @property - def _identifying_params(self) -> Mapping[str, Any]: - """Get the identifying parameters.""" - return {**{}, **self._default_params} - - def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]: - if not self.HUMAN_PROMPT or not self.AI_PROMPT: - raise NameError("Please ensure the anthropic package is loaded") - - if stop is None: - stop = [] - - # Never want model to invent new turns of Human / Assistant dialog. - stop.extend([self.HUMAN_PROMPT]) - - return stop - - -class Anthropic(LLM, _AnthropicCommon): - """Anthropic large language models. - - To use, you should have the ``anthropic`` python package installed, and the - environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass - it as a named parameter to the constructor. - - Example: - .. code-block:: python - - import anthropic - from langchain_community.llms import Anthropic - - model = Anthropic(model="", anthropic_api_key="my-api-key") - - # Simplest invocation, automatically wrapped with HUMAN_PROMPT - # and AI_PROMPT. - response = model("What are the biggest risks facing humanity?") - - # Or if you want to use the chat mode, build a few-shot-prompt, or - # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT: - raw_prompt = "What are the biggest risks facing humanity?" - prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" - response = model(prompt) - """ - - class Config: - """Configuration for this pydantic object.""" - - allow_population_by_field_name = True - arbitrary_types_allowed = True - - @root_validator() - def raise_warning(cls, values: Dict) -> Dict: - """Raise warning that this class is deprecated.""" - warnings.warn( - "This Anthropic LLM is deprecated. " - "Please use `from langchain_community.chat_models import ChatAnthropic` " - "instead" - ) - return values - - @property - def _llm_type(self) -> str: - """Return type of llm.""" - return "anthropic-llm" - - def _wrap_prompt(self, prompt: str) -> str: - if not self.HUMAN_PROMPT or not self.AI_PROMPT: - raise NameError("Please ensure the anthropic package is loaded") - - if prompt.startswith(self.HUMAN_PROMPT): - return prompt # Already wrapped. - - # Guard against common errors in specifying wrong number of newlines. - corrected_prompt, n_subs = re.subn(r"^\n*Human:", self.HUMAN_PROMPT, prompt) - if n_subs == 1: - return corrected_prompt - - # As a last resort, wrap the prompt ourselves to emulate instruct-style. - return f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here you go:\n" - - def _call( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - r"""Call out to Anthropic's completion endpoint. - - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. - - Returns: - The string generated by the model. - - Example: - .. code-block:: python - - prompt = "What are the biggest risks facing humanity?" - prompt = f"\n\nHuman: {prompt}\n\nAssistant:" - response = model(prompt) - - """ - if self.streaming: - completion = "" - for chunk in self._stream( - prompt=prompt, stop=stop, run_manager=run_manager, **kwargs - ): - completion += chunk.text - return completion - - stop = self._get_anthropic_stop(stop) - params = {**self._default_params, **kwargs} - response = self.client.completions.create( - prompt=self._wrap_prompt(prompt), - stop_sequences=stop, - **params, - ) - return response.completion - - def convert_prompt(self, prompt: PromptValue) -> str: - return self._wrap_prompt(prompt.to_string()) - - async def _acall( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - """Call out to Anthropic's completion endpoint asynchronously.""" - if self.streaming: - completion = "" - async for chunk in self._astream( - prompt=prompt, stop=stop, run_manager=run_manager, **kwargs - ): - completion += chunk.text - return completion - - stop = self._get_anthropic_stop(stop) - params = {**self._default_params, **kwargs} - - response = await self.async_client.completions.create( - prompt=self._wrap_prompt(prompt), - stop_sequences=stop, - **params, - ) - return response.completion - - def _stream( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> Iterator[GenerationChunk]: - r"""Call Anthropic completion_stream and return the resulting generator. - - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. - Returns: - A generator representing the stream of tokens from Anthropic. - Example: - .. code-block:: python - - prompt = "Write a poem about a stream." - prompt = f"\n\nHuman: {prompt}\n\nAssistant:" - generator = anthropic.stream(prompt) - for token in generator: - yield token - """ - stop = self._get_anthropic_stop(stop) - params = {**self._default_params, **kwargs} - - for token in self.client.completions.create( - prompt=self._wrap_prompt(prompt), stop_sequences=stop, stream=True, **params - ): - chunk = GenerationChunk(text=token.completion) - yield chunk - if run_manager: - run_manager.on_llm_new_token(chunk.text, chunk=chunk) - - async def _astream( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> AsyncIterator[GenerationChunk]: - r"""Call Anthropic completion_stream and return the resulting generator. - - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. - Returns: - A generator representing the stream of tokens from Anthropic. - Example: - .. code-block:: python - prompt = "Write a poem about a stream." - prompt = f"\n\nHuman: {prompt}\n\nAssistant:" - generator = anthropic.stream(prompt) - for token in generator: - yield token - """ - stop = self._get_anthropic_stop(stop) - params = {**self._default_params, **kwargs} - - async for token in await self.async_client.completions.create( - prompt=self._wrap_prompt(prompt), - stop_sequences=stop, - stream=True, - **params, - ): - chunk = GenerationChunk(text=token.completion) - yield chunk - if run_manager: - await run_manager.on_llm_new_token(chunk.text, chunk=chunk) - - def get_num_tokens(self, text: str) -> int: - """Calculate number of tokens.""" - if not self.count_tokens: - raise NameError("Please ensure the anthropic package is loaded") - return self.count_tokens(text) diff --git a/.scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py b/.scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py deleted file mode 100644 index 840acdbdb81..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py +++ /dev/null @@ -1,126 +0,0 @@ -import json -import logging -from typing import Any, Dict, Iterator, List, Optional - -import requests -from langchain_core.callbacks import CallbackManagerForLLMRun -from langchain_core.language_models.llms import LLM -from langchain_core.outputs import GenerationChunk - -logger = logging.getLogger(__name__) - - -class CloudflareWorkersAI(LLM): - """Langchain LLM class to help to access Cloudflare Workers AI service. - - To use, you must provide an API token and - account ID to access Cloudflare Workers AI, and - pass it as a named parameter to the constructor. - - Example: - .. code-block:: python - - from langchain_community.llms.cloudflare_workersai import CloudflareWorkersAI - - my_account_id = "my_account_id" - my_api_token = "my_secret_api_token" - llm_model = "@cf/meta/llama-2-7b-chat-int8" - - cf_ai = CloudflareWorkersAI( - account_id=my_account_id, - api_token=my_api_token, - model=llm_model - ) - """ # noqa: E501 - - account_id: str - api_token: str - model: str = "@cf/meta/llama-2-7b-chat-int8" - base_url: str = "https://api.cloudflare.com/client/v4/accounts" - streaming: bool = False - endpoint_url: str = "" - - def __init__(self, **kwargs: Any) -> None: - """Initialize the Cloudflare Workers AI class.""" - super().__init__(**kwargs) - - self.endpoint_url = f"{self.base_url}/{self.account_id}/ai/run/{self.model}" - - @property - def _llm_type(self) -> str: - """Return type of LLM.""" - return "cloudflare" - - @property - def _default_params(self) -> Dict[str, Any]: - """Default parameters""" - return {} - - @property - def _identifying_params(self) -> Dict[str, Any]: - """Identifying parameters""" - return { - "account_id": self.account_id, - "api_token": self.api_token, - "model": self.model, - "base_url": self.base_url, - } - - def _call_api(self, prompt: str, params: Dict[str, Any]) -> requests.Response: - """Call Cloudflare Workers API""" - headers = {"Authorization": f"Bearer {self.api_token}"} - data = {"prompt": prompt, "stream": self.streaming, **params} - response = requests.post(self.endpoint_url, headers=headers, json=data) - return response - - def _process_response(self, response: requests.Response) -> str: - """Process API response""" - if response.ok: - data = response.json() - return data["result"]["response"] - else: - raise ValueError(f"Request failed with status {response.status_code}") - - def _stream( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> Iterator[GenerationChunk]: - """Streaming prediction""" - original_steaming: bool = self.streaming - self.streaming = True - _response_prefix_count = len("data: ") - _response_stream_end = b"data: [DONE]" - for chunk in self._call_api(prompt, kwargs).iter_lines(): - if chunk == _response_stream_end: - break - if len(chunk) > _response_prefix_count: - try: - data = json.loads(chunk[_response_prefix_count:]) - except Exception as e: - logger.debug(chunk) - raise e - if data is not None and "response" in data: - yield GenerationChunk(text=data["response"]) - if run_manager: - run_manager.on_llm_new_token(data["response"]) - logger.debug("stream end") - self.streaming = original_steaming - - def _call( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - """Regular prediction""" - if self.streaming: - return "".join( - [c.text for c in self._stream(prompt, stop, run_manager, **kwargs)] - ) - else: - response = self._call_api(prompt, kwargs) - return self._process_response(response) diff --git a/.scripts/community_split/libs/community/langchain_community/retrievers/__init__.py b/.scripts/community_split/libs/community/langchain_community/retrievers/__init__.py deleted file mode 100644 index 75b4ef67536..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/retrievers/__init__.py +++ /dev/null @@ -1,106 +0,0 @@ -"""**Retriever** class returns Documents given a text **query**. - -It is more general than a vector store. A retriever does not need to be able to -store documents, only to return (or retrieve) it. Vector stores can be used as -the backbone of a retriever, but there are other types of retrievers as well. - -**Class hierarchy:** - -.. code-block:: - - BaseRetriever --> Retriever # Examples: ArxivRetriever, MergerRetriever - -**Main helpers:** - -.. code-block:: - - Document, Serializable, Callbacks, - CallbackManagerForRetrieverRun, AsyncCallbackManagerForRetrieverRun -""" - -from langchain_community.retrievers.arcee import ArceeRetriever -from langchain_community.retrievers.arxiv import ArxivRetriever -from langchain_community.retrievers.azure_cognitive_search import ( - AzureCognitiveSearchRetriever, -) -from langchain_community.retrievers.bedrock import AmazonKnowledgeBasesRetriever -from langchain_community.retrievers.bm25 import BM25Retriever -from langchain_community.retrievers.chaindesk import ChaindeskRetriever -from langchain_community.retrievers.chatgpt_plugin_retriever import ( - ChatGPTPluginRetriever, -) -from langchain_community.retrievers.cohere_rag_retriever import CohereRagRetriever -from langchain_community.retrievers.docarray import DocArrayRetriever -from langchain_community.retrievers.elastic_search_bm25 import ( - ElasticSearchBM25Retriever, -) -from langchain_community.retrievers.embedchain import EmbedchainRetriever -from langchain_community.retrievers.google_cloud_documentai_warehouse import ( - GoogleDocumentAIWarehouseRetriever, -) -from langchain_community.retrievers.google_vertex_ai_search import ( - GoogleCloudEnterpriseSearchRetriever, - GoogleVertexAIMultiTurnSearchRetriever, - GoogleVertexAISearchRetriever, -) -from langchain_community.retrievers.kay import KayAiRetriever -from langchain_community.retrievers.kendra import AmazonKendraRetriever -from langchain_community.retrievers.knn import KNNRetriever -from langchain_community.retrievers.llama_index import ( - LlamaIndexGraphRetriever, - LlamaIndexRetriever, -) -from langchain_community.retrievers.metal import MetalRetriever -from langchain_community.retrievers.milvus import MilvusRetriever -from langchain_community.retrievers.outline import OutlineRetriever -from langchain_community.retrievers.pinecone_hybrid_search import ( - PineconeHybridSearchRetriever, -) -from langchain_community.retrievers.pubmed import PubMedRetriever -from langchain_community.retrievers.remote_retriever import RemoteLangChainRetriever -from langchain_community.retrievers.svm import SVMRetriever -from langchain_community.retrievers.tavily_search_api import TavilySearchAPIRetriever -from langchain_community.retrievers.tfidf import TFIDFRetriever -from langchain_community.retrievers.weaviate_hybrid_search import ( - WeaviateHybridSearchRetriever, -) -from langchain_community.retrievers.wikipedia import WikipediaRetriever -from langchain_community.retrievers.zep import ZepRetriever -from langchain_community.retrievers.zilliz import ZillizRetriever - -__all__ = [ - "AmazonKendraRetriever", - "AmazonKnowledgeBasesRetriever", - "ArceeRetriever", - "ArxivRetriever", - "AzureCognitiveSearchRetriever", - "ChatGPTPluginRetriever", - "ChaindeskRetriever", - "CohereRagRetriever", - "ElasticSearchBM25Retriever", - "EmbedchainRetriever", - "GoogleDocumentAIWarehouseRetriever", - "GoogleCloudEnterpriseSearchRetriever", - "GoogleVertexAIMultiTurnSearchRetriever", - "GoogleVertexAISearchRetriever", - "KayAiRetriever", - "KNNRetriever", - "LlamaIndexGraphRetriever", - "LlamaIndexRetriever", - "MetalRetriever", - "MilvusRetriever", - "OutlineRetriever", - "PineconeHybridSearchRetriever", - "PubMedRetriever", - "RemoteLangChainRetriever", - "SVMRetriever", - "TavilySearchAPIRetriever", - "TFIDFRetriever", - "BM25Retriever", - "VespaRetriever", - "WeaviateHybridSearchRetriever", - "WikipediaRetriever", - "ZepRetriever", - "ZillizRetriever", - "DocArrayRetriever", -] diff --git a/.scripts/community_split/libs/community/langchain_community/storage/__init__.py b/.scripts/community_split/libs/community/langchain_community/storage/__init__.py deleted file mode 100644 index 7af3d5b3000..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/storage/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Implementations of key-value stores and storage helpers. - -Module provides implementations of various key-value stores that conform -to a simple key-value interface. - -The primary goal of these storages is to support implementation of caching. -""" - -from langchain_community.storage.redis import RedisStore -from langchain_community.storage.upstash_redis import ( - UpstashRedisByteStore, - UpstashRedisStore, -) - -__all__ = [ - "RedisStore", - "UpstashRedisByteStore", - "UpstashRedisStore", -] diff --git a/.scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py b/.scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py deleted file mode 100644 index 4e8b90a1b2a..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Optional, Type - -from langchain_core.callbacks import CallbackManagerForToolRun -from langchain_core.pydantic_v1 import BaseModel, Field - -from langchain_community.chat_models import ChatOpenAI -from langchain_community.tools.amadeus.base import AmadeusBaseTool - - -class ClosestAirportSchema(BaseModel): - """Schema for the AmadeusClosestAirport tool.""" - - location: str = Field( - description=( - " The location for which you would like to find the nearest airport " - " along with optional details such as country, state, region, or " - " province, allowing for easy processing and identification of " - " the closest airport. Examples of the format are the following:\n" - " Cali, Colombia\n " - " Lincoln, Nebraska, United States\n" - " New York, United States\n" - " Sydney, New South Wales, Australia\n" - " Rome, Lazio, Italy\n" - " Toronto, Ontario, Canada\n" - ) - ) - - -class AmadeusClosestAirport(AmadeusBaseTool): - """Tool for finding the closest airport to a particular location.""" - - name: str = "closest_airport" - description: str = ( - "Use this tool to find the closest airport to a particular location." - ) - args_schema: Type[ClosestAirportSchema] = ClosestAirportSchema - - def _run( - self, - location: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - content = ( - f" What is the nearest airport to {location}? Please respond with the " - " airport's International Air Transport Association (IATA) Location " - ' Identifier in the following JSON format. JSON: "iataCode": "IATA ' - ' Location Identifier" ' - ) - - return ChatOpenAI(temperature=0).predict(content) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py deleted file mode 100644 index 93988dd7d59..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -This tool allows agents to interact with the clickup library -and operate on a Clickup instance. -To use this tool, you must first set as environment variables: - client_secret - client_id - code - -Below is a sample script that uses the Clickup tool: - -```python -from langchain_community.agent_toolkits.clickup.toolkit import ClickupToolkit -from langchain_community.utilities.clickup import ClickupAPIWrapper - -clickup = ClickupAPIWrapper() -toolkit = ClickupToolkit.from_clickup_api_wrapper(clickup) -``` -""" -from typing import Optional - -from langchain_core.callbacks import CallbackManagerForToolRun -from langchain_core.pydantic_v1 import Field -from langchain_core.tools import BaseTool - -from langchain_community.utilities.clickup import ClickupAPIWrapper - - -class ClickupAction(BaseTool): - """Tool that queries the Clickup API.""" - - api_wrapper: ClickupAPIWrapper = Field(default_factory=ClickupAPIWrapper) - mode: str - name: str = "" - description: str = "" - - def _run( - self, - instructions: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Use the Clickup API to run an operation.""" - return self.api_wrapper.run(self.mode, instructions) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/jira/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/jira/tool.py deleted file mode 100644 index dc57b13dc20..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/jira/tool.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -This tool allows agents to interact with the atlassian-python-api library -and operate on a Jira instance. For more information on the -atlassian-python-api library, see https://atlassian-python-api.readthedocs.io/jira.html - -To use this tool, you must first set as environment variables: - JIRA_API_TOKEN - JIRA_USERNAME - JIRA_INSTANCE_URL - -Below is a sample script that uses the Jira tool: - -```python -from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit -from langchain_community.utilities.jira import JiraAPIWrapper - -jira = JiraAPIWrapper() -toolkit = JiraToolkit.from_jira_api_wrapper(jira) -``` -""" -from typing import Optional - -from langchain_core.callbacks import CallbackManagerForToolRun -from langchain_core.pydantic_v1 import Field -from langchain_core.tools import BaseTool - -from langchain_community.utilities.jira import JiraAPIWrapper - - -class JiraAction(BaseTool): - """Tool that queries the Atlassian Jira API.""" - - api_wrapper: JiraAPIWrapper = Field(default_factory=JiraAPIWrapper) - mode: str - name: str = "" - description: str = "" - - def _run( - self, - instructions: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Use the Atlassian Jira API to run an operation.""" - return self.api_wrapper.run(self.mode, instructions) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py deleted file mode 100644 index 2ee4e4f5129..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py +++ /dev/null @@ -1,276 +0,0 @@ -"""Tools for interacting with a Power BI dataset.""" -import logging -from time import perf_counter -from typing import Any, Dict, Optional, Tuple - -from langchain_core.callbacks import ( - AsyncCallbackManagerForToolRun, - CallbackManagerForToolRun, -) -from langchain_core.pydantic_v1 import Field, validator -from langchain_core.tools import BaseTool -from langchain_community.chat_models.openai import _import_tiktoken - -from langchain_community.tools.powerbi.prompt import ( - BAD_REQUEST_RESPONSE, - DEFAULT_FEWSHOT_EXAMPLES, - RETRY_RESPONSE, -) -from langchain_community.utilities.powerbi import PowerBIDataset, json_to_md - -logger = logging.getLogger(__name__) - - -class QueryPowerBITool(BaseTool): - """Tool for querying a Power BI Dataset.""" - - name: str = "query_powerbi" - description: str = """ - Input to this tool is a detailed question about the dataset, output is a result from the dataset. It will try to answer the question using the dataset, and if it cannot, it will ask for clarification. - - Example Input: "How many rows are in table1?" - """ # noqa: E501 - llm_chain: Any - powerbi: PowerBIDataset = Field(exclude=True) - examples: Optional[str] = DEFAULT_FEWSHOT_EXAMPLES - session_cache: Dict[str, Any] = Field(default_factory=dict, exclude=True) - max_iterations: int = 5 - output_token_limit: int = 4000 - tiktoken_model_name: Optional[str] = None # "cl100k_base" - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - - @validator("llm_chain") - def validate_llm_chain_input_variables( # pylint: disable=E0213 - cls, llm_chain: Any - ) -> Any: - """Make sure the LLM chain has the correct input variables.""" - for var in llm_chain.prompt.input_variables: - if var not in ["tool_input", "tables", "schemas", "examples"]: - raise ValueError( - "LLM chain for QueryPowerBITool must have input variables ['tool_input', 'tables', 'schemas', 'examples'], found %s", # noqa: C0301 E501 # pylint: disable=C0301 - llm_chain.prompt.input_variables, - ) - return llm_chain - - def _check_cache(self, tool_input: str) -> Optional[str]: - """Check if the input is present in the cache. - - If the value is a bad request, overwrite with the escalated version, - if not present return None.""" - if tool_input not in self.session_cache: - return None - return self.session_cache[tool_input] - - def _run( - self, - tool_input: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - **kwargs: Any, - ) -> str: - """Execute the query, return the results or an error message.""" - if cache := self._check_cache(tool_input): - logger.debug("Found cached result for %s: %s", tool_input, cache) - return cache - - try: - logger.info("Running PBI Query Tool with input: %s", tool_input) - query = self.llm_chain.predict( - tool_input=tool_input, - tables=self.powerbi.get_table_names(), - schemas=self.powerbi.get_schemas(), - examples=self.examples, - callbacks=run_manager.get_child() if run_manager else None, - ) - except Exception as exc: # pylint: disable=broad-except - self.session_cache[tool_input] = f"Error on call to LLM: {exc}" - return self.session_cache[tool_input] - if query == "I cannot answer this": - self.session_cache[tool_input] = query - return self.session_cache[tool_input] - logger.info("PBI Query:\n%s", query) - start_time = perf_counter() - pbi_result = self.powerbi.run(command=query) - end_time = perf_counter() - logger.debug("PBI Result: %s", pbi_result) - logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}") - result, error = self._parse_output(pbi_result) - if error is not None and "TokenExpired" in error: - self.session_cache[ - tool_input - ] = "Authentication token expired or invalid, please try reauthenticate." - return self.session_cache[tool_input] - - iterations = kwargs.get("iterations", 0) - if error and iterations < self.max_iterations: - return self._run( - tool_input=RETRY_RESPONSE.format( - tool_input=tool_input, query=query, error=error - ), - run_manager=run_manager, - iterations=iterations + 1, - ) - - self.session_cache[tool_input] = ( - result if result else BAD_REQUEST_RESPONSE.format(error=error) - ) - return self.session_cache[tool_input] - - async def _arun( - self, - tool_input: str, - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - **kwargs: Any, - ) -> str: - """Execute the query, return the results or an error message.""" - if cache := self._check_cache(tool_input): - logger.debug("Found cached result for %s: %s", tool_input, cache) - return f"{cache}, from cache, you have already asked this question." - try: - logger.info("Running PBI Query Tool with input: %s", tool_input) - query = await self.llm_chain.apredict( - tool_input=tool_input, - tables=self.powerbi.get_table_names(), - schemas=self.powerbi.get_schemas(), - examples=self.examples, - callbacks=run_manager.get_child() if run_manager else None, - ) - except Exception as exc: # pylint: disable=broad-except - self.session_cache[tool_input] = f"Error on call to LLM: {exc}" - return self.session_cache[tool_input] - - if query == "I cannot answer this": - self.session_cache[tool_input] = query - return self.session_cache[tool_input] - logger.info("PBI Query: %s", query) - start_time = perf_counter() - pbi_result = await self.powerbi.arun(command=query) - end_time = perf_counter() - logger.debug("PBI Result: %s", pbi_result) - logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}") - result, error = self._parse_output(pbi_result) - if error is not None and ("TokenExpired" in error or "TokenError" in error): - self.session_cache[ - tool_input - ] = "Authentication token expired or invalid, please try to reauthenticate or check the scope of the credential." # noqa: E501 - return self.session_cache[tool_input] - - iterations = kwargs.get("iterations", 0) - if error and iterations < self.max_iterations: - return await self._arun( - tool_input=RETRY_RESPONSE.format( - tool_input=tool_input, query=query, error=error - ), - run_manager=run_manager, - iterations=iterations + 1, - ) - - self.session_cache[tool_input] = ( - result if result else BAD_REQUEST_RESPONSE.format(error=error) - ) - return self.session_cache[tool_input] - - def _parse_output( - self, pbi_result: Dict[str, Any] - ) -> Tuple[Optional[str], Optional[Any]]: - """Parse the output of the query to a markdown table.""" - if "results" in pbi_result: - rows = pbi_result["results"][0]["tables"][0]["rows"] - if len(rows) == 0: - logger.info("0 records in result, query was valid.") - return ( - None, - "0 rows returned, this might be correct, but please validate if all filter values were correct?", # noqa: E501 - ) - result = json_to_md(rows) - too_long, length = self._result_too_large(result) - if too_long: - return ( - f"Result too large, please try to be more specific or use the `TOPN` function. The result is {length} tokens long, the limit is {self.output_token_limit} tokens.", # noqa: E501 - None, - ) - return result, None - - if "error" in pbi_result: - if ( - "pbi.error" in pbi_result["error"] - and "details" in pbi_result["error"]["pbi.error"] - ): - return None, pbi_result["error"]["pbi.error"]["details"][0]["detail"] - return None, pbi_result["error"] - return None, pbi_result - - def _result_too_large(self, result: str) -> Tuple[bool, int]: - """Tokenize the output of the query.""" - if self.tiktoken_model_name: - tiktoken_ = _import_tiktoken() - encoding = tiktoken_.encoding_for_model(self.tiktoken_model_name) - length = len(encoding.encode(result)) - logger.info("Result length: %s", length) - return length > self.output_token_limit, length - return False, 0 - - -class InfoPowerBITool(BaseTool): - """Tool for getting metadata about a PowerBI Dataset.""" - - name: str = "schema_powerbi" - description: str = """ - Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. - Be sure that the tables actually exist by calling list_tables_powerbi first! - - Example Input: "table1, table2, table3" - """ # noqa: E501 - powerbi: PowerBIDataset = Field(exclude=True) - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - - def _run( - self, - tool_input: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Get the schema for tables in a comma-separated list.""" - return self.powerbi.get_table_info(tool_input.split(", ")) - - async def _arun( - self, - tool_input: str, - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - return await self.powerbi.aget_table_info(tool_input.split(", ")) - - -class ListPowerBITool(BaseTool): - """Tool for getting tables names.""" - - name: str = "list_tables_powerbi" - description: str = "Input is an empty string, output is a comma separated list of tables in the database." # noqa: E501 # pylint: disable=C0301 - powerbi: PowerBIDataset = Field(exclude=True) - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - - def _run( - self, - tool_input: Optional[str] = None, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Get the names of the tables.""" - return ", ".join(self.powerbi.get_table_names()) - - async def _arun( - self, - tool_input: Optional[str] = None, - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - """Get the names of the tables.""" - return ", ".join(self.powerbi.get_table_names()) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py deleted file mode 100644 index 4a07000249d..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py +++ /dev/null @@ -1,130 +0,0 @@ -# flake8: noqa -"""Tools for interacting with Spark SQL.""" -from typing import Any, Dict, Optional - -from langchain_core.pydantic_v1 import BaseModel, Field, root_validator - -from langchain_core.language_models import BaseLanguageModel -from langchain_core.callbacks import ( - AsyncCallbackManagerForToolRun, - CallbackManagerForToolRun, -) -from langchain_core.prompts import PromptTemplate -from langchain_community.utilities.spark_sql import SparkSQL -from langchain_core.tools import BaseTool -from langchain_community.tools.spark_sql.prompt import QUERY_CHECKER - - -class BaseSparkSQLTool(BaseModel): - """Base tool for interacting with Spark SQL.""" - - db: SparkSQL = Field(exclude=True) - - class Config(BaseTool.Config): - pass - - -class QuerySparkSQLTool(BaseSparkSQLTool, BaseTool): - """Tool for querying a Spark SQL.""" - - name: str = "query_sql_db" - description: str = """ - Input to this tool is a detailed and correct SQL query, output is a result from the Spark SQL. - If the query is not correct, an error message will be returned. - If an error is returned, rewrite the query, check the query, and try again. - """ - - def _run( - self, - query: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Execute the query, return the results or an error message.""" - return self.db.run_no_throw(query) - - -class InfoSparkSQLTool(BaseSparkSQLTool, BaseTool): - """Tool for getting metadata about a Spark SQL.""" - - name: str = "schema_sql_db" - description: str = """ - Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. - Be sure that the tables actually exist by calling list_tables_sql_db first! - - Example Input: "table1, table2, table3" - """ - - def _run( - self, - table_names: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Get the schema for tables in a comma-separated list.""" - return self.db.get_table_info_no_throw(table_names.split(", ")) - - -class ListSparkSQLTool(BaseSparkSQLTool, BaseTool): - """Tool for getting tables names.""" - - name: str = "list_tables_sql_db" - description: str = "Input is an empty string, output is a comma separated list of tables in the Spark SQL." - - def _run( - self, - tool_input: str = "", - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Get the schema for a specific table.""" - return ", ".join(self.db.get_usable_table_names()) - - -class QueryCheckerTool(BaseSparkSQLTool, BaseTool): - """Use an LLM to check if a query is correct. - Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" - - template: str = QUERY_CHECKER - llm: BaseLanguageModel - llm_chain: Any = Field(init=False) - name: str = "query_checker_sql_db" - description: str = """ - Use this tool to double check if your query is correct before executing it. - Always use this tool before executing a query with query_sql_db! - """ - - @root_validator(pre=True) - def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "llm_chain" not in values: - from langchain.chains.llm import LLMChain - values["llm_chain"] = LLMChain( - llm=values.get("llm"), - prompt=PromptTemplate( - template=QUERY_CHECKER, input_variables=["query"] - ), - ) - - if values["llm_chain"].prompt.input_variables != ["query"]: - raise ValueError( - "LLM chain for QueryCheckerTool need to use ['query'] as input_variables " - "for the embedded prompt" - ) - - return values - - def _run( - self, - query: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Use the LLM to check the query.""" - return self.llm_chain.predict( - query=query, callbacks=run_manager.get_child() if run_manager else None - ) - - async def _arun( - self, - query: str, - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - return await self.llm_chain.apredict( - query=query, callbacks=run_manager.get_child() if run_manager else None - ) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py deleted file mode 100644 index 3e0d6509b99..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py +++ /dev/null @@ -1,134 +0,0 @@ -# flake8: noqa -"""Tools for interacting with a SQL database.""" -from typing import Any, Dict, Optional - -from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator - -from langchain_core.language_models import BaseLanguageModel -from langchain_core.callbacks import ( - AsyncCallbackManagerForToolRun, - CallbackManagerForToolRun, -) -from langchain_core.prompts import PromptTemplate -from langchain_community.utilities.sql_database import SQLDatabase -from langchain_core.tools import BaseTool -from langchain_community.tools.sql_database.prompt import QUERY_CHECKER - - -class BaseSQLDatabaseTool(BaseModel): - """Base tool for interacting with a SQL database.""" - - db: SQLDatabase = Field(exclude=True) - - class Config(BaseTool.Config): - pass - - -class QuerySQLDataBaseTool(BaseSQLDatabaseTool, BaseTool): - """Tool for querying a SQL database.""" - - name: str = "sql_db_query" - description: str = """ - Input to this tool is a detailed and correct SQL query, output is a result from the database. - If the query is not correct, an error message will be returned. - If an error is returned, rewrite the query, check the query, and try again. - """ - - def _run( - self, - query: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Execute the query, return the results or an error message.""" - return self.db.run_no_throw(query) - - -class InfoSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): - """Tool for getting metadata about a SQL database.""" - - name: str = "sql_db_schema" - description: str = """ - Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. - - Example Input: "table1, table2, table3" - """ - - def _run( - self, - table_names: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Get the schema for tables in a comma-separated list.""" - return self.db.get_table_info_no_throw( - [t.strip() for t in table_names.split(",")] - ) - - -class ListSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): - """Tool for getting tables names.""" - - name: str = "sql_db_list_tables" - description: str = "Input is an empty string, output is a comma separated list of tables in the database." - - def _run( - self, - tool_input: str = "", - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Get the schema for a specific table.""" - return ", ".join(self.db.get_usable_table_names()) - - -class QuerySQLCheckerTool(BaseSQLDatabaseTool, BaseTool): - """Use an LLM to check if a query is correct. - Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" - - template: str = QUERY_CHECKER - llm: BaseLanguageModel - llm_chain: Any = Field(init=False) - name: str = "sql_db_query_checker" - description: str = """ - Use this tool to double check if your query is correct before executing it. - Always use this tool before executing a query with sql_db_query! - """ - - @root_validator(pre=True) - def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "llm_chain" not in values: - from langchain.chains.llm import LLMChain - values["llm_chain"] = LLMChain( - llm=values.get("llm"), - prompt=PromptTemplate( - template=QUERY_CHECKER, input_variables=["dialect", "query"] - ), - ) - - if values["llm_chain"].prompt.input_variables != ["dialect", "query"]: - raise ValueError( - "LLM chain for QueryCheckerTool must have input variables ['query', 'dialect']" - ) - - return values - - def _run( - self, - query: str, - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Use the LLM to check the query.""" - return self.llm_chain.predict( - query=query, - dialect=self.db.dialect, - callbacks=run_manager.get_child() if run_manager else None, - ) - - async def _arun( - self, - query: str, - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - return await self.llm_chain.apredict( - query=query, - dialect=self.db.dialect, - callbacks=run_manager.get_child() if run_manager else None, - ) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py deleted file mode 100644 index 3d5f3955546..00000000000 --- a/.scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py +++ /dev/null @@ -1,215 +0,0 @@ -"""[DEPRECATED] - -## Zapier Natural Language Actions API -\ -Full docs here: https://nla.zapier.com/start/ - -**Zapier Natural Language Actions** gives you access to the 5k+ apps, 20k+ actions -on Zapier's platform through a natural language API interface. - -NLA supports apps like Gmail, Salesforce, Trello, Slack, Asana, HubSpot, Google Sheets, -Microsoft Teams, and thousands more apps: https://zapier.com/apps - -Zapier NLA handles ALL the underlying API auth and translation from -natural language --> underlying API call --> return simplified output for LLMs -The key idea is you, or your users, expose a set of actions via an oauth-like setup -window, which you can then query and execute via a REST API. - -NLA offers both API Key and OAuth for signing NLA API requests. - -1. Server-side (API Key): for quickly getting started, testing, and production scenarios - where LangChain will only use actions exposed in the developer's Zapier account - (and will use the developer's connected accounts on Zapier.com) - -2. User-facing (Oauth): for production scenarios where you are deploying an end-user - facing application and LangChain needs access to end-user's exposed actions and - connected accounts on Zapier.com - -This quick start will focus on the server-side use case for brevity. -Review [full docs](https://nla.zapier.com/start/) for user-facing oauth developer -support. - -Typically, you'd use SequentialChain, here's a basic example: - - 1. Use NLA to find an email in Gmail - 2. Use LLMChain to generate a draft reply to (1) - 3. Use NLA to send the draft reply (2) to someone in Slack via direct message - -In code, below: - -```python - -import os - -# get from https://platform.openai.com/ -os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "") - -# get from https://nla.zapier.com/docs/authentication/ -os.environ["ZAPIER_NLA_API_KEY"] = os.environ.get("ZAPIER_NLA_API_KEY", "") - -from langchain_community.agent_toolkits import ZapierToolkit -from langchain_community.utilities.zapier import ZapierNLAWrapper - -## step 0. expose gmail 'find email' and slack 'send channel message' actions - -# first go here, log in, expose (enable) the two actions: -# https://nla.zapier.com/demo/start -# -- for this example, can leave all fields "Have AI guess" -# in an oauth scenario, you'd get your own id (instead of 'demo') -# which you route your users through first - -zapier = ZapierNLAWrapper() -## To leverage OAuth you may pass the value `nla_oauth_access_token` to -## the ZapierNLAWrapper. If you do this there is no need to initialize -## the ZAPIER_NLA_API_KEY env variable -# zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token="TOKEN_HERE") -toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier) -``` - -""" -from typing import Any, Dict, Optional - -from langchain_core._api import warn_deprecated -from langchain_core.callbacks import ( - AsyncCallbackManagerForToolRun, - CallbackManagerForToolRun, -) -from langchain_core.pydantic_v1 import Field, root_validator -from langchain_core.tools import BaseTool - -from langchain_community.tools.zapier.prompt import BASE_ZAPIER_TOOL_PROMPT -from langchain_community.utilities.zapier import ZapierNLAWrapper - - -class ZapierNLARunAction(BaseTool): - """ - Args: - action_id: a specific action ID (from list actions) of the action to execute - (the set api_key must be associated with the action owner) - instructions: a natural language instruction string for using the action - (eg. "get the latest email from Mike Knoop" for "Gmail: find email" action) - params: a dict, optional. Any params provided will *override* AI guesses - from `instructions` (see "understanding the AI guessing flow" here: - https://nla.zapier.com/docs/using-the-api#ai-guessing) - - """ - - api_wrapper: ZapierNLAWrapper = Field(default_factory=ZapierNLAWrapper) - action_id: str - params: Optional[dict] = None - base_prompt: str = BASE_ZAPIER_TOOL_PROMPT - zapier_description: str - params_schema: Dict[str, str] = Field(default_factory=dict) - name: str = "" - description: str = "" - - @root_validator - def set_name_description(cls, values: Dict[str, Any]) -> Dict[str, Any]: - zapier_description = values["zapier_description"] - params_schema = values["params_schema"] - if "instructions" in params_schema: - del params_schema["instructions"] - - # Ensure base prompt (if overridden) contains necessary input fields - necessary_fields = {"{zapier_description}", "{params}"} - if not all(field in values["base_prompt"] for field in necessary_fields): - raise ValueError( - "Your custom base Zapier prompt must contain input fields for " - "{zapier_description} and {params}." - ) - - values["name"] = zapier_description - values["description"] = values["base_prompt"].format( - zapier_description=zapier_description, - params=str(list(params_schema.keys())), - ) - return values - - def _run( - self, instructions: str, run_manager: Optional[CallbackManagerForToolRun] = None - ) -> str: - """Use the Zapier NLA tool to return a list of all exposed user actions.""" - warn_deprecated( - since="0.0.319", - message=( - "This tool will be deprecated on 2023-11-17. See " - "https://nla.zapier.com/sunset/ for details" - ), - ) - return self.api_wrapper.run_as_str(self.action_id, instructions, self.params) - - async def _arun( - self, - instructions: str, - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - """Use the Zapier NLA tool to return a list of all exposed user actions.""" - warn_deprecated( - since="0.0.319", - message=( - "This tool will be deprecated on 2023-11-17. See " - "https://nla.zapier.com/sunset/ for details" - ), - ) - return await self.api_wrapper.arun_as_str( - self.action_id, - instructions, - self.params, - ) - - -ZapierNLARunAction.__doc__ = ( - ZapierNLAWrapper.run.__doc__ + ZapierNLARunAction.__doc__ # type: ignore -) - - -# other useful actions - - -class ZapierNLAListActions(BaseTool): - """ - Args: - None - - """ - - name: str = "ZapierNLA_list_actions" - description: str = BASE_ZAPIER_TOOL_PROMPT + ( - "This tool returns a list of the user's exposed actions." - ) - api_wrapper: ZapierNLAWrapper = Field(default_factory=ZapierNLAWrapper) - - def _run( - self, - _: str = "", - run_manager: Optional[CallbackManagerForToolRun] = None, - ) -> str: - """Use the Zapier NLA tool to return a list of all exposed user actions.""" - warn_deprecated( - since="0.0.319", - message=( - "This tool will be deprecated on 2023-11-17. See " - "https://nla.zapier.com/sunset/ for details" - ), - ) - return self.api_wrapper.list_as_str() - - async def _arun( - self, - _: str = "", - run_manager: Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - """Use the Zapier NLA tool to return a list of all exposed user actions.""" - warn_deprecated( - since="0.0.319", - message=( - "This tool will be deprecated on 2023-11-17. See " - "https://nla.zapier.com/sunset/ for details" - ), - ) - return await self.api_wrapper.alist_as_str() - - -ZapierNLAListActions.__doc__ = ( - ZapierNLAWrapper.list.__doc__ + ZapierNLAListActions.__doc__ # type: ignore -) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py deleted file mode 100644 index 3dc6fcea19a..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py +++ /dev/null @@ -1,283 +0,0 @@ -"""Integration tests for the langchain tracer module.""" -import asyncio -import os - -from aiohttp import ClientSession -from langchain_core.callbacks.manager import atrace_as_chain_group, trace_as_chain_group -from langchain_core.tracers.context import tracing_v2_enabled, tracing_enabled -from langchain_core.prompts import PromptTemplate - -from langchain_community.chat_models import ChatOpenAI -from langchain_community.llms import OpenAI - -questions = [ - ( - "Who won the US Open men's final in 2019? " - "What is his age raised to the 0.334 power?" - ), - ( - "Who is Olivia Wilde's boyfriend? " - "What is his current age raised to the 0.23 power?" - ), - ( - "Who won the most recent formula 1 grand prix? " - "What is their age raised to the 0.23 power?" - ), - ( - "Who won the US Open women's final in 2019? " - "What is her age raised to the 0.34 power?" - ), - ("Who is Beyonce's husband? " "What is his age raised to the 0.19 power?"), -] - - -def test_tracing_sequential() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_TRACING"] = "true" - - for q in questions[:3]: - llm = OpenAI(temperature=0) - tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - agent.run(q) - - -def test_tracing_session_env_var() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_TRACING"] = "true" - os.environ["LANGCHAIN_SESSION"] = "my_session" - - llm = OpenAI(temperature=0) - tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - agent.run(questions[0]) - if "LANGCHAIN_SESSION" in os.environ: - del os.environ["LANGCHAIN_SESSION"] - - -async def test_tracing_concurrent() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_TRACING"] = "true" - aiosession = ClientSession() - llm = OpenAI(temperature=0) - async_tools = load_tools(["llm-math", "serpapi"], llm=llm, aiosession=aiosession) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - tasks = [agent.arun(q) for q in questions[:3]] - await asyncio.gather(*tasks) - await aiosession.close() - - -async def test_tracing_concurrent_bw_compat_environ() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_HANDLER"] = "langchain" - if "LANGCHAIN_TRACING" in os.environ: - del os.environ["LANGCHAIN_TRACING"] - aiosession = ClientSession() - llm = OpenAI(temperature=0) - async_tools = load_tools(["llm-math", "serpapi"], llm=llm, aiosession=aiosession) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - tasks = [agent.arun(q) for q in questions[:3]] - await asyncio.gather(*tasks) - await aiosession.close() - if "LANGCHAIN_HANDLER" in os.environ: - del os.environ["LANGCHAIN_HANDLER"] - - -def test_tracing_context_manager() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - llm = OpenAI(temperature=0) - tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - if "LANGCHAIN_TRACING" in os.environ: - del os.environ["LANGCHAIN_TRACING"] - with tracing_enabled() as session: - assert session - agent.run(questions[0]) # this should be traced - - agent.run(questions[0]) # this should not be traced - - -async def test_tracing_context_manager_async() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - llm = OpenAI(temperature=0) - async_tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - if "LANGCHAIN_TRACING" in os.environ: - del os.environ["LANGCHAIN_TRACING"] - - # start a background task - task = asyncio.create_task(agent.arun(questions[0])) # this should not be traced - with tracing_enabled() as session: - assert session - tasks = [agent.arun(q) for q in questions[1:4]] # these should be traced - await asyncio.gather(*tasks) - - await task - - -async def test_tracing_v2_environment_variable() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_TRACING_V2"] = "true" - - aiosession = ClientSession() - llm = OpenAI(temperature=0) - async_tools = load_tools(["llm-math", "serpapi"], llm=llm, aiosession=aiosession) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - tasks = [agent.arun(q) for q in questions[:3]] - await asyncio.gather(*tasks) - await aiosession.close() - - -def test_tracing_v2_context_manager() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - llm = ChatOpenAI(temperature=0) - tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - if "LANGCHAIN_TRACING_V2" in os.environ: - del os.environ["LANGCHAIN_TRACING_V2"] - with tracing_v2_enabled(): - agent.run(questions[0]) # this should be traced - - agent.run(questions[0]) # this should not be traced - - -def test_tracing_v2_chain_with_tags() -> None: - from langchain.chains.llm import LLMChain - from langchain.chains.constitutional_ai.base import ConstitutionalChain - from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple - llm = OpenAI(temperature=0) - chain = ConstitutionalChain.from_llm( - llm, - chain=LLMChain.from_string(llm, "Q: {question} A:"), - tags=["only-root"], - constitutional_principles=[ - ConstitutionalPrinciple( - critique_request="Tell if this answer is good.", - revision_request="Give a better answer.", - ) - ], - ) - if "LANGCHAIN_TRACING_V2" in os.environ: - del os.environ["LANGCHAIN_TRACING_V2"] - with tracing_v2_enabled(): - chain.run("what is the meaning of life", tags=["a-tag"]) - - -def test_tracing_v2_agent_with_metadata() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_TRACING_V2"] = "true" - llm = OpenAI(temperature=0) - chat = ChatOpenAI(temperature=0) - tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - chat_agent = initialize_agent( - tools, chat, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - agent.run(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) - chat_agent.run(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) - - -async def test_tracing_v2_async_agent_with_metadata() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_TRACING_V2"] = "true" - llm = OpenAI(temperature=0, metadata={"f": "g", "h": "i"}) - chat = ChatOpenAI(temperature=0, metadata={"f": "g", "h": "i"}) - async_tools = load_tools(["llm-math", "serpapi"], llm=llm) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - chat_agent = initialize_agent( - async_tools, - chat, - agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, - verbose=True, - ) - await agent.arun(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) - await chat_agent.arun(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) - - -def test_trace_as_group() -> None: - from langchain.chains.llm import LLMChain - llm = OpenAI(temperature=0.9) - prompt = PromptTemplate( - input_variables=["product"], - template="What is a good name for a company that makes {product}?", - ) - chain = LLMChain(llm=llm, prompt=prompt) - with trace_as_chain_group("my_group", inputs={"input": "cars"}) as group_manager: - chain.run(product="cars", callbacks=group_manager) - chain.run(product="computers", callbacks=group_manager) - final_res = chain.run(product="toys", callbacks=group_manager) - group_manager.on_chain_end({"output": final_res}) - - with trace_as_chain_group("my_group_2", inputs={"input": "toys"}) as group_manager: - final_res = chain.run(product="toys", callbacks=group_manager) - group_manager.on_chain_end({"output": final_res}) - - -def test_trace_as_group_with_env_set() -> None: - from langchain.chains.llm import LLMChain - os.environ["LANGCHAIN_TRACING_V2"] = "true" - llm = OpenAI(temperature=0.9) - prompt = PromptTemplate( - input_variables=["product"], - template="What is a good name for a company that makes {product}?", - ) - chain = LLMChain(llm=llm, prompt=prompt) - with trace_as_chain_group( - "my_group_env_set", inputs={"input": "cars"} - ) as group_manager: - chain.run(product="cars", callbacks=group_manager) - chain.run(product="computers", callbacks=group_manager) - final_res = chain.run(product="toys", callbacks=group_manager) - group_manager.on_chain_end({"output": final_res}) - - with trace_as_chain_group( - "my_group_2_env_set", inputs={"input": "toys"} - ) as group_manager: - final_res = chain.run(product="toys", callbacks=group_manager) - group_manager.on_chain_end({"output": final_res}) - - -async def test_trace_as_group_async() -> None: - from langchain.chains.llm import LLMChain - llm = OpenAI(temperature=0.9) - prompt = PromptTemplate( - input_variables=["product"], - template="What is a good name for a company that makes {product}?", - ) - chain = LLMChain(llm=llm, prompt=prompt) - async with atrace_as_chain_group("my_async_group") as group_manager: - await chain.arun(product="cars", callbacks=group_manager) - await chain.arun(product="computers", callbacks=group_manager) - await chain.arun(product="toys", callbacks=group_manager) - - async with atrace_as_chain_group( - "my_async_group_2", inputs={"input": "toys"} - ) as group_manager: - res = await asyncio.gather( - *[ - chain.arun(product="toys", callbacks=group_manager), - chain.arun(product="computers", callbacks=group_manager), - chain.arun(product="cars", callbacks=group_manager), - ] - ) - await group_manager.on_chain_end({"output": res}) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py deleted file mode 100644 index f0908bbf8d0..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Integration tests for the langchain tracer module.""" -import asyncio - - -from langchain_community.callbacks import get_openai_callback -from langchain_community.llms import OpenAI - - -async def test_openai_callback() -> None: - llm = OpenAI(temperature=0) - with get_openai_callback() as cb: - llm("What is the square root of 4?") - - total_tokens = cb.total_tokens - assert total_tokens > 0 - - with get_openai_callback() as cb: - llm("What is the square root of 4?") - llm("What is the square root of 4?") - - assert cb.total_tokens == total_tokens * 2 - - with get_openai_callback() as cb: - await asyncio.gather( - *[llm.agenerate(["What is the square root of 4?"]) for _ in range(3)] - ) - - assert cb.total_tokens == total_tokens * 3 - - task = asyncio.create_task(llm.agenerate(["What is the square root of 4?"])) - with get_openai_callback() as cb: - await llm.agenerate(["What is the square root of 4?"]) - - await task - assert cb.total_tokens == total_tokens - - -def test_openai_callback_batch_llm() -> None: - llm = OpenAI(temperature=0) - with get_openai_callback() as cb: - llm.generate(["What is the square root of 4?", "What is the square root of 4?"]) - - assert cb.total_tokens > 0 - total_tokens = cb.total_tokens - - with get_openai_callback() as cb: - llm("What is the square root of 4?") - llm("What is the square root of 4?") - - assert cb.total_tokens == total_tokens - - -def test_openai_callback_agent() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - llm = OpenAI(temperature=0) - tools = load_tools(["serpapi", "llm-math"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - with get_openai_callback() as cb: - agent.run( - "Who is Olivia Wilde's boyfriend? " - "What is his current age raised to the 0.23 power?" - ) - print(f"Total Tokens: {cb.total_tokens}") - print(f"Prompt Tokens: {cb.prompt_tokens}") - print(f"Completion Tokens: {cb.completion_tokens}") - print(f"Total Cost (USD): ${cb.total_cost}") diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py deleted file mode 100644 index 1ffe61dbdcf..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Integration tests for the StreamlitCallbackHandler module.""" - -import pytest - -# Import the internal StreamlitCallbackHandler from its module - and not from -# the `langchain_community.callbacks.streamlit` package - so that we don't end up using -# Streamlit's externally-provided callback handler. -from langchain_community.callbacks.streamlit.streamlit_callback_handler import ( - StreamlitCallbackHandler, -) -from langchain_community.llms import OpenAI - - -@pytest.mark.requires("streamlit") -def test_streamlit_callback_agent() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - import streamlit as st - - streamlit_callback = StreamlitCallbackHandler(st.container()) - - llm = OpenAI(temperature=0) - tools = load_tools(["serpapi", "llm-math"], llm=llm) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - agent.run( - "Who is Olivia Wilde's boyfriend? " - "What is his current age raised to the 0.23 power?", - callbacks=[streamlit_callback], - ) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py deleted file mode 100644 index 02f022c62ad..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Integration tests for the langchain tracer module.""" -import asyncio -import os - -from aiohttp import ClientSession -from langchain_community.callbacks import wandb_tracing_enabled - -from langchain_community.llms import OpenAI - -questions = [ - ( - "Who won the US Open men's final in 2019? " - "What is his age raised to the 0.334 power?" - ), - ( - "Who is Olivia Wilde's boyfriend? " - "What is his current age raised to the 0.23 power?" - ), - ( - "Who won the most recent formula 1 grand prix? " - "What is their age raised to the 0.23 power?" - ), - ( - "Who won the US Open women's final in 2019? " - "What is her age raised to the 0.34 power?" - ), - ("Who is Beyonce's husband? " "What is his age raised to the 0.19 power?"), -] - - -def test_tracing_sequential() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_WANDB_TRACING"] = "true" - os.environ["WANDB_PROJECT"] = "langchain-tracing" - - for q in questions[:3]: - llm = OpenAI(temperature=0) - tools = load_tools( - ["llm-math", "serpapi"], - llm=llm, - ) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - agent.run(q) - - -def test_tracing_session_env_var() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_WANDB_TRACING"] = "true" - - llm = OpenAI(temperature=0) - tools = load_tools( - ["llm-math", "serpapi"], - llm=llm, - ) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - agent.run(questions[0]) - - -async def test_tracing_concurrent() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - os.environ["LANGCHAIN_WANDB_TRACING"] = "true" - aiosession = ClientSession() - llm = OpenAI(temperature=0) - async_tools = load_tools( - ["llm-math", "serpapi"], - llm=llm, - aiosession=aiosession, - ) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - tasks = [agent.arun(q) for q in questions[:3]] - await asyncio.gather(*tasks) - await aiosession.close() - - -def test_tracing_context_manager() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - llm = OpenAI(temperature=0) - tools = load_tools( - ["llm-math", "serpapi"], - llm=llm, - ) - agent = initialize_agent( - tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - if "LANGCHAIN_WANDB_TRACING" in os.environ: - del os.environ["LANGCHAIN_WANDB_TRACING"] - with wandb_tracing_enabled(): - agent.run(questions[0]) # this should be traced - - agent.run(questions[0]) # this should not be traced - - -async def test_tracing_context_manager_async() -> None: - from langchain.agents import AgentType, initialize_agent, load_tools - llm = OpenAI(temperature=0) - async_tools = load_tools( - ["llm-math", "serpapi"], - llm=llm, - ) - agent = initialize_agent( - async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True - ) - if "LANGCHAIN_WANDB_TRACING" in os.environ: - del os.environ["LANGCHAIN_TRACING"] - - # start a background task - task = asyncio.create_task(agent.arun(questions[0])) # this should not be traced - with wandb_tracing_enabled(): - tasks = [agent.arun(q) for q in questions[1:4]] # these should be traced - await asyncio.gather(*tasks) - - await task diff --git a/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py b/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py deleted file mode 100644 index 2ea93c6d9e2..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py +++ /dev/null @@ -1,333 +0,0 @@ -"""Test ChatOpenAI wrapper.""" -from typing import Any, Optional - -import pytest -from langchain_core.callbacks import CallbackManager -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage -from langchain_core.outputs import ( - ChatGeneration, - ChatResult, - LLMResult, -) -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.pydantic_v1 import BaseModel, Field - -from langchain_community.chat_models.openai import ChatOpenAI -from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler - - -@pytest.mark.scheduled -def test_chat_openai() -> None: - """Test ChatOpenAI wrapper.""" - chat = ChatOpenAI( - temperature=0.7, - base_url=None, - organization=None, - openai_proxy=None, - timeout=10.0, - max_retries=3, - http_client=None, - n=1, - max_tokens=10, - default_headers=None, - default_query=None, - ) - message = HumanMessage(content="Hello") - response = chat([message]) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_chat_openai_model() -> None: - """Test ChatOpenAI wrapper handles model_name.""" - chat = ChatOpenAI(model="foo") - assert chat.model_name == "foo" - chat = ChatOpenAI(model_name="bar") - assert chat.model_name == "bar" - - -def test_chat_openai_system_message() -> None: - """Test ChatOpenAI wrapper with system message.""" - chat = ChatOpenAI(max_tokens=10) - system_message = SystemMessage(content="You are to chat with the user.") - human_message = HumanMessage(content="Hello") - response = chat([system_message, human_message]) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -@pytest.mark.scheduled -def test_chat_openai_generate() -> None: - """Test ChatOpenAI wrapper with generate.""" - chat = ChatOpenAI(max_tokens=10, n=2) - message = HumanMessage(content="Hello") - response = chat.generate([[message], [message]]) - assert isinstance(response, LLMResult) - assert len(response.generations) == 2 - assert response.llm_output - for generations in response.generations: - assert len(generations) == 2 - for generation in generations: - assert isinstance(generation, ChatGeneration) - assert isinstance(generation.text, str) - assert generation.text == generation.message.content - - -@pytest.mark.scheduled -def test_chat_openai_multiple_completions() -> None: - """Test ChatOpenAI wrapper with multiple completions.""" - chat = ChatOpenAI(max_tokens=10, n=5) - message = HumanMessage(content="Hello") - response = chat._generate([message]) - assert isinstance(response, ChatResult) - assert len(response.generations) == 5 - for generation in response.generations: - assert isinstance(generation.message, BaseMessage) - assert isinstance(generation.message.content, str) - - -@pytest.mark.scheduled -def test_chat_openai_streaming() -> None: - """Test that streaming correctly invokes on_llm_new_token callback.""" - callback_handler = FakeCallbackHandler() - callback_manager = CallbackManager([callback_handler]) - chat = ChatOpenAI( - max_tokens=10, - streaming=True, - temperature=0, - callback_manager=callback_manager, - verbose=True, - ) - message = HumanMessage(content="Hello") - response = chat([message]) - assert callback_handler.llm_streams > 0 - assert isinstance(response, BaseMessage) - - -@pytest.mark.scheduled -def test_chat_openai_streaming_generation_info() -> None: - """Test that generation info is preserved when streaming.""" - - class _FakeCallback(FakeCallbackHandler): - saved_things: dict = {} - - def on_llm_end( - self, - *args: Any, - **kwargs: Any, - ) -> Any: - # Save the generation - self.saved_things["generation"] = args[0] - - callback = _FakeCallback() - callback_manager = CallbackManager([callback]) - chat = ChatOpenAI( - max_tokens=2, - temperature=0, - callback_manager=callback_manager, - ) - list(chat.stream("hi")) - generation = callback.saved_things["generation"] - # `Hello!` is two tokens, assert that that is what is returned - assert generation.generations[0][0].text == "Hello!" - - -def test_chat_openai_llm_output_contains_model_name() -> None: - """Test llm_output contains model_name.""" - chat = ChatOpenAI(max_tokens=10) - message = HumanMessage(content="Hello") - llm_result = chat.generate([[message]]) - assert llm_result.llm_output is not None - assert llm_result.llm_output["model_name"] == chat.model_name - - -def test_chat_openai_streaming_llm_output_contains_model_name() -> None: - """Test llm_output contains model_name.""" - chat = ChatOpenAI(max_tokens=10, streaming=True) - message = HumanMessage(content="Hello") - llm_result = chat.generate([[message]]) - assert llm_result.llm_output is not None - assert llm_result.llm_output["model_name"] == chat.model_name - - -def test_chat_openai_invalid_streaming_params() -> None: - """Test that streaming correctly invokes on_llm_new_token callback.""" - with pytest.raises(ValueError): - ChatOpenAI( - max_tokens=10, - streaming=True, - temperature=0, - n=5, - ) - - -@pytest.mark.scheduled -async def test_async_chat_openai() -> None: - """Test async generation.""" - chat = ChatOpenAI(max_tokens=10, n=2) - message = HumanMessage(content="Hello") - response = await chat.agenerate([[message], [message]]) - assert isinstance(response, LLMResult) - assert len(response.generations) == 2 - assert response.llm_output - for generations in response.generations: - assert len(generations) == 2 - for generation in generations: - assert isinstance(generation, ChatGeneration) - assert isinstance(generation.text, str) - assert generation.text == generation.message.content - - -@pytest.mark.scheduled -async def test_async_chat_openai_streaming() -> None: - """Test that streaming correctly invokes on_llm_new_token callback.""" - callback_handler = FakeCallbackHandler() - callback_manager = CallbackManager([callback_handler]) - chat = ChatOpenAI( - max_tokens=10, - streaming=True, - temperature=0, - callback_manager=callback_manager, - verbose=True, - ) - message = HumanMessage(content="Hello") - response = await chat.agenerate([[message], [message]]) - assert callback_handler.llm_streams > 0 - assert isinstance(response, LLMResult) - assert len(response.generations) == 2 - for generations in response.generations: - assert len(generations) == 1 - for generation in generations: - assert isinstance(generation, ChatGeneration) - assert isinstance(generation.text, str) - assert generation.text == generation.message.content - - - -@pytest.mark.scheduled -async def test_async_chat_openai_bind_functions() -> None: - """Test ChatOpenAI wrapper with multiple completions.""" - - class Person(BaseModel): - """Identifying information about a person.""" - - name: str = Field(..., title="Name", description="The person's name") - age: int = Field(..., title="Age", description="The person's age") - fav_food: Optional[str] = Field( - default=None, title="Fav Food", description="The person's favorite food" - ) - - chat = ChatOpenAI( - max_tokens=30, - n=1, - streaming=True, - ).bind_functions(functions=[Person], function_call="Person") - - prompt = ChatPromptTemplate.from_messages( - [ - ("system", "Use the provided Person function"), - ("user", "{input}"), - ] - ) - - chain = prompt | chat - - message = HumanMessage(content="Sally is 13 years old") - response = await chain.abatch([{"input": message}]) - - assert isinstance(response, list) - assert len(response) == 1 - for generation in response: - assert isinstance(generation, AIMessage) - - -def test_chat_openai_extra_kwargs() -> None: - """Test extra kwargs to chat openai.""" - # Check that foo is saved in extra_kwargs. - llm = ChatOpenAI(foo=3, max_tokens=10) - assert llm.max_tokens == 10 - assert llm.model_kwargs == {"foo": 3} - - # Test that if extra_kwargs are provided, they are added to it. - llm = ChatOpenAI(foo=3, model_kwargs={"bar": 2}) - assert llm.model_kwargs == {"foo": 3, "bar": 2} - - # Test that if provided twice it errors - with pytest.raises(ValueError): - ChatOpenAI(foo=3, model_kwargs={"foo": 2}) - - # Test that if explicit param is specified in kwargs it errors - with pytest.raises(ValueError): - ChatOpenAI(model_kwargs={"temperature": 0.2}) - - # Test that "model" cannot be specified in kwargs - with pytest.raises(ValueError): - ChatOpenAI(model_kwargs={"model": "text-davinci-003"}) - - -@pytest.mark.scheduled -def test_openai_streaming() -> None: - """Test streaming tokens from OpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - for token in llm.stream("I'm Pickle Rick"): - assert isinstance(token.content, str) - - -@pytest.mark.scheduled -async def test_openai_astream() -> None: - """Test streaming tokens from OpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - async for token in llm.astream("I'm Pickle Rick"): - assert isinstance(token.content, str) - - -@pytest.mark.scheduled -async def test_openai_abatch() -> None: - """Test streaming tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) - for token in result: - assert isinstance(token.content, str) - - -@pytest.mark.scheduled -async def test_openai_abatch_tags() -> None: - """Test batch tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - result = await llm.abatch( - ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} - ) - for token in result: - assert isinstance(token.content, str) - - -@pytest.mark.scheduled -def test_openai_batch() -> None: - """Test batch tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) - for token in result: - assert isinstance(token.content, str) - - -@pytest.mark.scheduled -async def test_openai_ainvoke() -> None: - """Test invoke tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) - assert isinstance(result.content, str) - - -@pytest.mark.scheduled -def test_openai_invoke() -> None: - """Test invoke tokens from ChatOpenAI.""" - llm = ChatOpenAI(max_tokens=10) - - result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) - assert isinstance(result.content, str) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py b/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py deleted file mode 100644 index 88bfc66a382..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py +++ /dev/null @@ -1,219 +0,0 @@ -"""Test Baidu Qianfan Chat Endpoint.""" - -from typing import Any - -from langchain_core.callbacks import CallbackManager -from langchain_core.messages import ( - AIMessage, - BaseMessage, - FunctionMessage, - HumanMessage, -) -from langchain_core.outputs import ChatGeneration, LLMResult -from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate - -from langchain_community.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint -from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler - -_FUNCTIONS: Any = [ - { - "name": "format_person_info", - "description": ( - "Output formatter. Should always be used to format your response to the" - " user." - ), - "parameters": { - "title": "Person", - "description": "Identifying information about a person.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "description": "The person's name", - "type": "string", - }, - "age": { - "title": "Age", - "description": "The person's age", - "type": "integer", - }, - "fav_food": { - "title": "Fav Food", - "description": "The person's favorite food", - "type": "string", - }, - }, - "required": ["name", "age"], - }, - }, - { - "name": "get_current_temperature", - "description": ("Used to get the location's temperature."), - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "city name", - }, - "unit": { - "type": "string", - "enum": ["centigrade", "Fahrenheit"], - }, - }, - "required": ["location", "unit"], - }, - "responses": { - "type": "object", - "properties": { - "temperature": { - "type": "integer", - "description": "city temperature", - }, - "unit": { - "type": "string", - "enum": ["centigrade", "Fahrenheit"], - }, - }, - }, - }, -] - - -def test_default_call() -> None: - """Test default model(`ERNIE-Bot`) call.""" - chat = QianfanChatEndpoint() - response = chat(messages=[HumanMessage(content="Hello")]) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_model() -> None: - """Test model kwarg works.""" - chat = QianfanChatEndpoint(model="BLOOMZ-7B") - response = chat(messages=[HumanMessage(content="Hello")]) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_model_param() -> None: - """Test model params works.""" - chat = QianfanChatEndpoint() - response = chat(model="BLOOMZ-7B", messages=[HumanMessage(content="Hello")]) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_endpoint() -> None: - """Test user custom model deployments like some open source models.""" - chat = QianfanChatEndpoint(endpoint="qianfan_bloomz_7b_compressed") - response = chat(messages=[HumanMessage(content="Hello")]) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_endpoint_param() -> None: - """Test user custom model deployments like some open source models.""" - chat = QianfanChatEndpoint() - response = chat( - messages=[ - HumanMessage(endpoint="qianfan_bloomz_7b_compressed", content="Hello") - ] - ) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_multiple_history() -> None: - """Tests multiple history works.""" - chat = QianfanChatEndpoint() - - response = chat( - messages=[ - HumanMessage(content="Hello."), - AIMessage(content="Hello!"), - HumanMessage(content="How are you doing?"), - ] - ) - assert isinstance(response, BaseMessage) - assert isinstance(response.content, str) - - -def test_stream() -> None: - """Test that stream works.""" - chat = QianfanChatEndpoint(streaming=True) - callback_handler = FakeCallbackHandler() - callback_manager = CallbackManager([callback_handler]) - response = chat( - messages=[ - HumanMessage(content="Hello."), - AIMessage(content="Hello!"), - HumanMessage(content="Who are you?"), - ], - stream=True, - callbacks=callback_manager, - ) - assert callback_handler.llm_streams > 0 - assert isinstance(response.content, str) - - -def test_multiple_messages() -> None: - """Tests multiple messages works.""" - chat = QianfanChatEndpoint() - message = HumanMessage(content="Hi, how are you.") - response = chat.generate([[message], [message]]) - - assert isinstance(response, LLMResult) - assert len(response.generations) == 2 - for generations in response.generations: - assert len(generations) == 1 - for generation in generations: - assert isinstance(generation, ChatGeneration) - assert isinstance(generation.text, str) - assert generation.text == generation.message.content - - -def test_functions_call_thoughts() -> None: - chat = QianfanChatEndpoint(model="ERNIE-Bot") - - prompt_tmpl = "Use the given functions to answer following question: {input}" - prompt_msgs = [ - HumanMessagePromptTemplate.from_template(prompt_tmpl), - ] - prompt = ChatPromptTemplate(messages=prompt_msgs) - - chain = prompt | chat.bind(functions=_FUNCTIONS) - - message = HumanMessage(content="What's the temperature in Shanghai today?") - response = chain.batch([{"input": message}]) - assert isinstance(response[0], AIMessage) - assert "function_call" in response[0].additional_kwargs - - -def test_functions_call() -> None: - chat = QianfanChatEndpoint(model="ERNIE-Bot") - - prompt = ChatPromptTemplate( - messages=[ - HumanMessage(content="What's the temperature in Shanghai today?"), - AIMessage( - content="", - additional_kwargs={ - "function_call": { - "name": "get_current_temperature", - "thoughts": "i will use get_current_temperature " - "to resolve the questions", - "arguments": '{"location":"Shanghai","unit":"centigrade"}', - } - }, - ), - FunctionMessage( - name="get_current_weather", - content='{"temperature": "25", \ - "unit": "摄氏度", "description": "晴朗"}', - ), - ] - ) - chain = prompt | chat.bind(functions=_FUNCTIONS) - resp = chain.invoke({}) - assert isinstance(resp, AIMessage) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py b/.scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py deleted file mode 100644 index c28789c7cd3..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py +++ /dev/null @@ -1,182 +0,0 @@ -from pathlib import Path - -import pytest - -from langchain_community.document_loaders.concurrent import ConcurrentLoader -from langchain_community.document_loaders.generic import GenericLoader -from langchain_community.document_loaders.parsers import LanguageParser - - -def test_language_loader_for_python() -> None: - """Test Python loader with parser enabled.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = GenericLoader.from_filesystem( - file_path, glob="hello_world.py", parser=LanguageParser(parser_threshold=5) - ) - docs = loader.load() - - assert len(docs) == 2 - - metadata = docs[0].metadata - assert metadata["source"] == str(file_path / "hello_world.py") - assert metadata["content_type"] == "functions_classes" - assert metadata["language"] == "python" - metadata = docs[1].metadata - assert metadata["source"] == str(file_path / "hello_world.py") - assert metadata["content_type"] == "simplified_code" - assert metadata["language"] == "python" - - assert ( - docs[0].page_content - == """def main(): - print("Hello World!") - - return 0""" - ) - assert ( - docs[1].page_content - == """#!/usr/bin/env python3 - -import sys - - -# Code for: def main(): - - -if __name__ == "__main__": - sys.exit(main())""" - ) - - -def test_language_loader_for_python_with_parser_threshold() -> None: - """Test Python loader with parser enabled and below threshold.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = GenericLoader.from_filesystem( - file_path, - glob="hello_world.py", - parser=LanguageParser(language="python", parser_threshold=1000), - ) - docs = loader.load() - - assert len(docs) == 1 - - -def esprima_installed() -> bool: - try: - import esprima # noqa: F401 - - return True - except Exception as e: - print(f"esprima not installed, skipping test {e}") - return False - - -@pytest.mark.skipif(not esprima_installed(), reason="requires esprima package") -def test_language_loader_for_javascript() -> None: - """Test JavaScript loader with parser enabled.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = GenericLoader.from_filesystem( - file_path, glob="hello_world.js", parser=LanguageParser(parser_threshold=5) - ) - docs = loader.load() - - assert len(docs) == 3 - - metadata = docs[0].metadata - assert metadata["source"] == str(file_path / "hello_world.js") - assert metadata["content_type"] == "functions_classes" - assert metadata["language"] == "js" - metadata = docs[1].metadata - assert metadata["source"] == str(file_path / "hello_world.js") - assert metadata["content_type"] == "functions_classes" - assert metadata["language"] == "js" - metadata = docs[2].metadata - assert metadata["source"] == str(file_path / "hello_world.js") - assert metadata["content_type"] == "simplified_code" - assert metadata["language"] == "js" - - assert ( - docs[0].page_content - == """class HelloWorld { - sayHello() { - console.log("Hello World!"); - } -}""" - ) - assert ( - docs[1].page_content - == """function main() { - const hello = new HelloWorld(); - hello.sayHello(); -}""" - ) - assert ( - docs[2].page_content - == """// Code for: class HelloWorld { - -// Code for: function main() { - -main();""" - ) - - -def test_language_loader_for_javascript_with_parser_threshold() -> None: - """Test JavaScript loader with parser enabled and below threshold.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = GenericLoader.from_filesystem( - file_path, - glob="hello_world.js", - parser=LanguageParser(language="js", parser_threshold=1000), - ) - docs = loader.load() - - assert len(docs) == 1 - - -def test_concurrent_language_loader_for_javascript_with_parser_threshold() -> None: - """Test JavaScript ConcurrentLoader with parser enabled and below threshold.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = ConcurrentLoader.from_filesystem( - file_path, - glob="hello_world.js", - parser=LanguageParser(language="js", parser_threshold=1000), - ) - docs = loader.load() - - assert len(docs) == 1 - - -def test_concurrent_language_loader_for_python_with_parser_threshold() -> None: - """Test Python ConcurrentLoader with parser enabled and below threshold.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = ConcurrentLoader.from_filesystem( - file_path, - glob="hello_world.py", - parser=LanguageParser(language="python", parser_threshold=1000), - ) - docs = loader.load() - - assert len(docs) == 1 - - -@pytest.mark.skipif(not esprima_installed(), reason="requires esprima package") -def test_concurrent_language_loader_for_javascript() -> None: - """Test JavaScript ConcurrentLoader with parser enabled.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = ConcurrentLoader.from_filesystem( - file_path, glob="hello_world.js", parser=LanguageParser(parser_threshold=5) - ) - docs = loader.load() - - assert len(docs) == 3 - - -def test_concurrent_language_loader_for_python() -> None: - """Test Python ConcurrentLoader with parser enabled.""" - file_path = Path(__file__).parent.parent.parent / "examples" - loader = ConcurrentLoader.from_filesystem( - file_path, glob="hello_world.py", parser=LanguageParser(parser_threshold=5) - ) - docs = loader.load() - - assert len(docs) == 2 diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py deleted file mode 100644 index d7839b50645..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Test Fireworks AI API Wrapper.""" -from typing import Generator - -import pytest -from langchain_core.outputs import LLMResult - -from langchain_community.llms.fireworks import Fireworks - -@pytest.fixture -def llm() -> Fireworks: - return Fireworks(model_kwargs={"temperature": 0, "max_tokens": 512}) - - -@pytest.mark.scheduled -def test_fireworks_call(llm: Fireworks) -> None: - """Test valid call to fireworks.""" - output = llm("How is the weather in New York today?") - assert isinstance(output, str) - - -@pytest.mark.scheduled -def test_fireworks_model_param() -> None: - """Tests model parameters for Fireworks""" - llm = Fireworks(model="foo") - assert llm.model == "foo" - - -@pytest.mark.scheduled -def test_fireworks_invoke(llm: Fireworks) -> None: - """Tests completion with invoke""" - output = llm.invoke("How is the weather in New York today?", stop=[","]) - assert isinstance(output, str) - assert output[-1] == "," - - -@pytest.mark.scheduled -async def test_fireworks_ainvoke(llm: Fireworks) -> None: - """Tests completion with invoke""" - output = await llm.ainvoke("How is the weather in New York today?", stop=[","]) - assert isinstance(output, str) - assert output[-1] == "," - - -@pytest.mark.scheduled -def test_fireworks_batch(llm: Fireworks) -> None: - """Tests completion with invoke""" - llm = Fireworks() - output = llm.batch( - [ - "How is the weather in New York today?", - "How is the weather in New York today?", - ], - stop=[","], - ) - for token in output: - assert isinstance(token, str) - assert token[-1] == "," - - -@pytest.mark.scheduled -async def test_fireworks_abatch(llm: Fireworks) -> None: - """Tests completion with invoke""" - output = await llm.abatch( - [ - "How is the weather in New York today?", - "How is the weather in New York today?", - ], - stop=[","], - ) - for token in output: - assert isinstance(token, str) - assert token[-1] == "," - - -@pytest.mark.scheduled -def test_fireworks_multiple_prompts( - llm: Fireworks, -) -> None: - """Test completion with multiple prompts.""" - output = llm.generate(["How is the weather in New York today?", "I'm pickle rick"]) - assert isinstance(output, LLMResult) - assert isinstance(output.generations, list) - assert len(output.generations) == 2 - - -@pytest.mark.scheduled -def test_fireworks_streaming(llm: Fireworks) -> None: - """Test stream completion.""" - generator = llm.stream("Who's the best quarterback in the NFL?") - assert isinstance(generator, Generator) - - for token in generator: - assert isinstance(token, str) - - -@pytest.mark.scheduled -def test_fireworks_streaming_stop_words(llm: Fireworks) -> None: - """Test stream completion with stop words.""" - generator = llm.stream("Who's the best quarterback in the NFL?", stop=[","]) - assert isinstance(generator, Generator) - - last_token = "" - for token in generator: - last_token = token - assert isinstance(token, str) - assert last_token[-1] == "," - - -@pytest.mark.scheduled -async def test_fireworks_streaming_async(llm: Fireworks) -> None: - """Test stream completion.""" - - last_token = "" - async for token in llm.astream( - "Who's the best quarterback in the NFL?", stop=[","] - ): - last_token = token - assert isinstance(token, str) - assert last_token[-1] == "," - - -@pytest.mark.scheduled -async def test_fireworks_async_agenerate(llm: Fireworks) -> None: - """Test async.""" - output = await llm.agenerate(["What is the best city to live in California?"]) - assert isinstance(output, LLMResult) - - -@pytest.mark.scheduled -async def test_fireworks_multiple_prompts_async_agenerate(llm: Fireworks) -> None: - output = await llm.agenerate( - ["How is the weather in New York today?", "I'm pickle rick"] - ) - assert isinstance(output, LLMResult) - assert isinstance(output.generations, list) - assert len(output.generations) == 2 diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py deleted file mode 100644 index 69d851765db..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py +++ /dev/null @@ -1,77 +0,0 @@ -import langchain_community.utilities.opaqueprompts as op -from langchain_core.output_parsers import StrOutputParser -from langchain_core.prompts import PromptTemplate -from langchain_core.runnables import RunnableParallel - -from langchain_community.llms import OpenAI -from langchain_community.llms.opaqueprompts import OpaquePrompts - -prompt_template = """ -As an AI assistant, you will answer questions according to given context. - -Sensitive personal information in the question is masked for privacy. -For instance, if the original text says "Giana is good," it will be changed -to "PERSON_998 is good." - -Here's how to handle these changes: -* Consider these masked phrases just as placeholders, but still refer to -them in a relevant way when answering. -* It's possible that different masked terms might mean the same thing. -Stick with the given term and don't modify it. -* All masked terms follow the "TYPE_ID" pattern. -* Please don't invent new masked terms. For instance, if you see "PERSON_998," -don't come up with "PERSON_997" or "PERSON_999" unless they're already in the question. - -Conversation History: ```{history}``` -Context : ```During our recent meeting on February 23, 2023, at 10:30 AM, -John Doe provided me with his personal details. His email is johndoe@example.com -and his contact number is 650-456-7890. He lives in New York City, USA, and -belongs to the American nationality with Christian beliefs and a leaning towards -the Democratic party. He mentioned that he recently made a transaction using his -credit card 4111 1111 1111 1111 and transferred bitcoins to the wallet address -1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa. While discussing his European travels, he -noted down his IBAN as GB29 NWBK 6016 1331 9268 19. Additionally, he provided -his website as https://johndoeportfolio.com. John also discussed -some of his US-specific details. He said his bank account number is -1234567890123456 and his drivers license is Y12345678. His ITIN is 987-65-4321, -and he recently renewed his passport, -the number for which is 123456789. He emphasized not to share his SSN, which is -669-45-6789. Furthermore, he mentioned that he accesses his work files remotely -through the IP 192.168.1.1 and has a medical license number MED-123456. ``` -Question: ```{question}``` -""" - - -def test_opaqueprompts() -> None: - chain = PromptTemplate.from_template(prompt_template) | OpaquePrompts(llm=OpenAI()) - output = chain.invoke( - { - "question": "Write a text message to remind John to do password reset \ - for his website through his email to stay secure." - } - ) - assert isinstance(output, str) - - -def test_opaqueprompts_functions() -> None: - prompt = (PromptTemplate.from_template(prompt_template),) - llm = OpenAI() - pg_chain = ( - op.sanitize - | RunnableParallel( - secure_context=lambda x: x["secure_context"], # type: ignore - response=(lambda x: x["sanitized_input"]) # type: ignore - | prompt - | llm - | StrOutputParser(), - ) - | (lambda x: op.desanitize(x["response"], x["secure_context"])) - ) - - pg_chain.invoke( - { - "question": "Write a text message to remind John to do password reset\ - for his website through his email to stay secure.", - "history": "", - } - ) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py deleted file mode 100644 index b1b2eb3b535..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Test Nebula API wrapper.""" -from langchain_community.llms.symblai_nebula import Nebula - - -def test_symblai_nebula_call() -> None: - """Test valid call to Nebula.""" - conversation = """Sam: Good morning, team! Let's keep this standup concise. - We'll go in the usual order: what you did yesterday, - what you plan to do today, and any blockers. Alex, kick us off. -Alex: Morning! Yesterday, I wrapped up the UI for the user dashboard. -The new charts and widgets are now responsive. -I also had a sync with the design team to ensure the final touchups are in -line with the brand guidelines. Today, I'll start integrating the frontend with -the new API endpoints Rhea was working on. -The only blocker is waiting for some final API documentation, -but I guess Rhea can update on that. -Rhea: Hey, all! Yep, about the API documentation - I completed the majority of - the backend work for user data retrieval yesterday. - The endpoints are mostly set up, but I need to do a bit more testing today. - I'll finalize the API documentation by noon, so that should unblock Alex. - After that, I’ll be working on optimizing the database queries - for faster data fetching. No other blockers on my end. -Sam: Great, thanks Rhea. Do reach out if you need any testing assistance - or if there are any hitches with the database. - Now, my update: Yesterday, I coordinated with the client to get clarity - on some feature requirements. Today, I'll be updating our project roadmap - and timelines based on their feedback. Additionally, I'll be sitting with - the QA team in the afternoon for preliminary testing. - Blocker: I might need both of you to be available for a quick call - in case the client wants to discuss the changes live. -Alex: Sounds good, Sam. Just let us know a little in advance for the call. -Rhea: Agreed. We can make time for that. -Sam: Perfect! Let's keep the momentum going. Reach out if there are any -sudden issues or support needed. Have a productive day! -Alex: You too. -Rhea: Thanks, bye!""" - llm = Nebula(nebula_api_key="") - - instruction = """Identify the main objectives mentioned in this -conversation.""" - output = llm.invoke(f"{instruction}\n{conversation}") - assert isinstance(output, str) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py deleted file mode 100644 index ae5f776b4ba..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Test Vertex AI API wrapper. -In order to run this test, you need to install VertexAI SDK: -pip install google-cloud-aiplatform>=1.36.0 - -Your end-user credentials would be used to make the calls (make sure you've run -`gcloud auth login` first). -""" -import os -from typing import Optional - -import pytest -from langchain_core.outputs import LLMResult - -from langchain_community.llms import VertexAI, VertexAIModelGarden - - -def test_vertex_initialization() -> None: - llm = VertexAI() - assert llm._llm_type == "vertexai" - assert llm.model_name == llm.client._model_id - - -def test_vertex_call() -> None: - llm = VertexAI(temperature=0) - output = llm("Say foo:") - assert isinstance(output, str) - - -@pytest.mark.scheduled -def test_vertex_generate() -> None: - llm = VertexAI(temperature=0.3, n=2, model_name="text-bison@001") - output = llm.generate(["Say foo:"]) - assert isinstance(output, LLMResult) - assert len(output.generations) == 1 - assert len(output.generations[0]) == 2 - - -@pytest.mark.scheduled -def test_vertex_generate_code() -> None: - llm = VertexAI(temperature=0.3, n=2, model_name="code-bison@001") - output = llm.generate(["generate a python method that says foo:"]) - assert isinstance(output, LLMResult) - assert len(output.generations) == 1 - assert len(output.generations[0]) == 2 - - -@pytest.mark.scheduled -async def test_vertex_agenerate() -> None: - llm = VertexAI(temperature=0) - output = await llm.agenerate(["Please say foo:"]) - assert isinstance(output, LLMResult) - - -@pytest.mark.scheduled -def test_vertex_stream() -> None: - llm = VertexAI(temperature=0) - outputs = list(llm.stream("Please say foo:")) - assert isinstance(outputs[0], str) - - -async def test_vertex_consistency() -> None: - llm = VertexAI(temperature=0) - output = llm.generate(["Please say foo:"]) - streaming_output = llm.generate(["Please say foo:"], stream=True) - async_output = await llm.agenerate(["Please say foo:"]) - assert output.generations[0][0].text == streaming_output.generations[0][0].text - assert output.generations[0][0].text == async_output.generations[0][0].text - - -@pytest.mark.parametrize( - "endpoint_os_variable_name,result_arg", - [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)], -) -def test_model_garden( - endpoint_os_variable_name: str, result_arg: Optional[str] -) -> None: - """In order to run this test, you should provide endpoint names. - - Example: - export FALCON_ENDPOINT_ID=... - export LLAMA_ENDPOINT_ID=... - export PROJECT=... - """ - endpoint_id = os.environ[endpoint_os_variable_name] - project = os.environ["PROJECT"] - location = "europe-west4" - llm = VertexAIModelGarden( - endpoint_id=endpoint_id, - project=project, - result_arg=result_arg, - location=location, - ) - output = llm("What is the meaning of life?") - assert isinstance(output, str) - assert llm._llm_type == "vertexai_model_garden" - - -@pytest.mark.parametrize( - "endpoint_os_variable_name,result_arg", - [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)], -) -def test_model_garden_generate( - endpoint_os_variable_name: str, result_arg: Optional[str] -) -> None: - """In order to run this test, you should provide endpoint names. - - Example: - export FALCON_ENDPOINT_ID=... - export LLAMA_ENDPOINT_ID=... - export PROJECT=... - """ - endpoint_id = os.environ[endpoint_os_variable_name] - project = os.environ["PROJECT"] - location = "europe-west4" - llm = VertexAIModelGarden( - endpoint_id=endpoint_id, - project=project, - result_arg=result_arg, - location=location, - ) - output = llm.generate(["What is the meaning of life?", "How much is 2+2"]) - assert isinstance(output, LLMResult) - assert len(output.generations) == 2 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "endpoint_os_variable_name,result_arg", - [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)], -) -async def test_model_garden_agenerate( - endpoint_os_variable_name: str, result_arg: Optional[str] -) -> None: - endpoint_id = os.environ[endpoint_os_variable_name] - project = os.environ["PROJECT"] - location = "europe-west4" - llm = VertexAIModelGarden( - endpoint_id=endpoint_id, - project=project, - result_arg=result_arg, - location=location, - ) - output = await llm.agenerate(["What is the meaning of life?", "How much is 2+2"]) - assert isinstance(output, LLMResult) - assert len(output.generations) == 2 - - -def test_vertex_call_count_tokens() -> None: - llm = VertexAI() - output = llm.get_num_tokens("How are you?") - assert output == 4 diff --git a/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py b/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py deleted file mode 100644 index 536ba323bfb..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py +++ /dev/null @@ -1,171 +0,0 @@ -"""Integration test for Arxiv API Wrapper.""" -from typing import Any, List - -import pytest -from langchain_core.documents import Document -from langchain_core.tools import BaseTool - -from langchain_community.tools import ArxivQueryRun -from langchain_community.utilities import ArxivAPIWrapper - - -@pytest.fixture -def api_client() -> ArxivAPIWrapper: - return ArxivAPIWrapper() - - -def test_run_success_paper_name(api_client: ArxivAPIWrapper) -> None: - """Test a query of paper name that returns the correct answer""" - - output = api_client.run("Heat-bath random walks with Markov bases") - assert "Probability distributions for Markov chains based quantum walks" in output - assert ( - "Transformations of random walks on groups via Markov stopping times" in output - ) - assert ( - "Recurrence of Multidimensional Persistent Random Walks. Fourier and Series " - "Criteria" in output - ) - - -def test_run_success_arxiv_identifier(api_client: ArxivAPIWrapper) -> None: - """Test a query of an arxiv identifier returns the correct answer""" - - output = api_client.run("1605.08386v1") - assert "Heat-bath random walks with Markov bases" in output - - -def test_run_success_multiple_arxiv_identifiers(api_client: ArxivAPIWrapper) -> None: - """Test a query of multiple arxiv identifiers that returns the correct answer""" - - output = api_client.run("1605.08386v1 2212.00794v2 2308.07912") - assert "Heat-bath random walks with Markov bases" in output - assert "Scaling Language-Image Pre-training via Masking" in output - assert ( - "Ultra-low mass PBHs in the early universe can explain the PTA signal" in output - ) - - -def test_run_returns_several_docs(api_client: ArxivAPIWrapper) -> None: - """Test that returns several docs""" - - output = api_client.run("Caprice Stanley") - assert "On Mixing Behavior of a Family of Random Walks" in output - - -def test_run_returns_no_result(api_client: ArxivAPIWrapper) -> None: - """Test that gives no result.""" - - output = api_client.run("1605.08386WWW") - assert "No good Arxiv Result was found" == output - - -def assert_docs(docs: List[Document]) -> None: - for doc in docs: - assert doc.page_content - assert doc.metadata - assert set(doc.metadata) == {"Published", "Title", "Authors", "Summary"} - - -def test_load_success_paper_name(api_client: ArxivAPIWrapper) -> None: - """Test a query of paper name that returns one document""" - - docs = api_client.load("Heat-bath random walks with Markov bases") - assert len(docs) == 3 - assert_docs(docs) - - -def test_load_success_arxiv_identifier(api_client: ArxivAPIWrapper) -> None: - """Test a query of an arxiv identifier that returns one document""" - - docs = api_client.load("1605.08386v1") - assert len(docs) == 1 - assert_docs(docs) - - -def test_load_success_multiple_arxiv_identifiers(api_client: ArxivAPIWrapper) -> None: - """Test a query of arxiv identifiers that returns the correct answer""" - - docs = api_client.load("1605.08386v1 2212.00794v2 2308.07912") - assert len(docs) == 3 - assert_docs(docs) - - -def test_load_returns_no_result(api_client: ArxivAPIWrapper) -> None: - """Test that returns no docs""" - - docs = api_client.load("1605.08386WWW") - assert len(docs) == 0 - - -def test_load_returns_limited_docs() -> None: - """Test that returns several docs""" - expected_docs = 2 - api_client = ArxivAPIWrapper(load_max_docs=expected_docs) - docs = api_client.load("ChatGPT") - assert len(docs) == expected_docs - assert_docs(docs) - - -def test_load_returns_limited_doc_content_chars() -> None: - """Test that returns limited doc_content_chars_max""" - - doc_content_chars_max = 100 - api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max) - docs = api_client.load("1605.08386") - assert len(docs[0].page_content) == doc_content_chars_max - - -def test_load_returns_unlimited_doc_content_chars() -> None: - """Test that returns unlimited doc_content_chars_max""" - - doc_content_chars_max = None - api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max) - docs = api_client.load("1605.08386") - assert len(docs[0].page_content) == pytest.approx(54338, rel=1e-2) - - -def test_load_returns_full_set_of_metadata() -> None: - """Test that returns several docs""" - api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True) - docs = api_client.load("ChatGPT") - assert len(docs) == 1 - for doc in docs: - assert doc.page_content - assert doc.metadata - assert set(doc.metadata).issuperset( - {"Published", "Title", "Authors", "Summary"} - ) - print(doc.metadata) - assert len(set(doc.metadata)) > 4 - - -def _load_arxiv_from_universal_entry(**kwargs: Any) -> BaseTool: - from langchain.agents.load_tools import load_tools - tools = load_tools(["arxiv"], **kwargs) - assert len(tools) == 1, "loaded more than 1 tool" - return tools[0] - - -def test_load_arxiv_from_universal_entry() -> None: - arxiv_tool = _load_arxiv_from_universal_entry() - output = arxiv_tool("Caprice Stanley") - assert ( - "On Mixing Behavior of a Family of Random Walks" in output - ), "failed to fetch a valid result" - - -def test_load_arxiv_from_universal_entry_with_params() -> None: - params = { - "top_k_results": 1, - "load_max_docs": 10, - "load_all_available_meta": True, - } - arxiv_tool = _load_arxiv_from_universal_entry(**params) - assert isinstance(arxiv_tool, ArxivQueryRun) - wp = arxiv_tool.api_wrapper - assert wp.top_k_results == 1, "failed to assert top_k_results" - assert wp.load_max_docs == 10, "failed to assert load_max_docs" - assert ( - wp.load_all_available_meta is True - ), "failed to assert load_all_available_meta" diff --git a/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py b/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py deleted file mode 100644 index bed2681e681..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py +++ /dev/null @@ -1,164 +0,0 @@ -"""Integration test for PubMed API Wrapper.""" -from typing import Any, List - -import pytest -from langchain_core.documents import Document -from langchain_core.tools import BaseTool - -from langchain_community.tools import PubmedQueryRun -from langchain_community.utilities import PubMedAPIWrapper - -xmltodict = pytest.importorskip("xmltodict") - - -@pytest.fixture -def api_client() -> PubMedAPIWrapper: - return PubMedAPIWrapper() - - -def test_run_success(api_client: PubMedAPIWrapper) -> None: - """Test that returns the correct answer""" - - search_string = ( - "Examining the Validity of ChatGPT in Identifying " - "Relevant Nephrology Literature" - ) - output = api_client.run(search_string) - test_string = ( - "Examining the Validity of ChatGPT in Identifying " - "Relevant Nephrology Literature: Findings and Implications" - ) - assert test_string in output - assert len(output) == api_client.doc_content_chars_max - - -def test_run_returns_no_result(api_client: PubMedAPIWrapper) -> None: - """Test that gives no result.""" - - output = api_client.run("1605.08386WWW") - assert "No good PubMed Result was found" == output - - -def test_retrieve_article_returns_book_abstract(api_client: PubMedAPIWrapper) -> None: - """Test that returns the excerpt of a book.""" - - output_nolabel = api_client.retrieve_article("25905357", "") - output_withlabel = api_client.retrieve_article("29262144", "") - test_string_nolabel = ( - "Osteoporosis is a multifactorial disorder associated with low bone mass and " - "enhanced skeletal fragility. Although" - ) - assert test_string_nolabel in output_nolabel["Summary"] - assert ( - "Wallenberg syndrome was first described in 1808 by Gaspard Vieusseux. However," - in output_withlabel["Summary"] - ) - - -def test_retrieve_article_returns_article_abstract( - api_client: PubMedAPIWrapper, -) -> None: - """Test that returns the abstract of an article.""" - - output_nolabel = api_client.retrieve_article("37666905", "") - output_withlabel = api_client.retrieve_article("37666551", "") - test_string_nolabel = ( - "This work aims to: (1) Provide maximal hand force data on six different " - "grasp types for healthy subjects; (2) detect grasp types with maximal " - "force significantly affected by hand osteoarthritis (HOA) in women; (3) " - "look for predictors to detect HOA from the maximal forces using discriminant " - "analyses." - ) - assert test_string_nolabel in output_nolabel["Summary"] - test_string_withlabel = ( - "OBJECTIVES: To assess across seven hospitals from six different countries " - "the extent to which the COVID-19 pandemic affected the volumes of orthopaedic " - "hospital admissions and patient outcomes for non-COVID-19 patients admitted " - "for orthopaedic care." - ) - assert test_string_withlabel in output_withlabel["Summary"] - - -def test_retrieve_article_no_abstract_available(api_client: PubMedAPIWrapper) -> None: - """Test that returns 'No abstract available'.""" - - output = api_client.retrieve_article("10766884", "") - assert "No abstract available" == output["Summary"] - - -def assert_docs(docs: List[Document]) -> None: - for doc in docs: - assert doc.metadata - assert set(doc.metadata) == { - "Copyright Information", - "uid", - "Title", - "Published", - } - - -def test_load_success(api_client: PubMedAPIWrapper) -> None: - """Test that returns one document""" - - docs = api_client.load_docs("chatgpt") - assert len(docs) == api_client.top_k_results == 3 - assert_docs(docs) - - -def test_load_returns_no_result(api_client: PubMedAPIWrapper) -> None: - """Test that returns no docs""" - - docs = api_client.load_docs("1605.08386WWW") - assert len(docs) == 0 - - -def test_load_returns_limited_docs() -> None: - """Test that returns several docs""" - expected_docs = 2 - api_client = PubMedAPIWrapper(top_k_results=expected_docs) - docs = api_client.load_docs("ChatGPT") - assert len(docs) == expected_docs - assert_docs(docs) - - -def test_load_returns_full_set_of_metadata() -> None: - """Test that returns several docs""" - api_client = PubMedAPIWrapper(load_max_docs=1, load_all_available_meta=True) - docs = api_client.load_docs("ChatGPT") - assert len(docs) == 3 - for doc in docs: - assert doc.metadata - assert set(doc.metadata).issuperset( - {"Copyright Information", "Published", "Title", "uid"} - ) - - -def _load_pubmed_from_universal_entry(**kwargs: Any) -> BaseTool: - from langchain.agents.load_tools import load_tools - tools = load_tools(["pubmed"], **kwargs) - assert len(tools) == 1, "loaded more than 1 tool" - return tools[0] - - -def test_load_pupmed_from_universal_entry() -> None: - pubmed_tool = _load_pubmed_from_universal_entry() - search_string = ( - "Examining the Validity of ChatGPT in Identifying " - "Relevant Nephrology Literature" - ) - output = pubmed_tool(search_string) - test_string = ( - "Examining the Validity of ChatGPT in Identifying " - "Relevant Nephrology Literature: Findings and Implications" - ) - assert test_string in output - - -def test_load_pupmed_from_universal_entry_with_params() -> None: - params = { - "top_k_results": 1, - } - pubmed_tool = _load_pubmed_from_universal_entry(**params) - assert isinstance(pubmed_tool, PubmedQueryRun) - wp = pubmed_tool.api_wrapper - assert wp.top_k_results == 1, "failed to assert top_k_results" diff --git a/.scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py b/.scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py deleted file mode 100644 index a2fc9053128..00000000000 --- a/.scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -from typing import Union - -import pytest -from vcr.request import Request - -# Those environment variables turn on Deep Lake pytest mode. -# It significantly makes tests run much faster. -# Need to run before `import deeplake` -os.environ["BUGGER_OFF"] = "true" -os.environ["DEEPLAKE_DOWNLOAD_PATH"] = "./testing/local_storage" -os.environ["DEEPLAKE_PYTEST_ENABLED"] = "true" - - -# This fixture returns a dictionary containing filter_headers options -# for replacing certain headers with dummy values during cassette playback -# Specifically, it replaces the authorization header with a dummy value to -# prevent sensitive data from being recorded in the cassette. -# It also filters request to certain hosts (specified in the `ignored_hosts` list) -# to prevent data from being recorded in the cassette. -@pytest.fixture(scope="module") -def vcr_config() -> dict: - skipped_host = ["pinecone.io"] - - def before_record_response(response: dict) -> Union[dict, None]: - return response - - def before_record_request(request: Request) -> Union[Request, None]: - for host in skipped_host: - if request.host.startswith(host) or request.host.endswith(host): - return None - return request - - return { - "before_record_request": before_record_request, - "before_record_response": before_record_response, - "filter_headers": [ - ("authorization", "authorization-DUMMY"), - ("X-OpenAI-Client-User-Agent", "X-OpenAI-Client-User-Agent-DUMMY"), - ("Api-Key", "Api-Key-DUMMY"), - ("User-Agent", "User-Agent-DUMMY"), - ], - "ignore_localhost": True, - } diff --git a/.scripts/community_split/libs/community/tests/unit_tests/callbacks/test_callback_manager.py b/.scripts/community_split/libs/community/tests/unit_tests/callbacks/test_callback_manager.py deleted file mode 100644 index f63c0ab8cc7..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/callbacks/test_callback_manager.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Test CallbackManager.""" -from unittest.mock import patch - -import pytest -from langchain_community.callbacks import get_openai_callback -from langchain_core.callbacks.manager import trace_as_chain_group, CallbackManager -from langchain_core.outputs import LLMResult -from langchain_core.tracers.langchain import LangChainTracer, wait_for_all_tracers -from langchain_community.llms.openai import BaseOpenAI - - -def test_callback_manager_configure_context_vars( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Test callback manager configuration.""" - monkeypatch.setenv("LANGCHAIN_TRACING_V2", "true") - monkeypatch.setenv("LANGCHAIN_TRACING", "false") - with patch.object(LangChainTracer, "_update_run_single"): - with patch.object(LangChainTracer, "_persist_run_single"): - with trace_as_chain_group("test") as group_manager: - assert len(group_manager.handlers) == 1 - tracer = group_manager.handlers[0] - assert isinstance(tracer, LangChainTracer) - - with get_openai_callback() as cb: - # This is a new empty callback handler - assert cb.successful_requests == 0 - assert cb.total_tokens == 0 - - # configure adds this openai cb but doesn't modify the group manager - mngr = CallbackManager.configure(group_manager) - assert mngr.handlers == [tracer, cb] - assert group_manager.handlers == [tracer] - - response = LLMResult( - generations=[], - llm_output={ - "token_usage": { - "prompt_tokens": 2, - "completion_tokens": 1, - "total_tokens": 3, - }, - "model_name": BaseOpenAI.__fields__["model_name"].default, - }, - ) - mngr.on_llm_start({}, ["prompt"])[0].on_llm_end(response) - - # The callback handler has been updated - assert cb.successful_requests == 1 - assert cb.total_tokens == 3 - assert cb.prompt_tokens == 2 - assert cb.completion_tokens == 1 - assert cb.total_cost > 0 - - with get_openai_callback() as cb: - # This is a new empty callback handler - assert cb.successful_requests == 0 - assert cb.total_tokens == 0 - - # configure adds this openai cb but doesn't modify the group manager - mngr = CallbackManager.configure(group_manager) - assert mngr.handlers == [tracer, cb] - assert group_manager.handlers == [tracer] - - response = LLMResult( - generations=[], - llm_output={ - "token_usage": { - "prompt_tokens": 2, - "completion_tokens": 1, - "total_tokens": 3, - }, - "model_name": BaseOpenAI.__fields__["model_name"].default, - }, - ) - mngr.on_llm_start({}, ["prompt"])[0].on_llm_end(response) - - # The callback handler has been updated - assert cb.successful_requests == 1 - assert cb.total_tokens == 3 - assert cb.prompt_tokens == 2 - assert cb.completion_tokens == 1 - assert cb.total_cost > 0 - wait_for_all_tracers() - assert LangChainTracer._persist_run_single.call_count == 1 # type: ignore diff --git a/.scripts/community_split/libs/community/tests/unit_tests/callbacks/test_imports.py b/.scripts/community_split/libs/community/tests/unit_tests/callbacks/test_imports.py deleted file mode 100644 index cfe420b3e82..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/callbacks/test_imports.py +++ /dev/null @@ -1,31 +0,0 @@ -from langchain_community.callbacks import __all__ - -EXPECTED_ALL = [ - "AimCallbackHandler", - "ArgillaCallbackHandler", - "ArizeCallbackHandler", - "PromptLayerCallbackHandler", - "ArthurCallbackHandler", - "ClearMLCallbackHandler", - "CometCallbackHandler", - "ContextCallbackHandler", - "HumanApprovalCallbackHandler", - "InfinoCallbackHandler", - "MlflowCallbackHandler", - "LLMonitorCallbackHandler", - "OpenAICallbackHandler", - "LLMThoughtLabeler", - "StreamlitCallbackHandler", - "WandbCallbackHandler", - "WhyLabsCallbackHandler", - "get_openai_callback", - "wandb_tracing_enabled", - "FlyteCallbackHandler", - "SageMakerCallbackHandler", - "LabelStudioCallbackHandler", - "TrubricsCallbackHandler", -] - - -def test_all_imports() -> None: - assert set(__all__) == set(EXPECTED_ALL) diff --git a/.scripts/community_split/libs/community/tests/unit_tests/chat_loaders/test_slack.py b/.scripts/community_split/libs/community/tests/unit_tests/chat_loaders/test_slack.py deleted file mode 100644 index afa4843ace4..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/chat_loaders/test_slack.py +++ /dev/null @@ -1,23 +0,0 @@ -import pathlib - -from langchain_community.chat_loaders import slack, utils - - -def test_slack_chat_loader() -> None: - chat_path = ( - pathlib.Path(__file__).parents[2] - / "examples" - / "slack_export.zip" - ) - loader = slack.SlackChatLoader(str(chat_path)) - - chat_sessions = list( - utils.map_ai_messages(loader.lazy_load(), sender="U0500003428") - ) - assert chat_sessions, "Chat sessions should not be empty" - - assert chat_sessions[1]["messages"], "Chat messages should not be empty" - - assert ( - "Example message" in chat_sessions[1]["messages"][0].content - ), "Chat content mismatch" diff --git a/.scripts/community_split/libs/community/tests/unit_tests/chat_models/test_bedrock.py b/.scripts/community_split/libs/community/tests/unit_tests/chat_models/test_bedrock.py deleted file mode 100644 index 6767bad3792..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/chat_models/test_bedrock.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Test Anthropic Chat API wrapper.""" -from typing import List -from unittest.mock import MagicMock - -import pytest - -from langchain_core.messages import ( - AIMessage, - BaseMessage, - HumanMessage, - SystemMessage, -) - -from langchain_community.chat_models import BedrockChat -from langchain_community.chat_models.meta import convert_messages_to_prompt_llama - - -@pytest.mark.parametrize( - ("messages", "expected"), - [ - ([HumanMessage(content="Hello")], "[INST] Hello [/INST]"), - ( - [HumanMessage(content="Hello"), AIMessage(content="Answer:")], - "[INST] Hello [/INST]\nAnswer:", - ), - ( - [ - SystemMessage(content="You're an assistant"), - HumanMessage(content="Hello"), - AIMessage(content="Answer:"), - ], - "<> You're an assistant <>\n[INST] Hello [/INST]\nAnswer:", - ), - ], -) -def test_formatting(messages: List[BaseMessage], expected: str) -> None: - result = convert_messages_to_prompt_llama(messages) - assert result == expected - - -def test_anthropic_bedrock() -> None: - client = MagicMock() - respbody = MagicMock( - read=MagicMock( - return_value=MagicMock( - decode=MagicMock(return_value=b'{"completion":"Hi back"}') - ) - ) - ) - client.invoke_model.return_value = {"body": respbody} - model = BedrockChat(model_id="anthropic.claude-v2", client=client) - - # should not throw an error - model.invoke("hello there") diff --git a/.scripts/community_split/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py b/.scripts/community_split/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py deleted file mode 100644 index 84802d8bc12..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Tests for the various PDF parsers.""" -from pathlib import Path -from typing import Iterator - -import pytest - -from langchain_community.document_loaders.base import BaseBlobParser -from langchain_community.document_loaders.blob_loaders import Blob -from langchain_community.document_loaders.parsers.pdf import ( - PDFMinerParser, - PyMuPDFParser, - PyPDFium2Parser, - PyPDFParser, -) - -_THIS_DIR = Path(__file__).parents[3] - -_EXAMPLES_DIR = _THIS_DIR / "examples" - -# Paths to test PDF files -HELLO_PDF = _EXAMPLES_DIR / "hello.pdf" -LAYOUT_PARSER_PAPER_PDF = _EXAMPLES_DIR / "layout-parser-paper.pdf" - - -def _assert_with_parser(parser: BaseBlobParser, splits_by_page: bool = True) -> None: - """Standard tests to verify that the given parser works. - - Args: - parser (BaseBlobParser): The parser to test. - splits_by_page (bool): Whether the parser splits by page or not by default. - """ - blob = Blob.from_path(HELLO_PDF) - doc_generator = parser.lazy_parse(blob) - assert isinstance(doc_generator, Iterator) - docs = list(doc_generator) - assert len(docs) == 1 - page_content = docs[0].page_content - assert isinstance(page_content, str) - # The different parsers return different amount of whitespace, so using - # startswith instead of equals. - assert docs[0].page_content.startswith("Hello world!") - - blob = Blob.from_path(LAYOUT_PARSER_PAPER_PDF) - doc_generator = parser.lazy_parse(blob) - assert isinstance(doc_generator, Iterator) - docs = list(doc_generator) - - if splits_by_page: - assert len(docs) == 16 - else: - assert len(docs) == 1 - # Test is imprecise since the parsers yield different parse information depending - # on configuration. Each parser seems to yield a slightly different result - # for this page! - assert "LayoutParser" in docs[0].page_content - metadata = docs[0].metadata - - assert metadata["source"] == str(LAYOUT_PARSER_PAPER_PDF) - - if splits_by_page: - assert int(metadata["page"]) == 0 - - -@pytest.mark.requires("pypdf") -def test_pypdf_parser() -> None: - """Test PyPDF parser.""" - _assert_with_parser(PyPDFParser()) - - -@pytest.mark.requires("pdfminer") -def test_pdfminer_parser() -> None: - """Test PDFMiner parser.""" - # Does not follow defaults to split by page. - _assert_with_parser(PDFMinerParser(), splits_by_page=False) - - -@pytest.mark.requires("fitz") # package is PyMuPDF -def test_pymupdf_loader() -> None: - """Test PyMuPDF loader.""" - _assert_with_parser(PyMuPDFParser()) - - -@pytest.mark.requires("pypdfium2") -def test_pypdfium2_parser() -> None: - """Test PyPDFium2 parser.""" - # Does not follow defaults to split by page. - _assert_with_parser(PyPDFium2Parser()) - - -@pytest.mark.requires("rapidocr_onnxruntime") -def test_extract_images_text_from_pdf() -> None: - """Test extract image from pdf and recognize text with rapid ocr""" - _assert_with_parser(PyPDFParser(extract_images=True)) - _assert_with_parser(PDFMinerParser(extract_images=True)) - _assert_with_parser(PyMuPDFParser(extract_images=True)) - _assert_with_parser(PyPDFium2Parser(extract_images=True)) diff --git a/.scripts/community_split/libs/community/tests/unit_tests/embeddings/test_imports.py b/.scripts/community_split/libs/community/tests/unit_tests/embeddings/test_imports.py deleted file mode 100644 index d33d98e493b..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/embeddings/test_imports.py +++ /dev/null @@ -1,60 +0,0 @@ -from langchain_community.embeddings import __all__ - -EXPECTED_ALL = [ - "OpenAIEmbeddings", - "AzureOpenAIEmbeddings", - "ClarifaiEmbeddings", - "CohereEmbeddings", - "DatabricksEmbeddings", - "ElasticsearchEmbeddings", - "FastEmbedEmbeddings", - "HuggingFaceEmbeddings", - "HuggingFaceInferenceAPIEmbeddings", - "InfinityEmbeddings", - "GradientEmbeddings", - "JinaEmbeddings", - "LlamaCppEmbeddings", - "HuggingFaceHubEmbeddings", - "MlflowAIGatewayEmbeddings", - "MlflowEmbeddings", - "ModelScopeEmbeddings", - "TensorflowHubEmbeddings", - "SagemakerEndpointEmbeddings", - "HuggingFaceInstructEmbeddings", - "MosaicMLInstructorEmbeddings", - "SelfHostedEmbeddings", - "SelfHostedHuggingFaceEmbeddings", - "SelfHostedHuggingFaceInstructEmbeddings", - "FakeEmbeddings", - "DeterministicFakeEmbedding", - "AlephAlphaAsymmetricSemanticEmbedding", - "AlephAlphaSymmetricSemanticEmbedding", - "SentenceTransformerEmbeddings", - "GooglePalmEmbeddings", - "MiniMaxEmbeddings", - "VertexAIEmbeddings", - "BedrockEmbeddings", - "DeepInfraEmbeddings", - "EdenAiEmbeddings", - "DashScopeEmbeddings", - "EmbaasEmbeddings", - "OctoAIEmbeddings", - "SpacyEmbeddings", - "NLPCloudEmbeddings", - "GPT4AllEmbeddings", - "XinferenceEmbeddings", - "LocalAIEmbeddings", - "AwaEmbeddings", - "HuggingFaceBgeEmbeddings", - "ErnieEmbeddings", - "JavelinAIGatewayEmbeddings", - "OllamaEmbeddings", - "QianfanEmbeddingsEndpoint", - "JohnSnowLabsEmbeddings", - "VoyageEmbeddings", - "BookendEmbeddings", -] - - -def test_all_imports() -> None: - assert set(__all__) == set(EXPECTED_ALL) diff --git a/.scripts/community_split/libs/community/tests/unit_tests/llms/test_openai.py b/.scripts/community_split/libs/community/tests/unit_tests/llms/test_openai.py deleted file mode 100644 index a14cc9651cb..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/llms/test_openai.py +++ /dev/null @@ -1,56 +0,0 @@ -import os - -import pytest - -from langchain_community.llms.openai import OpenAI -from langchain_community.utils.openai import is_openai_v1 - -os.environ["OPENAI_API_KEY"] = "foo" - - -def _openai_v1_installed() -> bool: - try: - return is_openai_v1() - except Exception as _: - return False - - -@pytest.mark.requires("openai") -def test_openai_model_param() -> None: - llm = OpenAI(model="foo") - assert llm.model_name == "foo" - llm = OpenAI(model_name="foo") - assert llm.model_name == "foo" - - -@pytest.mark.requires("openai") -def test_openai_model_kwargs() -> None: - llm = OpenAI(model_kwargs={"foo": "bar"}) - assert llm.model_kwargs == {"foo": "bar"} - - -@pytest.mark.requires("openai") -def test_openai_invalid_model_kwargs() -> None: - with pytest.raises(ValueError): - OpenAI(model_kwargs={"model_name": "foo"}) - - -@pytest.mark.requires("openai") -def test_openai_incorrect_field() -> None: - with pytest.warns(match="not default parameter"): - llm = OpenAI(foo="bar") - assert llm.model_kwargs == {"foo": "bar"} - - -@pytest.fixture -def mock_completion() -> dict: - return { - "id": "cmpl-3evkmQda5Hu7fcZavknQda3SQ", - "object": "text_completion", - "created": 1689989000, - "model": "text-davinci-003", - "choices": [ - {"text": "Bar Baz", "index": 0, "logprobs": None, "finish_reason": "length"} - ], - "usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}, - } diff --git a/.scripts/community_split/libs/community/tests/unit_tests/retrievers/test_imports.py b/.scripts/community_split/libs/community/tests/unit_tests/retrievers/test_imports.py deleted file mode 100644 index 04ebf72d5ea..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/retrievers/test_imports.py +++ /dev/null @@ -1,42 +0,0 @@ -from langchain_community.retrievers import __all__ - -EXPECTED_ALL = [ - "AmazonKendraRetriever", - "AmazonKnowledgeBasesRetriever", - "ArceeRetriever", - "ArxivRetriever", - "AzureCognitiveSearchRetriever", - "ChatGPTPluginRetriever", - "ChaindeskRetriever", - "CohereRagRetriever", - "ElasticSearchBM25Retriever", - "EmbedchainRetriever", - "GoogleDocumentAIWarehouseRetriever", - "GoogleCloudEnterpriseSearchRetriever", - "GoogleVertexAIMultiTurnSearchRetriever", - "GoogleVertexAISearchRetriever", - "KayAiRetriever", - "KNNRetriever", - "LlamaIndexGraphRetriever", - "LlamaIndexRetriever", - "MetalRetriever", - "MilvusRetriever", - "OutlineRetriever", - "PineconeHybridSearchRetriever", - "PubMedRetriever", - "RemoteLangChainRetriever", - "SVMRetriever", - "TavilySearchAPIRetriever", - "TFIDFRetriever", - "BM25Retriever", - "VespaRetriever", - "WeaviateHybridSearchRetriever", - "WikipediaRetriever", - "ZepRetriever", - "ZillizRetriever", - "DocArrayRetriever", -] - - -def test_all_imports() -> None: - assert set(__all__) == set(EXPECTED_ALL) diff --git a/.scripts/community_split/libs/community/tests/unit_tests/storage/test_imports.py b/.scripts/community_split/libs/community/tests/unit_tests/storage/test_imports.py deleted file mode 100644 index 68c47b76d69..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/storage/test_imports.py +++ /dev/null @@ -1,11 +0,0 @@ -from langchain_community.storage import __all__ - -EXPECTED_ALL = [ - "RedisStore", - "UpstashRedisByteStore", - "UpstashRedisStore", -] - - -def test_all_imports() -> None: - assert set(__all__) == set(EXPECTED_ALL) diff --git a/.scripts/community_split/libs/community/tests/unit_tests/tools/test_exported.py b/.scripts/community_split/libs/community/tests/unit_tests/tools/test_exported.py deleted file mode 100644 index 9ca12287cb9..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/tools/test_exported.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import List, Type - -from langchain_core.tools import BaseTool, StructuredTool - -import langchain_community.tools -from langchain_community.tools import _DEPRECATED_TOOLS -from langchain_community.tools import __all__ as tools_all - -_EXCLUDE = { - BaseTool, - StructuredTool, -} - - -def _get_tool_classes(skip_tools_without_default_names: bool) -> List[Type[BaseTool]]: - results = [] - for tool_class_name in tools_all: - if tool_class_name in _DEPRECATED_TOOLS: - continue - # Resolve the str to the class - tool_class = getattr(langchain_community.tools, tool_class_name) - if isinstance(tool_class, type) and issubclass(tool_class, BaseTool): - if tool_class in _EXCLUDE: - continue - if ( - skip_tools_without_default_names - and tool_class.__fields__["name"].default # type: ignore - in [None, ""] - ): - continue - results.append(tool_class) - return results - - -def test_tool_names_unique() -> None: - """Test that the default names for our core tools are unique.""" - tool_classes = _get_tool_classes(skip_tools_without_default_names=True) - names = sorted([tool_cls.__fields__["name"].default for tool_cls in tool_classes]) - duplicated_names = [name for name in names if names.count(name) > 1] - assert not duplicated_names diff --git a/.scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_faiss.py b/.scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_faiss.py deleted file mode 100644 index c8ef467efe5..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_faiss.py +++ /dev/null @@ -1,728 +0,0 @@ -"""Test FAISS functionality.""" -import datetime -import math -import tempfile - -import pytest - -from typing import Union - -from langchain_core.documents import Document - -from langchain_community.docstore.base import Docstore -from langchain_community.docstore.in_memory import InMemoryDocstore -from langchain_community.vectorstores.faiss import FAISS -from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings - - -_PAGE_CONTENT = """This is a page about LangChain. - -It is a really cool framework. - -What isn't there to love about langchain? - -Made in 2022.""" - - -class FakeDocstore(Docstore): - """Fake docstore for testing purposes.""" - - def search(self, search: str) -> Union[str, Document]: - """Return the fake document.""" - document = Document(page_content=_PAGE_CONTENT) - return document - - - -@pytest.mark.requires("faiss") -def test_faiss() -> None: - """Test end to end construction and search.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -async def test_faiss_afrom_texts() -> None: - """Test end to end construction and search.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = await docsearch.asimilarity_search("foo", k=1) - assert output == [Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -def test_faiss_vector_sim() -> None: - """Test vector similarity.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.similarity_search_by_vector(query_vec, k=1) - assert output == [Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -async def test_faiss_async_vector_sim() -> None: - """Test vector similarity.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.asimilarity_search_by_vector(query_vec, k=1) - assert output == [Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -def test_faiss_vector_sim_with_score_threshold() -> None: - """Test vector similarity.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.similarity_search_by_vector(query_vec, k=2, score_threshold=0.2) - assert output == [Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -async def test_faiss_vector_async_sim_with_score_threshold() -> None: - """Test vector similarity.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.asimilarity_search_by_vector( - query_vec, k=2, score_threshold=0.2 - ) - assert output == [Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -def test_similarity_search_with_score_by_vector() -> None: - """Test vector similarity with score by vector.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.similarity_search_with_score_by_vector(query_vec, k=1) - assert len(output) == 1 - assert output[0][0] == Document(page_content="foo") - - -@pytest.mark.requires("faiss") -async def test_similarity_async_search_with_score_by_vector() -> None: - """Test vector similarity with score by vector.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.asimilarity_search_with_score_by_vector(query_vec, k=1) - assert len(output) == 1 - assert output[0][0] == Document(page_content="foo") - - -@pytest.mark.requires("faiss") -def test_similarity_search_with_score_by_vector_with_score_threshold() -> None: - """Test vector similarity with score by vector.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.similarity_search_with_score_by_vector( - query_vec, - k=2, - score_threshold=0.2, - ) - assert len(output) == 1 - assert output[0][0] == Document(page_content="foo") - assert output[0][1] < 0.2 - - -@pytest.mark.requires("faiss") -async def test_sim_asearch_with_score_by_vector_with_score_threshold() -> None: - """Test vector similarity with score by vector.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - index_to_id = docsearch.index_to_docstore_id - expected_docstore = InMemoryDocstore( - { - index_to_id[0]: Document(page_content="foo"), - index_to_id[1]: Document(page_content="bar"), - index_to_id[2]: Document(page_content="baz"), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.asimilarity_search_with_score_by_vector( - query_vec, - k=2, - score_threshold=0.2, - ) - assert len(output) == 1 - assert output[0][0] == Document(page_content="foo") - assert output[0][1] < 0.2 - - -@pytest.mark.requires("faiss") -def test_faiss_mmr() -> None: - texts = ["foo", "foo", "fou", "foy"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - query_vec = FakeEmbeddings().embed_query(text="foo") - # make sure we can have k > docstore size - output = docsearch.max_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1 - ) - assert len(output) == len(texts) - assert output[0][0] == Document(page_content="foo") - assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo") - - -@pytest.mark.requires("faiss") -async def test_faiss_async_mmr() -> None: - texts = ["foo", "foo", "fou", "foy"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - query_vec = await FakeEmbeddings().aembed_query(text="foo") - # make sure we can have k > docstore size - output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1 - ) - assert len(output) == len(texts) - assert output[0][0] == Document(page_content="foo") - assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo") - - -@pytest.mark.requires("faiss") -def test_faiss_mmr_with_metadatas() -> None: - texts = ["foo", "foo", "fou", "foy"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.max_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1 - ) - assert len(output) == len(texts) - assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) - assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) - - -@pytest.mark.requires("faiss") -async def test_faiss_async_mmr_with_metadatas() -> None: - texts = ["foo", "foo", "fou", "foy"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1 - ) - assert len(output) == len(texts) - assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) - assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) - - -@pytest.mark.requires("faiss") -def test_faiss_mmr_with_metadatas_and_filter() -> None: - texts = ["foo", "foo", "fou", "foy"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.max_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1, filter={"page": 1} - ) - assert len(output) == 1 - assert output[0][0] == Document(page_content="foo", metadata={"page": 1}) - assert output[0][1] == 0.0 - - -@pytest.mark.requires("faiss") -async def test_faiss_async_mmr_with_metadatas_and_filter() -> None: - texts = ["foo", "foo", "fou", "foy"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1, filter={"page": 1} - ) - assert len(output) == 1 - assert output[0][0] == Document(page_content="foo", metadata={"page": 1}) - assert output[0][1] == 0.0 - - -@pytest.mark.requires("faiss") -def test_faiss_mmr_with_metadatas_and_list_filter() -> None: - texts = ["foo", "foo", "fou", "foy"] - metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] - docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) - query_vec = FakeEmbeddings().embed_query(text="foo") - output = docsearch.max_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]} - ) - assert len(output) == 3 - assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) - assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) - - -@pytest.mark.requires("faiss") -async def test_faiss_async_mmr_with_metadatas_and_list_filter() -> None: - texts = ["foo", "foo", "fou", "foy"] - metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) - query_vec = await FakeEmbeddings().aembed_query(text="foo") - output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( - query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]} - ) - assert len(output) == 3 - assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) - assert output[0][1] == 0.0 - assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) - - -@pytest.mark.requires("faiss") -def test_faiss_with_metadatas() -> None: - """Test end to end construction and search.""" - texts = ["foo", "bar", "baz"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) - expected_docstore = InMemoryDocstore( - { - docsearch.index_to_docstore_id[0]: Document( - page_content="foo", metadata={"page": 0} - ), - docsearch.index_to_docstore_id[1]: Document( - page_content="bar", metadata={"page": 1} - ), - docsearch.index_to_docstore_id[2]: Document( - page_content="baz", metadata={"page": 2} - ), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo", metadata={"page": 0})] - - -@pytest.mark.requires("faiss") -async def test_faiss_async_with_metadatas() -> None: - """Test end to end construction and search.""" - texts = ["foo", "bar", "baz"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) - expected_docstore = InMemoryDocstore( - { - docsearch.index_to_docstore_id[0]: Document( - page_content="foo", metadata={"page": 0} - ), - docsearch.index_to_docstore_id[1]: Document( - page_content="bar", metadata={"page": 1} - ), - docsearch.index_to_docstore_id[2]: Document( - page_content="baz", metadata={"page": 2} - ), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = await docsearch.asimilarity_search("foo", k=1) - assert output == [Document(page_content="foo", metadata={"page": 0})] - - -@pytest.mark.requires("faiss") -def test_faiss_with_metadatas_and_filter() -> None: - texts = ["foo", "bar", "baz"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) - expected_docstore = InMemoryDocstore( - { - docsearch.index_to_docstore_id[0]: Document( - page_content="foo", metadata={"page": 0} - ), - docsearch.index_to_docstore_id[1]: Document( - page_content="bar", metadata={"page": 1} - ), - docsearch.index_to_docstore_id[2]: Document( - page_content="baz", metadata={"page": 2} - ), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = docsearch.similarity_search("foo", k=1, filter={"page": 1}) - assert output == [Document(page_content="bar", metadata={"page": 1})] - - -@pytest.mark.requires("faiss") -async def test_faiss_async_with_metadatas_and_filter() -> None: - texts = ["foo", "bar", "baz"] - metadatas = [{"page": i} for i in range(len(texts))] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) - expected_docstore = InMemoryDocstore( - { - docsearch.index_to_docstore_id[0]: Document( - page_content="foo", metadata={"page": 0} - ), - docsearch.index_to_docstore_id[1]: Document( - page_content="bar", metadata={"page": 1} - ), - docsearch.index_to_docstore_id[2]: Document( - page_content="baz", metadata={"page": 2} - ), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1}) - assert output == [Document(page_content="bar", metadata={"page": 1})] - - -@pytest.mark.requires("faiss") -def test_faiss_with_metadatas_and_list_filter() -> None: - texts = ["foo", "bar", "baz", "foo", "qux"] - metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] - docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) - expected_docstore = InMemoryDocstore( - { - docsearch.index_to_docstore_id[0]: Document( - page_content="foo", metadata={"page": 0} - ), - docsearch.index_to_docstore_id[1]: Document( - page_content="bar", metadata={"page": 1} - ), - docsearch.index_to_docstore_id[2]: Document( - page_content="baz", metadata={"page": 2} - ), - docsearch.index_to_docstore_id[3]: Document( - page_content="foo", metadata={"page": 3} - ), - docsearch.index_to_docstore_id[4]: Document( - page_content="qux", metadata={"page": 3} - ), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = docsearch.similarity_search("foor", k=1, filter={"page": [0, 1, 2]}) - assert output == [Document(page_content="foo", metadata={"page": 0})] - - -@pytest.mark.requires("faiss") -async def test_faiss_async_with_metadatas_and_list_filter() -> None: - texts = ["foo", "bar", "baz", "foo", "qux"] - metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) - expected_docstore = InMemoryDocstore( - { - docsearch.index_to_docstore_id[0]: Document( - page_content="foo", metadata={"page": 0} - ), - docsearch.index_to_docstore_id[1]: Document( - page_content="bar", metadata={"page": 1} - ), - docsearch.index_to_docstore_id[2]: Document( - page_content="baz", metadata={"page": 2} - ), - docsearch.index_to_docstore_id[3]: Document( - page_content="foo", metadata={"page": 3} - ), - docsearch.index_to_docstore_id[4]: Document( - page_content="qux", metadata={"page": 3} - ), - } - ) - assert docsearch.docstore.__dict__ == expected_docstore.__dict__ - output = await docsearch.asimilarity_search("foor", k=1, filter={"page": [0, 1, 2]}) - assert output == [Document(page_content="foo", metadata={"page": 0})] - - -@pytest.mark.requires("faiss") -def test_faiss_search_not_found() -> None: - """Test what happens when document is not found.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - # Get rid of the docstore to purposefully induce errors. - docsearch.docstore = InMemoryDocstore({}) - with pytest.raises(ValueError): - docsearch.similarity_search("foo") - - -@pytest.mark.requires("faiss") -async def test_faiss_async_search_not_found() -> None: - """Test what happens when document is not found.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - # Get rid of the docstore to purposefully induce errors. - docsearch.docstore = InMemoryDocstore({}) - with pytest.raises(ValueError): - await docsearch.asimilarity_search("foo") - - -@pytest.mark.requires("faiss") -def test_faiss_add_texts() -> None: - """Test end to end adding of texts.""" - # Create initial doc store. - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - # Test adding a similar document as before. - docsearch.add_texts(["foo"]) - output = docsearch.similarity_search("foo", k=2) - assert output == [Document(page_content="foo"), Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -async def test_faiss_async_add_texts() -> None: - """Test end to end adding of texts.""" - # Create initial doc store. - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - # Test adding a similar document as before. - await docsearch.aadd_texts(["foo"]) - output = await docsearch.asimilarity_search("foo", k=2) - assert output == [Document(page_content="foo"), Document(page_content="foo")] - - -@pytest.mark.requires("faiss") -def test_faiss_add_texts_not_supported() -> None: - """Test adding of texts to a docstore that doesn't support it.""" - docsearch = FAISS(FakeEmbeddings(), None, FakeDocstore(), {}) - with pytest.raises(ValueError): - docsearch.add_texts(["foo"]) - - -@pytest.mark.requires("faiss") -async def test_faiss_async_add_texts_not_supported() -> None: - """Test adding of texts to a docstore that doesn't support it.""" - docsearch = FAISS(FakeEmbeddings(), None, FakeDocstore(), {}) - with pytest.raises(ValueError): - await docsearch.aadd_texts(["foo"]) - - -@pytest.mark.requires("faiss") -def test_faiss_local_save_load() -> None: - """Test end to end serialization.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts(texts, FakeEmbeddings()) - temp_timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") - with tempfile.TemporaryDirectory(suffix="_" + temp_timestamp + "/") as temp_folder: - docsearch.save_local(temp_folder) - new_docsearch = FAISS.load_local(temp_folder, FakeEmbeddings()) - assert new_docsearch.index is not None - - -@pytest.mark.requires("faiss") -async def test_faiss_async_local_save_load() -> None: - """Test end to end serialization.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) - temp_timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") - with tempfile.TemporaryDirectory(suffix="_" + temp_timestamp + "/") as temp_folder: - docsearch.save_local(temp_folder) - new_docsearch = FAISS.load_local(temp_folder, FakeEmbeddings()) - assert new_docsearch.index is not None - - -@pytest.mark.requires("faiss") -def test_faiss_similarity_search_with_relevance_scores() -> None: - """Test the similarity search with normalized similarities.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts( - texts, - FakeEmbeddings(), - relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), - ) - outputs = docsearch.similarity_search_with_relevance_scores("foo", k=1) - output, score = outputs[0] - assert output == Document(page_content="foo") - assert score == 1.0 - - -@pytest.mark.requires("faiss") -async def test_faiss_async_similarity_search_with_relevance_scores() -> None: - """Test the similarity search with normalized similarities.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts( - texts, - FakeEmbeddings(), - relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), - ) - outputs = await docsearch.asimilarity_search_with_relevance_scores("foo", k=1) - output, score = outputs[0] - assert output == Document(page_content="foo") - assert score == 1.0 - - -@pytest.mark.requires("faiss") -def test_faiss_similarity_search_with_relevance_scores_with_threshold() -> None: - """Test the similarity search with normalized similarities with score threshold.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts( - texts, - FakeEmbeddings(), - relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), - ) - outputs = docsearch.similarity_search_with_relevance_scores( - "foo", k=2, score_threshold=0.5 - ) - assert len(outputs) == 1 - output, score = outputs[0] - assert output == Document(page_content="foo") - assert score == 1.0 - - -@pytest.mark.requires("faiss") -async def test_faiss_asimilarity_search_with_relevance_scores_with_threshold() -> None: - """Test the similarity search with normalized similarities with score threshold.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts( - texts, - FakeEmbeddings(), - relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), - ) - outputs = await docsearch.asimilarity_search_with_relevance_scores( - "foo", k=2, score_threshold=0.5 - ) - assert len(outputs) == 1 - output, score = outputs[0] - assert output == Document(page_content="foo") - assert score == 1.0 - - -@pytest.mark.requires("faiss") -def test_faiss_invalid_normalize_fn() -> None: - """Test the similarity search with normalized similarities.""" - texts = ["foo", "bar", "baz"] - docsearch = FAISS.from_texts( - texts, FakeEmbeddings(), relevance_score_fn=lambda _: 2.0 - ) - with pytest.warns(Warning, match="scores must be between"): - docsearch.similarity_search_with_relevance_scores("foo", k=1) - - -@pytest.mark.requires("faiss") -async def test_faiss_async_invalid_normalize_fn() -> None: - """Test the similarity search with normalized similarities.""" - texts = ["foo", "bar", "baz"] - docsearch = await FAISS.afrom_texts( - texts, FakeEmbeddings(), relevance_score_fn=lambda _: 2.0 - ) - with pytest.warns(Warning, match="scores must be between"): - await docsearch.asimilarity_search_with_relevance_scores("foo", k=1) - - -@pytest.mark.requires("faiss") -def test_missing_normalize_score_fn() -> None: - """Test doesn't perform similarity search without a valid distance strategy.""" - texts = ["foo", "bar", "baz"] - faiss_instance = FAISS.from_texts(texts, FakeEmbeddings(), distance_strategy="fake") - with pytest.raises(ValueError): - faiss_instance.similarity_search_with_relevance_scores("foo", k=2) - - -@pytest.mark.requires("faiss") -async def test_async_missing_normalize_score_fn() -> None: - """Test doesn't perform similarity search without a valid distance strategy.""" - texts = ["foo", "bar", "baz"] - faiss_instance = await FAISS.afrom_texts( - texts, FakeEmbeddings(), distance_strategy="fake" - ) - with pytest.raises(ValueError): - await faiss_instance.asimilarity_search_with_relevance_scores("foo", k=2) - - -@pytest.mark.requires("faiss") -def test_delete() -> None: - """Test the similarity search with normalized similarities.""" - ids = ["a", "b", "c"] - docsearch = FAISS.from_texts(["foo", "bar", "baz"], FakeEmbeddings(), ids=ids) - docsearch.delete(ids[1:2]) - - result = docsearch.similarity_search("bar", k=2) - assert sorted([d.page_content for d in result]) == ["baz", "foo"] - assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]} - - -@pytest.mark.requires("faiss") -async def test_async_delete() -> None: - """Test the similarity search with normalized similarities.""" - ids = ["a", "b", "c"] - docsearch = await FAISS.afrom_texts( - ["foo", "bar", "baz"], FakeEmbeddings(), ids=ids - ) - docsearch.delete(ids[1:2]) - - result = await docsearch.asimilarity_search("bar", k=2) - assert sorted([d.page_content for d in result]) == ["baz", "foo"] - assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]} diff --git a/.scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_imports.py b/.scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_imports.py deleted file mode 100644 index 95cbcc11d96..00000000000 --- a/.scripts/community_split/libs/community/tests/unit_tests/vectorstores/test_imports.py +++ /dev/null @@ -1,13 +0,0 @@ -from langchain_community import vectorstores -from langchain_core.vectorstores import VectorStore - - -def test_all_imports() -> None: - """Simple test to make sure all things can be imported.""" - for cls in vectorstores.__all__: - if cls not in [ - "AlibabaCloudOpenSearchSettings", - "ClickhouseSettings", - "MyScaleSettings", - ]: - assert issubclass(getattr(vectorstores, cls), VectorStore) diff --git a/.scripts/community_split/libs/core/langchain_core/load/load.py b/.scripts/community_split/libs/core/langchain_core/load/load.py deleted file mode 100644 index 188aa45d61d..00000000000 --- a/.scripts/community_split/libs/core/langchain_core/load/load.py +++ /dev/null @@ -1,144 +0,0 @@ -import importlib -import json -import os -from typing import Any, Dict, List, Optional - -from langchain_core.load.mapping import SERIALIZABLE_MAPPING -from langchain_core.load.serializable import Serializable - -DEFAULT_NAMESPACES = ["langchain", "langchain_core", "langchain_community"] - - -class Reviver: - """Reviver for JSON objects.""" - - def __init__( - self, - secrets_map: Optional[Dict[str, str]] = None, - valid_namespaces: Optional[List[str]] = None, - ) -> None: - self.secrets_map = secrets_map or dict() - # By default only support langchain, but user can pass in additional namespaces - self.valid_namespaces = ( - [*DEFAULT_NAMESPACES, *valid_namespaces] - if valid_namespaces - else DEFAULT_NAMESPACES - ) - - def __call__(self, value: Dict[str, Any]) -> Any: - if ( - value.get("lc", None) == 1 - and value.get("type", None) == "secret" - and value.get("id", None) is not None - ): - [key] = value["id"] - if key in self.secrets_map: - return self.secrets_map[key] - else: - if key in os.environ and os.environ[key]: - return os.environ[key] - raise KeyError(f'Missing key "{key}" in load(secrets_map)') - - if ( - value.get("lc", None) == 1 - and value.get("type", None) == "not_implemented" - and value.get("id", None) is not None - ): - raise NotImplementedError( - "Trying to load an object that doesn't implement " - f"serialization: {value}" - ) - - if ( - value.get("lc", None) == 1 - and value.get("type", None) == "constructor" - and value.get("id", None) is not None - ): - [*namespace, name] = value["id"] - - if namespace[0] not in self.valid_namespaces: - raise ValueError(f"Invalid namespace: {value}") - - # The root namespace "langchain" is not a valid identifier. - if len(namespace) == 1 and namespace[0] == "langchain": - raise ValueError(f"Invalid namespace: {value}") - - # Get the importable path - key = tuple(namespace + [name]) - if key not in SERIALIZABLE_MAPPING: - raise ValueError( - "Trying to deserialize something that cannot " - "be deserialized in current version of langchain-core: " - f"{key}" - ) - import_path = SERIALIZABLE_MAPPING[key] - # Split into module and name - import_dir, import_obj = import_path[:-1], import_path[-1] - # Import module - mod = importlib.import_module(".".join(import_dir)) - # Import class - cls = getattr(mod, import_obj) - - # The class must be a subclass of Serializable. - if not issubclass(cls, Serializable): - raise ValueError(f"Invalid namespace: {value}") - - # We don't need to recurse on kwargs - # as json.loads will do that for us. - kwargs = value.get("kwargs", dict()) - return cls(**kwargs) - - return value - - -def loads( - text: str, - *, - secrets_map: Optional[Dict[str, str]] = None, - valid_namespaces: Optional[List[str]] = None, -) -> Any: - """Revive a LangChain class from a JSON string. - Equivalent to `load(json.loads(text))`. - - Args: - text: The string to load. - secrets_map: A map of secrets to load. - valid_namespaces: A list of additional namespaces (modules) - to allow to be deserialized. - - Returns: - Revived LangChain objects. - """ - return json.loads(text, object_hook=Reviver(secrets_map, valid_namespaces)) - - -def load( - obj: Any, - *, - secrets_map: Optional[Dict[str, str]] = None, - valid_namespaces: Optional[List[str]] = None, -) -> Any: - """Revive a LangChain class from a JSON object. Use this if you already - have a parsed JSON object, eg. from `json.load` or `orjson.loads`. - - Args: - obj: The object to load. - secrets_map: A map of secrets to load. - valid_namespaces: A list of additional namespaces (modules) - to allow to be deserialized. - - Returns: - Revived LangChain objects. - """ - reviver = Reviver(secrets_map, valid_namespaces) - - def _load(obj: Any) -> Any: - if isinstance(obj, dict): - # Need to revive leaf nodes before reviving this node - loaded_obj = {k: _load(v) for k, v in obj.items()} - return reviver(loaded_obj) - if isinstance(obj, list): - return [_load(o) for o in obj] - return obj - - return _load(obj) diff --git a/.scripts/community_split/libs/core/langchain_core/utils/__init__.py b/.scripts/community_split/libs/core/langchain_core/utils/__init__.py deleted file mode 100644 index 6491a85f17f..00000000000 --- a/.scripts/community_split/libs/core/langchain_core/utils/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -**Utility functions** for LangChain. - -These functions do not depend on any other LangChain module. -""" - -from langchain_core.utils.env import get_from_dict_or_env, get_from_env -from langchain_core.utils.formatting import StrictFormatter, formatter -from langchain_core.utils.input import ( - get_bolded_text, - get_color_mapping, - get_colored_text, - print_text, -) -from langchain_core.utils.loading import try_load_from_hub -from langchain_core.utils.strings import comma_list, stringify_dict, stringify_value -from langchain_core.utils.utils import ( - build_extra_kwargs, - check_package_version, - convert_to_secret_str, - get_pydantic_field_names, - guard_import, - mock_now, - raise_for_status_with_text, - xor_args, -) - -__all__ = [ - "StrictFormatter", - "check_package_version", - "convert_to_secret_str", - "formatter", - "get_bolded_text", - "get_color_mapping", - "get_colored_text", - "get_pydantic_field_names", - "guard_import", - "mock_now", - "print_text", - "raise_for_status_with_text", - "xor_args", - "try_load_from_hub", - "build_extra_kwargs", - "get_from_env", - "get_from_dict_or_env", - "stringify_dict", - "comma_list", - "stringify_value", -] diff --git a/.scripts/community_split/libs/core/langchain_core/utils/env.py b/.scripts/community_split/libs/core/langchain_core/utils/env.py deleted file mode 100644 index b1579e07b7d..00000000000 --- a/.scripts/community_split/libs/core/langchain_core/utils/env.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import annotations - -import os -from typing import Any, Dict, Optional - - -def env_var_is_set(env_var: str) -> bool: - """Check if an environment variable is set. - - Args: - env_var (str): The name of the environment variable. - - Returns: - bool: True if the environment variable is set, False otherwise. - """ - return env_var in os.environ and os.environ[env_var] not in ( - "", - "0", - "false", - "False", - ) - - -def get_from_dict_or_env( - data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None -) -> str: - """Get a value from a dictionary or an environment variable.""" - if key in data and data[key]: - return data[key] - else: - return get_from_env(key, env_key, default=default) - - -def get_from_env(key: str, env_key: str, default: Optional[str] = None) -> str: - """Get a value from a dictionary or an environment variable.""" - if env_key in os.environ and os.environ[env_key]: - return os.environ[env_key] - elif default is not None: - return default - else: - raise ValueError( - f"Did not find {key}, please add an environment variable" - f" `{env_key}` which contains it, or pass" - f" `{key}` as a named parameter." - ) diff --git a/.scripts/community_split/libs/core/tests/unit_tests/utils/test_imports.py b/.scripts/community_split/libs/core/tests/unit_tests/utils/test_imports.py deleted file mode 100644 index 9ebbb7e1e21..00000000000 --- a/.scripts/community_split/libs/core/tests/unit_tests/utils/test_imports.py +++ /dev/null @@ -1,28 +0,0 @@ -from langchain_core.utils import __all__ - -EXPECTED_ALL = [ - "StrictFormatter", - "check_package_version", - "convert_to_secret_str", - "formatter", - "get_bolded_text", - "get_color_mapping", - "get_colored_text", - "get_pydantic_field_names", - "guard_import", - "mock_now", - "print_text", - "raise_for_status_with_text", - "xor_args", - "try_load_from_hub", - "build_extra_kwargs", - "get_from_dict_or_env", - "get_from_env", - "stringify_dict", - "comma_list", - "stringify_value" -] - - -def test_all_imports() -> None: - assert set(__all__) == set(EXPECTED_ALL) diff --git a/.scripts/community_split/libs/langchain/langchain/callbacks/__init__.py b/.scripts/community_split/libs/langchain/langchain/callbacks/__init__.py deleted file mode 100644 index 422c4b54b25..00000000000 --- a/.scripts/community_split/libs/langchain/langchain/callbacks/__init__.py +++ /dev/null @@ -1,83 +0,0 @@ -"""**Callback handlers** allow listening to events in LangChain. - -**Class hierarchy:** - -.. code-block:: - - BaseCallbackHandler --> CallbackHandler # Example: AimCallbackHandler -""" - -from langchain_core.callbacks import StdOutCallbackHandler, StreamingStdOutCallbackHandler -from langchain_core.tracers.langchain import LangChainTracer -from langchain_core.tracers.context import ( - collect_runs, - tracing_enabled, - tracing_v2_enabled, -) - -from langchain_community.callbacks.aim_callback import AimCallbackHandler -from langchain_community.callbacks.argilla_callback import ArgillaCallbackHandler -from langchain_community.callbacks.arize_callback import ArizeCallbackHandler -from langchain_community.callbacks.arthur_callback import ArthurCallbackHandler -from langchain_community.callbacks.clearml_callback import ClearMLCallbackHandler -from langchain_community.callbacks.comet_ml_callback import CometCallbackHandler -from langchain_community.callbacks.context_callback import ContextCallbackHandler -from langchain.callbacks.file import FileCallbackHandler -from langchain_community.callbacks.flyte_callback import FlyteCallbackHandler -from langchain_community.callbacks.human import HumanApprovalCallbackHandler -from langchain_community.callbacks.infino_callback import InfinoCallbackHandler -from langchain_community.callbacks.labelstudio_callback import LabelStudioCallbackHandler -from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler -from langchain_community.callbacks.mlflow_callback import MlflowCallbackHandler -from langchain_community.callbacks.openai_info import OpenAICallbackHandler -from langchain_community.callbacks.promptlayer_callback import PromptLayerCallbackHandler -from langchain_community.callbacks.sagemaker_callback import SageMakerCallbackHandler -from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler -from langchain.callbacks.streaming_stdout_final_only import ( - FinalStreamingStdOutCallbackHandler, -) -from langchain_community.callbacks.streamlit import LLMThoughtLabeler, StreamlitCallbackHandler -from langchain_community.callbacks.trubrics_callback import TrubricsCallbackHandler -from langchain_community.callbacks.wandb_callback import WandbCallbackHandler -from langchain_community.callbacks.whylabs_callback import WhyLabsCallbackHandler - -from langchain_community.callbacks.manager import ( - get_openai_callback, - wandb_tracing_enabled, -) - - -__all__ = [ - "AimCallbackHandler", - "ArgillaCallbackHandler", - "ArizeCallbackHandler", - "PromptLayerCallbackHandler", - "ArthurCallbackHandler", - "ClearMLCallbackHandler", - "CometCallbackHandler", - "ContextCallbackHandler", - "FileCallbackHandler", - "HumanApprovalCallbackHandler", - "InfinoCallbackHandler", - "MlflowCallbackHandler", - "LLMonitorCallbackHandler", - "OpenAICallbackHandler", - "StdOutCallbackHandler", - "AsyncIteratorCallbackHandler", - "StreamingStdOutCallbackHandler", - "FinalStreamingStdOutCallbackHandler", - "LLMThoughtLabeler", - "LangChainTracer", - "StreamlitCallbackHandler", - "WandbCallbackHandler", - "WhyLabsCallbackHandler", - "get_openai_callback", - "tracing_enabled", - "tracing_v2_enabled", - "collect_runs", - "wandb_tracing_enabled", - "FlyteCallbackHandler", - "SageMakerCallbackHandler", - "LabelStudioCallbackHandler", - "TrubricsCallbackHandler", -] diff --git a/.scripts/community_split/libs/langchain/langchain/callbacks/manager.py b/.scripts/community_split/libs/langchain/langchain/callbacks/manager.py deleted file mode 100644 index 6c61a4cdb6c..00000000000 --- a/.scripts/community_split/libs/langchain/langchain/callbacks/manager.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -from langchain_core.callbacks.manager import ( - AsyncCallbackManager, - AsyncCallbackManagerForChainGroup, - AsyncCallbackManagerForChainRun, - AsyncCallbackManagerForLLMRun, - AsyncCallbackManagerForRetrieverRun, - AsyncCallbackManagerForToolRun, - AsyncParentRunManager, - AsyncRunManager, - BaseRunManager, - CallbackManager, - CallbackManagerForChainGroup, - CallbackManagerForChainRun, - CallbackManagerForLLMRun, - CallbackManagerForRetrieverRun, - CallbackManagerForToolRun, - Callbacks, - ParentRunManager, - RunManager, - ahandle_event, - atrace_as_chain_group, - handle_event, - trace_as_chain_group, -) -from langchain_core.tracers.context import ( - collect_runs, - tracing_enabled, - tracing_v2_enabled, -) -from langchain_core.utils.env import env_var_is_set -from langchain_community.callbacks.manager import ( - get_openai_callback, - wandb_tracing_enabled, -) - - -__all__ = [ - "BaseRunManager", - "RunManager", - "ParentRunManager", - "AsyncRunManager", - "AsyncParentRunManager", - "CallbackManagerForLLMRun", - "AsyncCallbackManagerForLLMRun", - "CallbackManagerForChainRun", - "AsyncCallbackManagerForChainRun", - "CallbackManagerForToolRun", - "AsyncCallbackManagerForToolRun", - "CallbackManagerForRetrieverRun", - "AsyncCallbackManagerForRetrieverRun", - "CallbackManager", - "CallbackManagerForChainGroup", - "AsyncCallbackManager", - "AsyncCallbackManagerForChainGroup", - "tracing_enabled", - "tracing_v2_enabled", - "collect_runs", - "atrace_as_chain_group", - "trace_as_chain_group", - "handle_event", - "ahandle_event", - "Callbacks", - "env_var_is_set", - "get_openai_callback", - "wandb_tracing_enabled", -] diff --git a/.scripts/community_split/libs/langchain/tests/unit_tests/callbacks/test_manager.py b/.scripts/community_split/libs/langchain/tests/unit_tests/callbacks/test_manager.py deleted file mode 100644 index 8ee369e81fd..00000000000 --- a/.scripts/community_split/libs/langchain/tests/unit_tests/callbacks/test_manager.py +++ /dev/null @@ -1,36 +0,0 @@ -from langchain.callbacks.manager import __all__ - -EXPECTED_ALL = [ - "BaseRunManager", - "RunManager", - "ParentRunManager", - "AsyncRunManager", - "AsyncParentRunManager", - "CallbackManagerForLLMRun", - "AsyncCallbackManagerForLLMRun", - "CallbackManagerForChainRun", - "AsyncCallbackManagerForChainRun", - "CallbackManagerForToolRun", - "AsyncCallbackManagerForToolRun", - "CallbackManagerForRetrieverRun", - "AsyncCallbackManagerForRetrieverRun", - "CallbackManager", - "CallbackManagerForChainGroup", - "AsyncCallbackManager", - "AsyncCallbackManagerForChainGroup", - "tracing_enabled", - "tracing_v2_enabled", - "collect_runs", - "atrace_as_chain_group", - "trace_as_chain_group", - "handle_event", - "ahandle_event", - "env_var_is_set", - "Callbacks", - "get_openai_callback", - "wandb_tracing_enabled", -] - - -def test_all_imports() -> None: - assert set(__all__) == set(EXPECTED_ALL) diff --git a/.scripts/community_split/libs/langchain/tests/unit_tests/chains/test_llm.py b/.scripts/community_split/libs/langchain/tests/unit_tests/chains/test_llm.py deleted file mode 100644 index 0179cd135f2..00000000000 --- a/.scripts/community_split/libs/langchain/tests/unit_tests/chains/test_llm.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Test LLM chain.""" -from tempfile import TemporaryDirectory -from typing import Dict, List, Union -from unittest.mock import patch - -import pytest -from langchain_core.output_parsers import BaseOutputParser -from langchain_core.prompts import PromptTemplate - -from langchain.chains.llm import LLMChain -from tests.unit_tests.llms.fake_llm import FakeLLM - - -class FakeOutputParser(BaseOutputParser): - """Fake output parser class for testing.""" - - def parse(self, text: str) -> Union[str, List[str], Dict[str, str]]: - """Parse by splitting.""" - return text.split() - - -@pytest.fixture -def fake_llm_chain() -> LLMChain: - """Fake LLM chain for testing purposes.""" - prompt = PromptTemplate(input_variables=["bar"], template="This is a {bar}:") - return LLMChain(prompt=prompt, llm=FakeLLM(), output_key="text1") - - -@patch( - "langchain_community.llms.loading.get_type_to_cls_dict", - lambda: {"fake": lambda: FakeLLM}, -) -def test_serialization(fake_llm_chain: LLMChain) -> None: - """Test serialization.""" - from langchain.chains.loading import load_chain - - with TemporaryDirectory() as temp_dir: - file = temp_dir + "/llm.json" - fake_llm_chain.save(file) - loaded_chain = load_chain(file) - assert loaded_chain == fake_llm_chain - - -def test_missing_inputs(fake_llm_chain: LLMChain) -> None: - """Test error is raised if inputs are missing.""" - with pytest.raises(ValueError): - fake_llm_chain({"foo": "bar"}) - - -def test_valid_call(fake_llm_chain: LLMChain) -> None: - """Test valid call of LLM chain.""" - output = fake_llm_chain({"bar": "baz"}) - assert output == {"bar": "baz", "text1": "foo"} - - # Test with stop words. - output = fake_llm_chain({"bar": "baz", "stop": ["foo"]}) - # Response should be `bar` now. - assert output == {"bar": "baz", "stop": ["foo"], "text1": "bar"} - - -def test_predict_method(fake_llm_chain: LLMChain) -> None: - """Test predict method works.""" - output = fake_llm_chain.predict(bar="baz") - assert output == "foo" - - -def test_predict_and_parse() -> None: - """Test parsing ability.""" - prompt = PromptTemplate( - input_variables=["foo"], template="{foo}", output_parser=FakeOutputParser() - ) - llm = FakeLLM(queries={"foo": "foo bar"}) - chain = LLMChain(prompt=prompt, llm=llm) - output = chain.predict_and_parse(foo="foo") - assert output == ["foo", "bar"] diff --git a/.scripts/community_split/libs/langchain/tests/unit_tests/load/test_serializable.py b/.scripts/community_split/libs/langchain/tests/unit_tests/load/test_serializable.py deleted file mode 100644 index cb6c4f9ff4c..00000000000 --- a/.scripts/community_split/libs/langchain/tests/unit_tests/load/test_serializable.py +++ /dev/null @@ -1,57 +0,0 @@ -import importlib -import pkgutil - -from langchain_core.load.mapping import SERIALIZABLE_MAPPING - - -def import_all_modules(package_name: str) -> dict: - package = importlib.import_module(package_name) - classes: dict = {} - - for attribute_name in dir(package): - attribute = getattr(package, attribute_name) - if hasattr(attribute, "is_lc_serializable") and isinstance(attribute, type): - if ( - isinstance(attribute.is_lc_serializable(), bool) # type: ignore - and attribute.is_lc_serializable() # type: ignore - ): - key = tuple(attribute.lc_id()) # type: ignore - value = tuple(attribute.__module__.split(".") + [attribute.__name__]) - if key in classes and classes[key] != value: - raise ValueError - classes[key] = value - if hasattr(package, "__path__"): - for loader, module_name, is_pkg in pkgutil.walk_packages( - package.__path__, package_name + "." - ): - if module_name not in ( - "langchain.chains.llm_bash", - "langchain.chains.llm_symbolic_math", - "langchain.tools.python", - "langchain.vectorstores._pgvector_data_models", - # TODO: why does this error? - "langchain.agents.agent_toolkits.openapi.planner", - ): - importlib.import_module(module_name) - new_classes = import_all_modules(module_name) - for k, v in new_classes.items(): - if k in classes and classes[k] != v: - raise ValueError - classes[k] = v - return classes - - -def test_serializable_mapping() -> None: - serializable_modules = import_all_modules("langchain") - missing = set(SERIALIZABLE_MAPPING).difference(serializable_modules) - assert missing == set() - extra = set(serializable_modules).difference(SERIALIZABLE_MAPPING) - assert extra == set() - - for k, import_path in serializable_modules.items(): - import_dir, import_obj = import_path[:-1], import_path[-1] - # Import module - mod = importlib.import_module(".".join(import_dir)) - # Import class - cls = getattr(mod, import_obj) - assert list(k) == cls.lc_id() diff --git a/.scripts/community_split/libs/langchain/tests/unit_tests/test_dependencies.py b/.scripts/community_split/libs/langchain/tests/unit_tests/test_dependencies.py deleted file mode 100644 index 872b01d6213..00000000000 --- a/.scripts/community_split/libs/langchain/tests/unit_tests/test_dependencies.py +++ /dev/null @@ -1,113 +0,0 @@ -"""A unit test meant to catch accidental introduction of non-optional dependencies.""" -from pathlib import Path -from typing import Any, Dict, Mapping - -import pytest -import toml - -HERE = Path(__file__).parent - -PYPROJECT_TOML = HERE / "../../pyproject.toml" - - -@pytest.fixture() -def poetry_conf() -> Dict[str, Any]: - """Load the pyproject.toml file.""" - with open(PYPROJECT_TOML) as f: - return toml.load(f)["tool"]["poetry"] - - -def test_required_dependencies(poetry_conf: Mapping[str, Any]) -> None: - """A test that checks if a new non-optional dependency is being introduced. - - If this test is triggered, it means that a contributor is trying to introduce a new - required dependency. This should be avoided in most situations. - """ - # Get the dependencies from the [tool.poetry.dependencies] section - dependencies = poetry_conf["dependencies"] - - is_required = { - package_name: isinstance(requirements, str) - or not requirements.get("optional", False) - for package_name, requirements in dependencies.items() - } - required_dependencies = [ - package_name for package_name, required in is_required.items() if required - ] - - assert sorted(required_dependencies) == sorted( - [ - "PyYAML", - "SQLAlchemy", - "aiohttp", - "async-timeout", - "dataclasses-json", - "jsonpatch", - "langchain-core", - "langsmith", - "numpy", - "pydantic", - "python", - "requests", - "tenacity", - "langchain-community", - ] - ) - - unrequired_dependencies = [ - package_name for package_name, required in is_required.items() if not required - ] - in_extras = [dep for group in poetry_conf["extras"].values() for dep in group] - assert set(unrequired_dependencies) == set(in_extras) - - -def test_test_group_dependencies(poetry_conf: Mapping[str, Any]) -> None: - """Check if someone is attempting to add additional test dependencies. - - Only dependencies associated with test running infrastructure should be added - to the test group; e.g., pytest, pytest-cov etc. - - Examples of dependencies that should NOT be included: boto3, azure, postgres, etc. - """ - - test_group_deps = sorted(poetry_conf["group"]["test"]["dependencies"]) - - assert test_group_deps == sorted( - [ - "duckdb-engine", - "freezegun", - "langchain-core", - "lark", - "pandas", - "pytest", - "pytest-asyncio", - "pytest-cov", - "pytest-dotenv", - "pytest-mock", - "pytest-socket", - "pytest-watcher", - "responses", - "syrupy", - "requests-mock", - ] - ) - - -def test_imports() -> None: - """Test that you can import all top level things okay.""" - from langchain_core.prompts import BasePromptTemplate # noqa: F401 - - from langchain.agents import OpenAIFunctionsAgent # noqa: F401 - from langchain.callbacks import OpenAICallbackHandler # noqa: F401 - from langchain.chains import LLMChain # noqa: F401 - from langchain.chat_models import ChatOpenAI # noqa: F401 - from langchain.document_loaders import BSHTMLLoader # noqa: F401 - from langchain.embeddings import OpenAIEmbeddings # noqa: F401 - from langchain.llms import OpenAI # noqa: F401 - from langchain.retrievers import VespaRetriever # noqa: F401 - from langchain.tools import DuckDuckGoSearchResults # noqa: F401 - from langchain.utilities import ( - SearchApiAPIWrapper, # noqa: F401 - SerpAPIWrapper, # noqa: F401 - ) - from langchain.vectorstores import FAISS # noqa: F401 diff --git a/.scripts/community_split/script_integrations.sh b/.scripts/community_split/script_integrations.sh deleted file mode 100755 index 56e5222261f..00000000000 --- a/.scripts/community_split/script_integrations.sh +++ /dev/null @@ -1,313 +0,0 @@ -#!/bin/bash - -cd libs - -# cleanup anything existing -git checkout master -- langchain/{langchain,tests} -git checkout master -- core/{langchain_core,tests} -git checkout master -- experimental/{langchain_experimental,tests} -rm -rf community/{langchain_community,tests} - -# make new dirs -mkdir -p community/langchain_community -touch community/langchain_community/__init__.py -touch community/langchain_community/py.typed -touch community/README.md -mkdir -p community/tests -touch community/tests/__init__.py -mkdir community/tests/unit_tests -touch community/tests/unit_tests/__init__.py -mkdir community/tests/integration_tests/ -touch community/tests/integration_tests/__init__.py -mkdir -p community/langchain_community/utils -touch community/langchain_community/utils/__init__.py -mkdir -p community/tests/unit_tests/utils -touch community/tests/unit_tests/utils/__init__.py -mkdir -p community/langchain_community/indexes -touch community/langchain_community/indexes/__init__.py -mkdir community/tests/unit_tests/indexes -touch community/tests/unit_tests/indexes/__init__.py - -# import core stuff from core -cd langchain - -git grep -l 'from langchain.pydantic_v1' | xargs sed -i '' 's/from langchain.pydantic_v1/from langchain_core.pydantic_v1/g' -git grep -l 'from langchain.tools.base' | xargs sed -i '' 's/from langchain.tools.base/from langchain_core.tools/g' -git grep -l 'from langchain.chat_models.base' | xargs sed -i '' 's/from langchain.chat_models.base/from langchain_core.language_models.chat_models/g' -git grep -l 'from langchain.llms.base' | xargs sed -i '' 's/from langchain.llms.base/from langchain_core.language_models.llms/g' -git grep -l 'from langchain.embeddings.base' | xargs sed -i '' 's/from langchain.embeddings.base/from langchain_core.embeddings/g' -git grep -l 'from langchain.vectorstores.base' | xargs sed -i '' 's/from langchain.vectorstores.base/from langchain_core.vectorstores/g' -git grep -l 'from langchain.agents.tools' | xargs sed -i '' 's/from langchain.agents.tools/from langchain_core.tools/g' -git grep -l 'from langchain.schema.output' | xargs sed -i '' 's/from langchain.schema.output/from langchain_core.outputs/g' -git grep -l 'from langchain.schema.messages' | xargs sed -i '' 's/from langchain.schema.messages/from langchain_core.messages/g' -git grep -l 'from langchain.schema.embeddings' | xargs sed -i '' 's/from langchain.schema.embeddings/from langchain_core.embeddings/g' - -# mv stuff to community -cd .. - -mv langchain/langchain/adapters community/langchain_community -mv langchain/langchain/callbacks community/langchain_community/callbacks -mv langchain/langchain/chat_loaders community/langchain_community -mv langchain/langchain/chat_models community/langchain_community -mv langchain/langchain/document_loaders community/langchain_community -mv langchain/langchain/docstore community/langchain_community -mv langchain/langchain/document_transformers community/langchain_community -mv langchain/langchain/embeddings community/langchain_community -mv langchain/langchain/graphs community/langchain_community -mv langchain/langchain/llms community/langchain_community -mv langchain/langchain/memory/chat_message_histories community/langchain_community -mv langchain/langchain/retrievers community/langchain_community -mv langchain/langchain/storage community/langchain_community -mv langchain/langchain/tools community/langchain_community -mv langchain/langchain/utilities community/langchain_community -mv langchain/langchain/vectorstores community/langchain_community - -mv langchain/langchain/agents/agent_toolkits community/langchain_community -mv langchain/langchain/cache.py community/langchain_community -mv langchain/langchain/indexes/base.py community/langchain_community/indexes -mv langchain/langchain/indexes/_sql_record_manager.py community/langchain_community/indexes -mv langchain/langchain/utils/{math,openai,openai_functions}.py community/langchain_community/utils - -# mv stuff to core -mv langchain/langchain/utils/json_schema.py core/langchain_core/utils -mv langchain/langchain/utils/html.py core/langchain_core/utils -mv langchain/langchain/utils/strings.py core/langchain_core/utils -cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py -rm langchain/langchain/utils/env.py - -# mv unit tests to community -mv langchain/tests/unit_tests/chat_loaders community/tests/unit_tests -mv langchain/tests/unit_tests/document_loaders community/tests/unit_tests -mv langchain/tests/unit_tests/docstore community/tests/unit_tests -mv langchain/tests/unit_tests/document_transformers community/tests/unit_tests -mv langchain/tests/unit_tests/embeddings community/tests/unit_tests -mv langchain/tests/unit_tests/graphs community/tests/unit_tests -mv langchain/tests/unit_tests/llms community/tests/unit_tests -mv langchain/tests/unit_tests/chat_models community/tests/unit_tests -mv langchain/tests/unit_tests/memory/chat_message_histories community/tests/unit_tests -mv langchain/tests/unit_tests/storage community/tests/unit_tests -mv langchain/tests/unit_tests/tools community/tests/unit_tests -mv langchain/tests/unit_tests/utilities community/tests/unit_tests -mv langchain/tests/unit_tests/vectorstores community/tests/unit_tests -mv langchain/tests/unit_tests/retrievers community/tests/unit_tests -mv langchain/tests/unit_tests/callbacks community/tests/unit_tests -mv langchain/tests/unit_tests/indexes/test_sql_record_manager.py community/tests/unit_tests/indexes -mv langchain/tests/unit_tests/utils/test_math.py community/tests/unit_tests/utils - -# cp some test helpers back to langchain -mkdir -p langchain/tests/unit_tests/llms -cp {community,langchain}/tests/unit_tests/llms/fake_llm.py -cp {community,langchain}/tests/unit_tests/llms/fake_chat_model.py -mkdir -p langchain/tests/unit_tests/callbacks -cp {community,langchain}/tests/unit_tests/callbacks/fake_callback_handler.py - -# mv unit tests to core -mv langchain/tests/unit_tests/utils/test_json_schema.py core/tests/unit_tests/utils -mv langchain/tests/unit_tests/utils/test_html.py core/tests/unit_tests/utils - -# mv integration tests to community -mv langchain/tests/integration_tests/document_loaders community/tests/integration_tests -mv langchain/tests/integration_tests/embeddings community/tests/integration_tests -mv langchain/tests/integration_tests/graphs community/tests/integration_tests -mv langchain/tests/integration_tests/llms community/tests/integration_tests -mv langchain/tests/integration_tests/chat_models community/tests/integration_tests -mv langchain/tests/integration_tests/memory/chat_message_histories community/tests/integration_tests -mv langchain/tests/integration_tests/storage community/tests/integration_tests -mv langchain/tests/integration_tests/tools community/tests/integration_tests -mv langchain/tests/integration_tests/utilities community/tests/integration_tests -mv langchain/tests/integration_tests/vectorstores community/tests/integration_tests -mv langchain/tests/integration_tests/retrievers community/tests/integration_tests -mv langchain/tests/integration_tests/adapters community/tests/integration_tests -mv langchain/tests/integration_tests/callbacks community/tests/integration_tests -mv langchain/tests/integration_tests/{test_kuzu,test_nebulagraph}.py community/tests/integration_tests/graphs -touch community/tests/integration_tests/{chat_message_histories,tools}/__init__.py - -# import new core stuff from core (everywhere) -git grep -l 'from langchain.utils.json_schema' | xargs sed -i '' 's/from langchain.utils.json_schema/from langchain_core.utils.json_schema/g' -git grep -l 'from langchain.utils.html' | xargs sed -i '' 's/from langchain.utils.html/from langchain_core.utils.html/g' -git grep -l 'from langchain.utils.strings' | xargs sed -i '' 's/from langchain.utils.strings/from langchain_core.utils.strings/g' -git grep -l 'from langchain.utils.env' | xargs sed -i '' 's/from langchain.utils.env/from langchain_core.utils.env/g' - -git add community -cd community - -# import core stuff from core -git grep -l 'from langchain.pydantic_v1' | xargs sed -i '' 's/from langchain.pydantic_v1/from langchain_core.pydantic_v1/g' -git grep -l 'from langchain.callbacks.base' | xargs sed -i '' 's/from langchain.callbacks.base/from langchain_core.callbacks/g' -git grep -l 'from langchain.callbacks.stdout' | xargs sed -i '' 's/from langchain.callbacks.stdout/from langchain_core.callbacks/g' -git grep -l 'from langchain.callbacks.streaming_stdout' | xargs sed -i '' 's/from langchain.callbacks.streaming_stdout/from langchain_core.callbacks/g' -git grep -l 'from langchain.callbacks.manager' | xargs sed -i '' 's/from langchain.callbacks.manager/from langchain_core.callbacks/g' -git grep -l 'from langchain.callbacks.tracers.base' | xargs sed -i '' 's/from langchain.callbacks.tracers.base/from langchain_core.tracers/g' -git grep -l 'from langchain.tools.base' | xargs sed -i '' 's/from langchain.tools.base/from langchain_core.tools/g' -git grep -l 'from langchain.agents.tools' | xargs sed -i '' 's/from langchain.agents.tools/from langchain_core.tools/g' -git grep -l 'from langchain.schema.output' | xargs sed -i '' 's/from langchain.schema.output/from langchain_core.outputs/g' -git grep -l 'from langchain.schema.messages' | xargs sed -i '' 's/from langchain.schema.messages/from langchain_core.messages/g' -git grep -l 'from langchain.schema import BaseRetriever' | xargs sed -i '' 's/from langchain.schema\ import\ BaseRetriever/from langchain_core.retrievers import BaseRetriever/g' -git grep -l 'from langchain.schema import Document' | xargs sed -i '' 's/from langchain.schema\ import\ Document/from langchain_core.documents import Document/g' - -# import openai stuff from openai -git grep -l 'from langchain.utils.math' | xargs sed -i '' 's/from langchain.utils.math/from langchain_community.utils.math/g' -git grep -l 'from langchain.utils.openai_functions' | xargs sed -i '' 's/from langchain.utils.openai_functions/from langchain_community.utils.openai_functions/g' -git grep -l 'from langchain.utils.openai' | xargs sed -i '' 's/from langchain.utils.openai/from langchain_community.utils.openai/g' -git grep -l 'from langchain.utils' | xargs sed -i '' 's/from langchain.utils/from langchain_core.utils/g' -git grep -l 'from langchain\.' | xargs sed -i '' 's/from langchain\./from langchain_community./g' -git grep -l 'from langchain_community.memory.chat_message_histories' | xargs sed -i '' 's/from langchain_community.memory.chat_message_histories/from langchain_community.chat_message_histories/g' -git grep -l 'from langchain_community.agents.agent_toolkits' | xargs sed -i '' 's/from langchain_community.agents.agent_toolkits/from langchain_community.agent_toolkits/g' - -sed -i '' 's/from\ langchain.chat_models\ import\ ChatOpenAI/from langchain_openai.chat_models import ChatOpenAI/g' langchain_community/chat_models/promptlayer_openai.py - -git grep -l 'from langchain_community\.text_splitter' | xargs sed -i '' 's/from langchain_community\.text_splitter/from langchain.text_splitter/g' -git grep -l 'from langchain_community\.chains' | xargs sed -i '' 's/from langchain_community\.chains/from langchain.chains/g' -git grep -l 'from langchain_community\.agents' | xargs sed -i '' 's/from langchain_community\.agents/from langchain.agents/g' -git grep -l 'from langchain_community\.memory' | xargs sed -i '' 's/from langchain_community\.memory/from langchain.memory/g' -git grep -l 'langchain\.__version__' | xargs sed -i '' 's/langchain\.__version__/langchain_community.__version__/g' -git grep -l 'langchain\.document_loaders' | xargs sed -i '' 's/langchain\.document_loaders/langchain_community.document_loaders/g' -git grep -l 'langchain\.callbacks' | xargs sed -i '' 's/langchain\.callbacks/langchain_community.callbacks/g' -git grep -l 'langchain\.tools' | xargs sed -i '' 's/langchain\.tools/langchain_community.tools/g' -git grep -l 'langchain\.llms' | xargs sed -i '' 's/langchain\.llms/langchain_community.llms/g' -git grep -l 'import langchain$' | xargs sed -i '' 's/import\ langchain$/import\ langchain_community/g' -git grep -l 'from\ langchain\ ' | xargs sed -i '' 's/from\ langchain\ /from\ langchain_community\ /g' -git grep -l 'langchain_core.language_models.llmsten' | xargs sed -i '' 's/langchain_core.language_models.llmsten/langchain_community.llms.baseten/g' - -# update all moved langchain files to re-export classes and functions -cd ../langchain -git checkout master -- langchain - -python ../../.scripts/community_split/update_imports.py langchain/chat_loaders langchain_community.chat_loaders -python ../../.scripts/community_split/update_imports.py langchain/callbacks langchain_community.callbacks -python ../../.scripts/community_split/update_imports.py langchain/document_loaders langchain_community.document_loaders -python ../../.scripts/community_split/update_imports.py langchain/docstore langchain_community.docstore -python ../../.scripts/community_split/update_imports.py langchain/document_transformers langchain_community.document_transformers -python ../../.scripts/community_split/update_imports.py langchain/embeddings langchain_community.embeddings -python ../../.scripts/community_split/update_imports.py langchain/graphs langchain_community.graphs -python ../../.scripts/community_split/update_imports.py langchain/llms langchain_community.llms -python ../../.scripts/community_split/update_imports.py langchain/chat_models langchain_community.chat_models -python ../../.scripts/community_split/update_imports.py langchain/memory/chat_message_histories langchain_community.chat_message_histories -python ../../.scripts/community_split/update_imports.py langchain/storage langchain_community.storage -python ../../.scripts/community_split/update_imports.py langchain/tools langchain_community.tools -python ../../.scripts/community_split/update_imports.py langchain/utilities langchain_community.utilities -python ../../.scripts/community_split/update_imports.py langchain/vectorstores langchain_community.vectorstores -python ../../.scripts/community_split/update_imports.py langchain/retrievers langchain_community.retrievers -python ../../.scripts/community_split/update_imports.py langchain/adapters langchain_community.adapters -python ../../.scripts/community_split/update_imports.py langchain/agents/agent_toolkits langchain_community.agent_toolkits -python ../../.scripts/community_split/update_imports.py langchain/cache.py langchain_community.cache -python ../../.scripts/community_split/update_imports.py langchain/utils/math.py langchain_community.utils.math -python ../../.scripts/community_split/update_imports.py langchain/utils/json_schema.py langchain_core.utils.json_schema -python ../../.scripts/community_split/update_imports.py langchain/utils/html.py langchain_core.utils.html -python ../../.scripts/community_split/update_imports.py langchain/utils/env.py langchain_core.utils.env -python ../../.scripts/community_split/update_imports.py langchain/utils/strings.py langchain_core.utils.strings -python ../../.scripts/community_split/update_imports.py langchain/utils/openai.py langchain_community.utils.openai -python ../../.scripts/community_split/update_imports.py langchain/utils/openai_functions.py langchain_community.utils.openai_functions - -# update core and openai imports -git grep -l 'from langchain.llms.base ' | xargs sed -i '' 's/from langchain.llms.base /from langchain_core.language_models.llms /g' -git grep -l 'from langchain.chat_models.base ' | xargs sed -i '' 's/from langchain.chat_models.base /from langchain_core.language_models.chat_models /g' -git grep -l 'from langchain.tools.base' | xargs sed -i '' 's/from langchain.tools.base/from langchain_core.tools/g' - -git grep -l 'langchain_core.language_models.llmsten' | xargs sed -i '' 's/langchain_core.language_models.llmsten/langchain_community.llms.baseten/g' - -cd .. - -mv community/langchain_community/utilities/loading.py langchain/langchain/utilities -mv community/langchain_community/utilities/asyncio.py langchain/langchain/utilities - -#git add partners -git add core - -# rm files from community that just export core classes -rm community/langchain_community/{chat_models,llms,tools,embeddings,vectorstores,callbacks}/base.py -rm community/tests/unit_tests/{chat_models,llms,tools,callbacks}/test_base.py -rm community/tests/unit_tests/callbacks/test_manager.py -rm community/langchain_community/callbacks/{stdout,streaming_stdout}.py -rm community/langchain_community/callbacks/tracers/{base,evaluation,langchain,langchain_v1,log_stream,root_listeners,run_collector,schemas,stdout}.py - -# keep export tests in langchain -git checkout master -- langchain/tests/unit_tests/{chat_models,llms,tools,callbacks,document_loaders}/test_base.py -git checkout master -- langchain/tests/unit_tests/{callbacks,docstore,document_loaders,document_transformers,embeddings,graphs,llms,chat_models,storage,tools,utilities,vectorstores}/test_imports.py -git checkout master -- langchain/tests/unit_tests/callbacks/test_manager.py -git checkout master -- langchain/tests/unit_tests/document_loaders/blob_loaders/test_public_api.py -git checkout master -- langchain/tests/unit_tests/document_loaders/parsers/test_public_api.py -git checkout master -- langchain/tests/unit_tests/vectorstores/test_public_api.py -git checkout master -- langchain/tests/unit_tests/schema - -# keep some non-integration stuff in langchain. rm from community and add back to langchain -rm community/langchain_community/retrievers/{multi_query,multi_vector,contextual_compression,ensemble,merger_retriever,parent_document_retriever,re_phraser,web_research,time_weighted_retriever}.py -rm -r community/langchain_community/retrievers/{self_query,document_compressors} -rm community/tests/unit_tests/retrievers/test_{ensemble,multi_query,multi_vector,parent_document,time_weighted_retriever,web_research}.py -rm community/tests/integration_tests/retrievers/test_{contextual_compression,merger_retriever}.py -rm -r community/tests/unit_tests/retrievers/{self_query,document_compressors} -rm -r community/tests/integration_tests/retrievers/document_compressors - -rm community/langchain_community/agent_toolkits/{pandas,python,spark}/__init__.py -rm community/langchain_community/tools/python/__init__.py - -rm -r community/langchain_community/agent_toolkits/conversational_retrieval/ -rm -r community/langchain_community/agent_toolkits/vectorstore/ -rm community/langchain_community/callbacks/tracers/logging.py -rm community/langchain_community/callbacks/{file,streaming_aiter_final_only,streaming_aiter,streaming_stdout_final_only}.py -rm community/langchain_community/embeddings/cache.py -rm community/langchain_community/storage/{encoder_backed,file_system,in_memory,_lc_store}.py -rm community/langchain_community/tools/retriever.py -rm community/tests/unit_tests/callbacks/tracers/test_logging.py -rm community/tests/unit_tests/embeddings/test_caching.py -rm community/tests/unit_tests/storage/test_{filesystem,in_memory,lc_store}.py - -git checkout master -- langchain/langchain/retrievers/{multi_query,multi_vector,self_query/base,contextual_compression,ensemble,merger_retriever,parent_document_retriever,re_phraser,web_research,time_weighted_retriever}.py -git checkout master -- langchain/langchain/retrievers/{self_query,document_compressors} -git checkout master -- langchain/tests/unit_tests/retrievers/test_{ensemble,multi_query,multi_vector,parent_document,time_weighted_retriever,web_research}.py -git checkout master -- langchain/tests/integration_tests/retrievers/test_{contextual_compression,merger_retriever}.py -git checkout master -- langchain/tests/unit_tests/retrievers/{self_query,document_compressors} -git checkout master -- langchain/tests/integration_tests/retrievers/document_compressors -touch langchain/tests/unit_tests/{llms,chat_models,tools,callbacks,runnables,document_loaders,docstore,document_transformers,embeddings,graphs,storage,utilities,vectorstores,retrievers}/__init__.py -touch langchain/tests/unit_tests/document_loaders/{blob_loaders,parsers}/__init__.py -mv {community,langchain}/tests/unit_tests/retrievers/sequential_retriever.py - -git checkout master -- langchain/langchain/agents/agent_toolkits/conversational_retrieval/ -git checkout master -- langchain/langchain/agents/agent_toolkits/vectorstore/ -git checkout master -- langchain/langchain/callbacks/tracers/logging.py -git checkout master -- langchain/langchain/callbacks/{file,streaming_aiter_final_only,streaming_aiter,streaming_stdout_final_only}.py -git checkout master -- langchain/langchain/embeddings/cache.py -git checkout master -- langchain/langchain/storage/{encoder_backed,file_system,in_memory,_lc_store}.py -git checkout master -- langchain/langchain/tools/retriever.py -git checkout master -- langchain/tests/unit_tests/callbacks/tracers/{test_logging,__init__}.py -git checkout master -- langchain/tests/unit_tests/embeddings/{__init__,test_caching}.py -git checkout master -- langchain/tests/unit_tests/storage/test_{filesystem,in_memory,lc_store}.py -git checkout master -- langchain/tests/unit_tests/storage/__init__.py - -# cp lint scripts -cp -r core/scripts community - -# cp test helpers -cp -r langchain/tests/integration_tests/examples community/tests -cp -r langchain/tests/integration_tests/examples community/tests/integration_tests -cp -r langchain/tests/unit_tests/examples community/tests/unit_tests -cp langchain/tests/unit_tests/conftest.py community/tests/unit_tests -cp community/tests/integration_tests/vectorstores/fake_embeddings.py langchain/tests/integration_tests/cache/ -cp langchain/tests/integration_tests/test_compile.py community/tests/integration_tests - -# cp manually changed files -cp -r ../.scripts/community_split/libs/* . - -# mv some tests to integrations -mv community/tests/{unit_tests,integration_tests}/document_loaders/test_telegram.py -mv community/tests/{unit_tests,integration_tests}/document_loaders/parsers/test_docai.py -mv community/tests/{unit_tests,integration_tests}/chat_message_histories/test_streamlit.py - -# fix some final tests -git grep -l 'integration_tests\.vectorstores\.fake_embeddings' langchain/tests | xargs sed -i '' 's/integration_tests\.vectorstores\.fake_embeddings/integration_tests.cache.fake_embeddings/g' -touch community/langchain_community/agent_toolkits/amadeus/__init__.py - -# format -cd core -make format -cd ../langchain -make format -cd ../experimental -make format -cd ../community -make format - -cd .. -sed -E -i '' '1 s/(.*)/\1\ \ \#\ noqa\:\ E501/g' langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py -sed -E -i '' 's/import\ importlib$/import importlib.util/g' experimental/langchain_experimental/prompts/load.py -git add . \ No newline at end of file diff --git a/.scripts/community_split/update_imports.py b/.scripts/community_split/update_imports.py deleted file mode 100644 index e555c42d9cf..00000000000 --- a/.scripts/community_split/update_imports.py +++ /dev/null @@ -1,85 +0,0 @@ -import ast -import os -import sys -from pathlib import Path - - -class ImportTransformer(ast.NodeTransformer): - def __init__(self, public_items, module_name): - self.public_items = public_items - self.module_name = module_name - - def visit_Module(self, node): - imports = [ - ast.ImportFrom( - module=self.module_name, - names=[ast.alias(name=item, asname=None)], - level=0, - ) - for item in self.public_items - ] - all_assignment = ast.Assign( - targets=[ast.Name(id="__all__", ctx=ast.Store())], - value=ast.List( - elts=[ast.Str(s=item) for item in self.public_items], ctx=ast.Load() - ), - ) - node.body = imports + [all_assignment] - return node - - -def find_public_classes_and_methods(file_path): - with open(file_path, "r") as file: - node = ast.parse(file.read(), filename=file_path) - - public_items = [] - for item in node.body: - if isinstance(item, ast.ClassDef) or isinstance(item, ast.FunctionDef): - public_items.append(item.name) - if ( - isinstance(item, ast.Assign) - and hasattr(item.targets[0], "id") - and item.targets[0].id not in ("__all__", "logger") - ): - public_items.append(item.targets[0].id) - - return public_items or None - - -def process_file(file_path, module_name): - public_items = find_public_classes_and_methods(file_path) - if public_items is None: - return - - with open(file_path, "r") as file: - contents = file.read() - tree = ast.parse(contents, filename=file_path) - - tree = ImportTransformer(public_items, module_name).visit(tree) - tree = ast.fix_missing_locations(tree) - - with open(file_path, "w") as file: - file.write(ast.unparse(tree)) - - -def process_directory(directory_path, base_module_name): - if Path(directory_path).is_file(): - process_file(directory_path, base_module_name) - else: - for root, dirs, files in os.walk(directory_path): - for filename in files: - if filename.endswith(".py") and not filename.startswith("_"): - file_path = os.path.join(root, filename) - relative_path = os.path.relpath(file_path, directory_path) - module_name = f"{base_module_name}.{os.path.splitext(relative_path)[0].replace(os.sep, '.')}" - process_file(file_path, module_name) - - -if __name__ == "__main__": - if len(sys.argv) != 3: - print("Usage: python script_name.py ") - sys.exit(1) - - directory_path = sys.argv[1] - base_module_name = sys.argv[2] - process_directory(directory_path, base_module_name)