diff --git a/.github/scripts/get_min_versions.py b/.github/scripts/get_min_versions.py index dc173c2e0f8..653f6902a47 100644 --- a/.github/scripts/get_min_versions.py +++ b/.github/scripts/get_min_versions.py @@ -48,7 +48,7 @@ def get_pypi_versions(package_name: str) -> List[str]: KeyError: If package not found or response format unexpected """ pypi_url = f"https://pypi.org/pypi/{package_name}/json" - response = requests.get(pypi_url) + response = requests.get(pypi_url, timeout=10.0) response.raise_for_status() return list(response.json()["releases"].keys()) diff --git a/libs/langchain/tests/unit_tests/test_pytest_config.py b/libs/langchain/tests/unit_tests/test_pytest_config.py index c2b0af645fb..b02d3d3dd55 100644 --- a/libs/langchain/tests/unit_tests/test_pytest_config.py +++ b/libs/langchain/tests/unit_tests/test_pytest_config.py @@ -8,4 +8,4 @@ def test_socket_disabled() -> None: with pytest.raises(pytest_socket.SocketBlockedError): # Ignore S113 since we don't need a timeout here as the request # should fail immediately - requests.get("https://www.example.com") # noqa: S113 + requests.get("https://www.example.com", timeout=10.0) # noqa: S113 diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 8ef75edb455..6ab3b798fb5 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -1231,7 +1231,7 @@ def test_image_tool_calling() -> None: }, ] image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" - image_data = b64encode(httpx.get(image_url).content).decode("utf-8") + image_data = b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8") human_content.append( { "type": "image", diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 26a8bb787a2..28012204af2 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -718,7 +718,7 @@ def test_image_token_counting_jpeg() -> None: actual = model.get_num_tokens_from_messages([message]) assert expected == actual - image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + image_data = base64.b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8") message = HumanMessage( content=[ {"type": "text", "text": "describe the weather in this image"}, @@ -750,7 +750,7 @@ def test_image_token_counting_png() -> None: actual = model.get_num_tokens_from_messages([message]) assert expected == actual - image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + image_data = base64.b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8") message = HumanMessage( content=[ {"type": "text", "text": "how many dice are in this image"}, diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index c49d5a40e1f..c572a850b6b 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -92,7 +92,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests): def test_openai_pdf_inputs(self, model: BaseChatModel) -> None: """Test that the model can process PDF inputs.""" url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode("utf-8") message = HumanMessage( [ diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py index a2b63e1ab8c..37466928ab1 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py @@ -87,7 +87,7 @@ class TestOpenAIResponses(TestOpenAIStandard): def test_openai_pdf_tool_messages(self, model: BaseChatModel) -> None: """Test that the model can process PDF inputs in `ToolMessage` objects.""" url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode("utf-8") tool_message = ToolMessage( content_blocks=[ diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index f014e37aae3..9191fd61d5a 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -151,7 +151,7 @@ def _get_base64_from_url(url: str) -> str: ) warnings.warn(warning_message, stacklevel=2) headers = {"User-Agent": user_agent} if user_agent else {} - httpx_response = httpx.get(url, headers=headers).content + httpx_response = httpx.get(url, headers=headers, timeout=10.0).content return base64.b64encode(httpx_response).decode("utf-8") @@ -2583,7 +2583,7 @@ class ChatModelIntegrationTests(ChatModelTests): pytest.skip("Model does not support PDF inputs.") url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode("utf-8") message = HumanMessage( [ @@ -2781,7 +2781,7 @@ class ChatModelIntegrationTests(ChatModelTests): pytest.skip("Model does not support image message.") image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" - image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + image_data = base64.b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8") # OpenAI CC format, base64 data message = HumanMessage( @@ -2887,7 +2887,7 @@ class ChatModelIntegrationTests(ChatModelTests): pytest.skip("Model does not support image tool message.") image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" - image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + image_data = base64.b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8") # OpenAI CC format, base64 data oai_format_message = ToolMessage( @@ -2986,7 +2986,7 @@ class ChatModelIntegrationTests(ChatModelTests): pytest.skip("Model does not support PDF tool message.") url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode("utf-8") tool_message = ToolMessage( content_blocks=[ @@ -3122,7 +3122,7 @@ class ChatModelIntegrationTests(ChatModelTests): ] if self.supports_image_inputs: image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" - image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + image_data = base64.b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8") human_content.append( { "type": "image", diff --git a/libs/text-splitters/langchain_text_splitters/base.py b/libs/text-splitters/langchain_text_splitters/base.py index 19114b253df..fa94ee2d464 100644 --- a/libs/text-splitters/langchain_text_splitters/base.py +++ b/libs/text-splitters/langchain_text_splitters/base.py @@ -227,7 +227,7 @@ class TextSplitter(BaseDocumentTransformer, ABC): cls, encoding_name: str = "gpt2", model_name: str | None = None, - allowed_special: Literal["all"] | AbstractSet[str] = set(), + allowed_special: Literal["all"] | AbstractSet[str] = None, disallowed_special: Literal["all"] | Collection[str] = "all", **kwargs: Any, ) -> Self: @@ -247,6 +247,8 @@ class TextSplitter(BaseDocumentTransformer, ABC): Raises: ImportError: If the tiktoken package is not installed. """ + if allowed_special is None: + allowed_special = set() if not _HAS_TIKTOKEN: msg = ( "Could not import tiktoken python package. " @@ -302,7 +304,7 @@ class TokenTextSplitter(TextSplitter): self, encoding_name: str = "gpt2", model_name: str | None = None, - allowed_special: Literal["all"] | AbstractSet[str] = set(), + allowed_special: Literal["all"] | AbstractSet[str] = None, disallowed_special: Literal["all"] | Collection[str] = "all", **kwargs: Any, ) -> None: @@ -319,6 +321,8 @@ class TokenTextSplitter(TextSplitter): Raises: ImportError: If the tiktoken package is not installed. """ + if allowed_special is None: + allowed_special = set() super().__init__(**kwargs) if not _HAS_TIKTOKEN: msg = (