From 633596823763c9b9088da26fa4b989be6a50f5e4 Mon Sep 17 00:00:00 2001 From: John Kennedy <65985482+jkennedyvz@users.noreply.github.com> Date: Sun, 1 Mar 2026 09:14:48 -0800 Subject: [PATCH] fix(deepseek): use proper URL parsing for azure endpoint detection (#35455) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Fixes [CodeQL alert #43](https://github.com/langchain-ai/langchain/security/code-scanning/43) (CWE-20: incomplete URL substring sanitization) - Replaces `"azure.com" in url` substring check with `urlparse`-based hostname validation to prevent bypass via crafted URLs (e.g., `https://evil-azure.com`, `https://example.com/azure.com`) - Adds bypass-attempt test cases to the existing Azure endpoint detection tests ## Why The substring check `"azure.com" in url` matches URLs where `azure.com` appears anywhere in the string, not just in the hostname. An attacker-controlled endpoint like `https://evil-azure.com` or `https://example.com/azure.com` would incorrectly trigger the Azure code path. Using `urlparse` to extract and validate the hostname is the standard fix per CodeQL guidance. ## Test plan - [x] Existing Azure endpoint detection tests pass - [x] New negative test cases for bypass attempts pass - [x] `uv run pytest tests/unit_tests/test_chat_models.py -k azure` — 6/6 passing > [!NOTE] > This PR was authored with assistance from an AI agent (Claude Code). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 --- libs/partners/deepseek/langchain_deepseek/chat_models.py | 4 +++- libs/partners/deepseek/tests/unit_tests/test_chat_models.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libs/partners/deepseek/langchain_deepseek/chat_models.py b/libs/partners/deepseek/langchain_deepseek/chat_models.py index ad5e77b6d02..e4618c06baa 100644 --- a/libs/partners/deepseek/langchain_deepseek/chat_models.py +++ b/libs/partners/deepseek/langchain_deepseek/chat_models.py @@ -6,6 +6,7 @@ import json from collections.abc import Callable, Iterator, Sequence from json import JSONDecodeError from typing import Any, Literal, TypeAlias, cast +from urllib.parse import urlparse import openai from langchain_core.callbacks import ( @@ -197,7 +198,8 @@ class ChatDeepSeek(BaseChatOpenAI): @property def _is_azure_endpoint(self) -> bool: """Check if the configured endpoint is an Azure deployment.""" - return "azure.com" in (self.api_base or "").lower() + hostname = urlparse(self.api_base or "").hostname or "" + return hostname == "azure.com" or hostname.endswith(".azure.com") @property def _llm_type(self) -> str: diff --git a/libs/partners/deepseek/tests/unit_tests/test_chat_models.py b/libs/partners/deepseek/tests/unit_tests/test_chat_models.py index b1129552f8f..7822017a244 100644 --- a/libs/partners/deepseek/tests/unit_tests/test_chat_models.py +++ b/libs/partners/deepseek/tests/unit_tests/test_chat_models.py @@ -348,6 +348,9 @@ class TestChatDeepSeekAzureToolChoice: DEFAULT_API_BASE, "https://api.openai.com/v1", "https://custom-endpoint.com/api", + "https://evil-azure.com/v1", # hostname bypass attempt + "https://notazure.com.evil.com/", # subdomain bypass attempt + "https://example.com/azure.com", # path bypass attempt ] for endpoint in non_azure_endpoints: llm = ChatDeepSeek(