mirror of
https://github.com/hwchase17/langchain.git
synced 2026-03-18 02:53:16 +00:00
fix(deepseek): use proper URL parsing for azure endpoint detection (#35455)
## Summary - Fixes [CodeQL alert #43](https://github.com/langchain-ai/langchain/security/code-scanning/43) (CWE-20: incomplete URL substring sanitization) - Replaces `"azure.com" in url` substring check with `urlparse`-based hostname validation to prevent bypass via crafted URLs (e.g., `https://evil-azure.com`, `https://example.com/azure.com`) - Adds bypass-attempt test cases to the existing Azure endpoint detection tests ## Why The substring check `"azure.com" in url` matches URLs where `azure.com` appears anywhere in the string, not just in the hostname. An attacker-controlled endpoint like `https://evil-azure.com` or `https://example.com/azure.com` would incorrectly trigger the Azure code path. Using `urlparse` to extract and validate the hostname is the standard fix per CodeQL guidance. ## Test plan - [x] Existing Azure endpoint detection tests pass - [x] New negative test cases for bypass attempts pass - [x] `uv run pytest tests/unit_tests/test_chat_models.py -k azure` — 6/6 passing > [!NOTE] > This PR was authored with assistance from an AI agent (Claude Code). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import json
|
||||
from collections.abc import Callable, Iterator, Sequence
|
||||
from json import JSONDecodeError
|
||||
from typing import Any, Literal, TypeAlias, cast
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import openai
|
||||
from langchain_core.callbacks import (
|
||||
@@ -197,7 +198,8 @@ class ChatDeepSeek(BaseChatOpenAI):
|
||||
@property
|
||||
def _is_azure_endpoint(self) -> bool:
|
||||
"""Check if the configured endpoint is an Azure deployment."""
|
||||
return "azure.com" in (self.api_base or "").lower()
|
||||
hostname = urlparse(self.api_base or "").hostname or ""
|
||||
return hostname == "azure.com" or hostname.endswith(".azure.com")
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
|
||||
@@ -348,6 +348,9 @@ class TestChatDeepSeekAzureToolChoice:
|
||||
DEFAULT_API_BASE,
|
||||
"https://api.openai.com/v1",
|
||||
"https://custom-endpoint.com/api",
|
||||
"https://evil-azure.com/v1", # hostname bypass attempt
|
||||
"https://notazure.com.evil.com/", # subdomain bypass attempt
|
||||
"https://example.com/azure.com", # path bypass attempt
|
||||
]
|
||||
for endpoint in non_azure_endpoints:
|
||||
llm = ChatDeepSeek(
|
||||
|
||||
Reference in New Issue
Block a user