fix(deepseek): use proper URL parsing for azure endpoint detection (#35455)

## Summary

- Fixes [CodeQL alert
#43](https://github.com/langchain-ai/langchain/security/code-scanning/43)
(CWE-20: incomplete URL substring sanitization)
- Replaces `"azure.com" in url` substring check with `urlparse`-based
hostname validation to prevent bypass via crafted URLs (e.g.,
`https://evil-azure.com`, `https://example.com/azure.com`)
- Adds bypass-attempt test cases to the existing Azure endpoint
detection tests

## Why

The substring check `"azure.com" in url` matches URLs where `azure.com`
appears anywhere in the string, not just in the hostname. An
attacker-controlled endpoint like `https://evil-azure.com` or
`https://example.com/azure.com` would incorrectly trigger the Azure code
path. Using `urlparse` to extract and validate the hostname is the
standard fix per CodeQL guidance.

## Test plan

- [x] Existing Azure endpoint detection tests pass
- [x] New negative test cases for bypass attempts pass
- [x] `uv run pytest tests/unit_tests/test_chat_models.py -k azure` —
6/6 passing

> [!NOTE]
> This PR was authored with assistance from an AI agent (Claude Code).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Kennedy
2026-03-01 09:14:48 -08:00
committed by GitHub
parent dac22ced14
commit 6335968237
2 changed files with 6 additions and 1 deletions

View File

@@ -6,6 +6,7 @@ import json
from collections.abc import Callable, Iterator, Sequence
from json import JSONDecodeError
from typing import Any, Literal, TypeAlias, cast
from urllib.parse import urlparse
import openai
from langchain_core.callbacks import (
@@ -197,7 +198,8 @@ class ChatDeepSeek(BaseChatOpenAI):
@property
def _is_azure_endpoint(self) -> bool:
"""Check if the configured endpoint is an Azure deployment."""
return "azure.com" in (self.api_base or "").lower()
hostname = urlparse(self.api_base or "").hostname or ""
return hostname == "azure.com" or hostname.endswith(".azure.com")
@property
def _llm_type(self) -> str:

View File

@@ -348,6 +348,9 @@ class TestChatDeepSeekAzureToolChoice:
DEFAULT_API_BASE,
"https://api.openai.com/v1",
"https://custom-endpoint.com/api",
"https://evil-azure.com/v1", # hostname bypass attempt
"https://notazure.com.evil.com/", # subdomain bypass attempt
"https://example.com/azure.com", # path bypass attempt
]
for endpoint in non_azure_endpoints:
llm = ChatDeepSeek(