feat(openai): support automatic server-side compaction (#35212)

This commit is contained in:
ccurme
2026-02-17 10:48:52 -05:00
committed by GitHub
parent 32c6ab3033
commit 8f1bc0d3ae
8 changed files with 113 additions and 7 deletions

View File

@@ -147,6 +147,7 @@ def test_configurable() -> None:
"reasoning_effort": None,
"verbosity": None,
"frequency_penalty": None,
"context_management": None,
"include": None,
"seed": None,
"service_tier": None,

View File

@@ -192,6 +192,7 @@ def test_configurable() -> None:
"reasoning_effort": None,
"verbosity": None,
"frequency_penalty": None,
"context_management": None,
"include": None,
"seed": None,
"service_tier": None,

View File

@@ -799,6 +799,11 @@ class BaseChatOpenAI(BaseChatModel):
passed in the parameter during invocation.
"""
context_management: list[dict[str, Any]] | None = None
"""Configuration for
[context management](https://developers.openai.com/api/docs/guides/compaction).
"""
include: list[str] | None = None
"""Additional fields to include in generations from Responses API.
@@ -1096,6 +1101,7 @@ class BaseChatOpenAI(BaseChatModel):
"reasoning_effort": self.reasoning_effort,
"reasoning": self.reasoning,
"verbosity": self.verbosity,
"context_management": self.context_management,
"include": self.include,
"service_tier": self.service_tier,
"truncation": self.truncation,
@@ -1482,6 +1488,7 @@ class BaseChatOpenAI(BaseChatModel):
return self.use_responses_api
if (
self.output_version == "responses/v1"
or self.context_management is not None
or self.include is not None
or self.reasoning is not None
or self.truncation is not None
@@ -3894,6 +3901,7 @@ def _use_responses_api(payload: dict) -> bool:
_is_builtin_tool(tool) for tool in payload["tools"]
)
responses_only_args = {
"context_management",
"include",
"previous_response_id",
"reasoning",
@@ -4273,6 +4281,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
)
elif block_type in (
"reasoning",
"compaction",
"web_search_call",
"file_search_call",
"function_call",
@@ -4475,6 +4484,7 @@ def _construct_lc_result_from_responses_api(
tool_calls.append(tool_call)
elif output.type in (
"reasoning",
"compaction",
"web_search_call",
"file_search_call",
"computer_call",
@@ -4683,6 +4693,7 @@ def _convert_responses_chunk_to_generation_chunk(
}
)
elif chunk.type == "response.output_item.done" and chunk.item.type in (
"compaction",
"web_search_call",
"file_search_call",
"computer_call",

View File

@@ -24,7 +24,7 @@ version = "1.1.9"
requires-python = ">=3.10.0,<4.0.0"
dependencies = [
"langchain-core>=1.2.11,<2.0.0",
"openai>=1.109.1,<3.0.0",
"openai>=2.20.0,<3.0.0",
"tiktoken>=0.7.0,<1.0.0",
]

View File

@@ -1107,3 +1107,96 @@ def test_custom_tool(output_version: Literal["responses/v1", "v1"]) -> None:
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert len(full.tool_calls) == 1
@pytest.mark.default_cassette("test_compaction.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])
def test_compaction(output_version: Literal["responses/v1", "v1"]) -> None:
"""Test the compation beta feature."""
llm = ChatOpenAI(
model="gpt-5.2",
context_management=[{"type": "compaction", "compact_threshold": 10_000}],
output_version=output_version,
)
input_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'a' * 50000}",
}
messages: list = [input_message]
first_response = llm.invoke(messages)
messages.append(first_response)
second_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'b' * 50000}",
}
messages.append(second_message)
second_response = llm.invoke(messages)
messages.append(second_response)
content_blocks = second_response.content_blocks
compaction_block = next(
(block for block in content_blocks if block["type"] == "non_standard"),
None,
)
assert compaction_block
assert compaction_block["value"].get("type") == "compaction"
third_message = {
"role": "user",
"content": "What are we talking about?",
}
messages.append(third_message)
third_response = llm.invoke(messages)
assert third_response.text
@pytest.mark.default_cassette("test_compaction_streaming.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["responses/v1", "v1"])
def test_compaction_streaming(output_version: Literal["responses/v1", "v1"]) -> None:
"""Test the compation beta feature."""
llm = ChatOpenAI(
model="gpt-5.2",
context_management=[{"type": "compaction", "compact_threshold": 10_000}],
output_version=output_version,
streaming=True,
)
input_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'a' * 50000}",
}
messages: list = [input_message]
first_response = llm.invoke(messages)
messages.append(first_response)
second_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'b' * 50000}",
}
messages.append(second_message)
second_response = llm.invoke(messages)
messages.append(second_response)
content_blocks = second_response.content_blocks
compaction_block = next(
(block for block in content_blocks if block["type"] == "non_standard"),
None,
)
assert compaction_block
assert compaction_block["value"].get("type") == "compaction"
third_message = {
"role": "user",
"content": "What are we talking about?",
}
messages.append(third_message)
third_response = llm.invoke(messages)
assert third_response.text

View File

@@ -1,5 +1,5 @@
version = 1
revision = 3
revision = 2
requires-python = ">=3.10.0, <4.0.0"
resolution-markers = [
"python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@@ -580,7 +580,7 @@ requires-dist = [
provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "deepseek", "xai", "perplexity"]
[package.metadata.requires-dev]
lint = [{ name = "ruff", specifier = ">=0.14.11,<0.16.0" }]
lint = [{ name = "ruff", specifier = ">=0.15.0,<0.16.0" }]
test = [
{ name = "blockbuster", specifier = ">=1.5.26,<1.6.0" },
{ name = "langchain-openai", editable = "." },
@@ -610,7 +610,7 @@ typing = [
[[package]]
name = "langchain-core"
version = "1.2.11"
version = "1.2.12"
source = { editable = "../../core" }
dependencies = [
{ name = "jsonpatch" },
@@ -641,7 +641,7 @@ dev = [
{ name = "jupyter", specifier = ">=1.0.0,<2.0.0" },
{ name = "setuptools", specifier = ">=67.6.1,<83.0.0" },
]
lint = [{ name = "ruff", specifier = ">=0.14.11,<0.16.0" }]
lint = [{ name = "ruff", specifier = ">=0.15.0,<0.16.0" }]
test = [
{ name = "blockbuster", specifier = ">=1.5.18,<1.6.0" },
{ name = "freezegun", specifier = ">=1.2.2,<2.0.0" },
@@ -718,7 +718,7 @@ typing = [
[package.metadata]
requires-dist = [
{ name = "langchain-core", editable = "../../core" },
{ name = "openai", specifier = ">=1.109.1,<3.0.0" },
{ name = "openai", specifier = ">=2.20.0,<3.0.0" },
{ name = "tiktoken", specifier = ">=0.7.0,<1.0.0" },
]
@@ -791,7 +791,7 @@ requires-dist = [
]
[package.metadata.requires-dev]
lint = [{ name = "ruff", specifier = ">=0.14.11,<0.16.0" }]
lint = [{ name = "ruff", specifier = ">=0.15.0,<0.16.0" }]
test = [{ name = "langchain-core", editable = "../../core" }]
test-integration = []
typing = [