fix(core): _parse_google_docstring mishandling continuation lines with colons (#35680)

## Description

`_parse_google_docstring` incorrectly parses multi-line argument
descriptions when a continuation line contains a colon. The continuation
line is treated as a new argument definition instead of being appended
to the current argument's description.

### Example

```python
def search(query: str, top_k: int = 5) -> str:
    """Search the knowledge base.

    Args:
        query: The search query to use
            for finding things: important ones
        top_k: Number of results to return
    """
```

**Before (broken):** The parser creates 3 args: `query`, `for finding
things`, `top_k`
**After (fixed):** The parser correctly creates 2 args: `query` (with
full description including "for finding things: important ones"),
`top_k`

### Root Cause

The parser used `if ":" in line` to detect new argument lines without
considering indentation. In Google-style docstrings, continuation lines
have deeper indentation than argument definition lines.

### Fix

Detect the base indentation level from the first argument line and treat
any line with deeper indentation as a continuation of the current
argument's description, regardless of whether it contains a colon.

## Issue

Fixes #35679

## Dependencies

None.

## Testing

Added 4 unit tests in
`test_function_calling.py::TestParseGoogleDocstring`:
- `test_continuation_line_with_colon` — the core bug scenario
- `test_simple_args_still_work` — regression check for basic args
- `test_continuation_line_without_colon` — multi-line descriptions
without colons
- `test_multiple_continuation_lines_with_colons` — multiple continuation
lines each containing colons

All tests pass locally with Python 3.12.

---------

Co-authored-by: gambletan <ethanchang32@gmail.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
Alvin Tang
2026-06-23 12:34:02 +08:00
committed by GitHub
parent 64177b6fc5
commit 95fe150ad2
2 changed files with 140 additions and 3 deletions

View File

@@ -781,11 +781,27 @@ def _parse_google_docstring(
raise ValueError(msg)
description = ""
args_block = None
arg_descriptions = {}
arg_descriptions: dict[str, str] = {}
if args_block:
arg = None
arg: str | None = None
# Base indentation, latched once from the first argument line, lets us
# distinguish new argument definitions from continuation lines. This
# assumes Google-style uniform indentation of argument names: a line
# indented deeper than the first argument is treated as a continuation
# (even if it contains a colon), so a more-indented later `name:` line
# in a malformed, non-uniformly-indented block folds into the previous
# argument rather than starting a new one.
arg_indent: int | None = None
for line in args_block.split("\n")[1:]:
if ":" in line:
if not line.strip():
continue
current_indent = len(line) - len(line.lstrip())
if arg_indent is None and ":" in line:
arg_indent = current_indent
is_continuation = arg_indent is not None and current_indent > arg_indent
if arg is not None and is_continuation:
arg_descriptions[arg] += " " + line.strip()
elif ":" in line:
arg, desc = line.split(":", maxsplit=1)
arg = arg.strip()
arg_name, _, annotations_ = arg.partition(" ")

View File

@@ -30,6 +30,7 @@ from langchain_core.runnables import RunnableLambda
from langchain_core.tools import BaseTool, StructuredTool, Tool, tool
from langchain_core.utils.function_calling import (
_convert_typed_dict_to_openai_function,
_parse_google_docstring,
convert_to_json_schema,
convert_to_openai_function,
convert_to_openai_tool,
@@ -1250,6 +1251,126 @@ def test_convert_to_openai_function_json_schema_missing_title_includes_schema()
convert_to_openai_function(schema_without_title)
class TestParseGoogleDocstring:
"""Tests for _parse_google_docstring continuation-line handling."""
def test_continuation_line_with_colon(self) -> None:
"""Continuation lines containing colons should not be treated as new args."""
# inspect.getdoc() returns dedented docstrings, so match that format
docstring = (
"Search the knowledge base.\n"
"\n"
"Args:\n"
" query: The search query to use\n"
" for finding things: important ones\n"
" top_k: Number of results to return"
)
_desc, args = _parse_google_docstring(docstring, ["query", "top_k"])
assert "query" in args
assert "top_k" in args
assert len(args) == 2
assert "for finding things: important ones" in args["query"]
def test_simple_args_still_work(self) -> None:
"""Basic single-line argument descriptions should still parse correctly."""
docstring = "Do something.\n\nArgs:\n x: The x value\n y: The y value"
_desc, args = _parse_google_docstring(docstring, ["x", "y"])
assert args == {"x": "The x value", "y": "The y value"}
def test_continuation_line_without_colon(self) -> None:
"""Colon-free continuation lines append to the current arg.
Documents preserved behavior: this case parsed correctly before the
continuation-detection fix (via the colon-free fallback branch) and
must continue to.
"""
docstring = (
"Do something.\n"
"\n"
"Args:\n"
" name: A very long description that\n"
" spans multiple lines\n"
" age: The age"
)
_desc, args = _parse_google_docstring(docstring, ["name", "age"])
assert "spans multiple lines" in args["name"]
assert args["age"] == "The age"
def test_multiple_continuation_lines_with_colons(self) -> None:
"""Multiple continuation lines with colons should all be appended."""
docstring = (
"Process data.\n"
"\n"
"Args:\n"
" config: Configuration string in format\n"
" key1: value1\n"
" key2: value2\n"
" verbose: Enable verbose output"
)
_desc, args = _parse_google_docstring(docstring, ["config", "verbose"])
assert "key1: value1" in args["config"]
assert "key2: value2" in args["config"]
assert args["verbose"] == "Enable verbose output"
def test_annotated_arg_with_colon_continuation(self) -> None:
"""A `(type)` annotation strips correctly alongside a colon continuation.
Exercises both code paths the fix touches at once: the parenthesized
type annotation is stripped from the arg name, and the colon-bearing
continuation line folds into that arg rather than creating a phantom
key (the original bug).
"""
docstring = (
"Run a query.\n"
"\n"
"Args:\n"
" query (str): The query to run\n"
" details: extra info\n"
" k (int): Number of results"
)
_desc, args = _parse_google_docstring(docstring, ["query", "k"])
assert set(args) == {"query", "k"}
assert "details: extra info" in args["query"]
assert args["k"] == "Number of results"
def test_returns_section_after_args_excluded(self) -> None:
"""A well-formed Returns: block after Args: must not leak in as an arg.
The blank line separating the sections terminates the Args block, so
`Returns`/`Raises` and their indented bodies stay out of
`arg_descriptions`.
"""
docstring = (
"Do work.\n\nArgs:\n x: The x value\n\nReturns:\n result: yes\n"
)
_desc, args = _parse_google_docstring(docstring, ["x"])
assert args == {"x": "The x value"}
def test_same_indent_colon_line_is_new_arg(self) -> None:
"""A colon line at the base arg indent starts a new arg, not a continuation.
Pins the `current_indent > arg_indent` boundary: only deeper-indented
lines are continuations.
"""
docstring = "Do work.\n\nArgs:\n a: first\n b: second"
_desc, args = _parse_google_docstring(docstring, ["a", "b"])
assert args == {"a": "first", "b": "second"}
def test_more_indented_second_arg_folds_into_previous(self) -> None:
"""Non-uniform indentation: a deeper second arg folds into the previous one.
Documents the intentional trade-off of indentation-based detection.
Google style requires uniform argument indentation; when a later arg is
indented deeper than the first, it is indistinguishable from a
colon-bearing continuation and is merged into the prior arg. This pins
that behavior so it stays intentional rather than incidental.
"""
docstring = "Do work.\n\nArgs:\n x: the x value\n y: the y value"
_desc, args = _parse_google_docstring(docstring, ["x", "y"])
assert set(args) == {"x"}
assert "y: the y value" in args["x"]
def test_convert_to_openai_tool_apply_patch_passthrough() -> None:
"""Test apply_patch is passed through as an OpenAI built-in tool."""
tool = {"type": "apply_patch"}