From 95fe150ad2863ef0ee54acfa4a177bd0acc137dd Mon Sep 17 00:00:00 2001 From: Alvin Tang Date: Tue, 23 Jun 2026 12:34:02 +0800 Subject: [PATCH] fix(core): `_parse_google_docstring` mishandling continuation lines with colons (#35680) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description `_parse_google_docstring` incorrectly parses multi-line argument descriptions when a continuation line contains a colon. The continuation line is treated as a new argument definition instead of being appended to the current argument's description. ### Example ```python def search(query: str, top_k: int = 5) -> str: """Search the knowledge base. Args: query: The search query to use for finding things: important ones top_k: Number of results to return """ ``` **Before (broken):** The parser creates 3 args: `query`, `for finding things`, `top_k` **After (fixed):** The parser correctly creates 2 args: `query` (with full description including "for finding things: important ones"), `top_k` ### Root Cause The parser used `if ":" in line` to detect new argument lines without considering indentation. In Google-style docstrings, continuation lines have deeper indentation than argument definition lines. ### Fix Detect the base indentation level from the first argument line and treat any line with deeper indentation as a continuation of the current argument's description, regardless of whether it contains a colon. ## Issue Fixes #35679 ## Dependencies None. ## Testing Added 4 unit tests in `test_function_calling.py::TestParseGoogleDocstring`: - `test_continuation_line_with_colon` — the core bug scenario - `test_simple_args_still_work` — regression check for basic args - `test_continuation_line_without_colon` — multi-line descriptions without colons - `test_multiple_continuation_lines_with_colons` — multiple continuation lines each containing colons All tests pass locally with Python 3.12. --------- Co-authored-by: gambletan Co-authored-by: Mason Daugherty --- .../langchain_core/utils/function_calling.py | 22 +++- .../unit_tests/utils/test_function_calling.py | 121 ++++++++++++++++++ 2 files changed, 140 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index 748162e3e06..1e63802a5dc 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -781,11 +781,27 @@ def _parse_google_docstring( raise ValueError(msg) description = "" args_block = None - arg_descriptions = {} + arg_descriptions: dict[str, str] = {} if args_block: - arg = None + arg: str | None = None + # Base indentation, latched once from the first argument line, lets us + # distinguish new argument definitions from continuation lines. This + # assumes Google-style uniform indentation of argument names: a line + # indented deeper than the first argument is treated as a continuation + # (even if it contains a colon), so a more-indented later `name:` line + # in a malformed, non-uniformly-indented block folds into the previous + # argument rather than starting a new one. + arg_indent: int | None = None for line in args_block.split("\n")[1:]: - if ":" in line: + if not line.strip(): + continue + current_indent = len(line) - len(line.lstrip()) + if arg_indent is None and ":" in line: + arg_indent = current_indent + is_continuation = arg_indent is not None and current_indent > arg_indent + if arg is not None and is_continuation: + arg_descriptions[arg] += " " + line.strip() + elif ":" in line: arg, desc = line.split(":", maxsplit=1) arg = arg.strip() arg_name, _, annotations_ = arg.partition(" ") diff --git a/libs/core/tests/unit_tests/utils/test_function_calling.py b/libs/core/tests/unit_tests/utils/test_function_calling.py index e6e46fd4506..395ecbc4e43 100644 --- a/libs/core/tests/unit_tests/utils/test_function_calling.py +++ b/libs/core/tests/unit_tests/utils/test_function_calling.py @@ -30,6 +30,7 @@ from langchain_core.runnables import RunnableLambda from langchain_core.tools import BaseTool, StructuredTool, Tool, tool from langchain_core.utils.function_calling import ( _convert_typed_dict_to_openai_function, + _parse_google_docstring, convert_to_json_schema, convert_to_openai_function, convert_to_openai_tool, @@ -1250,6 +1251,126 @@ def test_convert_to_openai_function_json_schema_missing_title_includes_schema() convert_to_openai_function(schema_without_title) +class TestParseGoogleDocstring: + """Tests for _parse_google_docstring continuation-line handling.""" + + def test_continuation_line_with_colon(self) -> None: + """Continuation lines containing colons should not be treated as new args.""" + # inspect.getdoc() returns dedented docstrings, so match that format + docstring = ( + "Search the knowledge base.\n" + "\n" + "Args:\n" + " query: The search query to use\n" + " for finding things: important ones\n" + " top_k: Number of results to return" + ) + _desc, args = _parse_google_docstring(docstring, ["query", "top_k"]) + assert "query" in args + assert "top_k" in args + assert len(args) == 2 + assert "for finding things: important ones" in args["query"] + + def test_simple_args_still_work(self) -> None: + """Basic single-line argument descriptions should still parse correctly.""" + docstring = "Do something.\n\nArgs:\n x: The x value\n y: The y value" + _desc, args = _parse_google_docstring(docstring, ["x", "y"]) + assert args == {"x": "The x value", "y": "The y value"} + + def test_continuation_line_without_colon(self) -> None: + """Colon-free continuation lines append to the current arg. + + Documents preserved behavior: this case parsed correctly before the + continuation-detection fix (via the colon-free fallback branch) and + must continue to. + """ + docstring = ( + "Do something.\n" + "\n" + "Args:\n" + " name: A very long description that\n" + " spans multiple lines\n" + " age: The age" + ) + _desc, args = _parse_google_docstring(docstring, ["name", "age"]) + assert "spans multiple lines" in args["name"] + assert args["age"] == "The age" + + def test_multiple_continuation_lines_with_colons(self) -> None: + """Multiple continuation lines with colons should all be appended.""" + docstring = ( + "Process data.\n" + "\n" + "Args:\n" + " config: Configuration string in format\n" + " key1: value1\n" + " key2: value2\n" + " verbose: Enable verbose output" + ) + _desc, args = _parse_google_docstring(docstring, ["config", "verbose"]) + assert "key1: value1" in args["config"] + assert "key2: value2" in args["config"] + assert args["verbose"] == "Enable verbose output" + + def test_annotated_arg_with_colon_continuation(self) -> None: + """A `(type)` annotation strips correctly alongside a colon continuation. + + Exercises both code paths the fix touches at once: the parenthesized + type annotation is stripped from the arg name, and the colon-bearing + continuation line folds into that arg rather than creating a phantom + key (the original bug). + """ + docstring = ( + "Run a query.\n" + "\n" + "Args:\n" + " query (str): The query to run\n" + " details: extra info\n" + " k (int): Number of results" + ) + _desc, args = _parse_google_docstring(docstring, ["query", "k"]) + assert set(args) == {"query", "k"} + assert "details: extra info" in args["query"] + assert args["k"] == "Number of results" + + def test_returns_section_after_args_excluded(self) -> None: + """A well-formed Returns: block after Args: must not leak in as an arg. + + The blank line separating the sections terminates the Args block, so + `Returns`/`Raises` and their indented bodies stay out of + `arg_descriptions`. + """ + docstring = ( + "Do work.\n\nArgs:\n x: The x value\n\nReturns:\n result: yes\n" + ) + _desc, args = _parse_google_docstring(docstring, ["x"]) + assert args == {"x": "The x value"} + + def test_same_indent_colon_line_is_new_arg(self) -> None: + """A colon line at the base arg indent starts a new arg, not a continuation. + + Pins the `current_indent > arg_indent` boundary: only deeper-indented + lines are continuations. + """ + docstring = "Do work.\n\nArgs:\n a: first\n b: second" + _desc, args = _parse_google_docstring(docstring, ["a", "b"]) + assert args == {"a": "first", "b": "second"} + + def test_more_indented_second_arg_folds_into_previous(self) -> None: + """Non-uniform indentation: a deeper second arg folds into the previous one. + + Documents the intentional trade-off of indentation-based detection. + Google style requires uniform argument indentation; when a later arg is + indented deeper than the first, it is indistinguishable from a + colon-bearing continuation and is merged into the prior arg. This pins + that behavior so it stays intentional rather than incidental. + """ + docstring = "Do work.\n\nArgs:\n x: the x value\n y: the y value" + _desc, args = _parse_google_docstring(docstring, ["x", "y"]) + assert set(args) == {"x"} + assert "y: the y value" in args["x"] + + def test_convert_to_openai_tool_apply_patch_passthrough() -> None: """Test apply_patch is passed through as an OpenAI built-in tool.""" tool = {"type": "apply_patch"}