mirror of
https://github.com/hwchase17/langchain.git
synced 2026-05-19 14:20:55 +00:00
fix(langchain): sort glob_search results by mtime (newest first) (#37462)
Closes #37369 --- The `glob_search` tool in `FilesystemFileSearchMiddleware` documents that results are "sorted by modification time (most recently modified first)", but the implementation was returning files in the arbitrary order provided by `Path.glob()`. This change adds a sort by modification timestamp (`modified_at`), in descending order, immediately before extracting the file paths for the return value. No public API changes. --------- Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
@@ -170,6 +170,7 @@ class FilesystemFileSearchMiddleware(AgentMiddleware[AgentState[ResponseT], Cont
|
||||
if not matching:
|
||||
return "No files found"
|
||||
|
||||
matching.sort(key=lambda item: item[1], reverse=True)
|
||||
file_paths = [p for p, _ in matching]
|
||||
return "\n".join(file_paths)
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Unit tests for file search middleware."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -210,6 +211,33 @@ class TestFilesystemGlobSearch:
|
||||
|
||||
assert result == "No files found"
|
||||
|
||||
def test_glob_results_sorted_by_mtime_desc(self, tmp_path: Path) -> None:
|
||||
"""Results must be ordered newest-first, as documented."""
|
||||
# Create files in alphabetical order opposite to mtime order so the
|
||||
# default Path.glob() ordering cannot coincidentally pass the assertion.
|
||||
oldest = tmp_path / "a_oldest.txt"
|
||||
middle = tmp_path / "b_middle.txt"
|
||||
newest = tmp_path / "c_newest.txt"
|
||||
for p in (oldest, middle, newest):
|
||||
p.write_text("x", encoding="utf-8")
|
||||
|
||||
# Explicit mtimes (seconds since epoch) — deterministic, no sleeps.
|
||||
os.utime(oldest, (1_000_000, 1_000_000))
|
||||
os.utime(middle, (2_000_000, 2_000_000))
|
||||
os.utime(newest, (3_000_000, 3_000_000))
|
||||
|
||||
middleware = FilesystemFileSearchMiddleware(root_path=str(tmp_path))
|
||||
|
||||
assert isinstance(middleware.glob_search, StructuredTool)
|
||||
assert middleware.glob_search.func is not None
|
||||
result = middleware.glob_search.func(pattern="*.txt")
|
||||
|
||||
assert result.splitlines() == [
|
||||
"/c_newest.txt",
|
||||
"/b_middle.txt",
|
||||
"/a_oldest.txt",
|
||||
]
|
||||
|
||||
|
||||
class TestPathTraversalSecurity:
|
||||
"""Security tests for path traversal protection."""
|
||||
|
||||
Reference in New Issue
Block a user