diff --git a/libs/langchain/langchain/storage/file_system.py b/libs/langchain/langchain/storage/file_system.py index 3437684758a..477395ce2e3 100644 --- a/libs/langchain/langchain/storage/file_system.py +++ b/libs/langchain/langchain/storage/file_system.py @@ -1,5 +1,6 @@ import os import re +import time from pathlib import Path from typing import Iterator, List, Optional, Sequence, Tuple, Union @@ -42,6 +43,7 @@ class LocalFileStore(ByteStore): *, chmod_file: Optional[int] = None, chmod_dir: Optional[int] = None, + update_atime: bool = False, ) -> None: """Implement the BaseStore interface for the local file system. @@ -52,10 +54,15 @@ class LocalFileStore(ByteStore): for newly created files, overriding the current `umask` if needed. chmod_dir: (optional, defaults to `None`) If specified, sets permissions for newly created dirs, overriding the current `umask` if needed. + update_atime: (optional, defaults to `False`) If `True`, updates the + filesystem access time (but not the modified time) when a file is read. + This allows MRU/LRU cache policies to be implemented for filesystems + where access time updates are disabled. """ self.root_path = Path(root_path).absolute() self.chmod_file = chmod_file self.chmod_dir = chmod_dir + self.update_atime = update_atime def _get_full_path(self, key: str) -> Path: """Get the full path for a given key relative to the root path. @@ -112,6 +119,9 @@ class LocalFileStore(ByteStore): if full_path.exists(): value = full_path.read_bytes() values.append(value) + if self.update_atime: + # update access time only; preserve modified time + os.utime(full_path, (time.time(), os.stat(full_path).st_mtime)) else: values.append(None) return values diff --git a/libs/langchain/tests/unit_tests/storage/test_filesystem.py b/libs/langchain/tests/unit_tests/storage/test_filesystem.py index 455d39e7dd0..c878bd6f191 100644 --- a/libs/langchain/tests/unit_tests/storage/test_filesystem.py +++ b/libs/langchain/tests/unit_tests/storage/test_filesystem.py @@ -57,6 +57,31 @@ def test_mset_chmod(chmod_dir_s: str, chmod_file_s: str) -> None: assert (os.stat(file_path).st_mode & 0o777) == chmod_file +def test_mget_update_atime() -> None: + # Create a temporary directory for testing + with tempfile.TemporaryDirectory() as temp_dir: + # Instantiate the LocalFileStore with a directory inside the temporary directory + # as the root path + temp_dir = os.path.join(temp_dir, "store_dir") + file_store = LocalFileStore(temp_dir, update_atime=True) + + # Set values for keys + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + file_store.mset(key_value_pairs) + + # Get original access time + dir_path = str(file_store.root_path) + file_path = os.path.join(dir_path, "key1") + atime1 = os.stat(file_path).st_atime + + # Get values for keys + _ = file_store.mget(["key1", "key2"]) + + # Make sure the filesystem access time has been updated + atime2 = os.stat(file_path).st_atime + assert atime2 != atime1 + + def test_mdelete(file_store: LocalFileStore) -> None: # Set values for keys key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]