diff --git a/libs/langchain/langchain/storage/file_system.py b/libs/langchain/langchain/storage/file_system.py index bd0aca4875b..3437684758a 100644 --- a/libs/langchain/langchain/storage/file_system.py +++ b/libs/langchain/langchain/storage/file_system.py @@ -36,14 +36,26 @@ class LocalFileStore(ByteStore): """ - def __init__(self, root_path: Union[str, Path]) -> None: + def __init__( + self, + root_path: Union[str, Path], + *, + chmod_file: Optional[int] = None, + chmod_dir: Optional[int] = None, + ) -> None: """Implement the BaseStore interface for the local file system. Args: root_path (Union[str, Path]): The root path of the file store. All keys are interpreted as paths relative to this root. + chmod_file: (optional, defaults to `None`) If specified, sets permissions + for newly created files, overriding the current `umask` if needed. + chmod_dir: (optional, defaults to `None`) If specified, sets permissions + for newly created dirs, overriding the current `umask` if needed. """ self.root_path = Path(root_path).absolute() + self.chmod_file = chmod_file + self.chmod_dir = chmod_dir def _get_full_path(self, key: str) -> Path: """Get the full path for a given key relative to the root path. @@ -66,6 +78,24 @@ class LocalFileStore(ByteStore): return Path(full_path) + def _mkdir_for_store(self, dir: Path) -> None: + """Makes a store directory path (including parents) with specified permissions + + This is needed because `Path.mkdir()` is restricted by the current `umask`, + whereas the explicit `os.chmod()` used here is not. + + Args: + dir: (Path) The store directory to make + + Returns: + None + """ + if not dir.exists(): + self._mkdir_for_store(dir.parent) + dir.mkdir(exist_ok=True) + if self.chmod_dir is not None: + os.chmod(dir, self.chmod_dir) + def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]: """Get the values associated with the given keys. @@ -97,8 +127,10 @@ class LocalFileStore(ByteStore): """ for key, value in key_value_pairs: full_path = self._get_full_path(key) - full_path.parent.mkdir(parents=True, exist_ok=True) + self._mkdir_for_store(full_path.parent) full_path.write_bytes(value) + if self.chmod_file is not None: + os.chmod(full_path, self.chmod_file) def mdelete(self, keys: Sequence[str]) -> None: """Delete the given keys and their associated values. diff --git a/libs/langchain/tests/unit_tests/storage/test_filesystem.py b/libs/langchain/tests/unit_tests/storage/test_filesystem.py index 4213006bf25..26d5cccd683 100644 --- a/libs/langchain/tests/unit_tests/storage/test_filesystem.py +++ b/libs/langchain/tests/unit_tests/storage/test_filesystem.py @@ -29,6 +29,34 @@ def test_mset_and_mget(file_store: LocalFileStore) -> None: assert values == [b"value1", b"value2"] +@pytest.mark.parametrize( + "chmod_dir_s, chmod_file_s", [("777", "666"), ("770", "660"), ("700", "600")] +) +def test_mset_chmod(chmod_dir_s: str, chmod_file_s: str) -> None: + chmod_dir = int(chmod_dir_s, base=8) + chmod_file = int(chmod_file_s, base=8) + + # Create a temporary directory for testing + with tempfile.TemporaryDirectory() as temp_dir: + # Instantiate the LocalFileStore with a directory inside the temporary directory + # as the root path + temp_dir = os.path.join(temp_dir, "store_dir") + file_store = LocalFileStore( + temp_dir, chmod_dir=chmod_dir, chmod_file=chmod_file + ) + + # Set values for keys + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + file_store.mset(key_value_pairs) + + # verify the permissions are set correctly + # (test only the standard user/group/other bits) + dir_path = str(file_store.root_path) + file_path = os.path.join(dir_path, "key1") + assert (os.stat(dir_path).st_mode & 0o777) == chmod_dir + assert (os.stat(file_path).st_mode & 0o777) == chmod_file + + def test_mdelete(file_store: LocalFileStore) -> None: # Set values for keys key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]