chore(langchain_v1): clean anything uncertain (#32228)

Further clean up of namespace:

- Removed prompts (we'll re-add in a separate commit)
- Remove LocalFileStore until we can review whether all the
implementation details are necessary
- Remove message processing logic from memory (we'll figure out where to
expose it)
- Remove `Tool` primitive (should be sufficient to use `BaseTool` for
typing purposes)
- Remove utilities to create kv stores. Unclear if they've had much
usage outside MultiparentRetriever
This commit is contained in:
Eugene Yurtsev 2025-07-24 10:41:05 -04:00 committed by GitHub
parent bdf1cd383c
commit 7995c719c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 8 additions and 577 deletions

View File

@ -81,6 +81,7 @@ jobs:
core
cli
langchain
langchain_v1
standard-tests
text-splitters
docs

View File

@ -1,6 +0,0 @@
.venv
.github
.git
.mypy_cache
.pytest_cache
Dockerfile

View File

@ -1,24 +1,5 @@
"""**Chat Models** are a variation on language models.
While Chat Models use language models under the hood, the interface they expose
is a bit different. Rather than expose a "text in, text out" API, they expose
an interface where "chat messages" are the inputs and outputs.
**Class hierarchy:**
.. code-block::
BaseLanguageModel --> BaseChatModel --> <name> # Examples: ChatOpenAI, ChatGooglePalm
**Main helpers:**
.. code-block::
AIMessage, BaseMessage, HumanMessage
""" # noqa: E501
from langchain_core.language_models import BaseChatModel
from langchain.chat_models.base import init_chat_model
__all__ = [
"init_chat_model",
]
__all__ = ["BaseChatModel", "init_chat_model"]

View File

@ -1,7 +1,10 @@
from langchain_core.embeddings import Embeddings
from langchain.embeddings.base import init_embeddings
from langchain.embeddings.cache import CacheBackedEmbeddings
__all__ = [
"CacheBackedEmbeddings",
"Embeddings",
"init_embeddings",
]

View File

@ -1,16 +0,0 @@
"""TBD: This module should provide high level building blocks for memory management.
We may want to wait until we combine:
1. langmem
2. some basic functions for message summarization
"""
from langchain_core.messages import filter_messages, trim_messages
from langchain_core.messages.utils import count_tokens_approximately
__all__ = [
"count_tokens_approximately",
"filter_messages",
"trim_messages",
]

View File

@ -1,39 +0,0 @@
from langchain_core.example_selectors import (
LengthBasedExampleSelector,
MaxMarginalRelevanceExampleSelector,
SemanticSimilarityExampleSelector,
)
from langchain_core.prompts import (
AIMessagePromptTemplate,
BaseChatPromptTemplate,
BasePromptTemplate,
ChatMessagePromptTemplate,
ChatPromptTemplate,
FewShotChatMessagePromptTemplate,
FewShotPromptTemplate,
FewShotPromptWithTemplates,
HumanMessagePromptTemplate,
MessagesPlaceholder,
PromptTemplate,
StringPromptTemplate,
SystemMessagePromptTemplate,
)
__all__ = [
"AIMessagePromptTemplate",
"BaseChatPromptTemplate",
"BasePromptTemplate",
"ChatMessagePromptTemplate",
"ChatPromptTemplate",
"FewShotChatMessagePromptTemplate",
"FewShotPromptTemplate",
"FewShotPromptWithTemplates",
"HumanMessagePromptTemplate",
"LengthBasedExampleSelector",
"MaxMarginalRelevanceExampleSelector",
"MessagesPlaceholder",
"PromptTemplate",
"SemanticSimilarityExampleSelector",
"StringPromptTemplate",
"SystemMessagePromptTemplate",
]

View File

@ -12,16 +12,11 @@ from langchain_core.stores import (
InvalidKeyException,
)
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
from langchain.storage.encoder_backed import EncoderBackedStore
from langchain.storage.file_system import LocalFileStore
__all__ = [
"EncoderBackedStore",
"InMemoryByteStore",
"InMemoryStore",
"InvalidKeyException",
"LocalFileStore",
"create_kv_docstore",
"create_lc_store",
]

View File

@ -1,91 +0,0 @@
"""Create a key-value store for any langchain serializable object."""
from typing import Callable, Optional
from langchain_core.documents import Document
from langchain_core.load import Serializable, dumps, loads
from langchain_core.stores import BaseStore, ByteStore
from langchain.storage.encoder_backed import EncoderBackedStore
def _dump_as_bytes(obj: Serializable) -> bytes:
"""Return a bytes representation of a document."""
return dumps(obj).encode("utf-8")
def _dump_document_as_bytes(obj: Document) -> bytes:
"""Return a bytes representation of a document."""
if not isinstance(obj, Document):
msg = "Expected a Document instance"
raise TypeError(msg)
return dumps(obj).encode("utf-8")
def _load_document_from_bytes(serialized: bytes) -> Document:
"""Return a document from a bytes representation."""
obj = loads(serialized.decode("utf-8"))
if not isinstance(obj, Document):
msg = f"Expected a Document instance. Got {type(obj)}"
raise TypeError(msg)
return obj
def _load_from_bytes(serialized: bytes) -> Serializable:
"""Return a document from a bytes representation."""
return loads(serialized.decode("utf-8"))
def _identity(x: str) -> str:
"""Return the same object."""
return x
# PUBLIC API
def create_lc_store(
store: ByteStore,
*,
key_encoder: Optional[Callable[[str], str]] = None,
) -> BaseStore[str, Serializable]:
"""Create a store for langchain serializable objects from a bytes store.
Args:
store: A bytes store to use as the underlying store.
key_encoder: A function to encode keys; if None uses identity function.
Returns:
A key-value store for documents.
"""
return EncoderBackedStore(
store,
key_encoder or _identity,
_dump_as_bytes,
_load_from_bytes,
)
def create_kv_docstore(
store: ByteStore,
*,
key_encoder: Optional[Callable[[str], str]] = None,
) -> BaseStore[str, Document]:
"""Create a store for langchain Document objects from a bytes store.
This store does run time type checking to ensure that the values are
Document objects.
Args:
store: A bytes store to use as the underlying store.
key_encoder: A function to encode keys; if None uses identity function.
Returns:
A key-value store for documents.
"""
return EncoderBackedStore(
store,
key_encoder or _identity,
_dump_document_as_bytes,
_load_document_from_bytes,
)

View File

@ -1,176 +0,0 @@
import os
import re
import time
from collections.abc import Iterator, Sequence
from pathlib import Path
from typing import Optional, Union
from langchain_core.stores import ByteStore
from langchain.storage.exceptions import InvalidKeyException
class LocalFileStore(ByteStore):
"""BaseStore interface that works on the local file system.
Examples:
Create a LocalFileStore instance and perform operations on it:
.. code-block:: python
from langchain.storage import LocalFileStore
# Instantiate the LocalFileStore with the root path
file_store = LocalFileStore("/path/to/root")
# Set values for keys
file_store.mset([("key1", b"value1"), ("key2", b"value2")])
# Get values for keys
values = file_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"]
# Delete keys
file_store.mdelete(["key1"])
# Iterate over keys
for key in file_store.yield_keys():
print(key) # noqa: T201
"""
def __init__(
self,
root_path: Union[str, Path],
*,
chmod_file: Optional[int] = None,
chmod_dir: Optional[int] = None,
update_atime: bool = False,
) -> None:
"""Implement the BaseStore interface for the local file system.
Args:
root_path (Union[str, Path]): The root path of the file store. All keys are
interpreted as paths relative to this root.
chmod_file: (optional, defaults to `None`) If specified, sets permissions
for newly created files, overriding the current `umask` if needed.
chmod_dir: (optional, defaults to `None`) If specified, sets permissions
for newly created dirs, overriding the current `umask` if needed.
update_atime: (optional, defaults to `False`) If `True`, updates the
filesystem access time (but not the modified time) when a file is read.
This allows MRU/LRU cache policies to be implemented for filesystems
where access time updates are disabled.
"""
self.root_path = Path(root_path).absolute()
self.chmod_file = chmod_file
self.chmod_dir = chmod_dir
self.update_atime = update_atime
def _get_full_path(self, key: str) -> Path:
"""Get the full path for a given key relative to the root path.
Args:
key (str): The key relative to the root path.
Returns:
Path: The full path for the given key.
"""
if not re.match(r"^[a-zA-Z0-9_.\-/]+$", key):
msg = f"Invalid characters in key: {key}"
raise InvalidKeyException(msg)
full_path = (self.root_path / key).resolve()
root_path = self.root_path.resolve()
common_path = os.path.commonpath([root_path, full_path])
if common_path != str(root_path):
msg = (
f"Invalid key: {key}. Key should be relative to the full path. "
f"{root_path} vs. {common_path} and full path of {full_path}"
)
raise InvalidKeyException(msg)
return full_path
def _mkdir_for_store(self, dir_path: Path) -> None:
"""Makes a store directory path (including parents) with specified permissions.
This is needed because `Path.mkdir()` is restricted by the current `umask`,
whereas the explicit `os.chmod()` used here is not.
Args:
dir_path: (Path) The store directory to make
Returns:
None
"""
if not dir_path.exists():
self._mkdir_for_store(dir_path.parent)
dir_path.mkdir(exist_ok=True)
if self.chmod_dir is not None:
dir_path.chmod(self.chmod_dir)
def mget(self, keys: Sequence[str]) -> list[Optional[bytes]]:
"""Get the values associated with the given keys.
Args:
keys: A sequence of keys.
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be None.
"""
values: list[Optional[bytes]] = []
for key in keys:
full_path = self._get_full_path(key)
if full_path.exists():
value = full_path.read_bytes()
values.append(value)
if self.update_atime:
# update access time only; preserve modified time
os.utime(full_path, (time.time(), full_path.stat().st_mtime))
else:
values.append(None)
return values
def mset(self, key_value_pairs: Sequence[tuple[str, bytes]]) -> None:
"""Set the values for the given keys.
Args:
key_value_pairs: A sequence of key-value pairs.
Returns:
None
"""
for key, value in key_value_pairs:
full_path = self._get_full_path(key)
self._mkdir_for_store(full_path.parent)
full_path.write_bytes(value)
if self.chmod_file is not None:
full_path.chmod(self.chmod_file)
def mdelete(self, keys: Sequence[str]) -> None:
"""Delete the given keys and their associated values.
Args:
keys (Sequence[str]): A sequence of keys to delete.
Returns:
None
"""
for key in keys:
full_path = self._get_full_path(key)
if full_path.exists():
full_path.unlink()
def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
"""Get an iterator over keys that match the given prefix.
Args:
prefix (Optional[str]): The prefix to match.
Returns:
Iterator[str]: An iterator over keys that match the given prefix.
"""
prefix_path = self._get_full_path(prefix) if prefix else self.root_path
for file in prefix_path.rglob("*"):
if file.is_file():
relative_path = file.relative_to(self.root_path)
yield str(relative_path)

View File

@ -2,7 +2,6 @@ from langchain_core.tools import (
BaseTool,
InjectedToolArg,
InjectedToolCallId,
Tool,
ToolException,
tool,
)
@ -11,7 +10,6 @@ __all__ = [
"BaseTool",
"InjectedToolArg",
"InjectedToolCallId",
"Tool",
"ToolException",
"tool",
]

View File

@ -14,6 +14,7 @@ if TYPE_CHECKING:
EXPECTED_ALL = [
"init_chat_model",
"BaseChatModel",
]

View File

@ -2,6 +2,7 @@ from langchain import embeddings
EXPECTED_ALL = [
"CacheBackedEmbeddings",
"Embeddings",
"init_embeddings",
]

View File

@ -1 +0,0 @@
"""Test prompt functionality."""

View File

@ -1,24 +0,0 @@
from langchain import prompts
EXPECTED_ALL = [
"AIMessagePromptTemplate",
"BaseChatPromptTemplate",
"BasePromptTemplate",
"ChatMessagePromptTemplate",
"ChatPromptTemplate",
"FewShotPromptTemplate",
"FewShotPromptWithTemplates",
"HumanMessagePromptTemplate",
"LengthBasedExampleSelector",
"MaxMarginalRelevanceExampleSelector",
"MessagesPlaceholder",
"PromptTemplate",
"SemanticSimilarityExampleSelector",
"StringPromptTemplate",
"SystemMessagePromptTemplate",
"FewShotChatMessagePromptTemplate",
]
def test_all_imports() -> None:
assert set(prompts.__all__) == set(EXPECTED_ALL)

View File

@ -1,155 +0,0 @@
import tempfile
from collections.abc import Generator
from pathlib import Path
import pytest
from langchain_core.stores import InvalidKeyException
from langchain.storage.file_system import LocalFileStore
@pytest.fixture
def file_store() -> Generator[LocalFileStore, None, None]:
# Create a temporary directory for testing
with tempfile.TemporaryDirectory() as temp_dir:
# Instantiate the LocalFileStore with the temporary directory as the root path
store = LocalFileStore(temp_dir)
yield store
def test_mset_and_mget(file_store: LocalFileStore) -> None:
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
file_store.mset(key_value_pairs)
# Get values for keys
values = file_store.mget(["key1", "key2"])
# Assert that the retrieved values match the original values
assert values == [b"value1", b"value2"]
@pytest.mark.parametrize(
("chmod_dir_s", "chmod_file_s"),
[("777", "666"), ("770", "660"), ("700", "600")],
)
def test_mset_chmod(chmod_dir_s: str, chmod_file_s: str) -> None:
chmod_dir = int(chmod_dir_s, base=8)
chmod_file = int(chmod_file_s, base=8)
# Create a temporary directory for testing
with tempfile.TemporaryDirectory() as temp_dir:
# Instantiate the LocalFileStore with a directory inside the temporary directory
# as the root path
file_store = LocalFileStore(
Path(temp_dir) / "store_dir",
chmod_dir=chmod_dir,
chmod_file=chmod_file,
)
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
file_store.mset(key_value_pairs)
# verify the permissions are set correctly
# (test only the standard user/group/other bits)
dir_path = file_store.root_path
file_path = file_store.root_path / "key1"
assert (dir_path.stat().st_mode & 0o777) == chmod_dir
assert (file_path.stat().st_mode & 0o777) == chmod_file
def test_mget_update_atime() -> None:
# Create a temporary directory for testing
with tempfile.TemporaryDirectory() as temp_dir:
# Instantiate the LocalFileStore with a directory inside the temporary directory
# as the root path
file_store = LocalFileStore(Path(temp_dir) / "store_dir", update_atime=True)
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
file_store.mset(key_value_pairs)
# Get original access time
file_path = file_store.root_path / "key1"
atime1 = file_path.stat().st_atime
# Get values for keys
_ = file_store.mget(["key1", "key2"])
# Make sure the filesystem access time has been updated
atime2 = file_path.stat().st_atime
assert atime2 != atime1
def test_mdelete(file_store: LocalFileStore) -> None:
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
file_store.mset(key_value_pairs)
# Delete keys
file_store.mdelete(["key1"])
# Check if the deleted key is present
values = file_store.mget(["key1"])
# Assert that the value is None after deletion
assert values == [None]
def test_set_invalid_key(file_store: LocalFileStore) -> None:
"""Test that an exception is raised when an invalid key is set."""
# Set a key-value pair
key = "crying-cat/😿"
value = b"This is a test value"
with pytest.raises(InvalidKeyException):
file_store.mset([(key, value)])
def test_set_key_and_verify_content(file_store: LocalFileStore) -> None:
"""Test that the content of the file is the same as the value set."""
# Set a key-value pair
key = "test_key"
value = b"This is a test value"
file_store.mset([(key, value)])
# Verify the content of the actual file
full_path = file_store._get_full_path(key)
assert full_path.exists()
assert full_path.read_bytes() == b"This is a test value"
def test_yield_keys(file_store: LocalFileStore) -> None:
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("subdir/key2", b"value2")]
file_store.mset(key_value_pairs)
# Iterate over keys
keys = list(file_store.yield_keys())
# Assert that the yielded keys match the expected keys
expected_keys = ["key1", str(Path("subdir") / "key2")]
assert keys == expected_keys
def test_catches_forbidden_keys(file_store: LocalFileStore) -> None:
"""Make sure we raise exception on keys that are not allowed; e.g., absolute path"""
with pytest.raises(InvalidKeyException):
file_store.mset([("/etc", b"value1")])
with pytest.raises(InvalidKeyException):
list(file_store.yield_keys("/etc/passwd"))
with pytest.raises(InvalidKeyException):
file_store.mget(["/etc/passwd"])
# check relative paths
with pytest.raises(InvalidKeyException):
list(file_store.yield_keys(".."))
with pytest.raises(InvalidKeyException):
file_store.mget(["../etc/passwd"])
with pytest.raises(InvalidKeyException):
file_store.mset([("../etc", b"value1")])
with pytest.raises(InvalidKeyException):
list(file_store.yield_keys("../etc/passwd"))

View File

@ -4,10 +4,7 @@ EXPECTED_ALL = [
"EncoderBackedStore",
"InMemoryStore",
"InMemoryByteStore",
"LocalFileStore",
"InvalidKeyException",
"create_lc_store",
"create_kv_docstore",
]

View File

@ -1,37 +0,0 @@
import tempfile
from collections.abc import Generator
from typing import cast
import pytest
from langchain_core.documents import Document
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
from langchain.storage.file_system import LocalFileStore
@pytest.fixture
def file_store() -> Generator[LocalFileStore, None, None]:
# Create a temporary directory for testing
with tempfile.TemporaryDirectory() as temp_dir:
# Instantiate the LocalFileStore with the temporary directory as the root path
store = LocalFileStore(temp_dir)
yield store
def test_create_lc_store(file_store: LocalFileStore) -> None:
"""Test that a docstore is created from a base store."""
docstore = create_lc_store(file_store)
docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))])
fetched_doc = cast("Document", docstore.mget(["key1"])[0])
assert fetched_doc.page_content == "hello"
assert fetched_doc.metadata == {"key": "value"}
def test_create_kv_store(file_store: LocalFileStore) -> None:
"""Test that a docstore is created from a base store."""
docstore = create_kv_docstore(file_store)
docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))])
fetched_doc = docstore.mget(["key1"])[0]
assert isinstance(fetched_doc, Document)
assert fetched_doc.page_content == "hello"
assert fetched_doc.metadata == {"key": "value"}

View File

@ -4,7 +4,6 @@ EXPECTED_ALL = {
"BaseTool",
"InjectedToolArg",
"InjectedToolCallId",
"Tool",
"ToolException",
"tool",
}