v fun

Merge branch 'master' into bagatur/delight
fun
2026-02-10 03:00:59 +00:00 · 2024-08-27 12:43:02 -07:00 · 2024-08-25 16:53:43 -07:00 · 2024-08-24 16:07:00 -07:00
25 changed files with 353 additions and 294 deletions
--- a/libs/community/langchain_community/document_loaders/init.py
+++ b/libs/community/langchain_community/document_loaders/init.py
@@ -19,6 +19,8 @@ import importlib
 from typing import TYPE_CHECKING, Any

 if TYPE_CHECKING:
+    from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
+
    from langchain_community.document_loaders.acreom import (
        AcreomLoader,
    )
@@ -87,7 +89,6 @@ if TYPE_CHECKING:
    from langchain_community.document_loaders.blob_loaders import (
        Blob,
        BlobLoader,
-        FileSystemBlobLoader,
        YoutubeAudioLoader,
    )
    from langchain_community.document_loaders.blockchain import (
@@ -800,7 +801,6 @@ __all__ = [
    "FaunaLoader",
    "FigmaFileLoader",
    "FireCrawlLoader",
-    "FileSystemBlobLoader",
    "GCSDirectoryLoader",
    "GlueCatalogLoader",
    "GCSFileLoader",
--- a/libs/community/langchain_community/document_loaders/base_o365.py
+++ b/libs/community/langchain_community/document_loaders/base_o365.py
@@ -10,6 +10,7 @@ from enum import Enum
 from pathlib import Path, PurePath
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Sequence, Union

+from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
 from langchain_core.pydantic_v1 import (
    BaseModel,
    BaseSettings,
@@ -19,9 +20,6 @@ from langchain_core.pydantic_v1 import (
 )

 from langchain_community.document_loaders.base import BaseLoader
-from langchain_community.document_loaders.blob_loaders.file_system import (
-    FileSystemBlobLoader,
-)
 from langchain_community.document_loaders.blob_loaders.schema import Blob

 if TYPE_CHECKING:
--- a/libs/community/langchain_community/document_loaders/blob_loaders/init.py
+++ b/libs/community/langchain_community/document_loaders/blob_loaders/init.py
@@ -4,12 +4,11 @@ from typing import TYPE_CHECKING, Any
 from langchain_core.document_loaders import Blob, BlobLoader

 if TYPE_CHECKING:
+    from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
+
    from langchain_community.document_loaders.blob_loaders.cloud_blob_loader import (
        CloudBlobLoader,
    )
-    from langchain_community.document_loaders.blob_loaders.file_system import (
-        FileSystemBlobLoader,
-    )
    from langchain_community.document_loaders.blob_loaders.youtube_audio import (
        YoutubeAudioLoader,
    )
@@ -39,6 +38,5 @@ __all__ = [
    "BlobLoader",
    "Blob",
    "CloudBlobLoader",
-    "FileSystemBlobLoader",
    "YoutubeAudioLoader",
 ]
--- a/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py
+++ b/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py
@@ -1,149 +1,3 @@
-"""Use to load blobs from the local file system."""
+from langchain_core.blob_loaders.file_system import FileSystemBlobLoader

-from pathlib import Path
-from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union
-
-from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader
-
-T = TypeVar("T")
-
-
-def _make_iterator(
-    length_func: Callable[[], int], show_progress: bool = False
-) -> Callable[[Iterable[T]], Iterator[T]]:
-    """Create a function that optionally wraps an iterable in tqdm."""
-    iterator: Callable[[Iterable[T]], Iterator[T]]
-    if show_progress:
-        try:
-            from tqdm.auto import tqdm
-        except ImportError:
-            raise ImportError(
-                "You must install tqdm to use show_progress=True."
-                "You can install tqdm with `pip install tqdm`."
-            )
-
-        # Make sure to provide `total` here so that tqdm can show
-        # a progress bar that takes into account the total number of files.
-        def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
-            """Wrap an iterable in a tqdm progress bar."""
-            return tqdm(iterable, total=length_func())
-
-        iterator = _with_tqdm
-    else:
-        iterator = iter
-
-    return iterator
-
-
-# PUBLIC API
-
-
-class FileSystemBlobLoader(BlobLoader):
-    """Load blobs in the local file system.
-
-    Example:
-
-    .. code-block:: python
-
-        from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
-        loader = FileSystemBlobLoader("/path/to/directory")
-        for blob in loader.yield_blobs():
-            print(blob)  # noqa: T201
-    """  # noqa: E501
-
-    def __init__(
-        self,
-        path: Union[str, Path],
-        *,
-        glob: str = "**/[!.]*",
-        exclude: Sequence[str] = (),
-        suffixes: Optional[Sequence[str]] = None,
-        show_progress: bool = False,
-    ) -> None:
-        """Initialize with a path to directory and how to glob over it.
-
-        Args:
-            path: Path to directory to load from or path to file to load.
-                  If a path to a file is provided, glob/exclude/suffixes are ignored.
-            glob: Glob pattern relative to the specified path
-                  by default set to pick up all non-hidden files
-            exclude: patterns to exclude from results, use glob syntax
-            suffixes: Provide to keep only files with these suffixes
-                      Useful when wanting to keep files with different suffixes
-                      Suffixes must include the dot, e.g. ".txt"
-            show_progress: If true, will show a progress bar as the files are loaded.
-                           This forces an iteration through all matching files
-                           to count them prior to loading them.
-
-        Examples:
-
-            .. code-block:: python
-                from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
-
-                # Load a single file.
-                loader = FileSystemBlobLoader("/path/to/file.txt")
-
-                # Recursively load all text files in a directory.
-                loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
-
-                # Recursively load all non-hidden files in a directory.
-                loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
-
-                # Load all files in a directory without recursion.
-                loader = FileSystemBlobLoader("/path/to/directory", glob="*")
-
-                # Recursively load all files in a directory, except for py or pyc files.
-                loader = FileSystemBlobLoader(
-                    "/path/to/directory",
-                    glob="**/*.txt",
-                    exclude=["**/*.py", "**/*.pyc"]
-                )
-        """  # noqa: E501
-        if isinstance(path, Path):
-            _path = path
-        elif isinstance(path, str):
-            _path = Path(path)
-        else:
-            raise TypeError(f"Expected str or Path, got {type(path)}")
-
-        self.path = _path.expanduser()  # Expand user to handle ~
-        self.glob = glob
-        self.suffixes = set(suffixes or [])
-        self.show_progress = show_progress
-        self.exclude = exclude
-
-    def yield_blobs(
-        self,
-    ) -> Iterable[Blob]:
-        """Yield blobs that match the requested pattern."""
-        iterator = _make_iterator(
-            length_func=self.count_matching_files, show_progress=self.show_progress
-        )
-
-        for path in iterator(self._yield_paths()):
-            yield Blob.from_path(path)
-
-    def _yield_paths(self) -> Iterable[Path]:
-        """Yield paths that match the requested pattern."""
-        if self.path.is_file():
-            yield self.path
-            return
-
-        paths = self.path.glob(self.glob)
-        for path in paths:
-            if self.exclude:
-                if any(path.match(glob) for glob in self.exclude):
-                    continue
-            if path.is_file():
-                if self.suffixes and path.suffix not in self.suffixes:
-                    continue
-                yield path
-
-    def count_matching_files(self) -> int:
-        """Count files that match the pattern without loading them."""
-        # Carry out a full iteration to count the files without
-        # materializing anything expensive in memory.
-        num = 0
-        for _ in self._yield_paths():
-            num += 1
-        return num
+__all__ = ["FileSystemBlobLoader"]
--- a/libs/community/langchain_community/document_loaders/concurrent.py
+++ b/libs/community/langchain_community/document_loaders/concurrent.py
@@ -4,12 +4,12 @@ import concurrent.futures
 from pathlib import Path
 from typing import Iterator, Literal, Optional, Sequence, Union

+from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
 from langchain_core.documents import Document

 from langchain_community.document_loaders.base import BaseBlobParser
 from langchain_community.document_loaders.blob_loaders import (
    BlobLoader,
-    FileSystemBlobLoader,
 )
 from langchain_community.document_loaders.generic import GenericLoader
 from langchain_community.document_loaders.parsers.registry import get_parser
--- a/libs/community/langchain_community/document_loaders/parsers/generic.py
+++ b/libs/community/langchain_community/document_loaders/parsers/generic.py
@@ -3,69 +3,6 @@
 This module contains some logic to help assemble more sophisticated parsers.
 """

-from typing import Iterator, Mapping, Optional
+from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser

-from langchain_core.documents import Document
-
-from langchain_community.document_loaders.base import BaseBlobParser
-from langchain_community.document_loaders.blob_loaders.schema import Blob
-
-
-class MimeTypeBasedParser(BaseBlobParser):
-    """Parser that uses `mime`-types to parse a blob.
-
-    This parser is useful for simple pipelines where the mime-type is sufficient
-    to determine how to parse a blob.
-
-    To use, configure handlers based on mime-types and pass them to the initializer.
-
-    Example:
-
-        .. code-block:: python
-
-            from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
-
-            parser = MimeTypeBasedParser(
-                handlers={
-                    "application/pdf": ...,
-                },
-                fallback_parser=...,
-            )
-    """  # noqa: E501
-
-    def __init__(
-        self,
-        handlers: Mapping[str, BaseBlobParser],
-        *,
-        fallback_parser: Optional[BaseBlobParser] = None,
-    ) -> None:
-        """Define a parser that uses mime-types to determine how to parse a blob.
-
-        Args:
-            handlers: A mapping from mime-types to functions that take a blob, parse it
-                      and return a document.
-            fallback_parser: A fallback_parser parser to use if the mime-type is not
-                             found in the handlers. If provided, this parser will be
-                             used to parse blobs with all mime-types not found in
-                             the handlers.
-                             If not provided, a ValueError will be raised if the
-                             mime-type is not found in the handlers.
-        """
-        self.handlers = handlers
-        self.fallback_parser = fallback_parser
-
-    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
-        """Load documents from a blob."""
-        mimetype = blob.mimetype
-
-        if mimetype is None:
-            raise ValueError(f"{blob} does not have a mimetype.")
-
-        if mimetype in self.handlers:
-            handler = self.handlers[mimetype]
-            yield from handler.lazy_parse(blob)
-        else:
-            if self.fallback_parser is not None:
-                yield from self.fallback_parser.lazy_parse(blob)
-            else:
-                raise ValueError(f"Unsupported mime type: {mimetype}")
+__all__ = ["MimeTypeBasedParser"]
--- a/libs/community/langchain_community/document_loaders/parsers/registry.py
+++ b/libs/community/langchain_community/document_loaders/parsers/registry.py
@@ -1,10 +1,11 @@
 """Module includes a registry of default parser configurations."""

-from langchain_community.document_loaders.base import BaseBlobParser
-from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
+from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
+from langchain_core.blob_parsers.txt import TextParser
+from langchain_core.document_loaders.base import BaseBlobParser
+
 from langchain_community.document_loaders.parsers.msword import MsWordParser
 from langchain_community.document_loaders.parsers.pdf import PyMuPDFParser
-from langchain_community.document_loaders.parsers.txt import TextParser


 def _get_default_parser() -> BaseBlobParser:
@@ -17,6 +18,7 @@ def _get_default_parser() -> BaseBlobParser:
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": (
                MsWordParser()
            ),
+            "text/x-python": TextParser(),
        },
        fallback_parser=None,
    )
--- a/libs/community/langchain_community/document_loaders/parsers/txt.py
+++ b/libs/community/langchain_community/document_loaders/parsers/txt.py
@@ -1,16 +1,5 @@
 """Module for parsing text files.."""

-from typing import Iterator
+from langchain_core.blob_parsers.txt import TextParser

-from langchain_core.documents import Document
-
-from langchain_community.document_loaders.base import BaseBlobParser
-from langchain_community.document_loaders.blob_loaders import Blob
-
-
-class TextParser(BaseBlobParser):
-    """Parser for text blobs."""
-
-    def lazy_parse(self, blob: Blob) -> Iterator[Document]:  # type: ignore[valid-type]
-        """Lazily parse the blob."""
-        yield Document(page_content=blob.as_string(), metadata={"source": blob.source})  # type: ignore[attr-defined]
+__all__ = ["TextParser"]
--- a/libs/community/tests/unit_tests/document_loaders/parsers/test_generic.py
+++ b/libs/community/tests/unit_tests/document_loaders/parsers/test_generic.py
@@ -3,11 +3,11 @@
 from typing import Iterator

 import pytest
+from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
 from langchain_core.documents import Document

 from langchain_community.document_loaders.base import BaseBlobParser
 from langchain_community.document_loaders.blob_loaders import Blob
-from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser


 class TestMimeBasedParser:
--- a/libs/community/tests/unit_tests/document_loaders/test_generic_loader.py
+++ b/libs/community/tests/unit_tests/document_loaders/test_generic_loader.py
@@ -6,12 +6,12 @@ from pathlib import Path
 from typing import Any, Generator, Iterator

 import pytest
+from langchain_core.blob_parsers.txt import TextParser
 from langchain_core.documents import Document

 from langchain_community.document_loaders.base import BaseBlobParser
 from langchain_community.document_loaders.blob_loaders import Blob, FileSystemBlobLoader
 from langchain_community.document_loaders.generic import GenericLoader
-from langchain_community.document_loaders.parsers.txt import TextParser


@pytest.fixture
--- a/libs/core/langchain_core/blob_loaders/init.py
+++ b/libs/core/langchain_core/blob_loaders/init.py
@@ -0,0 +1,4 @@
+from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
+from langchain_core.document_loaders.blob_loaders import BlobLoader
+
+__all__ = ["BlobLoader", "FileSystemBlobLoader"]
--- a/libs/core/langchain_core/blob_loaders/file_system.py
+++ b/libs/core/langchain_core/blob_loaders/file_system.py
@@ -0,0 +1,150 @@
+"""Use to load blobs from the local file system."""
+
+from pathlib import Path
+from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union
+
+from langchain_core.document_loaders import BlobLoader
+from langchain_core.documents.base import Blob
+
+T = TypeVar("T")
+
+
+def _make_iterator(
+    length_func: Callable[[], int], show_progress: bool = False
+) -> Callable[[Iterable[T]], Iterator[T]]:
+    """Create a function that optionally wraps an iterable in tqdm."""
+    iterator: Callable[[Iterable[T]], Iterator[T]]
+    if show_progress:
+        try:
+            from tqdm.auto import tqdm
+        except ImportError:
+            raise ImportError(
+                "You must install tqdm to use show_progress=True."
+                "You can install tqdm with `pip install tqdm`."
+            )
+
+        # Make sure to provide `total` here so that tqdm can show
+        # a progress bar that takes into account the total number of files.
+        def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
+            """Wrap an iterable in a tqdm progress bar."""
+            return tqdm(iterable, total=length_func())
+
+        iterator = _with_tqdm
+    else:
+        iterator = iter
+
+    return iterator
+
+
+# PUBLIC API
+
+
+class FileSystemBlobLoader(BlobLoader):
+    """Load blobs in the local file system.
+
+    Example:
+
+    .. code-block:: python
+
+        from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
+        loader = FileSystemBlobLoader("/path/to/directory")
+        for blob in loader.yield_blobs():
+            print(blob)  # noqa: T201
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        path: Union[str, Path],
+        *,
+        glob: str = "**/[!.]*",
+        exclude: Sequence[str] = (),
+        suffixes: Optional[Sequence[str]] = None,
+        show_progress: bool = False,
+    ) -> None:
+        """Initialize with a path to directory and how to glob over it.
+
+        Args:
+            path: Path to directory to load from or path to file to load.
+                  If a path to a file is provided, glob/exclude/suffixes are ignored.
+            glob: Glob pattern relative to the specified path
+                  by default set to pick up all non-hidden files
+            exclude: patterns to exclude from results, use glob syntax
+            suffixes: Provide to keep only files with these suffixes
+                      Useful when wanting to keep files with different suffixes
+                      Suffixes must include the dot, e.g. ".txt"
+            show_progress: If true, will show a progress bar as the files are loaded.
+                           This forces an iteration through all matching files
+                           to count them prior to loading them.
+
+        Examples:
+
+            .. code-block:: python
+                from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
+
+                # Load a single file.
+                loader = FileSystemBlobLoader("/path/to/file.txt")
+
+                # Recursively load all text files in a directory.
+                loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
+
+                # Recursively load all non-hidden files in a directory.
+                loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
+
+                # Load all files in a directory without recursion.
+                loader = FileSystemBlobLoader("/path/to/directory", glob="*")
+
+                # Recursively load all files in a directory, except for py or pyc files.
+                loader = FileSystemBlobLoader(
+                    "/path/to/directory",
+                    glob="**/*.txt",
+                    exclude=["**/*.py", "**/*.pyc"]
+                )
+        """  # noqa: E501
+        if isinstance(path, Path):
+            _path = path
+        elif isinstance(path, str):
+            _path = Path(path)
+        else:
+            raise TypeError(f"Expected str or Path, got {type(path)}")
+
+        self.path = _path.expanduser()  # Expand user to handle ~
+        self.glob = glob
+        self.suffixes = set(suffixes or [])
+        self.show_progress = show_progress
+        self.exclude = exclude if not isinstance(exclude, str) else (exclude,)
+
+    def yield_blobs(
+        self,
+    ) -> Iterable[Blob]:
+        """Yield blobs that match the requested pattern."""
+        iterator = _make_iterator(
+            length_func=self.count_matching_files, show_progress=self.show_progress
+        )
+
+        for path in iterator(self._yield_paths()):
+            yield Blob.from_path(path)
+
+    def _yield_paths(self) -> Iterable[Path]:
+        """Yield paths that match the requested pattern."""
+        if self.path.is_file():
+            yield self.path
+            return
+
+        paths = self.path.glob(self.glob)
+        for path in paths:
+            if self.exclude:
+                if any(path.match(glob) for glob in self.exclude):
+                    continue
+            if path.is_file():
+                if self.suffixes and path.suffix not in self.suffixes:
+                    continue
+                yield path
+
+    def count_matching_files(self) -> int:
+        """Count files that match the pattern without loading them."""
+        # Carry out a full iteration to count the files without
+        # materializing anything expensive in memory.
+        num = 0
+        for _ in self._yield_paths():
+            num += 1
+        return num
--- a/libs/core/langchain_core/blob_parsers/init.py
+++ b/libs/core/langchain_core/blob_parsers/init.py
@@ -0,0 +1,18 @@
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
+
+
+def __getattr__(name):
+    if name == "MimeTypeBasedParser":
+        from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
+
+        return MimeTypeBasedParser
+    else:
+        raise AttributeError(
+            f"No {name} attribute in module langchain_core.blob_parsers."
+        )
+
+
+__all__ = ["MimeTypeBasedParser"]
--- a/libs/core/langchain_core/blob_parsers/mime_type.py
+++ b/libs/core/langchain_core/blob_parsers/mime_type.py
@@ -0,0 +1,65 @@
+from typing import Iterator, Mapping, Optional
+
+from langchain_core.document_loaders import BaseBlobParser
+from langchain_core.documents import Document
+from langchain_core.documents.base import Blob
+
+
+class MimeTypeBasedParser(BaseBlobParser):
+    """Parser that uses `mime`-types to parse a blob.
+
+    This parser is useful for simple pipelines where the mime-type is sufficient
+    to determine how to parse a blob.
+
+    To use, configure handlers based on mime-types and pass them to the initializer.
+
+    Example:
+
+        .. code-block:: python
+
+            from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
+
+            parser = MimeTypeBasedParser(
+                handlers={
+                    "application/pdf": ...,
+                },
+                fallback_parser=...,
+            )
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        handlers: Mapping[str, BaseBlobParser],
+        *,
+        fallback_parser: Optional[BaseBlobParser] = None,
+    ) -> None:
+        """Define a parser that uses mime-types to determine how to parse a blob.
+
+        Args:
+            handlers: A mapping from mime-types to functions that take a blob, parse it
+                      and return a document.
+            fallback_parser: A fallback_parser parser to use if the mime-type is not
+                             found in the handlers. If provided, this parser will be
+                             used to parse blobs with all mime-types not found in
+                             the handlers.
+                             If not provided, a ValueError will be raised if the
+                             mime-type is not found in the handlers.
+        """
+        self.handlers = handlers
+        self.fallback_parser = fallback_parser
+
+    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
+        """Load documents from a blob."""
+        mimetype = blob.mimetype
+
+        if mimetype is None:
+            raise ValueError(f"{blob} does not have a mimetype.")
+
+        if mimetype in self.handlers:
+            handler = self.handlers[mimetype]
+            yield from handler.lazy_parse(blob)
+        else:
+            if self.fallback_parser is not None:
+                yield from self.fallback_parser.lazy_parse(blob)
+            else:
+                raise ValueError(f"Unsupported mime type: {mimetype}")
--- a/libs/core/langchain_core/blob_parsers/txt.py
+++ b/libs/core/langchain_core/blob_parsers/txt.py
@@ -0,0 +1,13 @@
+from typing import Iterator
+
+from langchain_core.document_loaders import BaseBlobParser
+from langchain_core.documents import Document
+from langchain_core.documents.base import Blob
+
+
+class TextParser(BaseBlobParser):
+    """Parser for text blobs."""
+
+    def lazy_parse(self, blob: Blob) -> Iterator[Document]:  # type: ignore[valid-type]
+        """Lazily parse the blob."""
+        yield Document(page_content=blob.as_string(), metadata={"source": blob.source})  # type: ignore[attr-defined]
--- a/libs/core/langchain_core/chat_history.py
+++ b/libs/core/langchain_core/chat_history.py
@@ -18,7 +18,8 @@
 from __future__ import annotations

 from abc import ABC, abstractmethod
-from typing import List, Sequence, Union
+from typing import List, Optional, Sequence, Union
+from uuid import UUID

 from langchain_core.messages import (
    AIMessage,
@@ -206,38 +207,27 @@ class InMemoryChatMessageHistory(BaseChatMessageHistory, BaseModel):
    messages: List[BaseMessage] = Field(default_factory=list)
    """A list of messages stored in memory."""

-    async def aget_messages(self) -> List[BaseMessage]:
-        """Async version of getting messages.
-
-        Can over-ride this method to provide an efficient async implementation.
-        In general, fetching messages may involve IO to the underlying
-        persistence layer.
-
-        Returns:
-            List of messages.
-        """
-        return self.messages
-
    def add_message(self, message: BaseMessage) -> None:
-        """Add a self-created message to the store.
-
-        Args:
-            message: The message to add.
-        """
        self.messages.append(message)

-    async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None:
-        """Async add messages to the store.
-
-        Args:
-            messages: The messages to add.
-        """
-        self.add_messages(messages)
-
    def clear(self) -> None:
        """Clear all messages from the store."""
        self.messages = []

-    async def aclear(self) -> None:
-        """Async clear all messages from the store."""
-        self.clear()
+
+class BaseHistoryManager(ABC):
+    @abstractmethod
+    def get_session(self, session_id: Union[str, UUID]) -> BaseChatMessageHistory:
+        """"""
+
+
+class InMemManager(BaseHistoryManager):
+    """"""
+
+    def __init__(self, sessions: Optional[dict] = None) -> None:
+        self.sessions = sessions or {}
+
+    def get_session(self, session_id: Union[str, UUID]) -> InMemoryChatMessageHistory:
+        if session_id not in self.sessions:
+            self.sessions[session_id] = InMemoryChatMessageHistory()
+        return self.sessions[session_id]
--- a/libs/community/langchain_community/document_loaders/generic.py
+++ b/libs/community/langchain_community/document_loaders/generic.py
@@ -12,15 +12,11 @@ from typing import (
    Union,
 )

+from langchain_core.blob_loaders import FileSystemBlobLoader
+from langchain_core.document_loaders.base import BaseBlobParser, BaseLoader
+from langchain_core.document_loaders.blob_loaders import BlobLoader
 from langchain_core.documents import Document

-from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader
-from langchain_community.document_loaders.blob_loaders import (
-    BlobLoader,
-    FileSystemBlobLoader,
-)
-from langchain_community.document_loaders.parsers.registry import get_parser
-
 if TYPE_CHECKING:
    from langchain_text_splitters import TextSplitter

@@ -172,10 +168,24 @@ class GenericLoader(BaseLoader):
                    # If there is an implementation of get_parser on the class, use it.
                    blob_parser = cls.get_parser(**(parser_kwargs or {}))
                except NotImplementedError:
-                    # if not then use the global registry.
-                    blob_parser = get_parser(parser)
+                    # if not then try to use the global registry.
+                    try:
+                        from langchain_community.document_loaders.parsers.registry import (
+                            get_parser,
+                        )
+                    except ImportError as e:
+                        raise ValueError("") from e
+                    else:
+                        blob_parser = get_parser(parser)
            else:
-                blob_parser = get_parser(parser)
+                try:
+                    from langchain_community.document_loaders.parsers.registry import (
+                        get_parser,
+                    )
+                except ImportError as e:
+                    raise ValueError("") from e
+                else:
+                    blob_parser = get_parser(parser)
        else:
            blob_parser = parser
        return cls(blob_loader, blob_parser)
--- a/libs/core/langchain_core/documents/base.py
+++ b/libs/core/langchain_core/documents/base.py
@@ -293,3 +293,6 @@ class Document(BaseMedia):
            return f"page_content='{self.page_content}' metadata={self.metadata}"
        else:
            return f"page_content='{self.page_content}'"
+
+    def __len__(self) -> int:
+        return len(self.page_content)
--- a/libs/core/langchain_core/language_models/base.py
+++ b/libs/core/langchain_core/language_models/base.py
@@ -35,7 +35,9 @@ from langchain_core.utils import get_pydantic_field_names
 if TYPE_CHECKING:
    from langchain_core.caches import BaseCache
    from langchain_core.callbacks import Callbacks
+    from langchain_core.chat_history import BaseHistoryManager
    from langchain_core.outputs import LLMResult
+    from langchain_core.runnables.history import RunnableWithMessageHistory


 class LangSmithParams(TypedDict, total=False):
@@ -363,7 +365,9 @@ class BaseLanguageModel(
        """
        return len(self.get_token_ids(text))

-    def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(
+        self, messages: List[MessageLikeRepresentation]
+    ) -> int:
        """Get the number of tokens in the messages.

        Useful for checking if an input fits in a model's context window.
@@ -374,6 +378,7 @@ class BaseLanguageModel(
        Returns:
            The sum of the number of tokens across the messages.
        """
+        messages = convert_to_messages(messages)
        return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages])

    @classmethod
@@ -383,3 +388,14 @@ class BaseLanguageModel(
        Use get_pydantic_field_names.
        """
        return get_pydantic_field_names(cls)
+
+    def with_history(
+        self, get_session_history: Union[Callable, BaseHistoryManager]
+    ) -> RunnableWithMessageHistory:
+        from langchain_core.chat_history import BaseHistoryManager
+        from langchain_core.runnables.history import RunnableWithMessageHistory
+
+        if isinstance(get_session_history, BaseHistoryManager):
+            get_session_history = get_session_history.get_session
+
+        return RunnableWithMessageHistory(self, get_session_history)
--- a/libs/langchain/langchain/init.py
+++ b/libs/langchain/langchain/init.py
@@ -6,6 +6,13 @@ from importlib import metadata
 from typing import Any, Optional

 from langchain_core._api.deprecation import surface_langchain_deprecation_warnings
+from langchain_core.chat_history import InMemManager
+from langchain_core.document_loaders.generic import GenericLoader
+from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, SystemMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.tools import tool
+
+from langchain.chat_models import init_chat_model as chat_model

 try:
    __version__ = metadata.version(__package__)
@@ -436,4 +443,13 @@ __all__ = [
    "QAWithSourcesChain",
    "LlamaCpp",
    "HuggingFaceTextGenInference",
+    "chat_model",
+    "InMemManager",
+    "GenericLoader",
+    "ChatPromptTemplate",
+    "tool",
+    "AnyMessage",
+    "AIMessage",
+    "SystemMessage",
+    "HumanMessage",
 ]
--- a/libs/langchain/langchain/document_loaders/init.py
+++ b/libs/langchain/langchain/document_loaders/init.py
@@ -74,7 +74,6 @@ if TYPE_CHECKING:
        FacebookChatLoader,
        FaunaLoader,
        FigmaFileLoader,
-        FileSystemBlobLoader,
        GCSDirectoryLoader,
        GCSFileLoader,
        GeoDataFrameLoader,
@@ -188,6 +187,7 @@ if TYPE_CHECKING:
        YoutubeLoader,
        YuqueLoader,
    )
+    from langchain_core.blob_loaders.file_system import FileSystemBlobLoader

 from langchain_core.document_loaders import Blob, BlobLoader

@@ -437,7 +437,6 @@ __all__ = [
    "FacebookChatLoader",
    "FaunaLoader",
    "FigmaFileLoader",
-    "FileSystemBlobLoader",
    "GCSDirectoryLoader",
    "GCSFileLoader",
    "GeoDataFrameLoader",
--- a/libs/langchain/langchain/document_loaders/blob_loaders/init.py
+++ b/libs/langchain/langchain/document_loaders/blob_loaders/init.py
@@ -6,9 +6,9 @@ from langchain._api import create_importer

 if TYPE_CHECKING:
    from langchain_community.document_loaders import (
-        FileSystemBlobLoader,
        YoutubeAudioLoader,
    )
+    from langchain_core.blob_loaders.file_system import FileSystemBlobLoader

 # Create a way to dynamically look up deprecated imports.
 # Used to consolidate logic for raising deprecation warnings and
@@ -31,6 +31,5 @@ def __getattr__(name: str) -> Any:
 __all__ = [
    "BlobLoader",
    "Blob",
-    "FileSystemBlobLoader",
    "YoutubeAudioLoader",
 ]
--- a/libs/langchain/langchain/document_loaders/parsers/generic.py
+++ b/libs/langchain/langchain/document_loaders/parsers/generic.py
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any
 from langchain._api import create_importer

 if TYPE_CHECKING:
-    from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
+    from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser

 # Create a way to dynamically look up deprecated imports.
 # Used to consolidate logic for raising deprecation warnings and
@@ -20,6 +20,4 @@ def __getattr__(name: str) -> Any:
    return _import_attribute(name)


-__all__ = [
-    "MimeTypeBasedParser",
-]
+__all__ = []
--- a/libs/langchain/langchain/document_loaders/parsers/txt.py
+++ b/libs/langchain/langchain/document_loaders/parsers/txt.py
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any
 from langchain._api import create_importer

 if TYPE_CHECKING:
-    from langchain_community.document_loaders.parsers.txt import TextParser
+    from langchain_core.blob_parsers.txt import TextParser

 # Create a way to dynamically look up deprecated imports.
 # Used to consolidate logic for raising deprecation warnings and
@@ -18,6 +18,4 @@ def __getattr__(name: str) -> Any:
    return _import_attribute(name)


-__all__ = [
-    "TextParser",
-]
+__all__ = []
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -61,6 +61,7 @@ from langchain_core.messages import (
    ToolCall,
    ToolMessage,
    ToolMessageChunk,
+    convert_to_messages,
 )
 from langchain_core.messages.ai import UsageMetadata
 from langchain_core.messages.tool import tool_call_chunk
@@ -878,7 +879,7 @@ class BaseChatOpenAI(BaseChatModel):
        return encoding_model.encode(text)

    # TODO: Count bound tools as part of input.
-    def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(self, messages: List[MessageLikeRepresentation]) -> int:
        """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.

        **Requirements**: You must have the ``pillow`` installed if you want to count
@@ -891,6 +892,7 @@ class BaseChatOpenAI(BaseChatModel):
        main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
        if sys.version_info[1] <= 7:
            return super().get_num_tokens_from_messages(messages)
+        messages = convert_to_messages(messages)
        model, encoding = self._get_encoding_model()
        if model.startswith("gpt-3.5-turbo-0301"):
            # every message follows <im_start>{role/name}\n{content}<im_end>\n
Author	SHA1	Message	Date
Bagatur	abab93b5ec	v fun	2024-08-27 12:43:02 -07:00
Bagatur	37017c0e3f	Merge branch 'master' into bagatur/delight	2024-08-25 16:53:43 -07:00
Bagatur	40ea8a5402	fun	2024-08-24 16:07:00 -07:00