Compare commits

...

3 Commits

Author SHA1 Message Date
Bagatur
abab93b5ec v fun 2024-08-27 12:43:02 -07:00
Bagatur
37017c0e3f Merge branch 'master' into bagatur/delight 2024-08-25 16:53:43 -07:00
Bagatur
40ea8a5402 fun 2024-08-24 16:07:00 -07:00
25 changed files with 353 additions and 294 deletions

View File

@@ -19,6 +19,8 @@ import importlib
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from langchain_community.document_loaders.acreom import (
AcreomLoader,
)
@@ -87,7 +89,6 @@ if TYPE_CHECKING:
from langchain_community.document_loaders.blob_loaders import (
Blob,
BlobLoader,
FileSystemBlobLoader,
YoutubeAudioLoader,
)
from langchain_community.document_loaders.blockchain import (
@@ -800,7 +801,6 @@ __all__ = [
"FaunaLoader",
"FigmaFileLoader",
"FireCrawlLoader",
"FileSystemBlobLoader",
"GCSDirectoryLoader",
"GlueCatalogLoader",
"GCSFileLoader",

View File

@@ -10,6 +10,7 @@ from enum import Enum
from pathlib import Path, PurePath
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Sequence, Union
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from langchain_core.pydantic_v1 import (
BaseModel,
BaseSettings,
@@ -19,9 +20,6 @@ from langchain_core.pydantic_v1 import (
)
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.blob_loaders.file_system import (
FileSystemBlobLoader,
)
from langchain_community.document_loaders.blob_loaders.schema import Blob
if TYPE_CHECKING:

View File

@@ -4,12 +4,11 @@ from typing import TYPE_CHECKING, Any
from langchain_core.document_loaders import Blob, BlobLoader
if TYPE_CHECKING:
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from langchain_community.document_loaders.blob_loaders.cloud_blob_loader import (
CloudBlobLoader,
)
from langchain_community.document_loaders.blob_loaders.file_system import (
FileSystemBlobLoader,
)
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
YoutubeAudioLoader,
)
@@ -39,6 +38,5 @@ __all__ = [
"BlobLoader",
"Blob",
"CloudBlobLoader",
"FileSystemBlobLoader",
"YoutubeAudioLoader",
]

View File

@@ -1,149 +1,3 @@
"""Use to load blobs from the local file system."""
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from pathlib import Path
from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union
from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader
T = TypeVar("T")
def _make_iterator(
length_func: Callable[[], int], show_progress: bool = False
) -> Callable[[Iterable[T]], Iterator[T]]:
"""Create a function that optionally wraps an iterable in tqdm."""
iterator: Callable[[Iterable[T]], Iterator[T]]
if show_progress:
try:
from tqdm.auto import tqdm
except ImportError:
raise ImportError(
"You must install tqdm to use show_progress=True."
"You can install tqdm with `pip install tqdm`."
)
# Make sure to provide `total` here so that tqdm can show
# a progress bar that takes into account the total number of files.
def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
"""Wrap an iterable in a tqdm progress bar."""
return tqdm(iterable, total=length_func())
iterator = _with_tqdm
else:
iterator = iter
return iterator
# PUBLIC API
class FileSystemBlobLoader(BlobLoader):
"""Load blobs in the local file system.
Example:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
loader = FileSystemBlobLoader("/path/to/directory")
for blob in loader.yield_blobs():
print(blob) # noqa: T201
""" # noqa: E501
def __init__(
self,
path: Union[str, Path],
*,
glob: str = "**/[!.]*",
exclude: Sequence[str] = (),
suffixes: Optional[Sequence[str]] = None,
show_progress: bool = False,
) -> None:
"""Initialize with a path to directory and how to glob over it.
Args:
path: Path to directory to load from or path to file to load.
If a path to a file is provided, glob/exclude/suffixes are ignored.
glob: Glob pattern relative to the specified path
by default set to pick up all non-hidden files
exclude: patterns to exclude from results, use glob syntax
suffixes: Provide to keep only files with these suffixes
Useful when wanting to keep files with different suffixes
Suffixes must include the dot, e.g. ".txt"
show_progress: If true, will show a progress bar as the files are loaded.
This forces an iteration through all matching files
to count them prior to loading them.
Examples:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
# Load a single file.
loader = FileSystemBlobLoader("/path/to/file.txt")
# Recursively load all text files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
# Recursively load all non-hidden files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
# Load all files in a directory without recursion.
loader = FileSystemBlobLoader("/path/to/directory", glob="*")
# Recursively load all files in a directory, except for py or pyc files.
loader = FileSystemBlobLoader(
"/path/to/directory",
glob="**/*.txt",
exclude=["**/*.py", "**/*.pyc"]
)
""" # noqa: E501
if isinstance(path, Path):
_path = path
elif isinstance(path, str):
_path = Path(path)
else:
raise TypeError(f"Expected str or Path, got {type(path)}")
self.path = _path.expanduser() # Expand user to handle ~
self.glob = glob
self.suffixes = set(suffixes or [])
self.show_progress = show_progress
self.exclude = exclude
def yield_blobs(
self,
) -> Iterable[Blob]:
"""Yield blobs that match the requested pattern."""
iterator = _make_iterator(
length_func=self.count_matching_files, show_progress=self.show_progress
)
for path in iterator(self._yield_paths()):
yield Blob.from_path(path)
def _yield_paths(self) -> Iterable[Path]:
"""Yield paths that match the requested pattern."""
if self.path.is_file():
yield self.path
return
paths = self.path.glob(self.glob)
for path in paths:
if self.exclude:
if any(path.match(glob) for glob in self.exclude):
continue
if path.is_file():
if self.suffixes and path.suffix not in self.suffixes:
continue
yield path
def count_matching_files(self) -> int:
"""Count files that match the pattern without loading them."""
# Carry out a full iteration to count the files without
# materializing anything expensive in memory.
num = 0
for _ in self._yield_paths():
num += 1
return num
__all__ = ["FileSystemBlobLoader"]

View File

@@ -4,12 +4,12 @@ import concurrent.futures
from pathlib import Path
from typing import Iterator, Literal, Optional, Sequence, Union
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import (
BlobLoader,
FileSystemBlobLoader,
)
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers.registry import get_parser

View File

@@ -3,69 +3,6 @@
This module contains some logic to help assemble more sophisticated parsers.
"""
from typing import Iterator, Mapping, Optional
from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders.schema import Blob
class MimeTypeBasedParser(BaseBlobParser):
"""Parser that uses `mime`-types to parse a blob.
This parser is useful for simple pipelines where the mime-type is sufficient
to determine how to parse a blob.
To use, configure handlers based on mime-types and pass them to the initializer.
Example:
.. code-block:: python
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
parser = MimeTypeBasedParser(
handlers={
"application/pdf": ...,
},
fallback_parser=...,
)
""" # noqa: E501
def __init__(
self,
handlers: Mapping[str, BaseBlobParser],
*,
fallback_parser: Optional[BaseBlobParser] = None,
) -> None:
"""Define a parser that uses mime-types to determine how to parse a blob.
Args:
handlers: A mapping from mime-types to functions that take a blob, parse it
and return a document.
fallback_parser: A fallback_parser parser to use if the mime-type is not
found in the handlers. If provided, this parser will be
used to parse blobs with all mime-types not found in
the handlers.
If not provided, a ValueError will be raised if the
mime-type is not found in the handlers.
"""
self.handlers = handlers
self.fallback_parser = fallback_parser
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Load documents from a blob."""
mimetype = blob.mimetype
if mimetype is None:
raise ValueError(f"{blob} does not have a mimetype.")
if mimetype in self.handlers:
handler = self.handlers[mimetype]
yield from handler.lazy_parse(blob)
else:
if self.fallback_parser is not None:
yield from self.fallback_parser.lazy_parse(blob)
else:
raise ValueError(f"Unsupported mime type: {mimetype}")
__all__ = ["MimeTypeBasedParser"]

View File

@@ -1,10 +1,11 @@
"""Module includes a registry of default parser configurations."""
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
from langchain_core.blob_parsers.txt import TextParser
from langchain_core.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.parsers.msword import MsWordParser
from langchain_community.document_loaders.parsers.pdf import PyMuPDFParser
from langchain_community.document_loaders.parsers.txt import TextParser
def _get_default_parser() -> BaseBlobParser:
@@ -17,6 +18,7 @@ def _get_default_parser() -> BaseBlobParser:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": (
MsWordParser()
),
"text/x-python": TextParser(),
},
fallback_parser=None,
)

View File

@@ -1,16 +1,5 @@
"""Module for parsing text files.."""
from typing import Iterator
from langchain_core.blob_parsers.txt import TextParser
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob
class TextParser(BaseBlobParser):
"""Parser for text blobs."""
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
"""Lazily parse the blob."""
yield Document(page_content=blob.as_string(), metadata={"source": blob.source}) # type: ignore[attr-defined]
__all__ = ["TextParser"]

View File

@@ -3,11 +3,11 @@
from typing import Iterator
import pytest
from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
class TestMimeBasedParser:

View File

@@ -6,12 +6,12 @@ from pathlib import Path
from typing import Any, Generator, Iterator
import pytest
from langchain_core.blob_parsers.txt import TextParser
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob, FileSystemBlobLoader
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers.txt import TextParser
@pytest.fixture

View File

@@ -0,0 +1,4 @@
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from langchain_core.document_loaders.blob_loaders import BlobLoader
__all__ = ["BlobLoader", "FileSystemBlobLoader"]

View File

@@ -0,0 +1,150 @@
"""Use to load blobs from the local file system."""
from pathlib import Path
from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union
from langchain_core.document_loaders import BlobLoader
from langchain_core.documents.base import Blob
T = TypeVar("T")
def _make_iterator(
length_func: Callable[[], int], show_progress: bool = False
) -> Callable[[Iterable[T]], Iterator[T]]:
"""Create a function that optionally wraps an iterable in tqdm."""
iterator: Callable[[Iterable[T]], Iterator[T]]
if show_progress:
try:
from tqdm.auto import tqdm
except ImportError:
raise ImportError(
"You must install tqdm to use show_progress=True."
"You can install tqdm with `pip install tqdm`."
)
# Make sure to provide `total` here so that tqdm can show
# a progress bar that takes into account the total number of files.
def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
"""Wrap an iterable in a tqdm progress bar."""
return tqdm(iterable, total=length_func())
iterator = _with_tqdm
else:
iterator = iter
return iterator
# PUBLIC API
class FileSystemBlobLoader(BlobLoader):
"""Load blobs in the local file system.
Example:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
loader = FileSystemBlobLoader("/path/to/directory")
for blob in loader.yield_blobs():
print(blob) # noqa: T201
""" # noqa: E501
def __init__(
self,
path: Union[str, Path],
*,
glob: str = "**/[!.]*",
exclude: Sequence[str] = (),
suffixes: Optional[Sequence[str]] = None,
show_progress: bool = False,
) -> None:
"""Initialize with a path to directory and how to glob over it.
Args:
path: Path to directory to load from or path to file to load.
If a path to a file is provided, glob/exclude/suffixes are ignored.
glob: Glob pattern relative to the specified path
by default set to pick up all non-hidden files
exclude: patterns to exclude from results, use glob syntax
suffixes: Provide to keep only files with these suffixes
Useful when wanting to keep files with different suffixes
Suffixes must include the dot, e.g. ".txt"
show_progress: If true, will show a progress bar as the files are loaded.
This forces an iteration through all matching files
to count them prior to loading them.
Examples:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
# Load a single file.
loader = FileSystemBlobLoader("/path/to/file.txt")
# Recursively load all text files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
# Recursively load all non-hidden files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
# Load all files in a directory without recursion.
loader = FileSystemBlobLoader("/path/to/directory", glob="*")
# Recursively load all files in a directory, except for py or pyc files.
loader = FileSystemBlobLoader(
"/path/to/directory",
glob="**/*.txt",
exclude=["**/*.py", "**/*.pyc"]
)
""" # noqa: E501
if isinstance(path, Path):
_path = path
elif isinstance(path, str):
_path = Path(path)
else:
raise TypeError(f"Expected str or Path, got {type(path)}")
self.path = _path.expanduser() # Expand user to handle ~
self.glob = glob
self.suffixes = set(suffixes or [])
self.show_progress = show_progress
self.exclude = exclude if not isinstance(exclude, str) else (exclude,)
def yield_blobs(
self,
) -> Iterable[Blob]:
"""Yield blobs that match the requested pattern."""
iterator = _make_iterator(
length_func=self.count_matching_files, show_progress=self.show_progress
)
for path in iterator(self._yield_paths()):
yield Blob.from_path(path)
def _yield_paths(self) -> Iterable[Path]:
"""Yield paths that match the requested pattern."""
if self.path.is_file():
yield self.path
return
paths = self.path.glob(self.glob)
for path in paths:
if self.exclude:
if any(path.match(glob) for glob in self.exclude):
continue
if path.is_file():
if self.suffixes and path.suffix not in self.suffixes:
continue
yield path
def count_matching_files(self) -> int:
"""Count files that match the pattern without loading them."""
# Carry out a full iteration to count the files without
# materializing anything expensive in memory.
num = 0
for _ in self._yield_paths():
num += 1
return num

View File

@@ -0,0 +1,18 @@
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
def __getattr__(name):
if name == "MimeTypeBasedParser":
from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
return MimeTypeBasedParser
else:
raise AttributeError(
f"No {name} attribute in module langchain_core.blob_parsers."
)
__all__ = ["MimeTypeBasedParser"]

View File

@@ -0,0 +1,65 @@
from typing import Iterator, Mapping, Optional
from langchain_core.document_loaders import BaseBlobParser
from langchain_core.documents import Document
from langchain_core.documents.base import Blob
class MimeTypeBasedParser(BaseBlobParser):
"""Parser that uses `mime`-types to parse a blob.
This parser is useful for simple pipelines where the mime-type is sufficient
to determine how to parse a blob.
To use, configure handlers based on mime-types and pass them to the initializer.
Example:
.. code-block:: python
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
parser = MimeTypeBasedParser(
handlers={
"application/pdf": ...,
},
fallback_parser=...,
)
""" # noqa: E501
def __init__(
self,
handlers: Mapping[str, BaseBlobParser],
*,
fallback_parser: Optional[BaseBlobParser] = None,
) -> None:
"""Define a parser that uses mime-types to determine how to parse a blob.
Args:
handlers: A mapping from mime-types to functions that take a blob, parse it
and return a document.
fallback_parser: A fallback_parser parser to use if the mime-type is not
found in the handlers. If provided, this parser will be
used to parse blobs with all mime-types not found in
the handlers.
If not provided, a ValueError will be raised if the
mime-type is not found in the handlers.
"""
self.handlers = handlers
self.fallback_parser = fallback_parser
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Load documents from a blob."""
mimetype = blob.mimetype
if mimetype is None:
raise ValueError(f"{blob} does not have a mimetype.")
if mimetype in self.handlers:
handler = self.handlers[mimetype]
yield from handler.lazy_parse(blob)
else:
if self.fallback_parser is not None:
yield from self.fallback_parser.lazy_parse(blob)
else:
raise ValueError(f"Unsupported mime type: {mimetype}")

View File

@@ -0,0 +1,13 @@
from typing import Iterator
from langchain_core.document_loaders import BaseBlobParser
from langchain_core.documents import Document
from langchain_core.documents.base import Blob
class TextParser(BaseBlobParser):
"""Parser for text blobs."""
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
"""Lazily parse the blob."""
yield Document(page_content=blob.as_string(), metadata={"source": blob.source}) # type: ignore[attr-defined]

View File

@@ -18,7 +18,8 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import List, Sequence, Union
from typing import List, Optional, Sequence, Union
from uuid import UUID
from langchain_core.messages import (
AIMessage,
@@ -206,38 +207,27 @@ class InMemoryChatMessageHistory(BaseChatMessageHistory, BaseModel):
messages: List[BaseMessage] = Field(default_factory=list)
"""A list of messages stored in memory."""
async def aget_messages(self) -> List[BaseMessage]:
"""Async version of getting messages.
Can over-ride this method to provide an efficient async implementation.
In general, fetching messages may involve IO to the underlying
persistence layer.
Returns:
List of messages.
"""
return self.messages
def add_message(self, message: BaseMessage) -> None:
"""Add a self-created message to the store.
Args:
message: The message to add.
"""
self.messages.append(message)
async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None:
"""Async add messages to the store.
Args:
messages: The messages to add.
"""
self.add_messages(messages)
def clear(self) -> None:
"""Clear all messages from the store."""
self.messages = []
async def aclear(self) -> None:
"""Async clear all messages from the store."""
self.clear()
class BaseHistoryManager(ABC):
@abstractmethod
def get_session(self, session_id: Union[str, UUID]) -> BaseChatMessageHistory:
""""""
class InMemManager(BaseHistoryManager):
""""""
def __init__(self, sessions: Optional[dict] = None) -> None:
self.sessions = sessions or {}
def get_session(self, session_id: Union[str, UUID]) -> InMemoryChatMessageHistory:
if session_id not in self.sessions:
self.sessions[session_id] = InMemoryChatMessageHistory()
return self.sessions[session_id]

View File

@@ -12,15 +12,11 @@ from typing import (
Union,
)
from langchain_core.blob_loaders import FileSystemBlobLoader
from langchain_core.document_loaders.base import BaseBlobParser, BaseLoader
from langchain_core.document_loaders.blob_loaders import BlobLoader
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader
from langchain_community.document_loaders.blob_loaders import (
BlobLoader,
FileSystemBlobLoader,
)
from langchain_community.document_loaders.parsers.registry import get_parser
if TYPE_CHECKING:
from langchain_text_splitters import TextSplitter
@@ -172,10 +168,24 @@ class GenericLoader(BaseLoader):
# If there is an implementation of get_parser on the class, use it.
blob_parser = cls.get_parser(**(parser_kwargs or {}))
except NotImplementedError:
# if not then use the global registry.
blob_parser = get_parser(parser)
# if not then try to use the global registry.
try:
from langchain_community.document_loaders.parsers.registry import (
get_parser,
)
except ImportError as e:
raise ValueError("") from e
else:
blob_parser = get_parser(parser)
else:
blob_parser = get_parser(parser)
try:
from langchain_community.document_loaders.parsers.registry import (
get_parser,
)
except ImportError as e:
raise ValueError("") from e
else:
blob_parser = get_parser(parser)
else:
blob_parser = parser
return cls(blob_loader, blob_parser)

View File

@@ -293,3 +293,6 @@ class Document(BaseMedia):
return f"page_content='{self.page_content}' metadata={self.metadata}"
else:
return f"page_content='{self.page_content}'"
def __len__(self) -> int:
return len(self.page_content)

View File

@@ -35,7 +35,9 @@ from langchain_core.utils import get_pydantic_field_names
if TYPE_CHECKING:
from langchain_core.caches import BaseCache
from langchain_core.callbacks import Callbacks
from langchain_core.chat_history import BaseHistoryManager
from langchain_core.outputs import LLMResult
from langchain_core.runnables.history import RunnableWithMessageHistory
class LangSmithParams(TypedDict, total=False):
@@ -363,7 +365,9 @@ class BaseLanguageModel(
"""
return len(self.get_token_ids(text))
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
def get_num_tokens_from_messages(
self, messages: List[MessageLikeRepresentation]
) -> int:
"""Get the number of tokens in the messages.
Useful for checking if an input fits in a model's context window.
@@ -374,6 +378,7 @@ class BaseLanguageModel(
Returns:
The sum of the number of tokens across the messages.
"""
messages = convert_to_messages(messages)
return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages])
@classmethod
@@ -383,3 +388,14 @@ class BaseLanguageModel(
Use get_pydantic_field_names.
"""
return get_pydantic_field_names(cls)
def with_history(
self, get_session_history: Union[Callable, BaseHistoryManager]
) -> RunnableWithMessageHistory:
from langchain_core.chat_history import BaseHistoryManager
from langchain_core.runnables.history import RunnableWithMessageHistory
if isinstance(get_session_history, BaseHistoryManager):
get_session_history = get_session_history.get_session
return RunnableWithMessageHistory(self, get_session_history)

View File

@@ -6,6 +6,13 @@ from importlib import metadata
from typing import Any, Optional
from langchain_core._api.deprecation import surface_langchain_deprecation_warnings
from langchain_core.chat_history import InMemManager
from langchain_core.document_loaders.generic import GenericLoader
from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langchain.chat_models import init_chat_model as chat_model
try:
__version__ = metadata.version(__package__)
@@ -436,4 +443,13 @@ __all__ = [
"QAWithSourcesChain",
"LlamaCpp",
"HuggingFaceTextGenInference",
"chat_model",
"InMemManager",
"GenericLoader",
"ChatPromptTemplate",
"tool",
"AnyMessage",
"AIMessage",
"SystemMessage",
"HumanMessage",
]

View File

@@ -74,7 +74,6 @@ if TYPE_CHECKING:
FacebookChatLoader,
FaunaLoader,
FigmaFileLoader,
FileSystemBlobLoader,
GCSDirectoryLoader,
GCSFileLoader,
GeoDataFrameLoader,
@@ -188,6 +187,7 @@ if TYPE_CHECKING:
YoutubeLoader,
YuqueLoader,
)
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
from langchain_core.document_loaders import Blob, BlobLoader
@@ -437,7 +437,6 @@ __all__ = [
"FacebookChatLoader",
"FaunaLoader",
"FigmaFileLoader",
"FileSystemBlobLoader",
"GCSDirectoryLoader",
"GCSFileLoader",
"GeoDataFrameLoader",

View File

@@ -6,9 +6,9 @@ from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_loaders import (
FileSystemBlobLoader,
YoutubeAudioLoader,
)
from langchain_core.blob_loaders.file_system import FileSystemBlobLoader
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
@@ -31,6 +31,5 @@ def __getattr__(name: str) -> Any:
__all__ = [
"BlobLoader",
"Blob",
"FileSystemBlobLoader",
"YoutubeAudioLoader",
]

View File

@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any
from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
from langchain_core.blob_parsers.mime_type import MimeTypeBasedParser
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
@@ -20,6 +20,4 @@ def __getattr__(name: str) -> Any:
return _import_attribute(name)
__all__ = [
"MimeTypeBasedParser",
]
__all__ = []

View File

@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any
from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_loaders.parsers.txt import TextParser
from langchain_core.blob_parsers.txt import TextParser
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
@@ -18,6 +18,4 @@ def __getattr__(name: str) -> Any:
return _import_attribute(name)
__all__ = [
"TextParser",
]
__all__ = []

View File

@@ -61,6 +61,7 @@ from langchain_core.messages import (
ToolCall,
ToolMessage,
ToolMessageChunk,
convert_to_messages,
)
from langchain_core.messages.ai import UsageMetadata
from langchain_core.messages.tool import tool_call_chunk
@@ -878,7 +879,7 @@ class BaseChatOpenAI(BaseChatModel):
return encoding_model.encode(text)
# TODO: Count bound tools as part of input.
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
def get_num_tokens_from_messages(self, messages: List[MessageLikeRepresentation]) -> int:
"""Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
**Requirements**: You must have the ``pillow`` installed if you want to count
@@ -891,6 +892,7 @@ class BaseChatOpenAI(BaseChatModel):
main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
if sys.version_info[1] <= 7:
return super().get_num_tokens_from_messages(messages)
messages = convert_to_messages(messages)
model, encoding = self._get_encoding_model()
if model.startswith("gpt-3.5-turbo-0301"):
# every message follows <im_start>{role/name}\n{content}<im_end>\n