mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-13 08:27:03 +00:00
chat_loaders
refactoring (#10381)
Replaced unnecessary namespace renaming `from langchain.chat_loaders import base as chat_loaders` with `from langchain.chat_loaders.base import BaseChatLoader, ChatSession` and simplified correspondent types. @eyurtsev
This commit is contained in:
parent
40d9191955
commit
90504fc499
@ -4,13 +4,13 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
||||
|
||||
from langchain import schema
|
||||
from langchain.chat_loaders import base as chat_loaders
|
||||
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import sqlite3
|
||||
|
||||
|
||||
class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
||||
class IMessageChatLoader(BaseChatLoader):
|
||||
"""Load chat sessions from the `iMessage` chat.db SQLite file.
|
||||
|
||||
It only works on macOS when you have iMessage enabled and have the chat.db file.
|
||||
@ -18,8 +18,8 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
||||
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
|
||||
terminal may not have permission to access this file. To resolve this, you can
|
||||
copy the file to a different location, change the permissions of the file, or
|
||||
grant full disk access for your terminal emulator in System Settings > Security
|
||||
and Privacy > Full Disk Access.
|
||||
grant full disk access for your terminal emulator
|
||||
in System Settings > Security and Privacy > Full Disk Access.
|
||||
"""
|
||||
|
||||
def __init__(self, path: Optional[Union[str, Path]] = None):
|
||||
@ -46,7 +46,7 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
||||
|
||||
def _load_single_chat_session(
|
||||
self, cursor: "sqlite3.Cursor", chat_id: int
|
||||
) -> chat_loaders.ChatSession:
|
||||
) -> ChatSession:
|
||||
"""
|
||||
Load a single chat session from the iMessage chat.db.
|
||||
|
||||
@ -83,9 +83,9 @@ class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
||||
)
|
||||
)
|
||||
|
||||
return chat_loaders.ChatSession(messages=results)
|
||||
return ChatSession(messages=results)
|
||||
|
||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
||||
def lazy_load(self) -> Iterator[ChatSession]:
|
||||
"""
|
||||
Lazy load the chat sessions from the iMessage chat.db
|
||||
and yield them in the required format.
|
||||
|
@ -6,12 +6,12 @@ from pathlib import Path
|
||||
from typing import Dict, Iterator, List, Union
|
||||
|
||||
from langchain import schema
|
||||
from langchain.chat_loaders import base as chat_loaders
|
||||
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SlackChatLoader(chat_loaders.BaseChatLoader):
|
||||
class SlackChatLoader(BaseChatLoader):
|
||||
"""Load `Slack` conversations from a dump zip file."""
|
||||
|
||||
def __init__(
|
||||
@ -27,9 +27,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
|
||||
if not self.zip_path.exists():
|
||||
raise FileNotFoundError(f"File {self.zip_path} not found")
|
||||
|
||||
def _load_single_chat_session(
|
||||
self, messages: List[Dict]
|
||||
) -> chat_loaders.ChatSession:
|
||||
def _load_single_chat_session(self, messages: List[Dict]) -> ChatSession:
|
||||
results: List[Union[schema.AIMessage, schema.HumanMessage]] = []
|
||||
previous_sender = None
|
||||
for message in messages:
|
||||
@ -62,7 +60,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
|
||||
)
|
||||
)
|
||||
previous_sender = sender
|
||||
return chat_loaders.ChatSession(messages=results)
|
||||
return ChatSession(messages=results)
|
||||
|
||||
def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
|
||||
"""Read JSON data from a zip subfile."""
|
||||
@ -72,7 +70,7 @@ class SlackChatLoader(chat_loaders.BaseChatLoader):
|
||||
raise ValueError(f"Expected list of dictionaries, got {type(data)}")
|
||||
return data
|
||||
|
||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
||||
def lazy_load(self) -> Iterator[ChatSession]:
|
||||
"""
|
||||
Lazy load the chat sessions from the Slack dump file and yield them
|
||||
in the required format.
|
||||
|
@ -7,12 +7,12 @@ from pathlib import Path
|
||||
from typing import Iterator, List, Union
|
||||
|
||||
from langchain import schema
|
||||
from langchain.chat_loaders import base as chat_loaders
|
||||
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
||||
class TelegramChatLoader(BaseChatLoader):
|
||||
"""Load `telegram` conversations to LangChain chat messages.
|
||||
|
||||
To export, use the Telegram Desktop app from
|
||||
@ -35,16 +35,14 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
||||
"""
|
||||
self.path = path if isinstance(path, str) else str(path)
|
||||
|
||||
def _load_single_chat_session_html(
|
||||
self, file_path: str
|
||||
) -> chat_loaders.ChatSession:
|
||||
def _load_single_chat_session_html(self, file_path: str) -> ChatSession:
|
||||
"""Load a single chat session from an HTML file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the HTML file.
|
||||
|
||||
Returns:
|
||||
chat_loaders.ChatSession: The loaded chat session.
|
||||
ChatSession: The loaded chat session.
|
||||
"""
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
@ -81,18 +79,16 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
||||
)
|
||||
previous_sender = from_name
|
||||
|
||||
return chat_loaders.ChatSession(messages=results)
|
||||
return ChatSession(messages=results)
|
||||
|
||||
def _load_single_chat_session_json(
|
||||
self, file_path: str
|
||||
) -> chat_loaders.ChatSession:
|
||||
def _load_single_chat_session_json(self, file_path: str) -> ChatSession:
|
||||
"""Load a single chat session from a JSON file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the JSON file.
|
||||
|
||||
Returns:
|
||||
chat_loaders.ChatSession: The loaded chat session.
|
||||
ChatSession: The loaded chat session.
|
||||
"""
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
@ -114,7 +110,7 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
||||
)
|
||||
)
|
||||
|
||||
return chat_loaders.ChatSession(messages=results)
|
||||
return ChatSession(messages=results)
|
||||
|
||||
def _iterate_files(self, path: str) -> Iterator[str]:
|
||||
"""Iterate over files in a directory or zip file.
|
||||
@ -139,12 +135,12 @@ class TelegramChatLoader(chat_loaders.BaseChatLoader):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
yield zip_file.extract(file, path=temp_dir)
|
||||
|
||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
||||
def lazy_load(self) -> Iterator[ChatSession]:
|
||||
"""Lazy load the messages from the chat file and yield them
|
||||
in as chat sessions.
|
||||
|
||||
Yields:
|
||||
chat_loaders.ChatSession: The loaded chat session.
|
||||
ChatSession: The loaded chat session.
|
||||
"""
|
||||
for file_path in self._iterate_files(self.path):
|
||||
if file_path.endswith(".html"):
|
||||
|
@ -5,13 +5,13 @@ import zipfile
|
||||
from typing import Iterator, List, Union
|
||||
|
||||
from langchain import schema
|
||||
from langchain.chat_loaders import base as chat_loaders
|
||||
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
|
||||
from langchain.schema import messages
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
||||
class WhatsAppChatLoader(BaseChatLoader):
|
||||
"""Load `WhatsApp` conversations from a dump zip file or directory."""
|
||||
|
||||
def __init__(self, path: str):
|
||||
@ -42,7 +42,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
def _load_single_chat_session(self, file_path: str) -> chat_loaders.ChatSession:
|
||||
def _load_single_chat_session(self, file_path: str) -> ChatSession:
|
||||
"""Load a single chat session from a file.
|
||||
|
||||
Args:
|
||||
@ -84,7 +84,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
||||
)
|
||||
else:
|
||||
logger.debug(f"Could not parse line: {line}")
|
||||
return chat_loaders.ChatSession(messages=results)
|
||||
return ChatSession(messages=results)
|
||||
|
||||
def _iterate_files(self, path: str) -> Iterator[str]:
|
||||
"""Iterate over the files in a directory or zip file.
|
||||
@ -108,7 +108,7 @@ class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
|
||||
if file.endswith(".txt"):
|
||||
yield zip_file.extract(file)
|
||||
|
||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
||||
def lazy_load(self) -> Iterator[ChatSession]:
|
||||
"""Lazy load the messages from the chat file and yield
|
||||
them as chat sessions.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user