mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-11-03 17:54:10 +00:00 
			
		
		
		
	Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
		
			
				
	
	
		
			97 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			97 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import json
 | 
						|
import urllib
 | 
						|
from datetime import datetime
 | 
						|
from typing import Iterator, List, Optional
 | 
						|
 | 
						|
from langchain_core.documents import Document
 | 
						|
from langchain_core.utils import get_from_env
 | 
						|
 | 
						|
from langchain_community.document_loaders.base import BaseLoader
 | 
						|
 | 
						|
LINK_NOTE_TEMPLATE = "joplin://x-callback-url/openNote?id={id}"
 | 
						|
 | 
						|
 | 
						|
class JoplinLoader(BaseLoader):
 | 
						|
    """Load notes from `Joplin`.
 | 
						|
 | 
						|
    In order to use this loader, you need to have Joplin running with the
 | 
						|
    Web Clipper enabled (look for "Web Clipper" in the app settings).
 | 
						|
 | 
						|
    To get the access token, you need to go to the Web Clipper options and
 | 
						|
    under "Advanced Options" you will find the access token.
 | 
						|
 | 
						|
    You can find more information about the Web Clipper service here:
 | 
						|
    https://joplinapp.org/clipper/
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        access_token: Optional[str] = None,
 | 
						|
        port: int = 41184,
 | 
						|
        host: str = "localhost",
 | 
						|
    ) -> None:
 | 
						|
        """
 | 
						|
 | 
						|
        Args:
 | 
						|
            access_token: The access token to use.
 | 
						|
            port: The port where the Web Clipper service is running. Default is 41184.
 | 
						|
            host: The host where the Web Clipper service is running.
 | 
						|
                Default is localhost.
 | 
						|
        """
 | 
						|
        access_token = access_token or get_from_env(
 | 
						|
            "access_token", "JOPLIN_ACCESS_TOKEN"
 | 
						|
        )
 | 
						|
        base_url = f"http://{host}:{port}"
 | 
						|
        self._get_note_url = (
 | 
						|
            f"{base_url}/notes?token={access_token}"
 | 
						|
            f"&fields=id,parent_id,title,body,created_time,updated_time&page={{page}}"
 | 
						|
        )
 | 
						|
        self._get_folder_url = (
 | 
						|
            f"{base_url}/folders/{{id}}?token={access_token}&fields=title"
 | 
						|
        )
 | 
						|
        self._get_tag_url = (
 | 
						|
            f"{base_url}/notes/{{id}}/tags?token={access_token}&fields=title"
 | 
						|
        )
 | 
						|
 | 
						|
    def _get_notes(self) -> Iterator[Document]:
 | 
						|
        has_more = True
 | 
						|
        page = 1
 | 
						|
        while has_more:
 | 
						|
            req_note = urllib.request.Request(self._get_note_url.format(page=page))
 | 
						|
            with urllib.request.urlopen(req_note) as response:
 | 
						|
                json_data = json.loads(response.read().decode())
 | 
						|
                for note in json_data["items"]:
 | 
						|
                    metadata = {
 | 
						|
                        "source": LINK_NOTE_TEMPLATE.format(id=note["id"]),
 | 
						|
                        "folder": self._get_folder(note["parent_id"]),
 | 
						|
                        "tags": self._get_tags(note["id"]),
 | 
						|
                        "title": note["title"],
 | 
						|
                        "created_time": self._convert_date(note["created_time"]),
 | 
						|
                        "updated_time": self._convert_date(note["updated_time"]),
 | 
						|
                    }
 | 
						|
                    yield Document(page_content=note["body"], metadata=metadata)
 | 
						|
 | 
						|
                has_more = json_data["has_more"]
 | 
						|
                page += 1
 | 
						|
 | 
						|
    def _get_folder(self, folder_id: str) -> str:
 | 
						|
        req_folder = urllib.request.Request(self._get_folder_url.format(id=folder_id))
 | 
						|
        with urllib.request.urlopen(req_folder) as response:
 | 
						|
            json_data = json.loads(response.read().decode())
 | 
						|
            return json_data["title"]
 | 
						|
 | 
						|
    def _get_tags(self, note_id: str) -> List[str]:
 | 
						|
        req_tag = urllib.request.Request(self._get_tag_url.format(id=note_id))
 | 
						|
        with urllib.request.urlopen(req_tag) as response:
 | 
						|
            json_data = json.loads(response.read().decode())
 | 
						|
            return [tag["title"] for tag in json_data["items"]]
 | 
						|
 | 
						|
    def _convert_date(self, date: int) -> str:
 | 
						|
        return datetime.fromtimestamp(date / 1000).strftime("%Y-%m-%d %H:%M:%S")
 | 
						|
 | 
						|
    def lazy_load(self) -> Iterator[Document]:
 | 
						|
        yield from self._get_notes()
 | 
						|
 | 
						|
    def load(self) -> List[Document]:
 | 
						|
        return list(self.lazy_load())
 |