community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)

Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2025-09-01 19:12:42 +00:00 · 2023-12-11 13:53:30 -08:00
parent c0f4b95aa9
commit ed58eeb9c5
2446 changed files with 171805 additions and 137118 deletions
--- a/libs/community/langchain_community/callbacks/labelstudio_callback.py
+++ b/libs/community/langchain_community/callbacks/labelstudio_callback.py
@@ -0,0 +1,390 @@
+import os
+import warnings
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple, Union
+from uuid import UUID
+
+from langchain_core.agents import AgentAction, AgentFinish
+from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.messages import BaseMessage, ChatMessage
+from langchain_core.outputs import Generation, LLMResult
+
+
+class LabelStudioMode(Enum):
+    """Label Studio mode enumerator."""
+
+    PROMPT = "prompt"
+    CHAT = "chat"
+
+
+def get_default_label_configs(
+    mode: Union[str, LabelStudioMode],
+) -> Tuple[str, LabelStudioMode]:
+    """Get default Label Studio configs for the given mode.
+
+    Parameters:
+        mode: Label Studio mode ("prompt" or "chat")
+
+    Returns: Tuple of Label Studio config and mode
+    """
+    _default_label_configs = {
+        LabelStudioMode.PROMPT.value: """
+<View>
+<Style>
+    .prompt-box {
+        background-color: white;
+        border-radius: 10px;
+        box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
+        padding: 20px;
+    }
+</Style>
+<View className="root">
+    <View className="prompt-box">
+        <Text name="prompt" value="$prompt"/>
+    </View>
+    <TextArea name="response" toName="prompt"
+              maxSubmissions="1" editable="true"
+              required="true"/>
+</View>
+<Header value="Rate the response:"/>
+<Rating name="rating" toName="prompt"/>
+</View>""",
+        LabelStudioMode.CHAT.value: """
+<View>
+<View className="root">
+     <Paragraphs name="dialogue"
+               value="$prompt"
+               layout="dialogue"
+               textKey="content"
+               nameKey="role"
+               granularity="sentence"/>
+  <Header value="Final response:"/>
+    <TextArea name="response" toName="dialogue"
+              maxSubmissions="1" editable="true"
+              required="true"/>
+</View>
+<Header value="Rate the response:"/>
+<Rating name="rating" toName="dialogue"/>
+</View>""",
+    }
+
+    if isinstance(mode, str):
+        mode = LabelStudioMode(mode)
+
+    return _default_label_configs[mode.value], mode
+
+
+class LabelStudioCallbackHandler(BaseCallbackHandler):
+    """Label Studio callback handler.
+    Provides the ability to send predictions to Label Studio
+    for human evaluation, feedback and annotation.
+
+    Parameters:
+        api_key: Label Studio API key
+        url: Label Studio URL
+        project_id: Label Studio project ID
+        project_name: Label Studio project name
+        project_config: Label Studio project config (XML)
+        mode: Label Studio mode ("prompt" or "chat")
+
+    Examples:
+        >>> from langchain_community.llms import OpenAI
+        >>> from langchain_community.callbacks import LabelStudioCallbackHandler
+        >>> handler = LabelStudioCallbackHandler(
+        ...             api_key='<your_key_here>',
+        ...             url='http://localhost:8080',
+        ...             project_name='LangChain-%Y-%m-%d',
+        ...             mode='prompt'
+        ... )
+        >>> llm = OpenAI(callbacks=[handler])
+        >>> llm.predict('Tell me a story about a dog.')
+    """
+
+    DEFAULT_PROJECT_NAME: str = "LangChain-%Y-%m-%d"
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        url: Optional[str] = None,
+        project_id: Optional[int] = None,
+        project_name: str = DEFAULT_PROJECT_NAME,
+        project_config: Optional[str] = None,
+        mode: Union[str, LabelStudioMode] = LabelStudioMode.PROMPT,
+    ):
+        super().__init__()
+
+        # Import LabelStudio SDK
+        try:
+            import label_studio_sdk as ls
+        except ImportError:
+            raise ImportError(
+                f"You're using {self.__class__.__name__} in your code,"
+                f" but you don't have the LabelStudio SDK "
+                f"Python package installed or upgraded to the latest version. "
+                f"Please run `pip install -U label-studio-sdk`"
+                f" before using this callback."
+            )
+
+        # Check if Label Studio API key is provided
+        if not api_key:
+            if os.getenv("LABEL_STUDIO_API_KEY"):
+                api_key = str(os.getenv("LABEL_STUDIO_API_KEY"))
+            else:
+                raise ValueError(
+                    f"You're using {self.__class__.__name__} in your code,"
+                    f" Label Studio API key is not provided. "
+                    f"Please provide Label Studio API key: "
+                    f"go to the Label Studio instance, navigate to "
+                    f"Account & Settings -> Access Token and copy the key. "
+                    f"Use the key as a parameter for the callback: "
+                    f"{self.__class__.__name__}"
+                    f"(label_studio_api_key='<your_key_here>', ...) or "
+                    f"set the environment variable LABEL_STUDIO_API_KEY=<your_key_here>"
+                )
+        self.api_key = api_key
+
+        if not url:
+            if os.getenv("LABEL_STUDIO_URL"):
+                url = os.getenv("LABEL_STUDIO_URL")
+            else:
+                warnings.warn(
+                    f"Label Studio URL is not provided, "
+                    f"using default URL: {ls.LABEL_STUDIO_DEFAULT_URL}"
+                    f"If you want to provide your own URL, use the parameter: "
+                    f"{self.__class__.__name__}"
+                    f"(label_studio_url='<your_url_here>', ...) "
+                    f"or set the environment variable LABEL_STUDIO_URL=<your_url_here>"
+                )
+                url = ls.LABEL_STUDIO_DEFAULT_URL
+        self.url = url
+
+        # Maps run_id to prompts
+        self.payload: Dict[str, Dict] = {}
+
+        self.ls_client = ls.Client(url=self.url, api_key=self.api_key)
+        self.project_name = project_name
+        if project_config:
+            self.project_config = project_config
+            self.mode = None
+        else:
+            self.project_config, self.mode = get_default_label_configs(mode)
+
+        self.project_id = project_id or os.getenv("LABEL_STUDIO_PROJECT_ID")
+        if self.project_id is not None:
+            self.ls_project = self.ls_client.get_project(int(self.project_id))
+        else:
+            project_title = datetime.today().strftime(self.project_name)
+            existing_projects = self.ls_client.get_projects(title=project_title)
+            if existing_projects:
+                self.ls_project = existing_projects[0]
+                self.project_id = self.ls_project.id
+            else:
+                self.ls_project = self.ls_client.create_project(
+                    title=project_title, label_config=self.project_config
+                )
+                self.project_id = self.ls_project.id
+        self.parsed_label_config = self.ls_project.parsed_label_config
+
+        # Find the first TextArea tag
+        # "from_name", "to_name", "value" will be used to create predictions
+        self.from_name, self.to_name, self.value, self.input_type = (
+            None,
+            None,
+            None,
+            None,
+        )
+        for tag_name, tag_info in self.parsed_label_config.items():
+            if tag_info["type"] == "TextArea":
+                self.from_name = tag_name
+                self.to_name = tag_info["to_name"][0]
+                self.value = tag_info["inputs"][0]["value"]
+                self.input_type = tag_info["inputs"][0]["type"]
+                break
+        if not self.from_name:
+            error_message = (
+                f'Label Studio project "{self.project_name}" '
+                f"does not have a TextArea tag. "
+                f"Please add a TextArea tag to the project."
+            )
+            if self.mode == LabelStudioMode.PROMPT:
+                error_message += (
+                    "\nHINT: go to project Settings -> "
+                    "Labeling Interface -> Browse Templates"
+                    ' and select "Generative AI -> '
+                    'Supervised Language Model Fine-tuning" template.'
+                )
+            else:
+                error_message += (
+                    "\nHINT: go to project Settings -> "
+                    "Labeling Interface -> Browse Templates"
+                    " and check available templates under "
+                    '"Generative AI" section.'
+                )
+            raise ValueError(error_message)
+
+    def add_prompts_generations(
+        self, run_id: str, generations: List[List[Generation]]
+    ) -> None:
+        # Create tasks in Label Studio
+        tasks = []
+        prompts = self.payload[run_id]["prompts"]
+        model_version = (
+            self.payload[run_id]["kwargs"]
+            .get("invocation_params", {})
+            .get("model_name")
+        )
+        for prompt, generation in zip(prompts, generations):
+            tasks.append(
+                {
+                    "data": {
+                        self.value: prompt,
+                        "run_id": run_id,
+                    },
+                    "predictions": [
+                        {
+                            "result": [
+                                {
+                                    "from_name": self.from_name,
+                                    "to_name": self.to_name,
+                                    "type": "textarea",
+                                    "value": {"text": [g.text for g in generation]},
+                                }
+                            ],
+                            "model_version": model_version,
+                        }
+                    ],
+                }
+            )
+        self.ls_project.import_tasks(tasks)
+
+    def on_llm_start(
+        self,
+        serialized: Dict[str, Any],
+        prompts: List[str],
+        **kwargs: Any,
+    ) -> None:
+        """Save the prompts in memory when an LLM starts."""
+        if self.input_type != "Text":
+            raise ValueError(
+                f'\nLabel Studio project "{self.project_name}" '
+                f"has an input type <{self.input_type}>. "
+                f'To make it work with the mode="chat", '
+                f"the input type should be <Text>.\n"
+                f"Read more here https://labelstud.io/tags/text"
+            )
+        run_id = str(kwargs["run_id"])
+        self.payload[run_id] = {"prompts": prompts, "kwargs": kwargs}
+
+    def _get_message_role(self, message: BaseMessage) -> str:
+        """Get the role of the message."""
+        if isinstance(message, ChatMessage):
+            return message.role
+        else:
+            return message.__class__.__name__
+
+    def on_chat_model_start(
+        self,
+        serialized: Dict[str, Any],
+        messages: List[List[BaseMessage]],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Save the prompts in memory when an LLM starts."""
+        if self.input_type != "Paragraphs":
+            raise ValueError(
+                f'\nLabel Studio project "{self.project_name}" '
+                f"has an input type <{self.input_type}>. "
+                f'To make it work with the mode="chat", '
+                f"the input type should be <Paragraphs>.\n"
+                f"Read more here https://labelstud.io/tags/paragraphs"
+            )
+
+        prompts = []
+        for message_list in messages:
+            dialog = []
+            for message in message_list:
+                dialog.append(
+                    {
+                        "role": self._get_message_role(message),
+                        "content": message.content,
+                    }
+                )
+            prompts.append(dialog)
+        self.payload[str(run_id)] = {
+            "prompts": prompts,
+            "tags": tags,
+            "metadata": metadata,
+            "run_id": run_id,
+            "parent_run_id": parent_run_id,
+            "kwargs": kwargs,
+        }
+
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Do nothing when a new token is generated."""
+        pass
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        """Create a new Label Studio task for each prompt and generation."""
+        run_id = str(kwargs["run_id"])
+
+        # Submit results to Label Studio
+        self.add_prompts_generations(run_id, response.generations)
+
+        # Pop current run from `self.runs`
+        self.payload.pop(run_id)
+
+    def on_llm_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Do nothing when LLM outputs an error."""
+        pass
+
+    def on_chain_start(
+        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
+    ) -> None:
+        pass
+
+    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
+        pass
+
+    def on_chain_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Do nothing when LLM chain outputs an error."""
+        pass
+
+    def on_tool_start(
+        self,
+        serialized: Dict[str, Any],
+        input_str: str,
+        **kwargs: Any,
+    ) -> None:
+        """Do nothing when tool starts."""
+        pass
+
+    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
+        """Do nothing when agent takes a specific action."""
+        pass
+
+    def on_tool_end(
+        self,
+        output: str,
+        observation_prefix: Optional[str] = None,
+        llm_prefix: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Do nothing when tool ends."""
+        pass
+
+    def on_tool_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Do nothing when tool outputs an error."""
+        pass
+
+    def on_text(self, text: str, **kwargs: Any) -> None:
+        """Do nothing"""
+        pass
+
+    def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
+        """Do nothing"""
+        pass