community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)

Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2025-09-01 19:12:42 +00:00 · 2023-12-11 13:53:30 -08:00
parent c0f4b95aa9
commit ed58eeb9c5
2446 changed files with 171805 additions and 137118 deletions
--- a/libs/community/langchain_community/llms/openllm.py
+++ b/libs/community/langchain_community/llms/openllm.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import copy
+import json
+import logging
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    TypedDict,
+    Union,
+    overload,
+)
+
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+from langchain_core.language_models.llms import LLM
+from langchain_core.pydantic_v1 import PrivateAttr
+
+if TYPE_CHECKING:
+    import openllm
+
+
+ServerType = Literal["http", "grpc"]
+
+
+class IdentifyingParams(TypedDict):
+    """Parameters for identifying a model as a typed dict."""
+
+    model_name: str
+    model_id: Optional[str]
+    server_url: Optional[str]
+    server_type: Optional[ServerType]
+    embedded: bool
+    llm_kwargs: Dict[str, Any]
+
+
+logger = logging.getLogger(__name__)
+
+
+class OpenLLM(LLM):
+    """OpenLLM, supporting both in-process model
+    instance and remote OpenLLM servers.
+
+    To use, you should have the openllm library installed:
+
+    .. code-block:: bash
+
+        pip install openllm
+
+    Learn more at: https://github.com/bentoml/openllm
+
+    Example running an LLM model locally managed by OpenLLM:
+        .. code-block:: python
+
+            from langchain_community.llms import OpenLLM
+            llm = OpenLLM(
+                model_name='flan-t5',
+                model_id='google/flan-t5-large',
+            )
+            llm("What is the difference between a duck and a goose?")
+
+    For all available supported models, you can run 'openllm models'.
+
+    If you have a OpenLLM server running, you can also use it remotely:
+        .. code-block:: python
+
+            from langchain_community.llms import OpenLLM
+            llm = OpenLLM(server_url='http://localhost:3000')
+            llm("What is the difference between a duck and a goose?")
+    """
+
+    model_name: Optional[str] = None
+    """Model name to use. See 'openllm models' for all available models."""
+    model_id: Optional[str] = None
+    """Model Id to use. If not provided, will use the default model for the model name.
+    See 'openllm models' for all available model variants."""
+    server_url: Optional[str] = None
+    """Optional server URL that currently runs a LLMServer with 'openllm start'."""
+    server_type: ServerType = "http"
+    """Optional server type. Either 'http' or 'grpc'."""
+    embedded: bool = True
+    """Initialize this LLM instance in current process by default. Should
+    only set to False when using in conjunction with BentoML Service."""
+    llm_kwargs: Dict[str, Any]
+    """Keyword arguments to be passed to openllm.LLM"""
+
+    _runner: Optional[openllm.LLMRunner] = PrivateAttr(default=None)
+    _client: Union[
+        openllm.client.HTTPClient, openllm.client.GrpcClient, None
+    ] = PrivateAttr(default=None)
+
+    class Config:
+        extra = "forbid"
+
+    @overload
+    def __init__(
+        self,
+        model_name: Optional[str] = ...,
+        *,
+        model_id: Optional[str] = ...,
+        embedded: Literal[True, False] = ...,
+        **llm_kwargs: Any,
+    ) -> None:
+        ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        server_url: str = ...,
+        server_type: Literal["grpc", "http"] = ...,
+        **llm_kwargs: Any,
+    ) -> None:
+        ...
+
+    def __init__(
+        self,
+        model_name: Optional[str] = None,
+        *,
+        model_id: Optional[str] = None,
+        server_url: Optional[str] = None,
+        server_type: Literal["grpc", "http"] = "http",
+        embedded: bool = True,
+        **llm_kwargs: Any,
+    ):
+        try:
+            import openllm
+        except ImportError as e:
+            raise ImportError(
+                "Could not import openllm. Make sure to install it with "
+                "'pip install openllm.'"
+            ) from e
+
+        llm_kwargs = llm_kwargs or {}
+
+        if server_url is not None:
+            logger.debug("'server_url' is provided, returning a openllm.Client")
+            assert (
+                model_id is None and model_name is None
+            ), "'server_url' and {'model_id', 'model_name'} are mutually exclusive"
+            client_cls = (
+                openllm.client.HTTPClient
+                if server_type == "http"
+                else openllm.client.GrpcClient
+            )
+            client = client_cls(server_url)
+
+            super().__init__(
+                **{
+                    "server_url": server_url,
+                    "server_type": server_type,
+                    "llm_kwargs": llm_kwargs,
+                }
+            )
+            self._runner = None  # type: ignore
+            self._client = client
+        else:
+            assert model_name is not None, "Must provide 'model_name' or 'server_url'"
+            # since the LLM are relatively huge, we don't actually want to convert the
+            # Runner with embedded when running the server. Instead, we will only set
+            # the init_local here so that LangChain users can still use the LLM
+            # in-process. Wrt to BentoML users, setting embedded=False is the expected
+            # behaviour to invoke the runners remotely.
+            # We need to also enable ensure_available to download and setup the model.
+            runner = openllm.Runner(
+                model_name=model_name,
+                model_id=model_id,
+                init_local=embedded,
+                ensure_available=True,
+                **llm_kwargs,
+            )
+            super().__init__(
+                **{
+                    "model_name": model_name,
+                    "model_id": model_id,
+                    "embedded": embedded,
+                    "llm_kwargs": llm_kwargs,
+                }
+            )
+            self._client = None  # type: ignore
+            self._runner = runner
+
+    @property
+    def runner(self) -> openllm.LLMRunner:
+        """
+        Get the underlying openllm.LLMRunner instance for integration with BentoML.
+
+        Example:
+        .. code-block:: python
+
+            llm = OpenLLM(
+                model_name='flan-t5',
+                model_id='google/flan-t5-large',
+                embedded=False,
+            )
+            tools = load_tools(["serpapi", "llm-math"], llm=llm)
+            agent = initialize_agent(
+                tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
+            )
+            svc = bentoml.Service("langchain-openllm", runners=[llm.runner])
+
+            @svc.api(input=Text(), output=Text())
+            def chat(input_text: str):
+                return agent.run(input_text)
+        """
+        if self._runner is None:
+            raise ValueError("OpenLLM must be initialized locally with 'model_name'")
+        return self._runner
+
+    @property
+    def _identifying_params(self) -> IdentifyingParams:
+        """Get the identifying parameters."""
+        if self._client is not None:
+            self.llm_kwargs.update(self._client._config())
+            model_name = self._client._metadata()["model_name"]
+            model_id = self._client._metadata()["model_id"]
+        else:
+            if self._runner is None:
+                raise ValueError("Runner must be initialized.")
+            model_name = self.model_name
+            model_id = self.model_id
+            try:
+                self.llm_kwargs.update(
+                    json.loads(self._runner.identifying_params["configuration"])
+                )
+            except (TypeError, json.JSONDecodeError):
+                pass
+        return IdentifyingParams(
+            server_url=self.server_url,
+            server_type=self.server_type,
+            embedded=self.embedded,
+            llm_kwargs=self.llm_kwargs,
+            model_name=model_name,
+            model_id=model_id,
+        )
+
+    @property
+    def _llm_type(self) -> str:
+        return "openllm_client" if self._client else "openllm"
+
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        try:
+            import openllm
+        except ImportError as e:
+            raise ImportError(
+                "Could not import openllm. Make sure to install it with "
+                "'pip install openllm'."
+            ) from e
+
+        copied = copy.deepcopy(self.llm_kwargs)
+        copied.update(kwargs)
+        config = openllm.AutoConfig.for_model(
+            self._identifying_params["model_name"], **copied
+        )
+        if self._client:
+            res = self._client.generate(
+                prompt, **config.model_dump(flatten=True)
+            ).responses[0]
+        else:
+            assert self._runner is not None
+            res = self._runner(prompt, **config.model_dump(flatten=True))
+        if isinstance(res, dict) and "text" in res:
+            return res["text"]
+        elif isinstance(res, str):
+            return res
+        else:
+            raise ValueError(
+                "Expected result to be a dict with key 'text' or a string. "
+                f"Received {res}"
+            )
+
+    async def _acall(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        try:
+            import openllm
+        except ImportError as e:
+            raise ImportError(
+                "Could not import openllm. Make sure to install it with "
+                "'pip install openllm'."
+            ) from e
+
+        copied = copy.deepcopy(self.llm_kwargs)
+        copied.update(kwargs)
+        config = openllm.AutoConfig.for_model(
+            self._identifying_params["model_name"], **copied
+        )
+        if self._client:
+            async_client = openllm.client.AsyncHTTPClient(self.server_url)
+            res = (
+                await async_client.generate(prompt, **config.model_dump(flatten=True))
+            ).responses[0]
+        else:
+            assert self._runner is not None
+            (
+                prompt,
+                generate_kwargs,
+                postprocess_kwargs,
+            ) = self._runner.llm.sanitize_parameters(prompt, **kwargs)
+            generated_result = await self._runner.generate.async_run(
+                prompt, **generate_kwargs
+            )
+            res = self._runner.llm.postprocess_generate(
+                prompt, generated_result, **postprocess_kwargs
+            )
+
+        if isinstance(res, dict) and "text" in res:
+            return res["text"]
+        elif isinstance(res, str):
+            return res
+        else:
+            raise ValueError(
+                "Expected result to be a dict with key 'text' or a string. "
+                f"Received {res}"
+            )