From 751fb7de20fdfca11206ccb9ca6db76885fd2f6b Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Thu, 14 Mar 2024 10:40:34 -0700 Subject: [PATCH] Add new beta StructuredPrompt (#19080) Thank you for contributing to LangChain! - [ ] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --- libs/core/langchain_core/load/mapping.py | 6 + .../core/langchain_core/prompts/structured.py | 133 ++++++++++++++++++ .../unit_tests/prompts/test_structured.py | 78 ++++++++++ 3 files changed, 217 insertions(+) create mode 100644 libs/core/langchain_core/prompts/structured.py create mode 100644 libs/core/tests/unit_tests/prompts/test_structured.py diff --git a/libs/core/langchain_core/load/mapping.py b/libs/core/langchain_core/load/mapping.py index 90d52756f06..5dac9ccd839 100644 --- a/libs/core/langchain_core/load/mapping.py +++ b/libs/core/langchain_core/load/mapping.py @@ -522,6 +522,12 @@ _OG_SERIALIZABLE_MAPPING: Dict[Tuple[str, ...], Tuple[str, ...]] = { "image", "ImagePromptTemplate", ), + ("langchain", "prompts", "chat", "StructuredPrompt"): ( + "langchain_core", + "prompts", + "structured", + "StructuredPrompt", + ), } # Needed for backwards compatibility for a few versions where we serialized diff --git a/libs/core/langchain_core/prompts/structured.py b/libs/core/langchain_core/prompts/structured.py new file mode 100644 index 00000000000..fd4b1b40b42 --- /dev/null +++ b/libs/core/langchain_core/prompts/structured.py @@ -0,0 +1,133 @@ +from typing import ( + Any, + Callable, + Dict, + Iterator, + Mapping, + Optional, + Sequence, + Set, + Type, + Union, +) + +from langchain_core._api.beta_decorator import beta +from langchain_core.language_models.base import BaseLanguageModel +from langchain_core.prompts.chat import ( + BaseChatPromptTemplate, + BaseMessagePromptTemplate, + ChatPromptTemplate, + MessageLikeRepresentation, + MessagesPlaceholder, + _convert_to_message, +) +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables.base import ( + Other, + Runnable, + RunnableSequence, + RunnableSerializable, +) + + +@beta() +class StructuredPrompt(ChatPromptTemplate): + schema_: Union[Dict, Type[BaseModel]] + + @classmethod + def from_messages_and_schema( + cls, + messages: Sequence[MessageLikeRepresentation], + schema: Union[Dict, Type[BaseModel]], + ) -> ChatPromptTemplate: + """Create a chat prompt template from a variety of message formats. + + Examples: + + Instantiation from a list of message templates: + + .. code-block:: python + + class OutputSchema(BaseModel): + name: str + value: int + + template = ChatPromptTemplate.from_messages( + [ + ("human", "Hello, how are you?"), + ("ai", "I'm doing well, thanks!"), + ("human", "That's good to hear."), + ], + OutputSchema, + ) + + Args: + messages: sequence of message representations. + A message can be represented using the following formats: + (1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of + (message type, template); e.g., ("human", "{user_input}"), + (4) 2-tuple of (message class, template), (4) a string which is + shorthand for ("human", template); e.g., "{user_input}" + schema: a dictionary representation of function call, or a Pydantic model. + + Returns: + a structured prompt template + """ + _messages = [_convert_to_message(message) for message in messages] + + # Automatically infer input variables from messages + input_vars: Set[str] = set() + partial_vars: Dict[str, Any] = {} + for _message in _messages: + if isinstance(_message, MessagesPlaceholder) and _message.optional: + partial_vars[_message.variable_name] = [] + elif isinstance( + _message, (BaseChatPromptTemplate, BaseMessagePromptTemplate) + ): + input_vars.update(_message.input_variables) + + return cls( + input_variables=sorted(input_vars), + messages=_messages, + partial_variables=partial_vars, + schema_=schema, + ) + + def __or__( + self, + other: Union[ + Runnable[Any, Other], + Callable[[Any], Other], + Callable[[Iterator[Any]], Iterator[Other]], + Mapping[str, Union[Runnable[Any, Other], Callable[[Any], Other], Any]], + ], + ) -> RunnableSerializable[Dict, Other]: + if isinstance(other, BaseLanguageModel) or hasattr( + other, "with_structured_output" + ): + return RunnableSequence(self, other.with_structured_output(self.schema_)) + else: + raise NotImplementedError( + "Structured prompts need to be piped to a language model." + ) + + def pipe( + self, + *others: Union[Runnable[Any, Other], Callable[[Any], Other]], + name: Optional[str] = None, + ) -> RunnableSerializable[Dict, Other]: + if ( + others + and isinstance(others[0], BaseLanguageModel) + or hasattr(others[0], "with_structured_output") + ): + return RunnableSequence( + self, + others[0].with_structured_output(self.schema_), + *others[1:], + name=name, + ) + else: + raise NotImplementedError( + "Structured prompts need to be piped to a language model." + ) diff --git a/libs/core/tests/unit_tests/prompts/test_structured.py b/libs/core/tests/unit_tests/prompts/test_structured.py new file mode 100644 index 00000000000..8a5b97a4373 --- /dev/null +++ b/libs/core/tests/unit_tests/prompts/test_structured.py @@ -0,0 +1,78 @@ +from functools import partial +from inspect import isclass +from typing import Any, Dict, Type, Union, cast + +from langchain_core.load.dump import dumps +from langchain_core.load.load import loads +from langchain_core.prompts.structured import StructuredPrompt +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables.base import Runnable, RunnableLambda +from tests.unit_tests.fake.chat_model import FakeListChatModel + + +def _fake_runnable( + schema: Union[Dict, Type[BaseModel]], _: Any +) -> Union[BaseModel, Dict]: + if isclass(schema) and issubclass(schema, BaseModel): + return schema(name="yo", value=42) + else: + params = cast(Dict, schema)["parameters"] + return {k: 1 for k, v in params.items()} + + +class FakeStructuredChatModel(FakeListChatModel): + """Fake ChatModel for testing purposes.""" + + def with_structured_output(self, schema: Union[Dict, Type[BaseModel]]) -> Runnable: + return RunnableLambda(partial(_fake_runnable, schema)) + + @property + def _llm_type(self) -> str: + return "fake-messages-list-chat-model" + + +def test_structured_prompt_pydantic() -> None: + class OutputSchema(BaseModel): + name: str + value: int + + prompt = StructuredPrompt.from_messages_and_schema( + [ + ("human", "I'm very structured, how about you?"), + ], + OutputSchema, + ) + + model = FakeStructuredChatModel(responses=[]) + + chain = prompt | model + + assert chain.invoke({"hello": "there"}) == OutputSchema(name="yo", value=42) + + +def test_structured_prompt_dict() -> None: + prompt = StructuredPrompt.from_messages_and_schema( + [ + ("human", "I'm very structured, how about you?"), + ], + { + "name": "yo", + "description": "a structured output", + "parameters": { + "name": {"type": "string"}, + "value": {"type": "integer"}, + }, + }, + ) + + model = FakeStructuredChatModel(responses=[]) + + chain = prompt | model + + assert chain.invoke({"hello": "there"}) == {"name": 1, "value": 1} + + assert loads(dumps(prompt)) == prompt + + chain = loads(dumps(prompt)) | model + + assert chain.invoke({"hello": "there"}) == {"name": 1, "value": 1}