From 663747b730f526d44774f3b8d115736882948032 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Thu, 2 May 2024 09:55:42 -0700 Subject: [PATCH] core[patch]: Fixes for convert_messages (#21207) - support two-tuples of any sequence type (eg. json.loads never produces tuples) - support type alias for role key - if id is passed in in dict form use it - if tool_calls passed in in dict form use them --------- Co-authored-by: Bagatur --- .github/workflows/_lint.yml | 34 ++++++++++----------- libs/core/langchain_core/messages/utils.py | 22 +++++++++---- libs/core/tests/unit_tests/test_messages.py | 21 ++++++++++--- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/.github/workflows/_lint.yml b/.github/workflows/_lint.yml index e2e25fb8ebc..c207afd7ca5 100644 --- a/.github/workflows/_lint.yml +++ b/.github/workflows/_lint.yml @@ -44,7 +44,7 @@ jobs: python-version: ${{ matrix.python-version }} poetry-version: ${{ env.POETRY_VERSION }} working-directory: ${{ inputs.working-directory }} - cache-key: lint-with-extras +# cache-key: lint-with-extras - name: Check Poetry File shell: bash @@ -79,14 +79,14 @@ jobs: run: | poetry run pip install -e "$LANGCHAIN_LOCATION" - - name: Get .mypy_cache to speed up mypy - uses: actions/cache@v4 - env: - SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2" - with: - path: | - ${{ env.WORKDIR }}/.mypy_cache - key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} +# - name: Get .mypy_cache to speed up mypy +# uses: actions/cache@v4 +# env: +# SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2" +# with: +# path: | +# ${{ env.WORKDIR }}/.mypy_cache +# key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} - name: Analysing the code with our lint @@ -113,14 +113,14 @@ jobs: run: | poetry install --with test,test_integration - - name: Get .mypy_cache_test to speed up mypy - uses: actions/cache@v4 - env: - SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2" - with: - path: | - ${{ env.WORKDIR }}/.mypy_cache_test - key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} +# - name: Get .mypy_cache_test to speed up mypy +# uses: actions/cache@v4 +# env: +# SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2" +# with: +# path: | +# ${{ env.WORKDIR }}/.mypy_cache_test +# key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} - name: Analysing the code with our lint working-directory: ${{ inputs.working-directory }} diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 8f8957a9baf..f161a4c32c0 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -130,7 +130,9 @@ def message_chunk_to_message(chunk: BaseMessageChunk) -> BaseMessage: ) -MessageLikeRepresentation = Union[BaseMessage, Tuple[str, str], str, Dict[str, Any]] +MessageLikeRepresentation = Union[ + BaseMessage, List[str], Tuple[str, str], str, Dict[str, Any] +] def _create_message_from_message_type( @@ -138,6 +140,8 @@ def _create_message_from_message_type( content: str, name: Optional[str] = None, tool_call_id: Optional[str] = None, + tool_calls: Optional[List[Dict[str, Any]]] = None, + id: Optional[str] = None, **additional_kwargs: Any, ) -> BaseMessage: """Create a message from a message type and content string. @@ -156,6 +160,10 @@ def _create_message_from_message_type( kwargs["tool_call_id"] = tool_call_id if additional_kwargs: kwargs["additional_kwargs"] = additional_kwargs # type: ignore[assignment] + if id is not None: + kwargs["id"] = id + if tool_calls is not None: + kwargs["tool_calls"] = tool_calls if message_type in ("human", "user"): message: BaseMessage = HumanMessage(content=content, **kwargs) elif message_type in ("ai", "assistant"): @@ -197,15 +205,17 @@ def _convert_to_message( _message = message elif isinstance(message, str): _message = _create_message_from_message_type("human", message) - elif isinstance(message, tuple): - if len(message) != 2: - raise ValueError(f"Expected 2-tuple of (role, template), got {message}") - message_type_str, template = message + elif isinstance(message, Sequence) and len(message) == 2: + # mypy doesn't realise this can't be a string given the previous branch + message_type_str, template = message # type: ignore[misc] _message = _create_message_from_message_type(message_type_str, template) elif isinstance(message, dict): msg_kwargs = message.copy() try: - msg_type = msg_kwargs.pop("role") + try: + msg_type = msg_kwargs.pop("role") + except KeyError: + msg_type = msg_kwargs.pop("type") msg_content = msg_kwargs.pop("content") except KeyError: raise ValueError( diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 8085d7c479c..1ae7320e15b 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -545,8 +545,8 @@ def test_convert_to_messages() -> None: [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, - {"role": "ai", "content": "Hi!"}, - {"role": "human", "content": "Hello!", "name": "Jane"}, + {"role": "ai", "content": "Hi!", "id": "ai1"}, + {"type": "human", "content": "Hello!", "name": "Jane", "id": "human1"}, { "role": "assistant", "content": "Hi!", @@ -554,13 +554,20 @@ def test_convert_to_messages() -> None: "function_call": {"name": "greet", "arguments": '{"name": "Jane"}'}, }, {"role": "function", "name": "greet", "content": "Hi!"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"name": "greet", "args": {"name": "Jane"}, "id": "tool_id"} + ], + }, {"role": "tool", "tool_call_id": "tool_id", "content": "Hi!"}, ] ) == [ SystemMessage(content="You are a helpful assistant."), HumanMessage(content="Hello!"), - AIMessage(content="Hi!"), - HumanMessage(content="Hello!", name="Jane"), + AIMessage(content="Hi!", id="ai1"), + HumanMessage(content="Hello!", name="Jane", id="human1"), AIMessage( content="Hi!", name="JaneBot", @@ -569,6 +576,10 @@ def test_convert_to_messages() -> None: }, ), FunctionMessage(name="greet", content="Hi!"), + AIMessage( + content="", + tool_calls=[ToolCall(name="greet", args={"name": "Jane"}, id="tool_id")], + ), ToolMessage(tool_call_id="tool_id", content="Hi!"), ] @@ -579,7 +590,7 @@ def test_convert_to_messages() -> None: "hello!", ("ai", "Hi!"), ("human", "Hello!"), - ("assistant", "Hi!"), + ["assistant", "Hi!"], ] ) == [ SystemMessage(content="You are a helpful assistant."),