diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py
index 03e808c4c75..67fe22c4824 100644
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -396,7 +396,8 @@ def _format_messages(
                                 {
                                     k: v
                                     for k, v in block.items()
-                                    if k in ("type", "text", "cache_control")
+                                    if k
+                                    in ("type", "text", "cache_control", "citations")
                                 }
                             )
                     elif block["type"] == "thinking":
diff --git a/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz
new file mode 100644
index 00000000000..f022211c387
Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz differ
diff --git a/libs/partners/anthropic/tests/cassettes/test_redacted_thinking.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_redacted_thinking.yaml.gz
new file mode 100644
index 00000000000..f4295b91e62
Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_redacted_thinking.yaml.gz differ
diff --git a/libs/partners/anthropic/tests/cassettes/test_remote_mcp.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_remote_mcp.yaml.gz
new file mode 100644
index 00000000000..e29dfe5a082
Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_remote_mcp.yaml.gz differ
diff --git a/libs/partners/anthropic/tests/cassettes/test_thinking.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_thinking.yaml.gz
new file mode 100644
index 00000000000..5c92ad877ce
Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_thinking.yaml.gz differ
diff --git a/libs/partners/anthropic/tests/cassettes/test_web_search.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_web_search.yaml.gz
new file mode 100644
index 00000000000..546cdf8c505
Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_web_search.yaml.gz differ
diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
index 33bb78de48b..a9e6a954232 100644
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@@ -713,14 +713,24 @@ def test_citations() -> None:
     assert any("citations" in block for block in full.content)
     assert not any("citation" in block for block in full.content)
 
+    # Test pass back in
+    next_message = {
+        "role": "user",
+        "content": "Can you comment on the citations you just made?",
+    }
+    _ = llm.invoke(messages + [full, next_message])
 
+
+@pytest.mark.vcr
 def test_thinking() -> None:
     llm = ChatAnthropic(
         model="claude-3-7-sonnet-latest",
         max_tokens=5_000,
         thinking={"type": "enabled", "budget_tokens": 2_000},
     )
-    response = llm.invoke("Hello")
+
+    input_message = {"role": "user", "content": "Hello"}
+    response = llm.invoke([input_message])
     assert any("thinking" in block for block in response.content)
     for block in response.content:
         assert isinstance(block, dict)
@@ -731,7 +741,7 @@ def test_thinking() -> None:
 
     # Test streaming
     full: Optional[BaseMessageChunk] = None
-    for chunk in llm.stream("Hello"):
+    for chunk in llm.stream([input_message]):
         if full is None:
             full = cast(BaseMessageChunk, chunk)
         else:
@@ -746,8 +756,12 @@ def test_thinking() -> None:
             assert block["thinking"] and isinstance(block["thinking"], str)
             assert block["signature"] and isinstance(block["signature"], str)
 
+    # Test pass back in
+    next_message = {"role": "user", "content": "How are you?"}
+    _ = llm.invoke([input_message, full, next_message])
 
-@pytest.mark.flaky(retries=3, delay=1)
+
+@pytest.mark.vcr
 def test_redacted_thinking() -> None:
     llm = ChatAnthropic(
         model="claude-3-7-sonnet-latest",
@@ -755,8 +769,9 @@ def test_redacted_thinking() -> None:
         thinking={"type": "enabled", "budget_tokens": 2_000},
     )
     query = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB"  # noqa: E501
+    input_message = {"role": "user", "content": query}
 
-    response = llm.invoke(query)
+    response = llm.invoke([input_message])
     has_reasoning = False
     for block in response.content:
         assert isinstance(block, dict)
@@ -768,7 +783,7 @@ def test_redacted_thinking() -> None:
 
     # Test streaming
     full: Optional[BaseMessageChunk] = None
-    for chunk in llm.stream(query):
+    for chunk in llm.stream([input_message]):
         if full is None:
             full = cast(BaseMessageChunk, chunk)
         else:
@@ -784,6 +799,10 @@ def test_redacted_thinking() -> None:
             assert block["data"] and isinstance(block["data"], str)
     assert stream_has_reasoning
 
+    # Test pass back in
+    next_message = {"role": "user", "content": "What?"}
+    _ = llm.invoke([input_message, full, next_message])
+
 
 def test_structured_output_thinking_enabled() -> None:
     llm = ChatAnthropic(
@@ -882,9 +901,8 @@ def test_image_tool_calling() -> None:
     llm.bind_tools([color_picker]).invoke(messages)
 
 
-# TODO: set up VCR
+@pytest.mark.vcr
 def test_web_search() -> None:
-    pytest.skip()
     llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
 
     tool = {"type": "web_search_20250305", "name": "web_search", "max_uses": 1}
@@ -900,7 +918,8 @@ def test_web_search() -> None:
         ],
     }
     response = llm_with_tools.invoke([input_message])
-    block_types = {block["type"] for block in response.content}
+    assert all(isinstance(block, dict) for block in response.content)
+    block_types = {block["type"] for block in response.content}  # type: ignore[index]
     assert block_types == {"text", "server_tool_use", "web_search_tool_result"}
 
     # Test streaming
@@ -923,11 +942,12 @@ def test_web_search() -> None:
     )
 
 
+@pytest.mark.vcr
 def test_code_execution() -> None:
-    pytest.skip()
     llm = ChatAnthropic(
         model="claude-sonnet-4-20250514",
         betas=["code-execution-2025-05-22"],
+        max_tokens=10_000,
     )
 
     tool = {"type": "code_execution_20250522", "name": "code_execution"}
@@ -946,7 +966,8 @@ def test_code_execution() -> None:
         ],
     }
     response = llm_with_tools.invoke([input_message])
-    block_types = {block["type"] for block in response.content}
+    assert all(isinstance(block, dict) for block in response.content)
+    block_types = {block["type"] for block in response.content}  # type: ignore[index]
     assert block_types == {"text", "server_tool_use", "code_execution_tool_result"}
 
     # Test streaming
@@ -969,8 +990,8 @@ def test_code_execution() -> None:
     )
 
 
+@pytest.mark.vcr
 def test_remote_mcp() -> None:
-    pytest.skip()
     mcp_servers = [
         {
             "type": "url",
@@ -985,6 +1006,7 @@ def test_remote_mcp() -> None:
         model="claude-sonnet-4-20250514",
         betas=["mcp-client-2025-04-04"],
         mcp_servers=mcp_servers,
+        max_tokens=10_000,
     )
 
     input_message = {
@@ -1000,7 +1022,8 @@ def test_remote_mcp() -> None:
         ],
     }
     response = llm.invoke([input_message])
-    block_types = {block["type"] for block in response.content}
+    assert all(isinstance(block, dict) for block in response.content)
+    block_types = {block["type"] for block in response.content}  # type: ignore[index]
     assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"}
 
     # Test streaming
@@ -1010,7 +1033,8 @@ def test_remote_mcp() -> None:
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
     assert isinstance(full.content, list)
-    block_types = {block["type"] for block in full.content}
+    assert all(isinstance(block, dict) for block in full.content)
+    block_types = {block["type"] for block in full.content}  # type: ignore[index]
     assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"}
 
     # Test we can pass back in