From 8d0fb2d04b5d8f5a085f4aadf9f290c559a79d58 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 13:51:47 -0400 Subject: [PATCH] fix(anthropic): correct `input_token` count for streaming (#32591) * Create usage metadata on [`message_delta`](https://docs.anthropic.com/en/docs/build-with-claude/streaming#event-types) instead of at the beginning. Consequently, token counts are not included during streaming but instead at the end. This allows for accurate reporting of server-side tool usage (important for billing) * Add some clarifying comments * Fix some outstanding Pylance warnings * Remove unnecessary `text` popping in thinking blocks * Also now correctly reports `input_cache_read`/`input_cache_creation` as a result --- .../langchain_anthropic/chat_models.py | 108 ++++++++++++------ libs/partners/anthropic/pyproject.toml | 2 +- ...AnthropicStandard.test_stream_time.yaml.gz | Bin 3355 -> 4239 bytes .../tests/integration_tests/test_llms.py | 12 +- .../tests/integration_tests/test_standard.py | 10 +- .../tests/unit_tests/test_chat_models.py | 73 ++++++++++++ libs/partners/anthropic/uv.lock | 4 +- 7 files changed, 163 insertions(+), 46 deletions(-) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index ed035e03843..09d1868905f 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -2192,47 +2192,65 @@ def _make_message_chunk_from_anthropic_event( coerce_content_to_string: bool, block_start_event: Optional[anthropic.types.RawMessageStreamEvent] = None, ) -> tuple[Optional[AIMessageChunk], Optional[anthropic.types.RawMessageStreamEvent]]: - """Convert Anthropic event to AIMessageChunk. + """Convert Anthropic streaming event to `AIMessageChunk`. + + Args: + event: Raw streaming event from Anthropic SDK + stream_usage: Whether to include usage metadata in the output chunks. + coerce_content_to_string: Whether to convert structured content to plain + text strings. When True, only text content is preserved; when False, + structured content like tool calls and citations are maintained. + block_start_event: Previous content block start event, used for tracking + tool use blocks and maintaining context across related events. + + Returns: + Tuple containing: + - AIMessageChunk: Converted message chunk with appropriate content and + metadata, or None if the event doesn't produce a chunk + - RawMessageStreamEvent: Updated `block_start_event` for tracking content + blocks across sequential events, or None if not applicable + + Note: + Not all Anthropic events result in message chunks. Events like internal + state changes return None for the message chunk while potentially + updating the `block_start_event` for context tracking. - Note that not all events will result in a message chunk. In these cases - we return ``None``. """ message_chunk: Optional[AIMessageChunk] = None - # See https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/lib/streaming/_messages.py # noqa: E501 + # Reference: Anthropic SDK streaming implementation + # https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/lib/streaming/_messages.py # noqa: E501 + if event.type == "message_start" and stream_usage: - usage_metadata = _create_usage_metadata(event.message.usage) - # We pick up a cumulative count of output_tokens at the end of the stream, - # so here we zero out to avoid double counting. - usage_metadata["total_tokens"] = ( - usage_metadata["total_tokens"] - usage_metadata["output_tokens"] - ) - usage_metadata["output_tokens"] = 0 + # Capture model name, but don't include usage_metadata yet + # as it will be properly reported in message_delta with complete info if hasattr(event.message, "model"): response_metadata = {"model_name": event.message.model} else: response_metadata = {} + message_chunk = AIMessageChunk( content="" if coerce_content_to_string else [], - usage_metadata=usage_metadata, response_metadata=response_metadata, ) + elif ( event.type == "content_block_start" and event.content_block is not None and event.content_block.type in ( - "tool_use", - "code_execution_tool_result", + "tool_use", # Standard tool usage + "code_execution_tool_result", # Built-in code execution results "document", "redacted_thinking", "mcp_tool_use", "mcp_tool_result", - "server_tool_use", - "web_search_tool_result", + "server_tool_use", # Server-side tool usage + "web_search_tool_result", # Built-in web search results ) ): if coerce_content_to_string: warnings.warn("Received unexpected tool content block.", stacklevel=2) + content_block = event.content_block.model_dump() content_block["index"] = event.index if event.content_block.type == "tool_use": @@ -2250,35 +2268,47 @@ def _make_message_chunk_from_anthropic_event( tool_call_chunks=tool_call_chunks, ) block_start_event = event + + # Process incremental content updates elif event.type == "content_block_delta": + # Text and citation deltas (incremental text content) if event.delta.type in ("text_delta", "citations_delta"): if coerce_content_to_string and hasattr(event.delta, "text"): - text = event.delta.text + text = getattr(event.delta, "text", "") message_chunk = AIMessageChunk(content=text) else: content_block = event.delta.model_dump() content_block["index"] = event.index + + # All citation deltas are part of a text block content_block["type"] = "text" if "citation" in content_block: + # Assign citations to a list if present content_block["citations"] = [content_block.pop("citation")] message_chunk = AIMessageChunk(content=[content_block]) + + # Reasoning elif ( event.delta.type == "thinking_delta" or event.delta.type == "signature_delta" ): content_block = event.delta.model_dump() - if "text" in content_block and content_block["text"] is None: - content_block.pop("text") content_block["index"] = event.index content_block["type"] = "thinking" message_chunk = AIMessageChunk(content=[content_block]) + + # Tool input JSON (streaming tool arguments) elif event.delta.type == "input_json_delta": content_block = event.delta.model_dump() content_block["index"] = event.index + start_event_block = ( + getattr(block_start_event, "content_block", None) + if block_start_event + else None + ) if ( - (block_start_event is not None) - and hasattr(block_start_event, "content_block") - and (block_start_event.content_block.type == "tool_use") + start_event_block is not None + and getattr(start_event_block, "type", None) == "tool_use" ): tool_call_chunk = create_tool_call_chunk( index=event.index, @@ -2293,12 +2323,10 @@ def _make_message_chunk_from_anthropic_event( content=[content_block], tool_call_chunks=tool_call_chunks, ) + + # Process final usage metadata and completion info elif event.type == "message_delta" and stream_usage: - usage_metadata = UsageMetadata( - input_tokens=0, - output_tokens=event.usage.output_tokens, - total_tokens=event.usage.output_tokens, - ) + usage_metadata = _create_usage_metadata(event.usage) message_chunk = AIMessageChunk( content="", usage_metadata=usage_metadata, @@ -2307,6 +2335,8 @@ def _make_message_chunk_from_anthropic_event( "stop_sequence": event.delta.stop_sequence, }, ) + # Unhandled event types (e.g., `content_block_stop`, `ping` events) + # https://docs.anthropic.com/en/docs/build-with-claude/streaming#other-events else: pass @@ -2319,26 +2349,38 @@ class ChatAnthropicMessages(ChatAnthropic): def _create_usage_metadata(anthropic_usage: BaseModel) -> UsageMetadata: + """Create LangChain `UsageMetadata` from Anthropic `Usage` data. + + Note: Anthropic's `input_tokens` excludes cached tokens, so we manually add + `cache_read` and `cache_creation` tokens to get the true total. + + """ input_token_details: dict = { "cache_read": getattr(anthropic_usage, "cache_read_input_tokens", None), "cache_creation": getattr(anthropic_usage, "cache_creation_input_tokens", None), } - # Add (beta) cache TTL information if available + + # Add cache TTL information if provided (5-minute and 1-hour ephemeral cache) cache_creation = getattr(anthropic_usage, "cache_creation", None) - cache_creation_keys = ("ephemeral_1h_input_tokens", "ephemeral_5m_input_tokens") + + # Currently just copying over the 5m and 1h keys, but if more are added in the + # future we'll need to expand this tuple + cache_creation_keys = ("ephemeral_5m_input_tokens", "ephemeral_1h_input_tokens") if cache_creation: if isinstance(cache_creation, BaseModel): cache_creation = cache_creation.model_dump() for k in cache_creation_keys: input_token_details[k] = cache_creation.get(k) - # Anthropic input_tokens exclude cached token counts. + # Calculate total input tokens: Anthropic's `input_tokens` excludes cached tokens, + # so we need to add them back to get the true total input token count input_tokens = ( - (getattr(anthropic_usage, "input_tokens", 0) or 0) - + (input_token_details["cache_read"] or 0) - + (input_token_details["cache_creation"] or 0) + (getattr(anthropic_usage, "input_tokens", 0) or 0) # Base input tokens + + (input_token_details["cache_read"] or 0) # Tokens read from cache + + (input_token_details["cache_creation"] or 0) # Tokens used to create cache ) output_tokens = getattr(anthropic_usage, "output_tokens", 0) or 0 + return UsageMetadata( input_tokens=input_tokens, output_tokens=output_tokens, diff --git a/libs/partners/anthropic/pyproject.toml b/libs/partners/anthropic/pyproject.toml index 3ccc65bb7c5..222da0493a4 100644 --- a/libs/partners/anthropic/pyproject.toml +++ b/libs/partners/anthropic/pyproject.toml @@ -7,7 +7,7 @@ authors = [] license = { text = "MIT" } requires-python = ">=3.9" dependencies = [ - "anthropic<1,>=0.60.0", + "anthropic<1,>=0.64.0", "langchain-core<1.0.0,>=0.3.72", "pydantic<3.0.0,>=2.7.4", ] diff --git a/libs/partners/anthropic/tests/cassettes/TestAnthropicStandard.test_stream_time.yaml.gz b/libs/partners/anthropic/tests/cassettes/TestAnthropicStandard.test_stream_time.yaml.gz index 10a3e4007191e4e50aca514fe50afa09e25ea236..14e5546a74ccf40724716e09508ed48cb696b814 100644 GIT binary patch literal 4239 zcmV;A5OD7wiwFP!XrpKX|J5B^lbXo-J-=ek(^jR*4VRgmHC6jCnh_zRi8^w7g9ZVI ziz}dFogaU`MnFV#yw%3us#LPt-G2SuZ{0mN|B1~o3cr17Jm`Uue0%uMe{{#!=E=kV zK0UyHW+Ft2Y)=fXIaj5pH9rkjz3)8r+7=>J?ynWea8bg4mDtchUNclj{8!!=*Q(4r zFlMSVPZZ3fV4jmMD6+!L;Kr6RYOP_>#2mG3#cR?=yJhoS-!|Hh&P1d_-lG>PCx&>U z!mMmV8oIASdB6fg(ZEbV~SCJ^^r$0;#No>LZ+tdtm4jaT{ zHH6GrwE1`A;g8RshcDls+ix%5KY#wCG@xPnC@>t~IyTae?0GPAP-6khJAr>>EDC(z zMA3(XBGZo=uIXEmeZs&fnVCn1*+F=0r*UY`8=3_R9T~K0XfvnrZYD>^gpua>t{H|6 zZI0|yhlYudP8{oMM@dm08iXefoN4GCwtoWk%CqKXG*22Rh(ADEX$A~+JoCKKXF`Ri z1o2^rJTtNbG-PH1Z~hsj5d!YKb@N|$&5jsLr0Sa&QC{(5MxYw-#gzm&wd@VBCE+lUzK0h zZ(cLfq$j5T60y9qbd0XMOtq1hgEDrqB|%jrIRLi1dq>_RG4OLaAFJ__$tw$zWhF=v#km8L>s|( z{B#x@(ke`~6xUqJY~Mw)=;~hIEqzC0WaJwM-~m@N-EYxGr%CBd1i0-vKc_EP{Z19B z9{}E__$Lg`UFdGJX6!TIv%sao*sjcC9XXr;u9k*8ENk{9xycCd&59K2ij>R_7x|Km7-O5#gRc=ICWhxu0U}C+!xQ6dN#lE96(s639i3}VH9F(D=h-_L5&Ngp`Y-PfpAa|R zP(L$WG9!dK)1L#$C`A^iY)MRf*wni!TREB+x<3T84=0MZS}07I@@>I~@SWLB?G&ZT zc4`rMe*=BKWS9EU_CyB%DnPRj0Rzi-hjtq_Z*VVP`EF_X%Fh=Rk0#iR!f^b{Gr*f` z0+06EODF$)AIf2QP62JUmC2Pt ziRWZX=2HQZM=X@QW2|#@PX@t{LQ{2HfD}}xoh;r@zssTAnZ{MAUEsM^(N&R>vwDu3 z5!QQd3`;y_5|n@bwY*Han@(v?##BEYK4+rCj#Zgtr;)A-<={j~IYPQJc@9u;k94r{ z$>@2XbQuluB`+jJ9Sw4UyoeM6(%n@oGWHCC8b*H4(ix>-qHH6#koChGzgX4_JDw@1 zjscq(E_87f%iL4Xke=*Riq3#OSHT}BtaG3*S;%7wi|-mXN?NfF;X#4-c0D6)ap~TQ zk#Z=nkZDDFbjlo)EqN;=fp@rt@+L-t2h~$7>Be9%pNg<<^t^AYPh_NG$2rGmq)N{^N*!)M9X2p@7Ma`N2B zlwsAgjeE}3Kq23SaA4F5l#0if3*?m~Dbpfb@>-H?8ER0lI-p;X+ys=92chW`L=a$? z-)l6|$4pi*(QV|rq}@xX|AH>A+H(bQN=Ntt_pKuAupHdsxW6up5j&L><}9WLVY6L6K5)8 zJ31=_I`dcISj!{?&uY6h1p!>YI;}Siq-H~c%S>BtAurTAb;4Vtf=eptIt7lgJ>#oPpZOwfY62{M|4Sn69y08Yd>l{GU3U=TP3jW6N>AD?P_h@9G@*oZjkbPk&ZukW>81u)a)#e97y=dKL_C z^4f0ok>AVhINuq94W^fAiKDWybJL}{19JmkD4$=B&JM;m!$g%j)NV$bjOYxTwA3FB zH7RoWuCOq(U67Em7VNEZdwO-#1Y-x}I5EqlI8#Fvm6Y2g8u9#m$)nOiFOc_y!~Kwd zS<$-p0-CBx>)gPCSTj-O;Z)0wHX&)eRZbo=vvjv$rbA)r70i@Fx#6jq-hH03;Lf7>* zGXwM|>?99Tkhk2pr9>2Gzzt;;toRG`pC1BjmD|&;LVyyK2YewKwI8i9cyc@!#j1?< zLjgj3B7iWhZwND-rY>i*$jMlHgRoA1iE*sgNFLY5k|boafN^}m)giD^02zF@2=Nh{ z77unecA-`(S4(eX()PWLl2i;AYSNnN&Us#3Dbl@Fl`qd&pDNEAV;rlJ>ua0<9-Y0= znL*5V3@~$>T3R9F*UNT}fI_)lFWb3T6fH`vg^@-GHTfb!9P7SqbbFSLNz$zr5cX-X zRc`UEN*{$eyEyAI)ws^O9A8!q1sSa%T*otT)+M)k7Z{DZsgFlGtM2R9%xLWOvR?1D z?q;m7c3TxzaJNR6M&9_W$l(yw$E4>%sCQ&MiyVpoxwAeug@?JN=o!3?r?K0Hc#LV@ zHA(e=^)Y^lX$8p~?X(kWKaS7IbEC+tOz|sO9Kp|O5^kv8bF4{7MDSs4v+0@8e$=UR z0kq}dxm;9bgf*}>gWKR=+nXj9;6L024s0)kW+gpZwpm+&oZc!z+onXX%woA1aiPw+ zekqjNFdj`S0i5x^}a%-(KkK1;y{#27vMzD3RPC^LDZM-9|xkTK@-rYSl zS#0A?GWv-$MtnjEM9T7Tjk%_9St*jDv+g!WacKhbw>j1h!o0`8pJYE#K?oC78d^wn zS#hWGLP;(!zGBv|cGcg&H!7e!(2XMtGga=z{EOh$SEC5Y69r18C$q#vx2ojl(sLsQ z!kn{}@%K%Cwe2dhkhZ>Wj;|0=m4bN3U|dRTpU(c3KDS$BtxN9z8I}5(F1b4mEF`%9 zm|N(D;<=$F7d{b$p6j$fkaOR4=d_NTyXq(MjW#}SHk>KUI40S#Zl!1afF7*I{k3~| z@iwK811+7`ls<~~*i4;D155ElS9e;hf;(_H>|vCe_6e#=9Y1b;m*y;QY6^Lq43tb&NLIA z>%^ObQG%=bg%=EJ_M+(Fw|E=4$2ZX=K>O(OcG-WMiNFWn^t{Yf*&`59{@xR? zsqTdE(*PF&Yo9}r8d0a*)vmC>*6!RBn)ebsXWB@jEaVZ~?iHsp-W@LZ7dk;nQI#qc zsy=LZzRzwvf$ia_AwIQh=MJt)$hwJZuPwYl;QH&eVit+B>6Bh&>*)5K%%Tl*+{Zj4I9R?dC zeCn0$7qR}qE88y$SW+$_@9M&Cht?#Kx;8ub`>Le|_ioT++|p@(bO!qV1QKji zsI#t(@8rRccqrT2*|}x)-{%_WQZrFU=_RT0HErDgz88#Z5oUTH%<|;YXD*O8Vsp&j z`_b9a`hx=gBVvx3EyNB;p!K&t2p9Dby}jCEEkcL|u&?XBB9i z;@jf6U2_l2^)ABkM8rid!gk{&?Gwd1YZM3LiRies5I*00#@&PWQ}5A$42QJ*ou%sW zabQ3e;>L%Mxe(E*v)M>Ot{1F$B6w)^_b`+4(!&4@t`S2h6;_Idx46;x-+otWb6DQs zD*eEOG?%9@Ax>stX4{n{iL?e|F5tV#e3uPI7 z#ELOa<*Q65CMMFQ^dx=Zp}5>dVRv;bk(oiA6W{r+xa8}qDkMD@@?mA=Zcib3imWb> zhmW^y@5b{eBAX@q_~GJYbGu5D+VesPY1_uPT~x2X-;r@Lo=HTqIIDY-hLk)Jz6SrV z4_|P(%R3@wh@r5MMu=Ov2DjVucK3Z7@tJ*Yh#fA_^Lc$6EPg!rxxy^9{8>E0eiC19 zA7w5vRT%-<+r)ya$o=%?U)|D!Q^*nfMkl7*L?^~y7=j1F@-4A zkAUm^b>HGy-?u^fCUN>0G1D(z3zRx(2bj#%OaVzzn9dY*X8NKsKy=`#<0fr%Tl6+~ zE`8mj`rSh>HSN4sT}!U5q1d84BU4He>+ip-ug$kRjp4KMt?u>iwuWr8(GL7*9=P=* zwr|EC_&R+A{tZ6@UpL^p7S(?-GUp3(Ui;PCT=T<;Id8o9C@|6&zH3IwjbWe%QP_CX z>IW3hi8q3|rTNZU!`o;$#s{P40J7_NPSkMxSsXQ@;NA4YM*7iZQ|G3qIsT!IoG?8! zPygj0jxL6JckcOMSLdGt_uTv^HpA##kc(;OLtC194xkrd_F|NNh)XnSpj3>*NQ>gI zc1*U@L%bhGa|gjsVY!U2N`?`9K$0YeyS6Z&{&7-|-L@bkx~{{!;KMvWVU004w)Tc4J0%_WkC!-%Kww zmf6ieUvBW@WW7XF-H9!>mb%j1NVDj*{~*%PU3mYt&Ib*@qf>y(B3$7#%T)TbiP2_qk1&x(1 z$}yR!aj&kejQpR^{bVq1Y_MpmtHXpYXo{UWB&kc@+OzmkrYoI9nl0*9DEpJ)-+yC} z>ln7Pgpy>uVOh>1$^Wk1{QmXp=<(sc{rvdw_3Q7I0W~MIBHIh6r$&Z}yNni|Ra;?s zFA7hLwW2U|tmNH6i4!I@-wCIQd&a;dTR10%xlw#d(=>LLHDii}P7Hdj84IsAce0aX z;>7Sm--+Xzv9#QChsIfGojKMwPHshYXcV70aA8<;sDH-pRc9@oWSP~hD1FCll^Gc7 z1qV#NWSt!v_CR`tl!E6ntAXz6Dqn z#)-cyop=$2v9kpU`i?kRlIYk6jGG%>)qOJ@-brG^xA=jpbMgd%>006NhmK&?o}(#X zrS7PLPompY&Z0bUxq_>%OqdgG@$SeZf7x7+oMfMEamKI?l@Tr4RC_&OXB<-I3AvGEE5ach^oaMpm=j_tD_wl9$=`(b!LVrPc-_9(E`Xg zIWTrjptx94Qw9-CO>lCqa(-%c<+=)K3{Fl7lJ~K!$;cU`484h3pGxg}`a2m$Keish3^bZ(!Rt!nk~f}FELn9BjccX0D5rL(<(Xn@ zW;Cjf;-{dATc#xA8zK~r7VYqasvFt6Dxs$wJi%Cp>yY(OK zK<3C&PoA)bO~W>{v&F|dsT(sVD*db7qeG-{XWAQSR1V5_M{ac~w#FML+S-WlAchk{ zUBJe^)6TbvJtrPgih{XLcSPqfDf6kh(y-@Plrw!?DBQhFVkTup@Q+Ho-JVkLeTjQ~ z|9Haiu&_-3HI+rzmmUWVEAUrLakW1kDkG|#t@eCu&#@i;MHQFCRv>lk2e89%nx;B0jeC+vMbr~9Aoyr`D zz+${h&|I1#mOU+;PXT6iD1`KII@QA7Lg!!vD5fiNDv5Ga$9CGByOPA$g&oLqN%Zmj z5%XHimd;A=V}A6B?~(+0~lhfVR6Wb)gg=lH^%Gqwy@GYVW6o3l4{}E z?)H4-ys7jEpH+KJ;>$$4H>7*rE?2c;BQp!{4{r{2@-9)>BNgJbr|h0S3xzQZ*_V1iunIg{Y}glwrR z=LQHhmDu=pifjZfo)`%>rBqW1f zgg*o2izQF-@}($WA>CuVypQrQNZk+#_)!nwiIbVUg)r$*qm==9J2gc)0P}53(GQL8 zC%RkT*SLNekmskQZxWntHJKUl#gYXH z%vm~s;WIq*Q7fh-@uLnh7O<=k0|bk>KaBpz5||?-a1QzxOP)&tJLn%Uj@Kc%jer$A zY~O}O55DSg!zrX7u}zf8r0&2cv9E_c?6R;h!@&Y2srMfBokI01l`pf6^BHiL`kFLX z9|HYJ4nWG*Vo9qzg!jL$2*vNZxWMCid_@guul^7!W{O|&hR zyb9DrlG48VG|^G&2wXWMKFwSm37{3U62$t-R9>*NK=7c2nw&bz(hOw2Fi512PxE>*YUcJqDx=8@%F~JySw+!CX-sJbn$lPxk#S)V*shns?N?SfPtQqdcLVb3q@}9e zmdDRjvbSwKbdj=BB!|M##>|8bsuJbE4wmnyJ_94DvQl7!t zWb24K1D*kDgC!E~4k2d;P@Pw2;4V}n%rB9(8Hygjt%(aW?hX<64HL^DkH-$L-8Zy$ zAYb3+>i%ct>)YtLI=;uDys4yOe7QY+@HldE+RONBs&hVvuNxNJ+~frxuR6Q&$Q1u1 zHz&_o=61H5lT2jbk_sblIO?vc_^|?*HSauCgD@@5Y=q$n6Q@aZ?9t$&aJOV!kS+iO_CvGNX&eyr(L2ILr*W?7? zsz!KK%1>&9?`3W~yIV<0NH${A>Wcyz;Fua>WF62#674kk-9Rmed+3-q%_g?G@V`L_S-YvN@D@UEBZb? zD4@HE#AKZ3;}+Km^l_7AZ!VBMH7LTb%gm&r_-DC0Q~Z-oQHZ-UIs+e9+o2jSXC`f* z$m{%+gvNHDsrm}3Se=!m+rIlY&i@TDE-@)}au}crF-bc~{b-Xag1UJrv9F7-O#m86 zzGLbEG()MJ3c>?`6J?2?pElj146PSux>vjW|Ci2+W&`X+`a{(1BXXouy{*nAFj}LPo*v>L!o>T>!Umg&wK4+ znP!n5y6Np6C|Eq~C9cn^$bOB?jcl7O4kDVl(X_RmsWB62UR`uWolc$)V8};9LXO;{25vbV&0oCO!7b5mMnCZySyWO3_n> zI7DY3=&2J!lsVC*ZDj$4i|ps_ZNtq`mh!X_;gvDIE+Sy$8p^sneYWWV!ywAb((`tm z?f3_kt!jDXiqa8fGaWg)sr0aY+tN$tdo-10)K=bs>G?l_eJI>sNXvZ$34VHKl%xEh zZu0j#u+40F@?KVJy{G6A!q~guC!ZvW${oQ9K~%kjNGnVb_hst|($T|JNTTwih85g; z+Z%ZUpBy5Z0pfqQNNvA@? z&T{1}4}SH$G{Sh|ENhSKF7=^NY7Nj8C2{T9I6Po`%{p36jnLZ|M2(tfzcYFXk$o@l zlA0GT(xjF|b0>^z^wD)wmrh`K;W0(dm>xUlzd1^itEJwZd%4*A^DmKmS^Q7Ig(8>J zE|<14^%6m^vh39+eb<)Oq-N1DjuRtE*`bM+kgiyPtJZ=Qa9`5%X54jVZ#007M7kI(=B diff --git a/libs/partners/anthropic/tests/integration_tests/test_llms.py b/libs/partners/anthropic/tests/integration_tests/test_llms.py index 3da6663d4c7..c5c67eb645d 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_llms.py +++ b/libs/partners/anthropic/tests/integration_tests/test_llms.py @@ -9,6 +9,8 @@ from langchain_core.outputs import LLMResult from langchain_anthropic import Anthropic from tests.unit_tests._utils import FakeCallbackHandler +MODEL = "claude-3-7-sonnet-latest" + @pytest.mark.requires("anthropic") def test_anthropic_model_name_param() -> None: @@ -24,14 +26,14 @@ def test_anthropic_model_param() -> None: def test_anthropic_call() -> None: """Test valid call to anthropic.""" - llm = Anthropic(model="claude-3-7-sonnet-20250219") # type: ignore[call-arg] + llm = Anthropic(model=MODEL) # type: ignore[call-arg] output = llm.invoke("Say foo:") assert isinstance(output, str) def test_anthropic_streaming() -> None: """Test streaming tokens from anthropic.""" - llm = Anthropic(model="claude-3-7-sonnet-20250219") # type: ignore[call-arg] + llm = Anthropic(model=MODEL) # type: ignore[call-arg] generator = llm.stream("I'm Pickle Rick") assert isinstance(generator, Generator) @@ -45,7 +47,7 @@ def test_anthropic_streaming_callback() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) llm = Anthropic( - model="claude-3-7-sonnet-20250219", # type: ignore[call-arg] + model=MODEL, # type: ignore[call-arg] streaming=True, callback_manager=callback_manager, verbose=True, @@ -56,7 +58,7 @@ def test_anthropic_streaming_callback() -> None: async def test_anthropic_async_generate() -> None: """Test async generate.""" - llm = Anthropic(model="claude-3-7-sonnet-20250219") # type: ignore[call-arg] + llm = Anthropic(model=MODEL) # type: ignore[call-arg] output = await llm.agenerate(["How many toes do dogs have?"]) assert isinstance(output, LLMResult) @@ -66,7 +68,7 @@ async def test_anthropic_async_streaming_callback() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) llm = Anthropic( - model="claude-3-7-sonnet-20250219", # type: ignore[call-arg] + model=MODEL, # type: ignore[call-arg] streaming=True, callback_manager=callback_manager, verbose=True, diff --git a/libs/partners/anthropic/tests/integration_tests/test_standard.py b/libs/partners/anthropic/tests/integration_tests/test_standard.py index 686a97c1bd4..d06eb84cdc9 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_standard.py +++ b/libs/partners/anthropic/tests/integration_tests/test_standard.py @@ -11,6 +11,8 @@ from langchain_anthropic import ChatAnthropic REPO_ROOT_DIR = Path(__file__).parents[5] +MODEL = "claude-3-5-haiku-latest" + class TestAnthropicStandard(ChatModelIntegrationTests): @property @@ -19,7 +21,7 @@ class TestAnthropicStandard(ChatModelIntegrationTests): @property def chat_model_params(self) -> dict: - return {"model": "claude-3-5-sonnet-latest"} + return {"model": MODEL} @property def supports_image_inputs(self) -> bool: @@ -67,8 +69,7 @@ class TestAnthropicStandard(ChatModelIntegrationTests): def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage: llm = ChatAnthropic( - model="claude-3-5-sonnet-20240620", # type: ignore[call-arg] - extra_headers={"anthropic-beta": "prompt-caching-2024-07-31"}, # type: ignore[call-arg] + model=MODEL, # type: ignore[call-arg] ) with open(REPO_ROOT_DIR / "README.md") as f: readme = f.read() @@ -96,8 +97,7 @@ class TestAnthropicStandard(ChatModelIntegrationTests): def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage: llm = ChatAnthropic( - model="claude-3-5-sonnet-20240620", # type: ignore[call-arg] - extra_headers={"anthropic-beta": "prompt-caching-2024-07-31"}, # type: ignore[call-arg] + model=MODEL, # type: ignore[call-arg] ) with open(REPO_ROOT_DIR / "README.md") as f: readme = f.read() diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index 8a30427d1c1..6fc598c401c 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -1213,3 +1213,76 @@ def test_cache_control_kwarg() -> None: ], }, ] + + +def test_streaming_cache_token_reporting() -> None: + """Test that cache tokens are properly reported in streaming events.""" + from unittest.mock import MagicMock + + from anthropic.types import MessageDeltaUsage + + from langchain_anthropic.chat_models import _make_message_chunk_from_anthropic_event + + # Create a mock message_start event + mock_message = MagicMock() + mock_message.model = "claude-3-sonnet-20240229" + mock_message.usage.input_tokens = 100 + mock_message.usage.output_tokens = 0 + mock_message.usage.cache_read_input_tokens = 25 + mock_message.usage.cache_creation_input_tokens = 10 + + message_start_event = MagicMock() + message_start_event.type = "message_start" + message_start_event.message = mock_message + + # Create a mock message_delta event with complete usage info + mock_delta_usage = MessageDeltaUsage( + output_tokens=50, + input_tokens=100, + cache_read_input_tokens=25, + cache_creation_input_tokens=10, + ) + + mock_delta = MagicMock() + mock_delta.stop_reason = "end_turn" + mock_delta.stop_sequence = None + + message_delta_event = MagicMock() + message_delta_event.type = "message_delta" + message_delta_event.usage = mock_delta_usage + message_delta_event.delta = mock_delta + + # Test message_start event + start_chunk, _ = _make_message_chunk_from_anthropic_event( + message_start_event, + stream_usage=True, + coerce_content_to_string=True, + block_start_event=None, + ) + + # Test message_delta event - should contain complete usage metadata (w/ cache) + delta_chunk, _ = _make_message_chunk_from_anthropic_event( + message_delta_event, + stream_usage=True, + coerce_content_to_string=True, + block_start_event=None, + ) + + # Verify message_delta has complete usage_metadata including cache tokens + assert start_chunk is not None, "message_start should produce a chunk" + assert getattr(start_chunk, "usage_metadata", None) is None, ( + "message_start should not have usage_metadata" + ) + assert delta_chunk is not None, "message_delta should produce a chunk" + assert delta_chunk.usage_metadata is not None, ( + "message_delta should have usage_metadata" + ) + assert "input_token_details" in delta_chunk.usage_metadata + input_details = delta_chunk.usage_metadata["input_token_details"] + assert input_details.get("cache_read") == 25 + assert input_details.get("cache_creation") == 10 + + # Verify totals are correct: 100 base + 25 cache_read + 10 cache_creation = 135 + assert delta_chunk.usage_metadata["input_tokens"] == 135 + assert delta_chunk.usage_metadata["output_tokens"] == 50 + assert delta_chunk.usage_metadata["total_tokens"] == 185 diff --git a/libs/partners/anthropic/uv.lock b/libs/partners/anthropic/uv.lock index 3f31dfc2c20..0cc8cadcbff 100644 --- a/libs/partners/anthropic/uv.lock +++ b/libs/partners/anthropic/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'", @@ -469,7 +469,7 @@ typing = [ [package.metadata] requires-dist = [ - { name = "anthropic", specifier = ">=0.60.0,<1" }, + { name = "anthropic", specifier = ">=0.64.0,<1" }, { name = "langchain-core", editable = "../../core" }, { name = "pydantic", specifier = ">=2.7.4,<3.0.0" }, ]