From 74385e0ebd6a4f9f06d083c26f5aab2af7ed55e3 Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 13 Nov 2025 10:18:15 -0500 Subject: [PATCH] fix(langchain, openai): fix create_agent / response_format for Responses API (#33939) --- libs/langchain_v1/langchain/agents/factory.py | 5 +- ..._inference_to_native_output[False].yaml.gz | Bin 0 -> 2605 bytes ...t_inference_to_native_output[True].yaml.gz | Bin 0 -> 4065 bytes ...st_inference_to_tool_output[False].yaml.gz | Bin 0 -> 2541 bytes ...est_inference_to_tool_output[True].yaml.gz | Bin 0 -> 3363 bytes .../agents/test_response_format.py | 79 ---------- .../tests/unit_tests/agents/model.py | 3 +- .../test_response_format_integration.py | 142 ++++++++++++++++++ .../langchain_v1/tests/unit_tests/conftest.py | 44 ++++++ .../langchain_openai/chat_models/base.py | 10 ++ .../chat_models/test_base.py | 35 +++-- .../chat_models/test_responses_api.py | 15 +- 12 files changed, 234 insertions(+), 99 deletions(-) create mode 100644 libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz create mode 100644 libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz create mode 100644 libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz create mode 100644 libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz delete mode 100644 libs/langchain_v1/tests/integration_tests/agents/test_response_format.py create mode 100644 libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py diff --git a/libs/langchain_v1/langchain/agents/factory.py b/libs/langchain_v1/langchain/agents/factory.py index af02e587d67..87e02e60265 100644 --- a/libs/langchain_v1/langchain/agents/factory.py +++ b/libs/langchain_v1/langchain/agents/factory.py @@ -1009,8 +1009,9 @@ def create_agent( # noqa: PLR0915 # Bind model based on effective response format if isinstance(effective_response_format, ProviderStrategy): - # Use provider-specific structured output - kwargs = effective_response_format.to_model_kwargs() + kwargs: dict[str, Any] = { + "response_format": effective_response_format.schema_spec.json_schema + } return ( request.model.bind_tools( final_tools, strict=True, **kwargs, **request.model_settings diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a7b90f2633b19bc255226b0ee49b8343d273a71 GIT binary patch literal 2605 zcmV+|3exo-iwFSQ{uF5f|LvJuZ{kQ6fZyj=WS&N9$&zit^g7Z$fD^z@a2gYQd$Y?1 z8_Ji#24gfoeos}|U}fxB%w6du zMy`x}Y+HKjSjxf%eb>@>YS1>j{7clIctayZA}EQ2+>`sfJDe>X(crDlQPCetTX?qh zVhz7g8Dpc2jJ-2u^3K4v1%! zGQ=C@N!_B3zV(*>phl@dVwouVmWENJO4hWDJX2aZW*NOkYJx_SLYpeNl0XC<2shEN zB|2La3n9rdIKT&1pW2`X-9+1ZSlZiSh&}U(tn>$hqF5bm37!=8lqzj(Q^3sQIz=lL za&BmzsAErt1!J9sj6|(+El8>)-ljt>eLT^M0qaGZ2O>*WT8bKZ) zC;kxAwjm)nrcgST8u}xna+rrJFup`FEX&@c2axFdcM4AR3 zKIbF~Wu(Kgv&aI;BM>)u$`D;SPk~-#_q7NNcFsUP%t?k z`vd9N+(dS0?Cp-4p^=`|jeSo%8adWvLGskz^iC|nj(CFojroAJPH97NJk#KzNPC*| zq+fX*2tTO}I@Xwa!IVxrAGTEmdpA)M8aDLhKyV>v4(t#UwkQ;&+fEfV*v=Oyh0d@p zQas3KXFck`cd$F8lO|8}$~O&ZZU_<>3)^J+VU5HSS73?#2_uH2*`qN`ietUZS z^5wUd02d@E^`m5QO>$F&={nlGMU*C2<)^?g9*55!!Ki;YJA#@|<(mD7;lPuM%{lY#87SJc$EHrGKLicCVJDEt=*x3T7@^ zK{~iCG|KftwYAo6$*qGtOOrgfRV66a6^GKz&6!_=XcPxOCjH$&>Wtrgd!4?)ubXxB za4z%-7ymZ6cp07M(1Uz=av7mmV)H5bD^i9wl_bH3K$Ns(D z(YDi-uY;#P7%plEBa^oDWnLf?7Pw#!ZMK0iG$#YpgX-69leG(SPJ2+4Y!6}2fnZ>l z2q7aH=tYytTTL5Hp!#gm&V((@^Zb0S*CYL)pp1@8`a;P@;{n)~q_x<CxtJAj$XI9MvYiAK8}FASk?N_7$LF zeaQMpVvfCzLQZYEZ&~Y*lN|68&P$YxYe0jOj^YkL8)8}8_X2p2ftF$R!zRG;5RgDK)>WO$s-+-gON57N%U)j$djnZqb zYanpq;bSjvJwA{$@rWhn@T{Sj#X0>E{o3R~oCHjUbuuBIY1s^Fn)=Qsz38oH5{OJ^HCFS7p$&^z@`SWFy)a>C3L{<~1 z`usT4E}13?8&pomC;$`1ID+Pdr zBOu5nDMQWrZ8H(P5^cPW9@5|w)?BqoYtoqNOD(gJ1H1ySRH@ZbVft$Jt-uEH7>4A;u4)i90CFA% z_8fk$A@#HmwYORK5XrkGwRy84hX$x(dPCMUL*4ozxyp1-y)4_wu`Oc-{BT&dDGxaY zYBAuv+9bg^IE`RAf*cmSySbvVL4pJjrH8J`c28@Y5Izi>CIX}+7hVT7;_c(OIboXK z6M$&8r!pt!0z{-W#|EG+39cm6_1iaOS7u&{5&ydKAG;BWd8iCnWz!2blSUy9)y(2Jr!0A2v;nJLv zkEHa$V~Rd+1br?EJi5i@2h(5;^X`L31t&9Dy7(A|8Gkq5<(*eBxGZ8KY3N8cKED2M;bSifgz|97!R|K<1g1LLbcd*HbE|?OO87hL z7@KAeB$$%Ye-3%bHDP&p@f=Y&OOyK(g6sM9b`6ZlU+okph#{3oDJ!#K?z|1U+IPnP*)nNODa Pn_1>xwa&2+r6m9Wf|=-d literal 0 HcmV?d00001 diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdfb2a66ad2653b8f9b820c9cc43029ba968e66f GIT binary patch literal 4065 zcmV<74<7IziwFSi{uF5f|Ls~?ucKI!e(ztA{nS!PmH=O8l~d{mFBco%1h+S~H#Qq= zz%GN0LG$A$&cTr zL2O;~Z~yhTZ}5+oFD`aG=G}?(UF#=-Aq;QsOueDMOJR6-H)G$4b_ZiKKAYMjGh;Q8 z=5A={>$9m#2SZm1OIJ8U8*o_R&tSZ=XR7ZAut*RSdot#%+3sKpDyR3Wh3;&4hi7*? zQo^5DM!iXMwJVnGJIn1Z`mm7Od~m%nj{G%|T%Ze?P;*$kGNr zD9ue>$!uW&%3gw>lSOU|k5D?E1FX>LhXrggm0~V|?H@sqGr@c$pbEb4PTYbj?32pi!Hn~7D#jOX&(X*LjVG1VR%H5uPiKI`W;-A zIngk~3Qc{%S(;iS| z7k3ukZR`g0XOb=}@Vhw+Nl@@M>Tf3gx|#W6xQD1ibRVu3=B5S_4U+(+m@~uq$-@Z> zn03f7tPoj((xuwUFt8#O0ssni4q0-C978^~!TFpAE-+!`3lF z0w*{PN3b-bTl_Aa+|Z@R(uHG&PgpRH?7oD~`4KD}(6fyEfeL__nsCgVSjPTz=5P{V zYoIW6a-14?Q7dl?cMioRassp)VqpxC($x$2>V&o>MaspUpWbl;N_6pr0)TvPXpa@j zKbCszq(tzbFtNX?1co>Brp~+j)j$6Ug1%?D-W94K4y5CFmz4au_w66Q{kDFcjwajJ z>2JUNqbAVvV(07z@!=!MTk4;$L1CrAIsQP-ImfZ*d@huFaoP*L_>lTv5KQxn_o3MN z6nrG$pOcT=a7(;v&pN=HJ`j5JlGF;~&`Xk@b#?r&iY9sNd{H*EK8!(K^qhQA@M1Y9 z*jn!nS@CgH>Z`6+*1LMi^t(PGrCf*T!k;I0Ujcs(99V=q7!=z?7s3-#ii0N(m5nH#EpDjG zsL$+FgDBJw+)m#Ol-J9Pdos#S^-h!O=x&2O$qtG3j)=U~jFi1RHIL?T;Tzx&GkPtQ z!pY?0C{vE7hcYumb35uQGx=pXeI1zF6Zlj-D%%yWOjr4GddjH}u~iUh1S|0NA<}#X zxyhj>zTZ6R-7Ipo!4bF6a7f)SA%!M1Uw^(+FX56M3G-^*Kgk3oJlc{qMq6U z04BW7&D;scBc(L4*MLvNv}~KG@6Oa0WHF=!Ab>?`=me2g{rFO#U;vzh^UeE6nGJw| z;6Mz|SR@w{IPBq;!rxBl5mx6LaI9Co zkrsn|SYZqI?J($RE;v})%K;$o87#U%ur3&S)Nr6m<%z-y&y`7Q$AFQc(x9US!KwpF zI-xpwKWX@ zY*TFIUu63BTnVxADt9?2R>Qr@4}XErU*Pi>`1~AvvYk~-6;_s{S9A#4m=sWHc0Ndx z1!<4C_$irQj08h4pjwD0MKFE2aQgC8%H*jrkhkl|)YgGHeSI;C!(*wb{t{4xQXDv{ zQa-K(^QcA$K)|tolP068Bp%^)aYdN0=g?hHArDP)=;O9OeeEkpX+Tvjr?Q|-8E7VzRVp-beeGKsA|Qx(O%9Fr zJcVhE=gdmvJ578kF_Jh07@3APVi1Dy%?12KHw}Qp-(##V5?qBHG#l{tENgC@l3OSg z6xjUSLUfP2RGuE-KZp78?&|BzMK~rH+r&Yoqj1(xl`MJw9T?t5;4F!t$JXA&nhH)H z07uY!Q8Q}D01o8A35x-e(|GO;|9;M~@N{i}S{@mnTONXAI7oSHIm^o}oD59{JiOzS zsr0hkN&_=^$MVSJVFbg?TNe-z&8VMA;!%~*{1C_}$P(D7K28v41eVs-mQx2AI7WfT zXQT_O8Fb_7Gs3T6KY{0fS+TR zCRGL@Laz>OoXbNN9^w;nN-*uTA*aNdFG4qh`B+!>71UL!<&7hN@JtFIbx&2orV#DA zw*+<2K75FK1K)D!wT9q_I#eJ$jx%UkJ2@!nn93^mzDx6j`V(+s1c)mpzJ#y<&VjMF z^mj8iu^C`7JCLlZ#EK7j1`&j5g;)Y(wwga#E~q&;Dssr$H8qwY{uB9y{+C*nzkE;oqIC+OMne z{x#-5sMpL=^m!PYynh?eD8UUi)$PQjHKL8qE!j~ujzC_Qz9iguQ}uXQ|9fkr&FQ6V zZKFfem(Mp3ZHt-{YV8%Q*tJyFn?Xs=zjaz!fquFtct)ucXrb3^+1Nmch7vx&UkAwuDzM`77bvRz9?7$t0h}w;6=|HD7X<>G6Hv} zehrPVvA7CjJ>qk&o)p7o0uiQxkqnZUJRL@S7j!ru+Gkwkl7?0FvtG^Hjg)WC-mFp7 zM{rz?9)-C&cUfE;%V~qAa(nbs?;y7Z;Wp*J+erM^#;?wN0-3VO1mFyyeo{y-H%|nbwKCUiedP@roD~lz-*l6ozhrr?_GPZj2h~a z(tCANd&>*jQqn1Eg=sIjJDty*eb2ER-$TQGdh1@_qKvvk&KEDXg5LRhuwq73eIR$a zo*geqU~j!B`9$yt(fUY)aT47eh8N$@dGB^f(yM1hy##_3I9?BPAKfdi5a2L~0@yG9 z|CN1N^EL7GeDdPYg|A*@1u=5Km-SamzWk2FOaF>~B>u5(qN^9U9Rf6iXv(*=6R|Jjqi+`p7dA=6C6(oP>SfJUV?q7RCQ4>2vr+`ux8qeL{J= zdXbNWIx(Wx%+OYlOyi@m4UfvvC`q5-KS}zej^KWN1B$4sse^Ao*~YhFU=*uN(d36b z4MSOgf44>hw$@NGbX27V84{^|E6@OnPxv$_=c3Yr8tTATlu3Ju zp$%>};++ssCrM`OwP~a|#57Tzn7U7M5&IsG(o)SiN_8MqeX3H~Je?L z9OCQp#w1r^Q@4RlqmoCWoA#Z)azSL89j^k0lh99{@k;nU?2*7(e?CE!D``MlL`;|$ zKzRk?idZqDm-ruZOK^DcXa)z9m5q{G8s{kLE`GamhSHHEy3dZw;k$m`YiO{lV70Pg zY9_jTCM7rGLvt4Lc15KEhF4Q4EjhCW%y_E+6umaV2m&IN~)<%txR8U@`R17t4OKoZZsaXsZ(>oS-W7i-AUkD#=Jfh&|1e?VI@Lne&N> zsC@ZVX6DoTdKPhdZT|^VvuodncgZufQHF$lVk$K!-8uJMk?QyOwXy7(?dtd_wZSGH zKjV~2lAL`hFe<-SrC$-eeMGL6Uk}vCwVIVx!Rpv(<15aJZmHK%M)xr*DeKoo+p$g7 zU!k{{uX?9PhVY)+6>Lpw-$eu zdPX6wSlGccll4r8euW36D!)pHR)-NYi~lCS*-ZHlK161iy(E2hpHrixPj_82aZpqD z_G}mC7 zWw|*gQ$|lEfkSxGZd&}1w@$x7N#jpBrSB|>>gXaur$;=&A@i|CeeSL6BlKTX)GsRP T7Zvq?l8X9oC&SrqG&cYM3Hind literal 0 HcmV?d00001 diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f37da79af1f6b5b1f584773f46724086cebd7d7 GIT binary patch literal 2541 zcmV`niwFRx{}gEg|LvJuZ==W($KUr;WS>?V2?_9cW*uoCY!cfrF+SQTH$vjLZrn9_+@z$FS0u;@c)@+d2#;&wu zJ(I^CwoEOtO?hd-ddF0G>d-Q}{7X2Px+6V6A}kVznJW!=XEa~hqRyM0!$NP=+rqQ0 z8>#q(+USKm)@ysl?5RUt)B|r6V1yNnM!m`dLs#rU1lm>}dDxBg3LIkiJ%NRhV{`*> zT0vH&D|W%z*YHJBDp%ZL>QG)KA?n~7Qq1L;&p?6|81gnmPM+yv5O|VTHx4_tF(i>y ze^zoP$I8|n&BNDF5s;`?%wa5eutV;O8g_|hkym;WFUVmD8)+3d$mLZUS;h{cDMOm} zu(xtXz0?w^Gl+I5IHtlPv?a2WwuO*uSm;88yl)OLi4Mdstd5qtB26pujPA-C;jJ6W z-@vZ041w7-TC+o8f|CVn&}D@pT(*nkwTHdtfV@`ga1gm2nVk5_F}3bXNCF~NFw9wm zAUKLA;cJ|QXsm1z=mQRE2@yTXB8dDuWYI-Tui6_csT-kiVe&*rj7&X6k-lyYC3WRU zO}+6tG>JxIT-r0DWiNChedS3?k1V`L3hMA&k9`@f6{t&HZ4N7wBgjNlE5`&0j9zhK z8?rKD4iB|dTBZ`XhR)ljQnNG%mD?GBBwA{lOcqjJL>ACqfZIbqvqySub(EAkL}L~@ zXfvncpDtKB#@D4C4HM9)fci^4Ru!$Dg=YvMIn)Kn#ZDRStv$j`8W7d()JV~<;hEHw{>?r7_T`KA{M47;o}a#a`R&Mn^JA2FVZ6L$S>!>o z32P@0llX=mO5)f@KR3$#IOnh*FZ19I!@NrU8)iY0-O94a{0-+U!O;z)9hc=!7-K)n zIA?=`drh+{Mt9m`=cdHRrb%|EVd|hYq{eOG&~J-=ylUf=+{Vw+B+mSMm-zXn;!slD zt>Oku!^r=b^-mLtd?-Zk@!`+c*&F;?Y{JKLqtChgFV5w2Cn+?|UDobOoVFSfdC0tO zXEV&|tks>&BG$g>K(YnUg*8-I;22Y~D_vmLc<97h=sgYfH(eXswfBNzT&guR<+-)f z=)7U;n+Dr^^vJ+>jo#WeN-6;wPji*blvw3q`UNg*2$L{_x^1~=VTr-E z(NJYmE7EtKfq}NB`>2jj@Y3NO69k#hP0GWZ83Py%d;2=03z`9bG`q*PIbC{Zy4q2% zl1IWJX<>KchXvGjgbR6O2hLc5P849robDNC-)-Dj&n;+Eq*0vm3ytbMDj#Ka3|q`$ zctowVaB*Q*K&`X#q&8pidp3l~PH2oeypF+wx0wF%$Nwupp(I&{pM>>ESa%Xuo7v$d z2z8d_8*&NG!3)rM`rXOQL8wRm!w0JfE!kl{(|gSBWpxs*ONRToJG6UsYjm;TwuL>ERK{g6WK@b|7}q`VbRclunIYa;5*I!YHkp)NG?(PTai+pW)| z1Xufxd-`tWNR)lVC+}Y5arWb~OX_w%Nu~FM_ue%T>_B~uOA3FI^4*l!q&MNXTKL;@ z-|}3XLGRQ(`~56R7&SkRqlG$*JQI$BRK%z&xTEqk%4k>sGV&I!-!46_#-$uvI?MpRV`vKmxCKh)K3W7Us@3>O8ZNntH z2@9KG87xKQZRAlml4AbHIZ;VNPU~Bbmu8i8AfGFbDayPNl(}T^=y1e4B^RD7bCyTDwZJ@Gn z7=@q<{d?2?7tnp?d9wE7pBr!d2vCg(b+`Y+lDqfF{QM*SiTQ2c%oi7W7Yv$8Yf8(5 za}aqpdn2d`pz6nm*B}1~LW)|P>@-~~CL><7`L6UP?j|0etFJ~<&WxSpO8+oIE_rqK ziIDdrWDmQ%YC2f$Xkt=NabUxooz9|kk_TS-U8|4kk{3u=?>hdaOeiSd;#BWgM{{miq^5Z8ze)8kL%#Z&8Ai^2uk|h8D D6u$Ez literal 0 HcmV?d00001 diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..95cc9005e8408e6f072b4be9978537f6ad246008 GIT binary patch literal 3363 zcmV+;4czh{iwFSC{}gEg|Ls~^ucFuzexF~_^E62(CV<7kb8n~Lzx?{E>4nBI z{qmn*zQ8Xh6%J|mAz15cey{=pmJ(MGUY9mcu z>~)pjYO0hOs>EnYhan1oc>TFKkzI>}L)83Uanp_twSe*HZjm$t!K_P4i}zz7q$YPkyubH|LaU83a53gXCIa_;5M*>P9}CEw>)Hc5D-^bMWsEIL|4R9Y z4~4TiS$i{2U@cBg`H;19mj^;@atmbQm<4uX4;YsB63=QcQcw`{{vptm3s{qQ#O-m{ zHzwVK#t~&;?U*%G0qFxiA`1C%aR!q!IB0@}jHC96`4Ie*)?OS;j<|Kw6gK~PHMaQK z4np8t0ZNJ(qO<9Nbvql`MM7){Fo~_#56m`?Gwd7cqVvPUuHvkZV;y2^g`zhLBhQ)& zXA43>6(TF;zv1}dLtyFeV0FjJ2;d;nQi$j?=ppm{LP^fi0dCvp)(NC14uw6<`t zrg_ZD2ly#i)rR^Dq5gi-WhfUm(N)2pY8irDXFywdH zhN8rl;ijNZ5MwCa6OWj(!m}0}kk+1qLe}2Q|NIjUx{hHxN9Z|NhhbUHA zae8px72EH;_XOO1^j;WFk#lSrTX@quLYGz)8(xT zzx*{@uHfV8=zYHz`kcrA{5)o&YYcT9uTaR;tjB39!I!c*BL(A~mLXSPjS_LFb;Q*o z(1X#7q=>0F++MWR*p*g2M#^+o3nW+a=k2_)PDV*S(B;@v#;F5cT45`F5D)qcEYF)O z9$vvX)j=M6g2x6Q#ulD@c?_q>0{6D+K(N4U91|Z9LZotNhutQL8vl_l8EFmio!hMb zy5Bspoh*TuLL|w^B3xYv=N88D+XfjvNW7?nzHa` z${2~OULyIsi!{*#jS~WWW&5+?*cBDQlQQc?AI=#q>ye9B2cliOvYXa|b zmmgrfPk8qU?>^z(zXR{6XE)}BD2w7Fz6AHE4=_uz-|~Y3w+sY=n4Z8cZ5Z5vM6k=K zGaJrdB%fyrmC|bA%~l#GW$PEX=CAyq=b4=Fx2{MSTWv|>E?Ojg4m}4?7^IK8E}6K) zQ5#ft3YD{=_yTW>23`-21IknvECDy9kKG3Rj~?;H(-i5bw}msbcMLPOdN7Uvh?4V# zN*FV{SS_-FQ zO?5XE2VgWq0yKf9KS`>N7n2O9qJR}YR(vA*>ln}|{I+Nu&$hZqY;}%bW#$3828vWo zp^_Gl#r}~~R3l7(LM4*--XHzxN_4BPgDAA`LA7d;mjN z8Y6KFw@@hpUManaVii(6uxr2|LMJ_~$N&msAqYYT%K=P&B|o1sG(B|=6qX0t$CmqA zpd*PBiRFb#EdcbHxjSjN0H-WgQq~=D`M~l(XJN@9(@y0u7EFpb=Y?0O1aybTd6pjm zN7a6Waf4nB2rOeZjB{AfrXs0Ths6UGF^-(zbXhz~sLD;%qB7#&)}hi`K$>6F?X51i zH#)5M2LBha?&7}zi@g>%E5)yd8&#=O>Nqaf=_0k2b#c$}{-z(yww$T$Tg#x9mv0gV zQxGTy$5rE8RH#(prr@2ad<(Ezd^v6Mc4-e6G{x~XLyCARx9YlI(K~+EE_6i)m@DLs zEnpGg8P>eTyRy0!X1wCSVD;Bn3Yn(p2!PtgP;yK308pWKZ@C2MQ3q%*ZwPAiNbT;x z1Q#sEV6WUZppee6v14l)E0j^;&>N5mt@b1qafEgFg|y#k^jcI)Kpk)64l^%1e|Iq# z$eGTsnZK5Hr~~K&tcz=NNi2?O+fV>ZDhxTYSyBt`d1~+^kT14^(o%;f+drw8aa5_x ztKs;~6i)W4D^BUsy~VpdZvnN8_kL<+Y5nIX`WY4*7oad+GfwnYd~_Y?E6eF2w}C|v z@-#2~lI2B7m-mCa=j0;e6^W{Z6y&qKEYiE7|G^EU-_m!xr1Ue+L-~$VYf34&4TIZr zEzfSr!oFRKbE(uz{gAF@GsF?y+ldco;)~Tx*hV_k;QUf{-d4|0}H!2H)lv$JR39LJP&@W}WCGVh!H?JC*QeD>F2pKGeI9AnQ`eUNI1}wa0;37kv zd$k#VQ-{!AS?@L^Zgp5bo(HV5aDJ=1<4>lhvTBx6dSB|LmVs#&KxiQWex+A!=B3Zq zf}-aII(u%*FMyGcQc^Cm;%>iL&nbY(f2+&Nk#c;myK^<30)Uz#mT-d#mFxR10~s}^ z61u*s;$YrFuxdI~%5$C$-CHM5RPJ)t+j!vSC48GR@T}d#alZWUd&PLe7q#PS>aFl- z3!WgDkU~@%0IbE0L*nplROJMA1tIq?;;MHY#*ogWPzlHugOH;R7+i(RXHUillR&9T zYsevja+4)q6mg|3=hoZ`zULtwzs_ND5MY^+wOh*8bW;GBg z1=fIOD+5=R-Xe_v&m~y=U~wVsqRqV4kuZaCYthrb*R8jM8(H$;F=0PenEj{|7#eZ? zgQatXHM#x~EV&lwJQ^V^!N<6@bS&7BYeXrcIw`ePHnlZ7V3(p=Q3J)T z=xo(Kvo({2g^vC$c{{q2aam$E^`k!s;El!*OppQ3SrVg6IQ5AF}nH z2<0Z~xeO;f@6*=l5XDEw2wD*&$+Mgm;Xvw793jET3q0_b{CTARW8^vUc)xSPkA;s; zV0a-8z|;C~N1lF1>7chf{K!{A-)`wQ!fGJ+StMmX0QJy`6Kz7 z&tGOa`SYUtF34`VYpWh5x<6`b{_gS*l0UQI{FD6opC*5vqEPgq1O+uG6v}Ma_e5^= zGFyp^m8#p|7bzPj=G5ia4AKjgi;0-6SmC^^GB)%Zxe}xm1{w_R zl>!B(B(-!4#i0*amnL+}U8&!*E=}kbddDBIE=}kbYUKmgr3u|aLE{6~r3u|qy5n`M zOB1@K3>s@#mnL*eq4E*y(u8iI)ae7(r3u|a@Aw1Or3u~gaMz??O#eLHQo;0ZS(hes z%NwSq%DOb6TWH&Oz`8V{Tdr0&b!kGkytzG9)};yE@>XSqb!kGkT>E7`>(Yd7 zDf3A+tV str: # noqa: ARG001 - """Get the weather for a city.""" - return "The weather is sunny and 75°F." - - -@pytest.mark.requires("langchain_openai") -def test_inference_to_native_output() -> None: - """Test that native output is inferred when a model supports it.""" - from langchain_openai import ChatOpenAI - - model = ChatOpenAI(model="gpt-5") - agent = create_agent( - model, - system_prompt=( - "You are a helpful weather assistant. Please call the get_weather tool, " - "then use the WeatherReport tool to generate the final response." - ), - tools=[get_weather], - response_format=WeatherBaseModel, - ) - response = agent.invoke({"messages": [HumanMessage("What's the weather?")]}) - - assert isinstance(response["structured_response"], WeatherBaseModel) - assert response["structured_response"].temperature == 75.0 - assert response["structured_response"].condition.lower() == "sunny" - assert len(response["messages"]) == 4 - - assert [m.type for m in response["messages"]] == [ - "human", # "What's the weather?" - "ai", # "What's the weather?" - "tool", # "The weather is sunny and 75°F." - "ai", # structured response - ] - - -@pytest.mark.requires("langchain_openai") -def test_inference_to_tool_output() -> None: - """Test that tool output is inferred when a model supports it.""" - from langchain_openai import ChatOpenAI - - model = ChatOpenAI(model="gpt-4") - agent = create_agent( - model, - system_prompt=( - "You are a helpful weather assistant. Please call the get_weather tool, " - "then use the WeatherReport tool to generate the final response." - ), - tools=[get_weather], - response_format=ToolStrategy(WeatherBaseModel), - ) - response = agent.invoke({"messages": [HumanMessage("What's the weather?")]}) - - assert isinstance(response["structured_response"], WeatherBaseModel) - assert response["structured_response"].temperature == 75.0 - assert response["structured_response"].condition.lower() == "sunny" - assert len(response["messages"]) == 5 - - assert [m.type for m in response["messages"]] == [ - "human", # "What's the weather?" - "ai", # "What's the weather?" - "tool", # "The weather is sunny and 75°F." - "ai", # structured response - "tool", # artificial tool message - ] diff --git a/libs/langchain_v1/tests/unit_tests/agents/model.py b/libs/langchain_v1/tests/unit_tests/agents/model.py index 07ed23995eb..8b948f658a9 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/model.py +++ b/libs/langchain_v1/tests/unit_tests/agents/model.py @@ -38,8 +38,7 @@ class FakeToolCallingModel(BaseChatModel, Generic[StructuredResponseT]): **kwargs: Any, ) -> ChatResult: """Top Level call""" - rf = kwargs.get("response_format") - is_native = isinstance(rf, dict) and rf.get("type") == "json_schema" + is_native = kwargs.get("response_format") if self.tool_calls: if is_native: diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py b/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py new file mode 100644 index 00000000000..c08497f8b4b --- /dev/null +++ b/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py @@ -0,0 +1,142 @@ +"""Test response_format for langchain-openai. + +If tests fail, cassettes may need to be re-recorded. + +To re-record cassettes: + +1. Delete existing cassettes (`rm tests/cassettes/test_inference_to_*.yaml.gz`) +2. Re run the tests with a valid OPENAI_API_KEY in your environment: +```bash +OPENAI_API_KEY=... uv run python -m pytest tests/unit_tests/agents/test_response_format_integration.py +``` + +The cassettes are compressed. To read them: +```bash +gunzip -c "tests/cassettes/test_inference_to_native_output[True].yaml.gz" | \ + yq -o json . | \ + jq '.requests[].body |= (gsub("\n";"") | @base64d | fromjson) | + .responses[].body.string |= (gsub("\n";"") | @base64d | fromjson)' +``` + +Or, in Python: +```python +import json + +from langchain_tests.conftest import CustomPersister, CustomSerializer + +def bytes_encoder(obj): + return obj.decode("utf-8", errors="replace") + +path = "tests/cassettes/test_inference_to_native_output[True].yaml.gz" + +requests, responses = CustomPersister().load_cassette(path, CustomSerializer()) +assert len(requests) == len(responses) +for request, response in list(zip(requests, responses)): + print("------ REQUEST ------") + req = request._to_dict() + req["body"] = json.loads(req["body"]) + print(json.dumps(req, indent=2, default=bytes_encoder)) + print("\n\n ------ RESPONSE ------") + resp = response + print(json.dumps(resp, indent=2, default=bytes_encoder)) +print("\n\n") +``` +""" + +import os + +import pytest +from langchain_core.messages import HumanMessage +from pydantic import BaseModel, Field + +from langchain.agents import create_agent +from langchain.agents.structured_output import ToolStrategy + + +class WeatherBaseModel(BaseModel): + """Weather response.""" + + temperature: float = Field(description="The temperature in fahrenheit") + condition: str = Field(description="Weather condition") + + +def get_weather(city: str) -> str: # noqa: ARG001 + """Get the weather for a city.""" + return f"The weather in {city} is sunny and 75°F." + + +@pytest.mark.requires("langchain_openai") +@pytest.mark.vcr +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_inference_to_native_output(use_responses_api: bool) -> None: + """Test that native output is inferred when a model supports it.""" + from langchain_openai import ChatOpenAI + + model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api} + + if "OPENAI_API_KEY" not in os.environ: + model_kwargs["api_key"] = "foo" + + model = ChatOpenAI(**model_kwargs) + + agent = create_agent( + model, + system_prompt=( + "You are a helpful weather assistant. Please call the get_weather tool " + "once, then use the WeatherReport tool to generate the final response." + ), + tools=[get_weather], + response_format=WeatherBaseModel, + ) + response = agent.invoke({"messages": [HumanMessage("What's the weather in Boston?")]}) + + assert isinstance(response["structured_response"], WeatherBaseModel) + assert response["structured_response"].temperature == 75.0 + assert response["structured_response"].condition.lower() == "sunny" + assert len(response["messages"]) == 4 + + assert [m.type for m in response["messages"]] == [ + "human", # "What's the weather?" + "ai", # "What's the weather?" + "tool", # "The weather is sunny and 75°F." + "ai", # structured response + ] + + +@pytest.mark.requires("langchain_openai") +@pytest.mark.vcr +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_inference_to_tool_output(use_responses_api: bool) -> None: + """Test that tool output is inferred when a model supports it.""" + from langchain_openai import ChatOpenAI + + model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api} + + if "OPENAI_API_KEY" not in os.environ: + model_kwargs["api_key"] = "foo" + + model = ChatOpenAI(**model_kwargs) + + agent = create_agent( + model, + system_prompt=( + "You are a helpful weather assistant. Please call the get_weather tool " + "once, then use the WeatherReport tool to generate the final response." + ), + tools=[get_weather], + response_format=ToolStrategy(WeatherBaseModel), + ) + response = agent.invoke({"messages": [HumanMessage("What's the weather?")]}) + + assert isinstance(response["structured_response"], WeatherBaseModel) + assert response["structured_response"].temperature == 75.0 + assert response["structured_response"].condition.lower() == "sunny" + assert len(response["messages"]) == 5 + + assert [m.type for m in response["messages"]] == [ + "human", # "What's the weather?" + "ai", # "What's the weather?" + "tool", # "The weather is sunny and 75°F." + "ai", # structured response + "tool", # artificial tool message + ] diff --git a/libs/langchain_v1/tests/unit_tests/conftest.py b/libs/langchain_v1/tests/unit_tests/conftest.py index 96f36ceae77..da921507200 100644 --- a/libs/langchain_v1/tests/unit_tests/conftest.py +++ b/libs/langchain_v1/tests/unit_tests/conftest.py @@ -2,8 +2,52 @@ from collections.abc import Sequence from importlib import util +from typing import Any import pytest +from langchain_tests.conftest import CustomPersister, CustomSerializer +from langchain_tests.conftest import ( + _base_vcr_config as _base_vcr_config, +) +from vcr import VCR + +_EXTRA_HEADERS = [ + ("openai-organization", "PLACEHOLDER"), + ("user-agent", "PLACEHOLDER"), + ("x-openai-client-user-agent", "PLACEHOLDER"), +] + + +def remove_request_headers(request: Any) -> Any: + """Remove sensitive headers from the request.""" + for k in request.headers: + request.headers[k] = "**REDACTED**" + request.uri = "**REDACTED**" + return request + + +def remove_response_headers(response: dict) -> dict: + """Remove sensitive headers from the response.""" + for k in response["headers"]: + response["headers"][k] = "**REDACTED**" + return response + + +@pytest.fixture(scope="session") +def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811 + """Extend the default configuration coming from langchain_tests.""" + config = _base_vcr_config.copy() + config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS) + config["before_record_request"] = remove_request_headers + config["before_record_response"] = remove_response_headers + config["serializer"] = "yaml.gz" + config["path_transformer"] = VCR.ensure_suffix(".yaml.gz") + return config + + +def pytest_recording_configure(config: dict, vcr: VCR) -> None: # noqa: ARG001 + vcr.register_persister(CustomPersister()) + vcr.register_serializer("yaml.gz", CustomSerializer()) def pytest_addoption(parser: pytest.Parser) -> None: diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index c343bf50f25..e567893cb32 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1771,6 +1771,7 @@ class BaseChatOpenAI(BaseChatModel): tool_choice: dict | str | bool | None = None, strict: bool | None = None, parallel_tool_calls: bool | None = None, + response_format: _DictOrPydanticClass | None = None, **kwargs: Any, ) -> Runnable[LanguageModelInput, AIMessage]: """Bind tool-like objects to this chat model. @@ -1796,6 +1797,9 @@ class BaseChatOpenAI(BaseChatModel): be validated. If `None`, `strict` argument will not be passed to the model. parallel_tool_calls: Set to `False` to disable parallel tool use. Defaults to `None` (no specification, which allows parallel tool use). + response_format: Optional schema to format model response. If provided + and the model does not call a tool, the model will generate a + [structured response](https://platform.openai.com/docs/guides/structured-outputs). kwargs: Any additional parameters are passed directly to `bind`. """ # noqa: E501 if parallel_tool_calls is not None: @@ -1838,6 +1842,11 @@ class BaseChatOpenAI(BaseChatModel): ) raise ValueError(msg) kwargs["tool_choice"] = tool_choice + + if response_format: + kwargs["response_format"] = _convert_to_openai_response_format( + response_format + ) return super().bind(tools=formatted_tools, **kwargs) def with_structured_output( @@ -3479,6 +3488,7 @@ def _convert_to_openai_response_format( strict is not None and strict is not response_format["json_schema"].get("strict") and isinstance(schema, dict) + and "strict" in schema.get("json_schema", {}) ): msg = ( f"Output schema already has 'strict' value set to " diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 6fcccdad56d..e2d0133879a 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -28,6 +28,7 @@ from langchain_tests.integration_tests.chat_models import ( magic_function, ) from pydantic import BaseModel, Field, field_validator +from typing_extensions import TypedDict from langchain_openai import ChatOpenAI from tests.unit_tests.fake.callbacks import FakeCallbackHandler @@ -1146,17 +1147,33 @@ def test_multi_party_conversation() -> None: assert "Bob" in response.content -def test_structured_output_and_tools() -> None: - class ResponseFormat(BaseModel): - response: str - explanation: str +class ResponseFormat(BaseModel): + response: str + explanation: str - llm = ChatOpenAI(model="gpt-5-nano").bind_tools( - [GenerateUsername], strict=True, response_format=ResponseFormat + +class ResponseFormatDict(TypedDict): + response: str + explanation: str + + +@pytest.mark.parametrize( + "schema", [ResponseFormat, ResponseFormat.model_json_schema(), ResponseFormatDict] +) +def test_structured_output_and_tools(schema: Any) -> None: + llm = ChatOpenAI(model="gpt-5-nano", verbosity="low").bind_tools( + [GenerateUsername], strict=True, response_format=schema ) response = llm.invoke("What weighs more, a pound of feathers or a pound of gold?") - assert isinstance(response.additional_kwargs["parsed"], ResponseFormat) + if schema == ResponseFormat: + parsed = response.additional_kwargs["parsed"] + assert isinstance(parsed, ResponseFormat) + else: + parsed = json.loads(response.text) + assert isinstance(parsed, dict) + assert parsed["response"] + assert parsed["explanation"] # Test streaming tool calls full: BaseMessageChunk | None = None @@ -1172,10 +1189,6 @@ def test_structured_output_and_tools() -> None: def test_tools_and_structured_output() -> None: - class ResponseFormat(BaseModel): - response: str - explanation: str - llm = ChatOpenAI(model="gpt-5-nano").with_structured_output( ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername] ) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index db18dcf046f..7a08864387e 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -318,18 +318,23 @@ async def test_parsed_dict_schema_async(schema: Any) -> None: assert isinstance(parsed["response"], str) -def test_function_calling_and_structured_output() -> None: +@pytest.mark.parametrize("schema", [Foo, Foo.model_json_schema(), FooDict]) +def test_function_calling_and_structured_output(schema: Any) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) - bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True) + bound_llm = llm.bind_tools([multiply], response_format=schema, strict=True) # Test structured output - response = llm.invoke("how are ya", response_format=Foo) - parsed = Foo(**json.loads(response.text)) + response = llm.invoke("how are ya", response_format=schema) + if schema == Foo: + parsed = schema(**json.loads(response.text)) + assert parsed.response + else: + parsed = json.loads(response.text) + assert parsed["response"] assert parsed == response.additional_kwargs["parsed"] - assert parsed.response # Test function calling ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))